Source code for MDMC.MD.ase.cif

"""Module for interfacing to ASE configuration readers (such as cif and pdb)

It should be possible to extend the CIF reader to include the additional
definitions in the mmCIF format.

It is expected that some of the behaviour in ``ase_read_cif`` will be extracted
into ``MDMC.readers.configurations.cif`` when an MDMC CIF reader is implemented.
"""

from itertools import groupby
from typing import TYPE_CHECKING, Callable

from ase.io.cif import read_cif
import numpy as np

from MDMC.MD.ase.conversions import ASEAtoms, convert_from_ase_atom
from MDMC.MD.structures import (
    BoundingBox, get_reduced_chemical_formula)
from MDMC.MD.interactions import Coulombic, Bond, BondAngle, DihedralAngle
from MDMC.MD.interaction_functions import Coulomb

if TYPE_CHECKING:
    from MDMC.MD.structures import Atom
    import ase


[docs]def ase_read_cif(file: str, **settings: dict) -> 'list[Atom]': """ Reads a configuration file and returns a list of ``Atom`` objects. These Atom objects can optionally have ``Coulombic`` interactions and also ``BondedInteraction`` objects if bonded interactions are defined in the CIF file. If ``names`` or ``atom_types`` is passed, then equivalent interactions (``Coulombic`` and ``BondedInteraction``, if bonded interactions are defined in the CIF file) will be initialized as a single object. For instance if the CIF file includes a benzene ring, then as long as the correct ``names`` or ``atom_types`` are passed, then there will only be a single C-C ``Bond`` object, which will include all 6 of the atom pairs. If both ``names`` and ``atom_types`` are passed, ``atom_types`` will be used to group ``Atom`` objects. If neither ``names`` or ``atom_types`` is passed then each interaction will become a separate object. .. note:: Not all CIF files contain bonded interactions (it is only common for biomolecules). .. note:: improper dihedrals are not explicitly defined in CIF, so these must be set after initialization of ``DihedralAngle`` objects. .. note:: CIF reader cannot parse CIF files with user defined text sections, so these must be stripped out before reading. Parameters ---------- file : file, str A ``file``, or the absolute file name of the configuration file **settings ``index`` (`int`, optional) The index of the configuration in the CIF file. Only a single configuration can be read from a CIF file, with the default being the first (``index=0``) configuration. ``names`` : (`list` of `str`) A `list` of ``names`` for the atoms in the CIF file. These ``names`` must have the same order as the order the atoms in the file. A ``name`` must be be provided for each atom in the CIF file. ``atom_types`` : (`list` of `int`) A `list` of `int` for atom types of the atoms in the CIF file. These names must have the same order as the order the atoms in the file. An ``atom_type`` must be provided for each atom in the CIF file. ``cutoff`` : (`float`) A distance (in Ang) at which the ``Coulombic`` interactions are cutoff. If this is not passed, the ``cutoff`` will be set to 10. ``add_bonds`` : (`bool`, optional) Whether or not any bonded interactions defined in the CIF file will be included. By default this is `True`. ``add_charges`` : (`bool`, optional) Whether or not each atom in the CIF file will be assigned a ``Coulombic`` interaction with a ``Coulomb`` function. CIF files do not contain charge information, so the charge of the ``Coulombic`` interaction will be set to 0. This enables the charges to be set by the application of a ``ForceField`` object. By default this is True. Returns ------- `list` of ``Atom`` The ``Atom`` objects corresponding to the data in the CIF file """ index = settings.get('index', 0) add_bonds = settings.get('add_bonds', True) add_charges = settings.get('add_charges', True) # ASE does not explicitly define bonds, however when it reads a CIF file, it # also reads the bonding information (if this is defined in the file). # This information is not used by ASE, however it is included in the info # attribute of any Atoms objects if the store_tags parameter is set to True images = read_cif(file, index=slice(index, None, None), store_tags=True) # images is a generator with a single element, an ase.atoms.Atoms object try: ase_atoms = list(images)[0] except Exception as error: msg = ('MDMC uses the ASE (Atomic Simulation Environment) module ' f'for reading your CIF file ({file.name}), ' 'which failed. Please see the ASE CIF documentation for help ' 'with the CIF format ASE requires.' ' Please note that the ASE CIF reader cannot parse CIF files ' 'with user defined text sections so these ' 'must be stripped out before reading. ' 'The full Python stack error message should be shown above.') raise Exception(msg).with_traceback(error.__traceback__) ase_atoms = _reduce_ase_unit_cell(ase_atoms) # Use provided names or None for each Atom names = settings.get('names', [None] * len(ase_atoms)) atom_types = settings.get('atom_types', [None] * len(ase_atoms)) # dict of CIF atom label to MDMC atom atoms_labels = {label: convert_from_ase_atom(atom, name=name, atom_type=atom_type, set_charge=False) for label, atom, name, atom_type in zip(ase_atoms.info['_atom_site_label'], ase_atoms, names, atom_types)} atoms = list(atoms_labels.values()) # The keys are used to group atoms so that the same atoms (atom tuples) are # used for a single Coulombic (BondedInteraction). So if atom_types are # used, a single Coulombic interaction will be created for all atoms with # atom_type 1. if atom_types[0]: def coulombic_key(atom): return atom.atom_type def bonded_key(atom_arr): return [atom.atom_type for atom in atom_arr] elif names[0]: def coulombic_key(atom): return atom.name def bonded_key(atom_arr): return [atom.name for atom in atom_arr] else: coulombic_key = bonded_key = None if add_charges: _create_coulombic_interactions(atoms, settings.get('cutoff', 10.0), coulombic_key) if add_bonds: # The CIF defintions which relate to bonded interactions. Note that # _chemical_conn_bond is not included in tags which describe bonds, even # though it could be used to create bonds. This is because there is no # equivalent tag for angles and dihedrals. It also does not include H # bonds, as these are not currently defined in MDMC. cif_geom_defs = ['_geom_bond_atom_site_label_', '_geom_angle_atom_site_label_', '_geom_torsion_atom_site_label_'] for cif_geom_def in cif_geom_defs: # For the CIF file to contain all the labels of the corresponding # cif_geom_def, it must contain at least the first label, '1'. if cif_geom_def + '1' in ase_atoms.info: inters_atoms = get_bonded_interactions_atoms(ase_atoms.info, cif_geom_def, atoms_labels) _create_bonded_interactions(inters_atoms, bonded_key) _make_atom_positions_valid(atoms) return atoms
[docs]def get_bonded_interactions_atoms(ase_atoms_info: dict, cif_geom_def: str, atoms_labels: dict) -> np.ndarray: """ Gets the atoms for each bonded interaction Parameters ---------- ase_atoms_info : dict A `dict` containing site labels for one or more of bonds, angles, and torsions. The corresponding values are a list with label (str) for each interaction. The number of site label keys is 2 for bonds, 3 for angles and 4 for torsions. For instance, for bonds there should be '_geom_bond_atom_site_label_1' and '_geom_bond_atom_site_label_2', with each being a list containing the first (or second) site label for each interaction. cif_geom_def : str Specifies whether the interaction type is a bond, angle, or torsion atoms_labels : dict (label:atom) pairs, where label is a str with the _atom_site_label from the CIF file, and atom is the corresponding MDMC ``Atom`` object. Returns ------- numpy.ndarray A 2D ``array`` with dimensions (n_interactions, n_atoms_per_interaction). So for 5 bond interactions, the dimensions of the array will be (5, 2), with the zeroeth index containing the two ``Atoms`` involved in the zeroeth bond, the first index containing the two ``Atoms`` involved in the first bond etc. For bond angle and dihedral interactions, the order of the atoms corresponds to the order required for ``BondAngle`` and ``DihedralAngle`` interactions. """ # There are a maximum of 4 atom sites in a geometry definition (for # torsions) cif_geom_defs = [cif_geom_def + str(index) for index in range(1, 5)] site_labels = np.array([ase_atoms_info[geom_def] for geom_def in cif_geom_defs if geom_def in ase_atoms_info]).T label_to_atom = np.vectorize(atoms_labels.__getitem__) interactions_atoms = label_to_atom(site_labels) return interactions_atoms
def _create_coulombic_interactions(atoms: 'list[Atom]', cutoff: float, key: Callable = None) -> None: """ Creates ``Coulombic`` interactions Parameters ---------- atoms : list of Atom The ``Atom`` objects for which a ``Coulombic`` interaction will be created cutoff: float The cutoff distance for the atom's Coulombic interaction. key : function The key which will be used to group ``Atom`` objects. Grouped ``Atom`` objects will have a single ``Coulombic`` interaction. """ atoms = _group_atoms(atoms, key) # If no grouping then each atom_group is a single atom for atom_group in atoms: # A Coulomb function is set so that the interaction can be parametrized Coulombic(atoms=atom_group, cutoff=cutoff, function=Coulomb(0.)) def _create_bonded_interactions(interactions_atoms: np.ndarray, key: Callable = None, **settings: dict) -> None: """ Creates ``BondedInteraction`` objects Parameters ---------- interaction_atoms : numpy.ndarray A 2D ``array`` with dimensions (n_interactions, n_atoms_per_interaction). So for 5 bond interactions, the dimensions of the array must be (5, 2), with the zeroeth index containing the two ``Atoms`` involved in the zeroeth bond, the first index containing the two ``Atoms`` involved in the first bond etc. For bond angle and dihedral interactions, the order of the atoms must correspond to the order required for ``BondAngle`` and ``DihedralAngle`` interactions. key : function A function by which the atoms tuples are grouped. Each group will have a single bonded interaction applied. For example, for a bonded interaction where the key is atom_type, if an atom pair has ``atom_types`` (1, 2), this will be grouped with all other atom pairs with ``atom_types`` (1, 2), and a single ``Bond`` will be applied to the group. **settings Settings to be passed for ``BondedInteraction`` initialization. For example ``improper=True`` can be passed to initialize a ``DihedralAngle`` to be an improper dihedral. Raises ------ TypeError If the number of atoms for each interaction is not a valid number to create a bonded interaction """ # Determine bonded interaction type based on number of atoms it is between n_inter_atoms = np.shape(interactions_atoms)[1] if n_inter_atoms == 2: bond_type = Bond elif n_inter_atoms == 3: bond_type = BondAngle elif n_inter_atoms == 4: bond_type = DihedralAngle else: raise TypeError(f'{n_inter_atoms} is not a valid number of atoms for a bonded' ' interaction') # If no key is passed, group by id - this will result in each tuple of atoms # being in its own group (i.e. no tuples of atoms are grouped together). # This ensures that interactions_atoms will always have the correct # dimensions if key is None: key = id interactions_atoms = _group_atoms(interactions_atoms, key) for interaction_atoms in interactions_atoms: # BondedInteractions require atoms to hashable, so tuple of np.arrays # must be mapped to tuple of tuples bond_type(*tuple(map(tuple, interaction_atoms)), **settings) def _group_atoms(atoms: 'list[Atom]', key: Callable) -> 'list[tuple[Atom]]': """ Groups atoms based on a key Parameters ---------- atoms : list of Atom The ``Atom`` objects to be grouped key : function The ``key`` with which the ``Atom`` objects will be grouped Returns ------- list of tuples Where each tuple contains a group of equivalent ``Atom`` objects, based on ``key`` """ try: atoms = sorted(atoms, key=key) except TypeError: pass # If the key passed in is None then it has no idea how to compare `Atom`s return [tuple(group) for _, group in groupby(atoms, key=key)] def _reduce_ase_unit_cell(ase_atoms: 'ase.atoms.Atoms') -> ASEAtoms: """ Reduces an ``ase.atoms.Atoms`` object from a unit cell of molecules to a single molecule Uses the number of atoms present in the CIF file to determine the size of the molecule Parameters ---------- ase_atoms : ase.atoms.Atoms An ``ase.atoms.Atoms`` object from which a single molecule will be extracted Returns ------- ASEAtoms An ``ASEAtoms`` object containing the atoms of a single molecule """ # The number of atoms in a molecule can be determined from info in CIF file n_atoms_molecule = len(ase_atoms.info['_atom_site_label']) # If the ase_atoms object already contains correct number of atoms, return # it if len(ase_atoms) == n_atoms_molecule: return ase_atoms # Otherwise build a new atoms object from the CIF fractional atom positions, # which are also stored in ase_atoms.info positions = np.array([ase_atoms.info['_atom_site_fract_' + dim] for dim in ['x', 'y', 'z']]).T formula = get_reduced_chemical_formula(ase_atoms.get_chemical_symbols(), len(ase_atoms) // n_atoms_molecule, system=None) return ASEAtoms(formula, scaled_positions=positions, cell=ase_atoms.cell, info=ase_atoms.info) def _make_atom_positions_valid(atoms: 'list[Atom]') -> None: """ Sets the positions of all atoms are positive (including 0.) This is so that all positions are valid within ``Universe`` Parameters ---------- atoms : list A `list` of `Atom` which will have their positions set so that relative distances are preserved and the smallest positions are equal to 0. """ # Offset atom positions so that they are >= 0. box = BoundingBox(atoms) for atom in atoms: atom.position -= box.min