Source code for MDMC.utilities.tinker_converter

"""
Read TINKER ``.prm`` files and convert them to MDMC force field python files.
"""

from datetime import datetime
import os
import textwrap

import numpy as np
import pandas as pd
import periodictable

from MDMC.common.decorators import wrap_docstring
from MDMC.common.df_operations import filter_dataframe
from MDMC.common import units
from MDMC.MD import force_fields


[docs] def read_prm(fname: str, ncols: int = 14) -> pd.DataFrame: """ Read a TINKER ``.prm`` (force field) file. Ignores all errors raised by incorrectly formatted data, so will be susceptible to incorrectly formatted ``.prm`` files. Parameters ---------- fname : str The name of the ``.prm`` file. ncols : int, optional The largest number of columns that should be read. The default is 14, which is the number of columns required for the torsion (proper dihedral) in ``oplsaa.prm``. Returns ------- pandas.DataFrame A dataframe with columns (named c0, c1, ...) up to (but not including) ncols. Each row contains a row read correctly (i.e. no Error thrown) from the ``.prm`` file. """ return pd.read_csv(fname, delim_whitespace=True, error_bad_lines=False, header=None, names=dummy_headers(0, ncols))
[docs] def parse_prm(dataframe: pd.DataFrame) -> tuple[pd.DataFrame, ...]: """ Parse a ``.prm`` file into DataFrames. Parameters ---------- dataframe : pandas.DataFrame A dataframe of raw data read from a ``.prm`` file. Returns ------- tuple[pd.DataFrame, ...] A tuple of dataframes, with each dataframe possessing only the entries starting with the corresponding definition. """ ncols = len(dataframe.columns) dtypes = np.array([['atom', ['c3'] + dummy_headers(8, ncols), ['atom_type', 'atom_group', 'name', 'atomic_num', 'mass', 'nbonds']], ['vdw', dummy_headers(4, ncols), ['atom_type', 'sigma', 'epsilon']], ['bond', dummy_headers(5, ncols), ['atom_group1', 'atom_group2', 'potential_strength', 'equilibrium_state']], ['angle', dummy_headers(6, ncols), ['atom_group1', 'atom_group2', 'atom_group3', 'potential_strength', 'equilibrium_state']], ['imptors', dummy_headers(8, ncols), ['atom_group2', 'atom_group3', 'atom_group1', 'atom_group4', 'K1', 'd1', 'n1']], ['torsion', [], ['atom_group1', 'atom_group2', 'atom_group3', 'atom_group4', 'K1', 'd1', 'n1', 'K2', 'd2', 'n2', 'K3', 'd3', 'n3']], ['charge', dummy_headers(3, ncols), ['atom_type', 'charge']]]) atoms, disps, bonds, angles, impropers, propers, charges = \ tuple(parse_dataframe(filter_dataframe([dtype[0]], dataframe, ['c0']), *dtype[1:]) for dtype in dtypes) # Combine charges with atoms atoms.insert(3, 'charge', charges.charge.to_numpy()) # Change atomic number to element atoms['atomic_num'] = \ atoms['atomic_num'].apply(lambda x: periodictable.elements[int(x)]) atoms.rename(columns={'atomic_num': 'element'}, inplace=True) # Rearrange order of proper and improper dihedrals # For proper dihedrals, just the parameters need to be rearranged, as the # convention for the atom order is the same as MDMC (end1 central1 central2 # end2). propers = propers[['atom_group1', 'atom_group2', 'atom_group3', 'atom_group4', 'K1', 'n1', 'd1', 'K2', 'n2', 'd2', 'K3', 'n3', 'd3']] # For improper dihedrals, the atom_groups are rearranged so that the central # atom is first (in keeping with LAMMPS anf GROMACS). This occurs because # the atom order in TINKER is end1, end2, central, end3 for impropers. impropers = impropers[['atom_group1', 'atom_group2', 'atom_group3', 'atom_group4', 'K1', 'n1', 'd1']] # Convert units disps.epsilon = convert_units(disps.epsilon) bonds.potential_strength = convert_units(bonds.potential_strength) angles.potential_strength = convert_units(angles.potential_strength) impropers.K1 = convert_units(impropers.K1) for k_parameter in ['K1', 'K2', 'K3']: propers[k_parameter] = convert_units(propers[k_parameter]) return atoms, disps, bonds, angles, impropers, propers
[docs] def write_force_field_module(fname: str, atoms: pd.DataFrame, *interactions: pd.DataFrame, path: str = None, module_docstring: str = None, class_docstring: str = None, **settings): """ Write a temporary module to allow Tinker potentials to be imported. Parameters ---------- fname : str Name of the module (excluding the file extension) and the class. atoms : pandas.DataFrame Atoms to dump. *interactions An arbitrary number of `pandas.DataFrame` objects. path : str, optional Path to dump to. module_docstring : str, optional Docstring for created module. class_docstring : str, optional Docstring for created class. **settings Extra options. Notes ----- Windows and Mac OS are case insensitive, so if a file exists in the specified path with the same case insensitive name as fname (e.g. if fname='spce' and 'SPCE' already exists), the existing file will be overwritten. So on case insensitive OS, fname should always be unique, without considering the case. """ line_length = settings.get('line_length', 80) data_fname = settings.get('data_fname', fname) imports = 'from MDMC.MD.force_fields.ff import FileForceField\n' if path is None: path = os.path.abspath( force_fields.__file__).replace('__init__.py', '') if module_docstring is None: module_docstring = (f'"""A module for defining the {fname} force field. This' ' was generated from the corresponding TINKER' ' file."""') if class_docstring is None: class_docstring = ('"""\n' f'{fname} force field, with defined atoms and' ' interactions\n') data_fname = path + 'data/' + data_fname + '.dat' with open(path + fname + '.py', 'w', encoding='UTF-8') as module: module.write(wrap_docstring(module_docstring, line_length) + '\n' * 2) module.write(imports + '\n' * 2) module.write(f'class {fname}(FileForceField):\n\n') module.write(textwrap.indent(wrap_docstring(class_docstring, line_length), ' ' * 4) + '\n\n') module.write(textwrap.indent(f'file_name = "{data_fname}"\n', ' ' * 4))
# Write a dat file which contains all atoms and interactions # Use pandas to output as CSV
[docs] def write_data( fname: str, atoms: pd.DataFrame, path: str = None, **settings ) -> None: """ Write forcefield data to a ``.dat`` file. Parameters ---------- fname : str File name to write to. atoms : pd.DataFrame Force field to write. path : str, optional Path to read data from. **settings Extra options. Other Parameters ---------------- orig_file : str Original file forcefield read from. Notes ----- For each of `disp`, `bond`, `angle`, `proper` and `improper`, there exists a pair of optional arguments: ``<type>s`` and ``<type>_function`` Which define the presence of parameters and the function to compute them. """ inter_types = ['disps', 'bonds', 'angles', 'propers', 'impropers'] disps, bonds, angles, propers, impropers = [settings.get(inter_type, []) for inter_type in inter_types] inter_functions = [settings.get(inter_type[:-1] + '_function', None) for inter_type in inter_types] orig_file = settings.get('orig_file', None) if path is None: path = os.path.abspath(force_fields.__file__).replace('__init__.py', 'data/') full_fname = path + fname + '.dat' # First line describes number of each data type description = (f'Atoms={len(atoms)} Dispersions={len(disps)} Bonds={len(bonds)} ' f'BondAngles={len(angles)} Propers={len(propers)} Impropers={len(impropers)}\n') # Second line describes types of interactions inter_functions = ('Dispersion={0} Bond={1} BondAngle={2} Proper={3}' ' Improper={4}\n'.format(*inter_functions)) # Metadata about the file original_file_str = (' It was generated from {0}\n'.format(orig_file) if orig_file else '\n') date = datetime.today().strftime('%Y-%m-%d') metadata = wrap_docstring(f'\nThis file contains the {fname} force field. It was' f' created on {date}.' + original_file_str, 80) with open(full_fname, 'w', encoding='UTF-8') as out_datafile: out_datafile.write(description) out_datafile.write(inter_functions) out_datafile.write(metadata) for name, data in [('Atoms', atoms), ('Dispersions', disps), ('Bonds', bonds), ('Bond Angles', angles), ('Proper Dihedrals', propers), ('Improper Dihedrals', impropers)]: with open(full_fname, 'a', encoding='UTF-8') as out_datafile: out_datafile.write('\n' + name + '\n') data.to_csv(out_datafile, sep='\t', index=False)
[docs] def dummy_headers(start: int, end: int) -> list[str]: """ Generate dummy headers c0, c1, c2, ... until end (exclusive). Parameters ---------- start : int The start of the header (inclusive). end : int The end of the header (exclusive). Returns ------- list of str A `str` (``c{i}``) for each i between start and end (exclusive). """ return ['c{0}'.format(i) for i in range(start, end)]
[docs] def parse_dataframe(dataframe: pd.DataFrame, drop: list, names: list) -> pd.DataFrame: """ Parse a single dataframe. Parses a dataframe containing single datatype (e.g. atom or angles) by dropping any unnecessary columns and setting correct header names. If the dataframe has a header 'c0' this is always dropped Parameters ---------- dataframe : pandas.DataFrame The dataframe to be parsed. drop : list A list of names of headers which will be dropped. names : list A list of the header names which will be set for the remaining columns.. Returns ------- pandas.DataFrame A parsed ``DataFrame``. """ try: # Assume that dataframe will have a header 'c0' which is always dropped dataframe = dataframe.drop(columns=['c0'] + drop) except KeyError: dataframe = dataframe.drop(columns=drop) dataframe.columns = names return dataframe
[docs] def convert_units(series: pd.Series) -> str: """ Convert from TINKER units (kcal) to kJ. Parameters ---------- series : pandas.Series The ``Series`` for which the parameters will be converted. Returns ------- str Value in kJ. """ return (series.astype(float) * units.kcal / units.mol).round(decimals=5).astype(str)