"""
Read TINKER ``.prm`` files and convert them to MDMC force field python files.
"""
import os
import textwrap
from datetime import datetime
import numpy as np
import pandas as pd
import periodictable
from MDMC.common import units
from MDMC.common.decorators import wrap_docstring
from MDMC.common.df_operations import filter_dataframe
from MDMC.MD import force_fields
[docs]
def read_prm(fname: str, ncols: int = 14) -> pd.DataFrame:
"""
Read a TINKER ``.prm`` (force field) file.
Ignores all errors raised by incorrectly formatted data, so will be
susceptible to incorrectly formatted ``.prm`` files.
Parameters
----------
fname : str
The name of the ``.prm`` file.
ncols : int, optional
The largest number of columns that should be read. The default is 14,
which is the number of columns required for the torsion (proper
dihedral) in ``oplsaa.prm``.
Returns
-------
pandas.DataFrame
A dataframe with columns (named c0, c1, ...) up to (but not including)
ncols. Each row contains a row read correctly (i.e. no Error thrown)
from the ``.prm`` file.
"""
return pd.read_csv(fname, delim_whitespace=True, error_bad_lines=False,
header=None, names=dummy_headers(0, ncols))
[docs]
def parse_prm(dataframe: pd.DataFrame) -> tuple[pd.DataFrame, ...]:
"""
Parse a ``.prm`` file into DataFrames.
Parameters
----------
dataframe : pandas.DataFrame
A dataframe of raw data read from a ``.prm`` file.
Returns
-------
tuple[pd.DataFrame, ...]
A tuple of dataframes, with each dataframe possessing only the entries
starting with the corresponding definition.
"""
ncols = len(dataframe.columns)
dtypes = np.array([['atom',
['c3'] + dummy_headers(8, ncols),
['atom_type', 'atom_group', 'name', 'atomic_num',
'mass', 'nbonds']],
['vdw',
dummy_headers(4, ncols),
['atom_type', 'sigma', 'epsilon']],
['bond',
dummy_headers(5, ncols),
['atom_group1', 'atom_group2', 'potential_strength',
'equilibrium_state']],
['angle',
dummy_headers(6, ncols),
['atom_group1', 'atom_group2', 'atom_group3',
'potential_strength', 'equilibrium_state']],
['imptors',
dummy_headers(8, ncols),
['atom_group2', 'atom_group3', 'atom_group1',
'atom_group4', 'K1', 'd1', 'n1']],
['torsion',
[],
['atom_group1', 'atom_group2', 'atom_group3',
'atom_group4', 'K1', 'd1', 'n1', 'K2', 'd2', 'n2',
'K3', 'd3', 'n3']],
['charge',
dummy_headers(3, ncols),
['atom_type', 'charge']]])
atoms, disps, bonds, angles, impropers, propers, charges = \
tuple(parse_dataframe(filter_dataframe([dtype[0]], dataframe, ['c0']),
*dtype[1:])
for dtype in dtypes)
# Combine charges with atoms
atoms.insert(3, 'charge', charges.charge.to_numpy())
# Change atomic number to element
atoms['atomic_num'] = \
atoms['atomic_num'].apply(lambda x: periodictable.elements[int(x)])
atoms.rename(columns={'atomic_num': 'element'}, inplace=True)
# Rearrange order of proper and improper dihedrals
# For proper dihedrals, just the parameters need to be rearranged, as the
# convention for the atom order is the same as MDMC (end1 central1 central2
# end2).
propers = propers[['atom_group1', 'atom_group2', 'atom_group3',
'atom_group4', 'K1', 'n1', 'd1', 'K2', 'n2', 'd2',
'K3', 'n3', 'd3']]
# For improper dihedrals, the atom_groups are rearranged so that the central
# atom is first (in keeping with LAMMPS anf GROMACS). This occurs because
# the atom order in TINKER is end1, end2, central, end3 for impropers.
impropers = impropers[['atom_group1', 'atom_group2', 'atom_group3',
'atom_group4', 'K1', 'n1', 'd1']]
# Convert units
disps.epsilon = convert_units(disps.epsilon)
bonds.potential_strength = convert_units(bonds.potential_strength)
angles.potential_strength = convert_units(angles.potential_strength)
impropers.K1 = convert_units(impropers.K1)
for k_parameter in ['K1', 'K2', 'K3']:
propers[k_parameter] = convert_units(propers[k_parameter])
return atoms, disps, bonds, angles, impropers, propers
[docs]
def write_force_field_module(fname: str,
atoms: pd.DataFrame,
*interactions: pd.DataFrame,
path: str = None,
module_docstring: str = None,
class_docstring: str = None,
**settings):
"""
Write a temporary module to allow Tinker potentials to be imported.
Parameters
----------
fname : str
Name of the module (excluding the file extension) and the class.
atoms : pandas.DataFrame
Atoms to dump.
*interactions
An arbitrary number of `pandas.DataFrame` objects.
path : str, optional
Path to dump to.
module_docstring : str, optional
Docstring for created module.
class_docstring : str, optional
Docstring for created class.
**settings
Extra options.
Notes
-----
Windows and Mac OS are case insensitive, so if a file exists in the
specified path with the same case insensitive name as fname (e.g. if
fname='spce' and 'SPCE' already exists), the existing file will be
overwritten. So on case insensitive OS, fname should always be unique,
without considering the case.
"""
line_length = settings.get('line_length', 80)
data_fname = settings.get('data_fname', fname)
imports = 'from MDMC.MD.force_fields.ff import FileForceField\n'
if path is None:
path = os.path.abspath(
force_fields.__file__).replace('__init__.py', '')
if module_docstring is None:
module_docstring = (f'"""A module for defining the {fname} force field. This'
' was generated from the corresponding TINKER'
' file."""')
if class_docstring is None:
class_docstring = ('"""\n'
f'{fname} force field, with defined atoms and'
' interactions\n')
data_fname = path + 'data/' + data_fname + '.dat'
with open(path + fname + '.py', 'w', encoding='UTF-8') as module:
module.write(wrap_docstring(module_docstring, line_length) + '\n' * 2)
module.write(imports + '\n' * 2)
module.write(f'class {fname}(FileForceField):\n\n')
module.write(textwrap.indent(wrap_docstring(class_docstring,
line_length), ' ' * 4)
+ '\n\n')
module.write(textwrap.indent(f'file_name = "{data_fname}"\n',
' ' * 4))
# Write a dat file which contains all atoms and interactions
# Use pandas to output as CSV
[docs]
def write_data(
fname: str,
atoms: pd.DataFrame,
path: str = None,
**settings,
) -> None:
"""
Write forcefield data to a ``.dat`` file.
Parameters
----------
fname : str
File name to write to.
atoms : pd.DataFrame
Force field to write.
path : str, optional
Path to read data from.
**settings
Extra options.
Other Parameters
----------------
orig_file : str
Original file forcefield read from.
Notes
-----
For each of `disp`, `bond`, `angle`, `proper` and `improper`, there
exists a pair of optional arguments:
``<type>s`` and ``<type>_function``
Which define the presence of parameters and the function to compute them.
"""
inter_types = ['disps', 'bonds', 'angles', 'propers', 'impropers']
disps, bonds, angles, propers, impropers = [settings.get(inter_type, []) for
inter_type in inter_types]
inter_functions = [settings.get(inter_type[:-1] + '_function', None) for
inter_type in inter_types]
orig_file = settings.get('orig_file', None)
if path is None:
path = os.path.abspath(force_fields.__file__).replace('__init__.py',
'data/')
full_fname = path + fname + '.dat'
# First line describes number of each data type
description = (f'Atoms={len(atoms)} Dispersions={len(disps)} Bonds={len(bonds)} '
f'BondAngles={len(angles)} Propers={len(propers)} Impropers={len(impropers)}\n')
# Second line describes types of interactions
inter_functions = ('Dispersion={0} Bond={1} BondAngle={2} Proper={3}'
' Improper={4}\n'.format(*inter_functions))
# Metadata about the file
original_file_str = (' It was generated from {0}\n'.format(orig_file)
if orig_file else '\n')
date = datetime.today().strftime('%Y-%m-%d')
metadata = wrap_docstring(f'\nThis file contains the {fname} force field. It was'
f' created on {date}.'
+ original_file_str, 80)
with open(full_fname, 'w', encoding='UTF-8') as out_datafile:
out_datafile.write(description)
out_datafile.write(inter_functions)
out_datafile.write(metadata)
for name, data in [('Atoms', atoms),
('Dispersions', disps),
('Bonds', bonds),
('Bond Angles', angles),
('Proper Dihedrals', propers),
('Improper Dihedrals', impropers)]:
with open(full_fname, 'a', encoding='UTF-8') as out_datafile:
out_datafile.write('\n' + name + '\n')
data.to_csv(out_datafile, sep='\t', index=False)
[docs]
def parse_dataframe(dataframe: pd.DataFrame, drop: list, names: list) -> pd.DataFrame:
"""
Parse a single dataframe.
Parses a dataframe containing single datatype (e.g. atom or angles) by
dropping any unnecessary columns and setting correct header names.
If the dataframe has a header 'c0' this is always dropped
Parameters
----------
dataframe : pandas.DataFrame
The dataframe to be parsed.
drop : list
A list of names of headers which will be dropped.
names : list
A list of the header names which will be set for the remaining columns..
Returns
-------
pandas.DataFrame
A parsed ``DataFrame``.
"""
try:
# Assume that dataframe will have a header 'c0' which is always dropped
dataframe = dataframe.drop(columns=['c0'] + drop)
except KeyError:
dataframe = dataframe.drop(columns=drop)
dataframe.columns = names
return dataframe
[docs]
def convert_units(series: pd.Series) -> str:
"""
Convert from TINKER units (kcal) to kJ.
Parameters
----------
series : pandas.Series
The ``Series`` for which the parameters will be converted.
Returns
-------
str
Value in kJ.
"""
return (series.astype(float) * units.kcal
/ units.mol).round(decimals=5).astype(str)