"""A module for defining force fields that can be applied to a universe
Each force field consists of a combination of interaction functions, and also
the values of the parameters within these functions. In this instance water
models (such as SPCE and TIP3P) are also defined as force fields, even though
the parameter sets are restricted to describing water. Each force field module
is self contained, although adding a new force field may require changes to the
MD engine facades, so that a correspondence is established between the MDMC
force field and the MD engine equivalent."""
import logging
import os
from abc import ABC, abstractmethod
from inspect import signature
from itertools import chain, permutations
from re import escape, sub
import pandas as pd
from MDMC.common.decorators import repr_decorator, weakref_cache
from MDMC.common.df_operations import filter_dataframe, filter_ordered_dataframe
from MDMC.MD import interaction_functions
from MDMC.MD.interactions import BondedInteraction, Coulombic
LOGGER = logging.getLogger(__name__)
[docs]
@repr_decorator('interaction_dictionary')
class ForceField(ABC):
"""
Abstract class defining a force field
For each interaction type that it uses (non-bonded, bonds, bond angles etc),
a force field must define the interaction function (LJ, harmonic etc). It
must also define the parameters for each of these functions.
"""
@property
@abstractmethod
def interaction_dictionary(self):
"""
The `dict` of interactions that exist within the ``ForceField``
Returns
-------
dict
{``Interaction``:``Elements``} where ``Elements`` is an ordered
`tuple` of elemental symbols, and values of ``InteractionFunction``
objects.
"""
raise NotImplementedError
[docs]
def parameterize_interactions(self, interactions):
"""
Parameterizes the interactions with the parameters speicifed in the
``interaction`` `dict`
Parameters
----------
interactions : list
A `list` of ``Interaction`` objects to be parameterized
"""
for interaction in interactions:
self._parameterize_interaction(interaction)
def _parameterize_interaction(self, interaction):
"""
Parameterizes the interaction with the parameters specified in the
``interaction`` `dict`
Parameters
----------
interaction : Interaction
An ``Interaction`` to be parameterized
"""
elements = interaction.element_tuple()
try:
interaction.function = self.interaction_dictionary[
(type(interaction), elements)]
interaction.function.set_parameters_interactions(interaction)
except KeyError as error:
raise KeyError("This force field does not have defined interactions"
" for these element types") from error
[docs]
@repr_decorator('interaction_dictionary', 'n_body')
class WaterModel(ForceField):
"""
Abstract class for force fields that describe a water model
"""
@property
@abstractmethod
def n_body(self):
"""
This is the number of bodies in the water model.
.. note:: THIS MUST BE IMPLEMENTED USING A STATIC VARIABLE, INSTEAD OF A
PROPERTY.
Returns
-------
int
The number of bodies (atoms) in the water model.
"""
raise NotImplementedError
[docs]
@repr_decorator('file_name')
class FileForceField(ForceField):
"""
Abstract class for force fields that are read from files
"""
def __init__(self):
self.data = {}
self._interaction_dictionary = None
with open(self.absolute_path, encoding='UTF-8') as file:
n_datatypes = self._parse_header(file.readline(), int)
self.inter_functions = dict(self._parse_header(file.readline(),
str))
line = file.readline()
while 'Atoms' not in line:
line = file.readline()
for name, num in n_datatypes:
# +1 rows to account for header
datatype = pd.read_csv(file, nrows=num + 1, engine='python',
sep='\t', skiprows=(1 if name != 'atoms'
else 0), dtype='str')
# Convert columns to correct types
for col_name in datatype:
datatype[col_name] = self._set_col_type(datatype[col_name])
self.data[name] = datatype
self.atom_name_group = dict(zip(zip(self.atoms.name,
self.atoms.element),
self.atoms.atom_group))
self.atom_type_name = dict(zip(self.atoms.atom_type,
self.atoms.name))
@property
def absolute_path(self):
"""
Get the absolute path of the data
Returns
-------
str
The absolute path (including file name) of the force field data file
"""
# Either the file name is defined with the path, in which case it is the
# same as the absolute path
if os.path.isfile(self.file_name):
return self.file_name
# Or it is assumed that the path is the force field data directory
return (os.path.dirname(os.path.realpath(__file__))
+ '/data/'
+ self.file_name)
@property
@abstractmethod
def file_name(self):
"""
Get the file name of the data
"""
raise NotImplementedError
@property
def atoms(self):
"""
Get file parameters for the atoms defined within the force field
May includes the normal number of bonds that this atom type should
possess, which is not currently used but could be an additional check on
the validity of the topology
Returns
-------
pandas.DataFrame
A ``DataFrame`` with the force field atom type, atom group, element,
mass, charge, and name
"""
return self.data['atoms']
@property
def bonds(self):
"""
Get file parameters for the bonds of the force field
Returns
-------
pandas.DataFrame
A ``DataFrame`` with all bonds, atom groups, and the parameters of
the bonds
"""
return self.data['bonds']
@property
def bond_angles(self):
"""
Get file parameters for the bond angles of the force field
Returns
-------
pandas.DataFrame
A ``DataFrame`` with all bond angles, atom groups, and the
parameters of the bond angle
"""
return self.data['bondangles']
@property
def propers(self):
"""
Get file parameters for the proper dihedrals of the force field
Returns
-------
pandas.DataFrame
A ``DataFrame`` with all proper dihedrals, atom groups, and the
parameters of the proper dihedral
"""
return self.data['propers']
@property
def impropers(self):
"""
Get file parameters for the improper dihedrals of the force field
Returns
-------
pandas.DataFrame
A ``DataFrame`` with all improper dihedrals, atom groups, and the
parameters of the improper dihedral
"""
return self.data['impropers']
@property
def dispersions(self):
"""
Get file parameters for the dispersion interactions of the force field
Returns
-------
pandas.DataFrame
A ``DataFrame`` with all dispersion interactions, atom types, and
the parameters of the dispersion interaction
"""
return self.data['dispersions']
@property
def interaction_dictionary(self):
# Cache interaction dictionary
try:
return self._interaction_dictionary
except AttributeError:
self._interaction_dictionary = None
return self._interaction_dictionary
[docs]
def filter_element(self, element):
"""
Filters the atoms in the ``FileForceField`` by element
Parameters
----------
element : str
The element by which the atoms in the FileForceField will be
filtered
Returns
-------
pandas.DataFrame
A filtered ``DataFrame`` where each row is an atom type that has an
element specified by ``element``
"""
return filter_dataframe([element], self.atoms, column_names=['element'])
[docs]
def set_atom_mass(self, atom):
"""
Sets ``Atom.mass`` to the mass defined in the force field for that atom
type
Parameters
----------
atom : Atom
The ``Atom`` for which the mass will be set
"""
ff_atom = filter_ordered_dataframe([atom.name, atom.element.symbol],
self.atoms,
column_names=['name', 'element'])
atom.mass = ff_atom.mass
def _parameterize_interaction(self, interaction):
"""
Parameterizes the interaction with the parameters specified in the
``interaction`` `dict`
Arguments:
interaction : Interaction
An ``Interaction`` to be parameterized
"""
if isinstance(interaction, BondedInteraction):
self._parametrize_bonded(interaction)
elif isinstance(interaction, Coulombic):
self._parametrize_coulombic(interaction)
else:
self._parametrize_dispersion(interaction)
def _parametrize_bonded(self, bonded):
"""
Parametrizes a ``BondedInteraction``
Parameters
----------
bonded : BondedInteraction
A subclass of ``BondedInteraction`` which will be parametrized
"""
groups = set()
for atom_tuple in bonded.atoms:
tuple_groups = []
for atom in atom_tuple:
self._convert_atom_type_name(atom)
# Raise an error if the name/element combination can't be found
try:
tuple_groups.append(self.atom_name_group[(atom.name,
atom.element.symbol)])
except KeyError as error:
raise KeyError(f'Unable to find atom of element "{atom.element.symbol}" '
f'recorded with the name "{atom.name}" '
'in the specified force field file.') from error
groups.add(tuple(tuple_groups))
# Ensure that the groups of all atom tuples is consistent (i.e. each
# atom tuple should contain the same groups, in the same order)
# It is important to note that this is more restrictive than it needs to
# be - it could allow not raise an error where the mismatch was valid
# because wildcards would allow the interaction parameters to be the
# same regardless of the different atom groups, however this is not
# implemented.
if len(groups) != 1:
msg = (f'The atom groups of this interaction are not consistent {groups}.'
' Atom tuples should have the same groups in the same'
' order.')
LOGGER.error('%s: %s',
self.__class__,
msg)
raise ValueError(msg)
groups = groups.pop()
bonded_type = str(type(bonded).__name__)
if bonded_type == 'DihedralAngle':
bonded_type = 'improper' if bonded.improper else 'proper'
function_type = self._get_interaction_function(bonded_type)
parameter_df = getattr(self, sub('(?<!^)(?=[A-Z])',
'_',
bonded_type).lower() + 's')
# Filter for both the specified order of groups, and the reverse
# This is valid for all bonded types except improper dihedrals, which
# for which only the first atom occupies a unique position (the center);
# the other atoms can be in any permutation
matches = []
if bonded_type != 'improper':
for ordered_groups in [groups, tuple(reversed(groups))]:
regex = 'atom_group?'
matches.append(filter_ordered_dataframe(ordered_groups,
parameter_df,
column_regex=regex,
wildcard=0))
else:
col_names = ['atom_group1']
cen_atom_match = filter_ordered_dataframe([groups[0]],
parameter_df,
column_names=col_names,
wildcard=0)
for permuted_groups in list(permutations(groups[1:])):
# Only filter using atom_group2, atom_group3, atom_group4
regex = '^atom_group[2-4]$'
matches.append(filter_ordered_dataframe(permuted_groups,
cen_atom_match,
column_regex=regex,
wildcard=0))
# Duplicate dropping requied for groups == tuple(reversed(groups))
matches = pd.concat(matches).drop_duplicates()
# If there is more than one row which matches the groups (because of
# wildcards) then pick row with fewest wildcards
if len(matches) > 1:
matching_group = matches.loc[[(matches.filter(regex='atom_group?')
== 0).sum(axis='columns').idxmin()]]
elif len(matches) == 1:
matching_group = matches
else:
# If there are no matches in the file, then raise an error
msg = 'Unable to find bond information for specified atoms: '
for i, atom_type in enumerate(groups):
msg += f'atom_group{i} - {atom_type} ({self.atom_type_name[atom_type]}), '
raise ValueError(msg[:-2])
# Get the parameter names for the InteractionFunction. This means that
# the columns in the bonded DataFrame do not need to be ordered,
# as long as the column names are the same as the InteractionFunction
# parameter names (e.g. equilibrium_state and potential_strength must
# exist in self.bonds if it is a HarmonicPotential, but can be in any
# order).
parameter_names = self._get_parameter_names(function_type,
[name for name
in matching_group if
'atom_group' not in name])
settings = {}
if function_type == interaction_functions.HarmonicPotential:
settings['interaction_type'] = bonded_type
# As parameter_names is correctly sorted
parameters = [getattr(matching_group, name).values[0] for name
in parameter_names]
self._set_interaction_function(bonded, function_type, parameters,
settings)
def _parametrize_coulombic(self, coulombic):
"""
Assumes that all force fields have a ``Coulomb`` ``InteractionFunction``
Parameters
----------
coulombic : Coulombic
The ``Coulombic`` to be parametrized
"""
# Check we have atoms to apply interaction to
if len(coulombic.atoms) == 0:
raise ValueError(f'Unable to find any atoms of types {coulombic.atom_types} '
'in the Universe to apply the Coulombic interaction to')
# Different atom names could be defined within the same coulombic
# Both atom name and element are required to uniquely identify the atom
atom_names_elements = set()
for atom in coulombic.atoms:
self._convert_atom_type_name(atom)
atom_names_elements.add((atom.name, atom.element.symbol))
cols = ['name', 'element']
matching_atoms = pd.concat([filter_ordered_dataframe(name_element,
self.atoms,
column_names=cols)
for name_element in atom_names_elements])
# Check that charges for all atom names are the same
unique_charges = matching_atoms.charge.unique()
self._check_nonbonded_parameters(unique_charges, ['charge'],
matching_atoms)
if len(unique_charges) != 1:
# If not, show the corresponding atom rows in the error message
msg = ('All atoms of the Coulombic interaction must have the same'
f' OPLS charge ({matching_atoms})')
LOGGER.error('%s %s',
self.__class__,
msg)
raise ValueError(msg)
self._set_interaction_function(coulombic,
interaction_functions.Coulomb,
unique_charges)
def _parametrize_dispersion(self, dispersion):
"""
While dispersion interactions can be defined between unlike atom types,
this is not the case for any major force field implementation (e.g.
OPLS, CHARMM, AMBER). Therefore currently this only parametrizes
like-like dispersion interactions.
Parameters
----------
dispersion : Dispersion
The ``Dispersion`` to be parametrized
"""
# As the atoms in a dispersion are determined by dispersion.atom_type,
# each atom pair must be consistent (i.e. it is only necessary to
# consider two atoms from each pair as they will be the same as all
# other atoms in the pair)
matching_disps = []
for atom_pairs in dispersion.atoms:
# Checks that atom_pairs are like-like and that we have atoms to
# apply interaction to
atom_pairs = list(atom_pairs)
atom_pair = []
for i, atom_type_pair in enumerate(atom_pairs):
if len(atom_type_pair) == 0:
existing_types = []
for (key, value) in dispersion.universe.atom_types.items():
if len(value) > 0:
existing_types.append(key)
raise ValueError(f'No atoms of type "{dispersion.atom_types[0][i]}" '
'found, the Universe contains only the types '
f'{existing_types}')
atom_pair.append(atom_type_pair[0])
if len(set(atom_pair)) != 1:
msg = ('Currently only force fields which only use like-like'
' Dispersion terms are implemented')
LOGGER.error('%s: {atom_pair: %s}. %s',
self.__class__,
atom_pair,
msg)
raise ValueError(msg)
# As only like-like, just consider first index of atom_pairs
for atom in atom_pairs[0]:
self._convert_atom_type_name(atom)
# As all atoms within atom_pairs should be the same, only consider
# single atom when determining ff_atom_type
# ff_atom_type is different to MDMC atom_type
atom_name_element = (atom_pairs[1][0].name,
atom_pairs[1][0].element.symbol)
cols = ['name', 'element']
ff_atom_type = filter_ordered_dataframe(atom_name_element,
self.atoms,
column_names=cols).atom_type
matching_disps.append(filter_dataframe(list(ff_atom_type),
self.dispersions,
column_names=['atom_type']))
matching_disps = pd.concat(matching_disps)
function_type = self._get_interaction_function('dispersion')
parameter_names = self._get_parameter_names(function_type)
unique_parameters = list(chain.from_iterable([matching_disps[n].unique()
for n in parameter_names]),
)
self._check_nonbonded_parameters(unique_parameters, parameter_names,
matching_disps)
self._set_interaction_function(dispersion, function_type,
unique_parameters)
def _get_interaction_function(self, interaction_type):
"""
Gets the ``InteractionFunction`` for the corresponding ``Interaction``
Parameters
----------
interaction_type : str
A `str` specifying an interaction type, which must be a key in
``self._inter_functions``
Returns
-------
InteractionFunction
The subclass of ``InteractionFunction`` correspoding to
``interaction_type``
"""
function_name = self.inter_functions[interaction_type.lower()]
return getattr(interaction_functions, function_name)
@weakref_cache(maxsize=None)
def _convert_atom_type_name(self, atom):
"""
Converts all ``Atom`` objects with ``Atom.name`` that are a valid force
field type (i.e. can be cast to an `int`) into the corresponding force
field atom name.
Each ``Atom.name`` only needs to be converted once, however they will be
converted for every interaction in which they appear - avoid this with
lrucache
Parameters
----------
atom : Atom
The ``Atom`` for which the name will be converted
Raises
------
ValueError
If the ``Atom`` does not have a valid force field type or a force
field atom name, raise a ValueError. This atom cannot be interpreted
as valid within the force field.
"""
# int cast as atom names will be strings, even if they are numerical
# (e.g. an atom type)
try:
atom.name = self.atom_type_name[int(atom.name)]
except (KeyError, ValueError) as err:
# Check if atom.name is already a name in the atoms DataFrame
if filter_dataframe([atom.name], self.atoms,
column_names=['name']).empty:
msg = ('All atom names must be an OPLS atom type or an OPLS'
f' atom name. {atom.name} is not an OPLS atom type of atom'
' name.')
LOGGER.error('%s %s',
self.__class__,
msg,
exc_info=1)
raise ValueError(msg) from err
@staticmethod
def _get_parameter_names(function, file_parameter_names=None):
"""
Gets the names of the parameters of function, excluding ``self`` and
``**settings``
If ``*args`` is included in the function signature, then the parameter
names are taken from the file.
Parameters
----------
function : callable
Any callable (e.g. function, method)
file_parameter_names : list
`list` of `str` with the parameter names from the file
Returns
-------
`list` of `str`
The parameters of the function
Raises
------
ValueError
If the function signature has ``*args`` and the parameter names in
the file are incorrectly ordered.
"""
# VAR_POSITIONAL is the Python Parameter kind which is used for *args
var_positional = False
parameter_names = []
for function_parameters in signature(function).parameters.values():
# Test that the kind of parameter is a VAR_POSITIONAL
if function_parameters.kind == function_parameters.VAR_POSITIONAL:
var_positional = True
else:
name = function_parameters.name
if name not in ['self', 'settings']:
parameter_names.append(name)
if var_positional:
# If there is *args in the function signature, we cannot determine
# the parameter names. Check that the parameter names in the
# signature agree with the order of the parameter names in the file.
# If this is the case, assume file parameter names are correctly
# ordered and use these, otherwise raise a ValueError
if any(parameter_names[i] != file_parameter_names[i] for i
in range(len(parameter_names))):
msg = (f'The force field data file has incorrectly ordered {parameter_names}'
' parameters')
LOGGER.error('FileForceField: %s',
msg)
raise ValueError(msg)
parameter_names = file_parameter_names
return parameter_names
@staticmethod
def _check_nonbonded_parameters(function_parameters,
function_parameter_names,
matching_inters):
"""
Checks that the parameters for an ``InteractionFunction`` for a
``NonBondedInteraction`` are valid
Parameters
----------
function_parameters : list
The parameters which will be passed to an ``InteractionFunction``
function_parameter_names : list
A list of the names of the ``function_parameters``
matching_inters : pandas.DataFrame
A ``Dataframe`` of the lines of the interaction in the force field
file which match the ``NonBondedInteraction`` which is being
parametrized
Raises
------
ValueError
If atoms which have been added to the ``NonBondedInteraction`` do
not have the same ``InteractionFunction`` parameters for the force
field, they cannot be included in the same ``NonBondedInteraction``.
"""
if len(function_parameters) != len(function_parameter_names):
msg = ('All atoms of the interaction must have the same OPLS'
f' parameters ({matching_inters})')
LOGGER.error('FileForceField: %s',
msg)
raise ValueError(msg)
@staticmethod
def _set_interaction_function(interaction, function_type,
function_parameters, function_settings=None):
"""
Initialises an ``InteractionFunction`` with specified parameters and
settings and sets it for an ``Interaction``
Parameters
----------
interaction : Interaction
The ``Interaction`` for which the ``InteractionFunction`` will be
set
function_type : InteractionFunction class
The ``InteractionFunction`` class which will be initialised
function_parameters : list
The parameters which are passed to the ``function_type``
function_settings : dict, optional
Any ``**settings`` to be passed to the ``function_type`` for
initialising. The default is `None`, which results in no
``**settings`` being passed.
"""
function_settings = function_settings if function_settings else {}
interaction.function = function_type(*function_parameters,
**function_settings)
interaction.function.set_parameters_interactions(interaction)
@staticmethod
def _parse_header(header, dtype):
"""
Parses a force field file header line
Parameters
----------
header : str
The header line to be parsed
dtype : class
The expected datatype of the ``header`` parameters, e.g. `int` or
`str`
Returns
-------
list of tuples
(``keyword``, ``parameter``) where ``keyword`` is a `str` which
specifies a force field file section, and ``parameter`` is the
description associated with it.
"""
# Strip newline formatting
return [(name.strip('\n').lower(), dtype(parameter.strip('\n')))
for name, parameter in [s.split('=') for s in header.split(' ')]]
@staticmethod
def _set_col_type(column):
"""
Sets the type of a ``DataFrame`` column to either a `float` or an `int`,
if possible
Parameters
----------
column : pandas.Series
The column for which the type will be converted
"""
if any(column.astype(str).str.contains(escape('.'))):
try:
return column.astype(float)
except ValueError:
return column
else:
try:
return column.astype(int)
except ValueError:
return column