Source code for MDMC.common.units

"""
Module for all unit definitions and operations.

This includes defining units used in MDMC, converting units, and subclassing
data strucures (`float`, ``array``) so that they have a ``unit``
attribute.  This style follows that of the Atomic Simulation Environment.

As members of units.py are set dynamically, pylintrc excludes member checking
for units.py using the generated-members keyword. Care must be taken to ensure
members exist when importing from units.py, as these will not be linted.
"""

from __future__ import annotations

from collections import Counter, defaultdict
from copy import deepcopy
from numbers import Number
from typing import Literal, Union

import numpy as np
from pandas import Series

# pylint: disable=no-member
# as it raises a false positive for components

#: Version of CODATA constants to load.
CODATA_VERSION = '2014'


#: CODATA 2014 taken from ASE.units, originally from:
#: http://arxiv.org/pdf/1507.07956.pdf.
CODATA = {

    '2014': {'_c': 299792458.,
             '_mu0': 4.0e-7 * np.pi,
             '_Grav': 6.67408e-11,
             '_hplanck': 6.626070040e-34,
             '_e': 1.6021766208e-19,
             '_me': 9.10938356e-31,
             '_mp': 1.672621898e-27,
             '_Nav': 6.022140857e23,
             '_k': 1.38064852e-23,
             '_amu': 1.660539040e-27},
}



[docs]
class Unit(str):
    """
    A class for defining unit strings.

    It possesses additional ``*`` and ``/`` operands so that combined units can
    be returned.

    .. note:: NON-INTEGER POWER OPERATIONS ARE CURRENTLY NOT IMPEMENTED

    Parameters
    ----------
    string : str
        The unit, which can contain ``/`` to specify divisors and ``^`` to
        specify powers. It can contain `int` which are used to specify order of
        magnitude (e.g. ``10^6 Pa``).  It can also contain negative powers, but
        there must not be a space between the negative sign and the number
        (e.g. ``Ang ^ -1`` NOT ``Ang ^ - 1``). Brackets and parentheses are not
        supported, and any of the characters ``[]()`` will be ignored.
    components : defaultdict[list], optional
        Sets the ``components`` attribute (see Attributes).  Default is `None`.

    Attributes
    ----------
    components : defaultdict[list]
        Contains the ``components`` of the ``Unit``, separated into two `list` s
        (``numerator`` and ``denominator``) depending on which side of the
        fraction each component is on.  If the ``Unit`` is a ``base`` unit i.e.
        initialized using ``Unit()``, then the ``components`` only has a
        ``numerator`` and this is the ``Unit`` string. If it a combined ``Unit``
        (created by either ``__mul__``, ``__div__`` or ``__pow__``) then the
        ``Unit`` objects which combined to form it make up the ``components``.
    conversion_factor : float
        The factor by which to multiply a value in order to express it in
        system units. For example ``Unit('ps').conversion_factor`` is
        ``1000.`` as the system units are femtoseconds.

    Examples
    --------
    Base units can be set::

    >>> time_unit = Unit('s')

    Compound units can be set with spaces separating ``base`` units which are
    multiplied::

    >>> charge_unit = Unit('A s')

    Compound units can be set with ``/`` separating ``base`` units which are
    divided::

    >>> velocity_unit = Unit('m / s')

    Units raised to a power can be set with ``^``::

    >>> volume_unit = Unit('Ang ^ 3')

    Compound units can be set with a combination of these operands::

    >>> force_unit = Unit('kg m / s ^ 2')

    To set an inverse unit, the power operation can be applied to a ``Unit``::

    >>> frequency = Unit('s') ** -1

    Or set with ``^`` within the string::

    >>> frequency = Unit('s ^ -1')

    Or with ``/`` within the string::

    >>> frequency = Unit('1 / s')

    Orders of magnitude can also be included::

    >>> pressure = Unit('10 ^ 6 Pa')
    """

    def __new__(cls, string: str, components: defaultdict[list] = None) -> Unit:

        if string is None:
            return None
        if isinstance(string, Unit):
            return string

        # Remove square brackets and parentheses as these are not supported
        unsupported_chars = ['[', ']', '(', ')']
        for char in unsupported_chars:
            string = string.replace(char, '')

        unit = super().__new__(cls, string)
        if not components:
            components = defaultdict(list)
            # String is compound if it contains either ' ', '/' or '^' (e.g.
            # 'Ang^2')
            if any(x in string for x in [' ', '/', '^']):
                num, denom = unit._parse_unit_string(string)
                components['numerator'] = num
                components['denominator'] = denom
            else:
                components['numerator'].append(unit)
                components['denominator'] = []
        unit.components = components
        return unit

    def __mul__(self, other: Unit) -> Unit:  # type: ignore
        """
        Multiply the ``Unit`` by another ``Unit``.

        Parameters
        ----------
        other : Unit
            The ``Unit`` object to multiply by.

        Returns
        -------
        Unit
            A compound ``unit``.

        Raises
        ------
        TypeError
            If `other` is not a ``Unit``.
        """

        try:
            components = self._calculate_components(other, 'mul')
        except AttributeError as error:
            raise TypeError(
                'A Unit can only be multipled by another Unit') from error
        return self.__class__(self._calculate_string(components), components)

    def __truediv__(self, other: Unit) -> Unit:
        """
        Divide the ``Unit`` by another ``Unit``.

        Parameters
        ----------
        other : Unit
            The ``Unit`` object to divide by.

        Returns
        -------
        Unit
            A compound ``unit``.

        Raises
        ------
        TypeError
            If `other` is not a ``Unit``.
        """

        try:
            components = self._calculate_components(other, 'div')
        except AttributeError as error:
            raise TypeError(
                'A Unit can only be divided by another Unit') from error
        return self.__class__(self._calculate_string(components), components)

    def __pow__(self, exponent: Number) -> Unit:
        """
        Perform the power operation on the ``Unit``.

        Parameters
        ----------
        exponent : numeric (inherits from numbers.Number)
            The number the ``Unit`` is raised to the power of.

        Returns
        -------
        Unit
            A compound ``Unit``.
        """

        if not isinstance(exponent, Number):
            try:
                exponent = float(exponent)
            except (TypeError, ValueError) as error:
                raise TypeError('Only numeric types can be used as a power for'
                                ' Units') from error

        components = self._calculate_components(exponent, 'pow')
        return self.__class__(self._calculate_string(components), components)

    @property
    def base(self) -> bool:
        """
        Get whether the ``Unit`` is a ``base`` or compound ``Unit``.

        Returns
        -------
        bool
            If `True`, ``Unit`` is a ``base`` ``Unit`` (only has a single
            element in the ``components`` ``numerator`` `list`).
        """

        return (not self.components['denominator'] and
                self.components['numerator'] == [self])

    @property
    def conversion_factor(self) -> float:
        """
        Multiplicative factor to get system units.

        Calculates the factor by which a value with this ``Unit`` should be
        multiplied in order to express it in system units. This takes into
        account any orders of magnitude and compound units.

        The `conversion_factor` for a ``Unit`` composed only of system units is
        therefore always 1.

        Returns
        -------
        float
            The conversion factor to system units for the relevant property.

        Raises
        ------
        KeyError
            If invalid unit provided.
        """

        factor = 1.
        factors_dict = create_units(CODATA_VERSION)[0]
        components = self.components
        numerator = components['numerator']
        denominator = components['denominator']

        try:
            for unit in numerator:
                try:
                    factor *= int(str(unit))
                except ValueError:
                    factor *= factors_dict[str(unit)]

            for unit in denominator:
                try:
                    factor /= int(str(unit))
                except ValueError:
                    factor /= factors_dict[str(unit)]

        except KeyError as error:
            raise KeyError(f'Unknown unit {str(unit)} provided, cannot convert to system'
                           ' units') from error

        return factor

    @property
    def physical_property(self) -> str:
        """
        The physical property (e.g. 'LENGTH', 'TIME', ...) that the unit measures.

        Note that compound units may not be supported, for the list of supported units see
        ``create_units``.

        Returns
        -------
        str or None
            The physical property.

        Raises
        ------
        KeyError
            If invalid unit provided.
        """

        properties_dict = create_units(CODATA_VERSION)[1]
        try:
            return properties_dict[str(self)]
        except KeyError as error:
            raise KeyError(f'Unknown unit {str(self)} provided, cannot determine the '
                           'physical property it measures ') from error

    def _calculate_components(self,
                              other: Unit,
                              op: Literal['mul', 'div', 'pow']) -> defaultdict[list]:
        """
        Calculate the ``components`` for a new ``Unit`` generated from an operation.

        These ``components`` are separated into whether they are in the
        ``numerator`` or the ``denominator`` of the new ``Unit``.

        Parameters
        ----------
        other : unit
            The ``Unit`` which is operating on this ``Unit`` (i.e. ``self``).
        op : str
            An operation, either ``mul``, ``div``, or ``pow``.

        Returns
        -------
        defaultdict[list]
            Contains the ``numerator`` and ``denominator`` of the new ``Unit``
            generated from the operation.
        """

        # Creating another defaultdict and then populating it by deepcopying
        # every unit in the numerator and denominator avoids issues with
        # multiple component dictionaries referencing the same object - this
        # previously led to units which were base units being transformed into
        # combined units as the lists in their components dictionary were
        # modified
        components = defaultdict(list)
        for k, lst in self.components.items():
            components[k] = [deepcopy(unit) for unit in lst]
        if op == 'mul':
            components['numerator'] += other.components['numerator']
            components['denominator'] += other.components['denominator']
        elif op == 'div':
            components['numerator'] += other.components['denominator']
            components['denominator'] += other.components['numerator']
        elif op == 'pow':
            # Ensure other is an integer
            other = int(other)
            if other >= 1:
                components['numerator'] *= other
                components['denominator'] *= other
            else:
                numerator = components['numerator']
                components['numerator'] = components['denominator'] * \
                    abs(other)
                components['denominator'] = numerator * abs(other)

        return components

    @staticmethod
    def _calculate_string(components: defaultdict(list)) -> str:
        """
        Calculate the `str` for a new ``Unit`` generated from an operation.

        Parameters
        ----------
        components : defaultdict[list]
            Contains the ``numerator`` and ``denominator`` of the new ``Unit``.

        Returns
        -------
        str
            The `str` representing the new ``Unit``.
        """

        def _calculate_expr_string(expr) -> str:
            """
            Calculate the `str` from a `list` of ``components``.

            ``Counter`` is used to determined the number of occurences of each
            ``Unit`` `str` and then create power notation if there is more than
            one occurence.

            Parameters
            ----------
            expr : list[Unit]
                Expression to compute new powers of.

            Returns
            -------
            str
                Computed string.
            """

            component_powers = Counter(expr)
            # List used rather than string so that sorting can be implemented
            component_list = []
            for comp, power in component_powers.items():
                if power == 1:
                    component_list.append(comp)
                else:
                    component_list.append(comp + ' ^ ' + str(power))
            return ' '.join(component_list)

        numerator = _calculate_expr_string(components['numerator'])
        denominator = _calculate_expr_string(components['denominator'])

        # Different string styles for the three cases of just numerator, just
        # denominator, and both
        if not components['numerator']:
            return '1 / ' + denominator
        if not components['denominator']:
            return numerator
        return numerator + ' / ' + denominator

    @staticmethod
    def _parse_unit_string(unit_string: str) -> tuple[list[Unit], list[Unit]]:
        """
        Convert a ``Unit`` `str` into ``Unit`` objects.

        Parameters
        ----------
        unit_string : str
            A `str` representing a ``Unit``.

        Returns
        -------
        numerator : list[Unit]
            Base ``Unit`` objects in numerator.
        denominator : list[Unit]
            Base ``Unit`` objects in denominator.

        Examples
        --------
        Parse ``e mol / K ^ 2``::

            >>> parse_unit_string('e mol / K ^ 2')
            ([Unit('e'), Unit('e'), Unit('mol')], [Unit('K'), Unit('K')])
        """

        def parse_powers(string: str) -> tuple[list[Unit], list[Unit]]:
            """
            Transform powers into respective ``Unit`` s.

            Parameters
            ----------
            string : str
                A compound ``Unit`` `str` containing zero or more powers
                (with powers specified by ``^``) but no denominators (i.e.
                ``/``), such as ``Ang``, ``Ang mol``, ``Ang ^ 2 mol kJ^2``.

            Returns
            -------
            numerator : list[Unit]
                Base ``Unit`` objects in numerator.
            denominator : list[Unit]
                Base ``Unit`` objects in denominator.

            Examples
            --------
            Parse ``Ang ^ 2 mol kJ^-2``::
                >>> parse_powers('Ang ^ 2 mol kJ^2')
                [Unit('Ang'), Unit('Ang'), Unit('mol)'], [Unit('kJ'), Unit('kJ')]
            """

            if '^' in string:
                # Joining with ' ' before stripping out spaces means that
                # 'Ang ^ 2' and 'Ang^2' are equivalent
                string = ' '.join(string.split('^'))
            splt_space = string.split(' ')
            # Strip out spaces
            strip = list(filter(lambda x: x != '', splt_space))
            parsed = [Unit(strip[0])]
            inverse_parsed = []
            # For all elements apart from the first, determine if element is an
            # int. If so, and it is positive, append n-1 copies of the previous
            # unit, where n is the integer value of the element. If it is
            # negative, remove the previous element from the numerator and
            # instead add n copies to the denominator. If n is zero, simply
            # remove the element. If not an int, append a Unit object
            # initialized from the element (which should be a string specifying)
            # a unit
            for i in range(1, len(strip)):
                element = strip[i]
                try:
                    exponent = int(element)
                    if exponent > 0:
                        for _ in range(exponent - 1):
                            parsed.append(parsed[-1])
                    else:
                        base = parsed.pop()
                        for _ in range(- exponent):
                            inverse_parsed.append(base)
                except ValueError:
                    parsed.append(Unit(element))
            return parsed, inverse_parsed

        # Start by splitting the compound unit into a numerator and denominator
        if '/' in unit_string:
            num_string, denom_string = unit_string.split('/')
            denom, inverse_denom = parse_powers(denom_string)
        else:
            num_string = unit_string
            denom, inverse_denom = [], []
        num, inverse_num = parse_powers(num_string)

        # Combine the numerator with elements from the denominator that had
        # negative powers, and vice versa
        return num + inverse_denom, denom + inverse_num



# Define the unit system used in MDMC
SYSTEM = {
    'LENGTH': Unit('Ang'),
    'TIME': Unit('fs'),
    'MASS': Unit('amu'),
    'CHARGE': Unit('e'),
    'ANGLE': Unit('deg'),
    'TEMPERATURE': Unit('K'),
    'ENERGY': Unit('kJ') / Unit('mol'),
    'FORCE': Unit('kJ') / (Unit('Ang') * Unit('mol')),
    'PRESSURE': Unit('Pa'),
    'ENERGY_TRANSFER': Unit('meV'),
    'ARBITRARY': Unit('arb'),
}



[docs]
def create_units(codata_version: str) -> dict[Unit, float]:
    """
    Create a `dict` of ``Unit`` based on the CODATA version.

    Parameters
    ----------
    codata_version : str
        The CODATA version to be used.

    Returns
    -------
    dict
        Contains (``Unit``: conversion factor) pairs.
    """

    # SYSTEM units are defined to 1.0, and have the physical properties they
    # measure defined (e.g. {'Ang': 'LENGTH', ...})
    units = {unit: 1.0 for unit in SYSTEM.values()}
    unit_properties = {unit: prop for prop, unit in SYSTEM.items()}

    # CODATA version
    codata = CODATA[codata_version]

    # Length
    # 1 m = 1e10 Ang
    units['m'] = units['Ang'] * 1e10
    unit_properties['m'] = 'LENGTH'
    # 1 cm = 1e8 Ang
    units['cm'] = units['Ang'] * 1e8
    unit_properties['cm'] = 'LENGTH'
    # 1 nm = 1e1 Ang
    units['nm'] = units['Ang'] * 1e1
    unit_properties['nm'] = 'LENGTH'
    # 1 AA = 1 Ang
    units['AA'] = units['Ang']
    unit_properties['AA'] = 'LENGTH'

    # Area
    # 1 barn = 1e-28 m^2 = 1e-8 Ang^2
    units['barn'] = units['Ang'] * units['Ang'] * 1e8

    # Time
    # 1 s = 1e15 fs
    units['s'] = units['fs'] * 1e15
    unit_properties['s'] = 'TIME'
    # 1 ns = 1e6 fs
    units['ns'] = units['fs'] * 1e6
    unit_properties['ns'] = 'TIME'
    # 1 ps = 1e3 fs
    units['ps'] = units['fs'] * 1e3
    unit_properties['ps'] = 'TIME'

    # Mass
    # 1 kg = (1000 * N_av) amu = (1/u) amu
    units['kg'] = units['amu'] / codata['_amu']
    unit_properties['kg'] = 'MASS'
    # 1 g = N_av amu = (1/1000u) amu = (1/1000) kg
    units['g'] = units['kg'] / 1000.
    unit_properties['g'] = 'MASS'
    # 1 g mol^-1 = 1 amu by definition
    units['g / mol'] = units['amu']
    unit_properties['g / mol'] = 'MASS'

    # Energy
    # 1 kcal mol^-1 = 4.184 kJ mol^-1
    units['kcal / mol'] = units['kJ / mol'] * 4.184
    unit_properties['kcal / mol'] = 'ENERGY'
    # 1 kJ = 1 kJ mol^-1 * Nav
    units['kJ'] = units['kJ / mol'] * codata['_Nav']
    unit_properties['kJ'] = 'ENERGY'
    # 1 J = (1/1000) kJ
    units['J'] = units['kJ'] / 1000.
    unit_properties['J'] = 'ENERGY'
    # 1 kcal = 4.184 kJ
    units['kcal'] = units['kJ'] * 4.184
    unit_properties['kcal'] = 'ENERGY'

    # Energy transfer
    units['ueV'] = units['meV'] * 1000.
    unit_properties['ueV'] = 'ENERGY_TRANSFER'

    # Force
    # 1 kcal Ang^-1 mol^-1 = 4.184 kJ Ang^-1 mol^-1
    units['kcal / Ang mol'] = units['kJ / Ang mol'] * 4.184
    unit_properties['kcal / Ang mol'] = 'FORCE'

    # Pressure
    # 1 atm = 101325 Pa
    units['atm'] = units['Pa'] * 101325.
    unit_properties['atm'] = 'PRESSURE'
    # 1 bar = 1e5 Pa
    units['bar'] = units['Pa'] * 1e5
    unit_properties['bar'] = 'PRESSURE'

    # Angle
    # 1 rad = (180 / pi) deg
    units['rad'] = units['deg'] * (180. / np.pi)
    unit_properties['rad'] = 'ANGLE'

    # Amount
    units['mol'] = codata['_Nav']

    return units, unit_properties




[docs]
class UnitFloat(float):
    """
    Subclasses `float` so that it contains a ``unit`` attribute.

    ``unit`` attribute is returned when __repr__ or __str__ are called.

    Parameters
    ----------
    value : float
        The value of the ``UnitFloat``.
    unit : Unit, str
        A ``Unit`` or a `str` representing the unit.

    Notes
    -----
    As both ``__repr__`` and ``__deepcopy__`` rely on the `float` being real,
    this class is not compatible with complex numbers.  This should be
    immaterial as no quantity which possesses units is complex.
    """

    def __new__(cls, value: float, unit: Union[Unit, str]):

        if value is None:
            return None
        if isinstance(value, Series):
            # sometimes we add a unit to a single element Pandas series;
            # doing this directly via float.__new__ is deprecated
            return float(value.iloc[0])
        return float.__new__(cls, value)

    def __init__(self, value: float, unit: Union[Unit, str]):

        float.__init__(value)
        self.unit = unit

    @property
    def unit(self) -> Unit:
        """
        Get or set the ``unit``.

        Either a `str` or a ``Unit`` can be passed to the setter.

        Returns
        -------
        Unit
            The ``Unit`` equivalent to the passed ``unit`` parameter.
        """

        return self._unit

    @unit.setter
    def unit(self, value: float) -> None:

        if not (isinstance(value, str) or value is None):
            raise TypeError('unit must be a string')
        self._unit = Unit(value)

    def __deepcopy__(self, memo: dict):
        """
        Copy the ``UnitFloat`` and all attributes.

        This method is required because otherwise the ``float.__deepcopy__`` is
        used, which attempts to create a new ``UnitFloat`` class using only 2
        arguments i.e. the ``value``.  ``UnitFloat.__new__`` takes exactly 3
        arguments.

        It simply creates a new ``UnitFloat`` and sets all of its attributes to
        deepcopies of the current attributes (where possible), along with
        updating the ``memo``.

        Parameters
        ----------
        memo : dict
            Dictionary of previously computed copies for recursion.

        Returns
        -------
        UnitFloat
            Copied ``UnitFloat`` object.
        """

        cls = self.__class__
        unit_float = cls.__new__(cls, self.real, self.unit)
        memo[id(self)] = unit_float
        for k, v in self.__dict__.items():
            setattr(unit_float, k, deepcopy(v, memo))
        return unit_float

    def __repr__(self):

        return repr(self.real) + ' ' + self.unit

    def __str__(self):

        return self.__repr__()




[docs]
class UnitNDArray(np.ndarray):

    """
    Subclasses :any:`ndarray` so that it contains a ``unit`` attribute.

    ``unit`` attribute is returned when ``__repr__`` or ``__str__`` are called

    Parameters
    ----------
    shape : tuple of ints
        Shape of created ``array``.
    unit : Unit, str
        A ``Unit`` or a `str` representing the unit.
    dtype : data-type, optional
        Any object that can be interpreted as a NumPy data type.
    buffer : object exposing NumPy buffer interface, optional
        Used to fill the ``array`` with data.
    offset : int, optional
        Offset of ``array`` data in ``buffer``.
    strides : tuple of ints, optional
        Strides of data in memory.
    order : str, optional
        Either ``C`` for row-major or ``F`` for column-major. Default is ``C``.
    """

    def __new__(cls, shape, unit, dtype=float, buffer=None, offset=0,
                strides=None, order=None):
        obj = super().__new__(cls, shape, dtype, buffer, offset, strides, order)
        obj.unit = unit
        return obj

    def __array_finalize__(self, obj):
        self.unit = getattr(obj, 'unit', None)

    @property
    def unit(self) -> None:
        """
        Get or set the ``unit``.

        Either a ``str`` or a ``Unit`` can be passed to the setter.

        Returns
        -------
        Unit
            The ``Unit`` equivalent to the passed ``unit`` parameter.
        """

        return self._unit

    @unit.setter
    def unit(self, value: float):

        if not (isinstance(value, str) or value is None):
            raise TypeError('unit must be a string')
        self._unit = Unit(value)

    def __repr__(self):
        try:
            return super().__repr__() + ' ' + self.unit
        except TypeError:
            return super().__repr__()

    def __str__(self):
        try:
            return super().__str__() + ' ' + self.unit
        except TypeError:
            return super().__str__()




[docs]
def unit_array(obj, unit: Union[Unit, str], dtype=None) -> UnitNDArray:
    """
    Create a ``UnitNDArray`` from an ``array`` or any nested sequence.

    This mimics the manner in which NumPy creates arrays (although is in Python
    not C), except several arguments are excluded.

    Also, unlike ``np.array(None)``, passing ``obj=None`` to ``unit_array``
    results in `None` being returned. This allows classes to have properties
    with units which can be either have a ``value`` or be undefined.

    Parameters
    ----------
    obj : None or array_like
        An object derived from ``collections.Sequence``. If `None`, then `None`
        is returned.
    unit : Unit, str
        A ``Unit`` or a `str` representing the unit.
    dtype : data-type, optional
        Any object that can be interpreted as a NumPy data type.

    Returns
    -------
    UnitArray
        A ``UnitArray`` object satisfying the specified requirements.
    """

    if obj is None:
        return None

    if not isinstance(unit, str):
        raise TypeError(f'unit must be a string, but was {unit}')

    # Significantly faster to create np.array and view it than to loop
    if not isinstance(obj, np.ndarray):
        obj = np.array(obj, dtype=dtype)

    unit_arr = obj.view(UnitNDArray)
    unit_arr.unit = unit
    return unit_arr



# Update the module scope to include the SYSTEM and units keys
globals().update(SYSTEM)
globals().update(create_units(CODATA_VERSION)[0])