Source code for MDMC.readers.configurations.pdb

"""Module for reading pdb files"""
# pylint: disable=no-name-in-module
import itertools
from typing import List

import numpy as np

from MDMC.MD.interactions import Bond
from MDMC.MD.structures import Atom
from MDMC.readers.configurations.conf_reader import ConfigurationReader


[docs] class ProteinDataBankReader(ConfigurationReader): """ A class for reading pdb configuration files Examples -------- To use a pdb reader to read a file called 'paracetamol.pdb' and create a set of ``Molecule``s from it: .. highlight:: python .. code-block:: python file = 'water.pdb' pdb_reader = pdb(file) with pdb_reader: pdb_reader.parse() water = pdb_reader.molecules[0] """ extension = 'pdb' def __init__(self, file_name: str): super().__init__(file_name) self._bonds: List['Bond'] = []
[docs] def parse(self, **settings: dict) -> None: # This follows https://www.wwpdb.org/documentation/file-format v3.30 (line 180 of A4 pdf) # Link to PDF of file format: # https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_A4.pdf (page 180) # pylint: disable=unused-variable # as molecule_id is unused but is part of the standard! molecule = {} _molecule_id = 0 for line in self.file: record_name = line[0:6] if record_name in ("ATOM ", "HETATM"): # chars 23-26 identify molecule atom_id = int(line[6:12].split()[-1]) _molecule_id = int(line[22:26].split()[-1]) element = line[76:78].split()[-1] current_atom_pos = [float(pos.split()[-1]) for pos in (line[30:38], line[38:46], line[46:54])] # xyz positions atom_name = line[12:16].split()[-1] current_atom_obj = Atom(element.capitalize(), position=current_atom_pos, name=atom_name) self._atoms.append(current_atom_obj) molecule[atom_id] = current_atom_obj elif line[0:6] == "CONECT": atoms_to_connect = line[6:].split() # pylint: disable=no-member for atom1_id, atom2_id in itertools.pairwise(atoms_to_connect): self.create_bond(molecule[int(atom1_id)], molecule[int(atom2_id)])
[docs] def create_bond(self, atom1: Atom, atom2: Atom) -> None: """ Checks the bond lengths of the atoms in the molecule and creates a bond if it is below a certain threshold This is needed because PDB files are able to include H-bonds (which MDMC does not support) alongside other types of bonds, which are undistinguishable from each other in a pdb file. Therefore, cutting off the bond length at a reasonable distance prevents an extremely long bond being introduced into a molecule structure """ # 2.1 Ang used as bonded interactions should not usually go beyond this, and to prevent # bonds that are way too long in the context of the whole molecule value. # Value comes from: https://doi.org/10.1002/anie.202102967, where 2 Ang is given as the cutoff = 2.1 difference = np.subtract(atom1.position, atom2.position) bond_length = np.linalg.norm(difference) if bond_length < cutoff: self._bonds += [Bond((atom1, atom2))]
@property def atoms(self) -> 'list[Atom]': return self._atoms @property def bonds(self) -> 'list[Bond]': """Returns the bonds within the molecule, as specified by "CONECT" statements in the pdb file""" return self._bonds