Source code for MDMC.readers.configurations.packmol_pdb

"""A reader for reading in the PDB configuration of whole packmol systems"""
from typing import List, Dict, TYPE_CHECKING

from MDMC.MD.structures import Molecule, Atom
from MDMC.readers.configurations.conf_reader import ConfigurationReader

if TYPE_CHECKING:
    from MDMC.MD.structures import Structure

[docs] class PackmolPDBReader(ConfigurationReader): """A class to read in packmol PDB output files""" extension = "NONE" def __init__(self, file_name: str): super().__init__(file_name) self._structures: List['Structure'] = []
[docs] def parse(self, **settings: dict) -> None: """ Parses a .pdb file into a list of `Structure`s (atoms and molecules) which can then be accessed as the `structures` property of the reader. Parameters ---------- **settings: dict, optional None are necessary for this reader. """ # 'chain id' is an identifier for the unique type of molecule in the file # and 'molecule id' is an identifier for a specific copy of said molecule # for, e.g. molecules of two different types, # the chains dict has following structure: # chains_dict = { # 'A': {'1': [...], '2': [...], ...}, # 'B': {'1': [...], '2': [...], ...} # } # where [...] is a list of atoms belonging to a specific molecule chains_dict: Dict[str, Dict[str, List[Atom]]] = {} for line in self.file: #chars 0-6 identify what the line is describing record_name = line[0:6] if record_name in ("ATOM ", "HETATM"): record = self._parse_atom_record(line) chain_id = record['chain_id'] atom_molecule_id = record['molecule_id'] current_atom_obj = Atom(record['element_symbol'].capitalize(), position=record['atom_position'], name=record['name']) if chain_id not in chains_dict: chains_dict[chain_id] = {} if atom_molecule_id in chains_dict[chain_id]: chains_dict[chain_id][atom_molecule_id].append(current_atom_obj) else: chains_dict[chain_id][atom_molecule_id] = [current_atom_obj] # if multiple atoms share an id, this means they are part of the same # molecule; create a Molecule object for them for molecules_dict in chains_dict.values(): for atom_list in molecules_dict.values(): if len(atom_list) == 1: self._structures.append(atom_list[0]) else: self._structures.append(Molecule(atoms=atom_list))
def _parse_atom_record(self, line): """ A function to get the necessary information out of an `ATOM ` or `HETATM` record This follows https://www.wwpdb.org/documentation/file-format v3.30 (page 180 of A4 pdf) Link to PDF of file format: https://files.wwpdb.org/pub/pdb/doc/format_descriptions/Format_v33_A4.pdf (page 180) """ record = { 'name': line[12:16].split()[-1], 'chain_id': line[21], 'molecule_id': int(line[22:26]), 'atom_position': tuple(float(pos) for pos in [line[30:38], line[38:46], line[46:54]]), 'element_symbol': line[76:78].split()[-1] } return record @property def structures(self) -> List['Structure']: """Returns a list of ``Molecule`` objects from the data read from the file""" return self._structures