Source code for ccpnmodel.ccpncore.lib.Io.PyMMLibPDB

#=========================================================================================
# Licence, Reference and Credits
#=========================================================================================
__copyright__ = "Copyright (C) CCPN project (http://www.ccpn.ac.uk) 2014 - 2017"
__credits__ = ("Wayne Boucher, Ed Brooksbank, Rasmus H Fogh, Luca Mureddu, Timothy J Ragan & Geerten W Vuister")
__licence__ = ("CCPN licence. See http://www.ccpn.ac.uk/v3-software/downloads/license",
               "or ccpnmodel.ccpncore.memops.Credits.CcpnLicense for licence text")
__reference__ = ("For publications, please use reference from http://www.ccpn.ac.uk/v3-software/downloads/license",
               "or ccpnmodel.ccpncore.memops.Credits.CcpNmrReference")
"""Brookhaven PDB v2.2 file parser. All records in the PDB v2.2
specification have corresponding classes defined here. PDB files are
loaded into a list of these classes, and also can be constructed/modified
and written back out as PDB files.

Modified from PyMMLib pdb.py 17/11/2015
By Rasmus Fogh and Geerten Vuister, CCPN project.

The PyMMLib original version is governed by the Artistic license v2.0,
and the authors of the modified version do not claim copyright on their modifications

Original authors:

Project Lead
------------
Dr. Ethan Merritt <merritt@u.washington.edu>

Programmer
----------
Jay Painter <jpaint@u.washington.edu>
Christoph Champ <champc@u.washington.edu>


Original copyright statement:

## Copyright 2002-2010 by PyMMLib Development Group (see AUTHORS file)
## This code is part of the PyMMLib distribution and governed by
## its license.  Please see the LICENSE file that should have been
## included as part of this package.


Modifications:
- Changed syntax and imports to conform to Python 3
- replaced fpformat calls by '%' formatting
- Changed prodessing of REMARK resorcs, merging remark number into main text field
- Changed ATOM resName records to 4-char (18-21), as some programs use
  4-char residue types.

Compared to earlier, CING-specific modified version, this:
- uses a newer PyMMlib version, with different file I/O code
- lacks CING-specific warning and error tracking, and progress bar handling
- lacks wrapper I/O functionality (e.g. reading zipped files), which should be done elsewhere
- Writes out ATOM.esNames shorter than 3 characters in accordance with the original specification.

"""

#=========================================================================================
# Last code modification
#=========================================================================================
__modifiedBy__ = "$modifiedBy: CCPN $"
__dateModified__ = "$dateModified: 2017-07-07 16:33:13 +0100 (Fri, July 07, 2017) $"
__version__ = "$Revision: 3.0.0 $"
#=========================================================================================
# Created
#=========================================================================================

__author__ = "$Author: CCPN $"
__date__ = "$Date: 2017-04-07 10:28:48 +0000 (Fri, April 07, 2017) $"
#=========================================================================================
# Start of code
#=========================================================================================


# from __future__ import generators
# import fpformat


[docs]class PDBError(Exception): """ """ pass
[docs]class PDBValueError(PDBError): """ """ def __init__(self, text): self.text = text def __str__(self): return self.text
[docs]class PDBRecord(dict): """Base class for all PDB file records. """ _name = None _field_list = None def __str__(self): return self.write()
[docs] def write(self): """Return a properly formed PDB record string from the instance dictionary values. """ ln = self._name for (field, start, end, ftype, just, get_func) in self._field_list: try: assert len(ln) <= (start - 1) except AssertionError: print("[ASSERT] "+ln) raise ## add spaces to the end if necessary ln = ln.ljust(start - 1) ## used later field_char_len = end - start + 1 ## access the namespace of this class to write the field ## if a class has a special function defined for retrieving ## this record, it should use it if get_func: ln += get_func(self) continue ## get the data s = self.get(field, "") ## if the data is blank, then just add the spaces and continue if s is None or s == "": ln += " " * field_char_len continue ## convert integer and float types if ftype.startswith("string"): pass elif ftype.startswith("integer"): s = str(s) elif ftype.startswith("float"): try: # CHANGE From ORIGINAL # Converted to % formatting for move to Python 3 # s = fpformat.fix(s, int(ftype[6])) s = '%.*f' % (int(ftype[6]), s) except ValueError: raise PDBValueError("field=%s %s not float" % (field, s)) ## assert type try: assert isinstance(s, str) except AssertionError: print("### s",str(type(s)), str(s), ftype, field) print(ln) raise ## check for maximum length if len(s) > field_char_len: ln += s[:field_char_len] else: if just.startswith("ljust"): ln += s.ljust(field_char_len) else: ln += s.rjust(field_char_len) return ln
[docs] def read(self, line): """Read the PDB record line and convert the fields to the appropriate dictionary values for this class. """ for (field, start, end, ftype, just, get_func) in self._field_list: s = line[start-1:end] ## ignore blank fields if s == "" or s.isspace(): continue elif ftype.startswith("string"): if just.endswith("lstrip"): s = s.lstrip() elif just.endswith("rstrip"): s = s.rstrip() else: s = s.strip() elif ftype.startswith("integer"): try: s = int(s) except ValueError: continue elif ftype.startswith("float"): try: s = float(s) except ValueError: continue self[field] = s
[docs] def reccat(self, rec_list, field): """Return the concatenation of field in all the records in rec_list. """ if not isinstance(rec_list, list): rec_list = [rec_list] retval = "" for rec in rec_list: x = rec.get(field) if x is not None: retval += x return retval
[docs] def reccat_list(self, rec_list, field, sep): """Call reccat, then split the result by the separator. """ listx = self.reccat(rec_list, field).split(sep) listx = [x.strip() for x in listx] return listx
[docs] def reccat_tuplelist(self, rec_list, field, sep1, sep2): """Call reccat_list with sep1 as the list separator, then split the items into tuples by sep2. """ listx = [] for x in self.reccat_list(rec_list, field, sep1): i = x.find(sep2) if i == -1: continue key = x[:i].strip() val = x[i+1:].strip() listx.append((key, val)) return listx
[docs] def reccat_dictlist(self, rec_list, field, master_key): listx = [] dictx = {} for (key, val) in self.reccat_tuplelist(rec_list, field, ";", ":"): if key == master_key: if dictx: listx.append(dictx) dictx = {} dictx[key] = val if dictx: listx.append(dictx) return listx
[docs] def reccat_multi(self, rec_list, primary_key, translations): """Create a list of dictionaries from a list of records. This method has complex behavior to support translations of several PDB records into a Python format. The primary key is used to seperate the dictionaries within the list, and the translation argument is a list of strings or 2-tuples. If the translation is a string, the value from the PDB record field is copied to the return dictionary. If the field is a 2-tuple==t, then t[0] is the return dictionary key whose value is a list formed from the list of PDB fields in t[1]. """ if not isinstance(rec_list, list): rec_list = [rec_list] listx = [] for rec in rec_list: ## XXX: add primary key generation for bad records try: pkey = rec[primary_key] except KeyError: ## if the record has no primary key, retrieve it from the ## last dictionary which is in the same order as the ## record list try: pkey = listx[-1][primary_key] except KeyError: continue except IndexError: continue ## search for a dictionary in listx with the same primary key dictx = None for dx in listx: if dx[primary_key] == pkey: dictx = dx break ## new dictx if not found if dictx is None: dictx = {primary_key: pkey} listx.append(dictx) ## translate the PDB record into dictx for trans in translations: ## source is a list of fields which should be ## added to a list under the dest key in dictx if isinstance(trans, tuple): (dest, srcs) = trans for sx in srcs: if dest in dictx: try: dictx[dest].append(rec[sx]) except KeyError: pass else: try: dictx[dest] = [rec[sx]] except KeyError: pass ## source is a single record field which should be ## added to dictx under the dest key else: try: dictx[trans] = rec[trans] except KeyError: pass return listx
############################################################################### ## BEGIN PDB RECORD DEFINITIONS ## SECTION 2: Title Section
[docs]class OBSLTE(PDBRecord): """OBSLTE appears in entries which have been withdrawn from distribution. This record acts as a flag in an entry which has been withdrawn from the PDB's full release. It indicates which, if any, new entries have replaced the withdrawn entry. The format allows for the case of multiple new entries replacing one existing entry. """ __slots__ = [] _name = "OBSLTE" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("repDate", 12, 20, "string", "rjust", None), ("idCode", 22, 25, "string", "rjust", None), ("rIdCode1", 32, 35, "string", "rjust", None), ("rIdCode2", 37, 40, "string", "rjust", None), ("rIdCode3", 42, 45, "string", "rjust", None), ("rIdCode4", 47, 50, "string", "rjust", None), ("rIdCode5", 52, 55, "string", "rjust", None), ("rIdCode6", 57, 60, "string", "rjust", None), ("rIdCode7", 62, 65, "string", "rjust", None), ("rIdCode8", 67, 70, "string", "rjust", None)]
[docs] def process(self, recs): """Processes continued record list to a list of dictionary objects. Each dictionary contains the data from one OBSLTE idCode. """ return self.reccat_multi( recs, "idCode", ["repDate", ("rIdCodes", ["rIdCode1", "rIdCode2", "rIdCode3", "rIdCode4", "rIdCode5", "rIdCode6", "rIdCode7", "rIdCode8"])])
[docs]class TITLE(PDBRecord): """The TITLE record contains a title for the experiment or analysis that is represented in the entry. It should identify an entry in the PDB in the same way that a title identifies a paper. """ __slots__ = [] _name = "TITLE " _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("title", 11, 70, "string", "ljust", None)]
[docs] def process(self, recs): return self.reccat(recs, "title")
[docs]class CAVEAT(PDBRecord): """CAVEAT warns of severe errors in an entry. Use caution when using an entry containing this record. """ __slots__ = [] _name = "CAVEAT" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("idCode", 12, 15, "string", "rjust", None), ("comment", 20, 70, "string", "ljust", None)]
[docs] def process(self, recs): """Returns a list of dictionaries with keys idCode and comment. """ cavet_list = [] for rec in recs: idCode = rec.get("idCode") if idCode is None: continue ## search for cavet entry with same idCode cav = None for cavx in cavet_list: if cavx.get("idCode") == idCode: cav = cavx break ## create new cavet dict if necessary if cav is None: cav = {"idCode" : idCode} cavet_list.append(cav) ## add comment comment = rec.get("comment") if comment is not None: if "comment" in cav: cav["comment"] += comment else: cav["comment"] = comment return cavet_list
[docs]class COMPND(PDBRecord): """The COMPND record describes the macromolecular contents of an entry. Each macromolecule found in the entry is described by a set of token: value pairs, and is referred to as a COMPND record component. Since the concept of a molecule is difficult to specify exactly, PDB staff may exercise editorial judgment in consultation with depositors in assigning these names. For each macromolecular component, the molecule name, synonyms, number assigned by the Enzyme Commission (EC), and other relevant details are specified. """ __slots__ = [] _name = "COMPND" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("compound", 11, 70, "string", "ljust", None)]
[docs] def process(self, recs): return self.reccat_dictlist(recs, "compound", "MOL_ID")
[docs]class SOURCE(PDBRecord): """The SOURCE record specifies the biological and/or chemical source of each biological molecule in the entry. Sources are described by both the common name and the scientific name, e.g., genus and species. Strain and/or cell-line for immortalized cells are given when they help to uniquely identify the biological entity studied. """ __slots__ = [] _name = "SOURCE" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("srcName", 11, 70, "string", "ljust", None)]
[docs] def process(self, recs): return self.reccat_dictlist(recs, "srcName", "MOL_ID")
[docs]class KEYWDS(PDBRecord): """The KEYWDS record contains a set of terms relevant to the entry. Terms in the KEYWDS record provide a simple means of categorizing entries and may be used to generate index files. This record addresses some of the limitations found in the classification field of the HEADER record. It provides the opportunity to add further annotation to the entry in a concise and computer-searchable fashion. """ __slots__ = [] _name = "KEYWDS" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("keywds", 11, 70, "string", "ljust", None)]
[docs] def process(self, recs): return self.reccat_list(recs, "keywds", ",")
[docs]class EXPDTA(PDBRecord): """The EXPDTA record presents information about the experiment. The EXPDTA record identifies the experimental technique used. This may refer to the type of radiation and sample, or include the spectroscopic or modeling technique. Permitted values include: ELECTRON DIFFRACTION FIBER DIFFRACTION FLUORESCENCE TRANSFER NEUTRON DIFFRACTION NMR THEORETICAL MODEL X-RAY DIFFRACTION """ __slots__ = [] _name = "EXPDTA" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("technique", 11, 70, "string", "ljust", None)] _technique_list = [ "ELECTRON DIFFRACTION", "FIBER DIFFRACTION", "FLUORESCENCE TRANSFER", "NEUTRON DIFFRACTION", "NMR", "THEORETICAL MODEL", "X-RAY DIFFRACTION"]
[docs] def process(self, recs): """Returns a list of 2-tuples: (technique, comment) where technique is one of the accepted techniques. """ expdta_list = [] for item in self.reccat_list(recs, "technique", ";"): tech = None cmnt = None for techx in self._technique_list: if item.startswith(techx): tech = techx cmnt = item[len(techx):].strip() or None break if tech is not None: expdta_list.append((tech, cmnt)) return expdta_list
[docs]class AUTHOR(PDBRecord): """The AUTHOR record contains the names of the people responsible for the contents of the entry. """ __slots__ = [] _name = "AUTHOR" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("authorList", 11, 70, "string", "ljust", None)]
[docs] def process(self, recs): return self.reccat_list(recs, "authorList", ",")
[docs]class REVDAT(PDBRecord): """REVDAT records contain a history of the modifications made to an entry since its release. """ __slots__ = [] _name = "REVDAT" _multi_record = "continuation" _field_list = [ ("modNum", 8, 10, "integer", "rjust", None), ("continuation", 11, 12, "integer", "rjust", None), ("modDate", 14, 22, "string", "rjust", None), ("modID", 24, 28, "string", "rjust", None), ("modType", 32, 32, "integer", "rjust", None), ("record1", 40, 45, "string", "ljust", None), ("record2", 47, 52, "string", "ljust", None), ("record3", 54, 59, "string", "ljust", None), ("record4", 61, 66, "string", "ljust", None)]
[docs] def process(self, recs): return self.reccat_multi( recs, "modNum", ["modDate", "modID", "modType", ("records", ["record1", "record2", "record3", "record4"])])
[docs]class SPRSDE(PDBRecord): """The SPRSDE records contain a list of the ID codes of entries that were made obsolete by the given coordinate entry and withdrawn from the PDB release set. One entry may replace many. It is PDB policy that only the principal investigator of a structure has the authority to withdraw it. """ __slots__ = [] _name = "SPRSDE" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "rjust", None), ("sprsdeDate", 12, 20, "string", "rjust", None), ("idCode", 22, 25, "string", "rjust", None), ("sIdCode1", 32, 35, "string", "rjust", None), ("sIdCode2", 37, 40, "string", "rjust", None), ("sIdCode3", 42, 45, "string", "rjust", None), ("sIdCode4", 47, 50, "string", "rjust", None), ("sIdCode5", 52, 55, "string", "rjust", None), ("sIdCode6", 57, 60, "string", "rjust", None), ("sIdCode7", 62, 65, "string", "rjust", None), ("sIdCode8", 67, 70, "string", "rjust", None)]
[docs] def process(self, recs): return self.reccat_multi( recs, "idCode", ["sprsdeDate", ("sIdCodes", ["sIdCode1", "sIdCode2", "sIdCode3", "sIdCode4", "sIdCode5", "sIdCode6", "sIdCode7", "sIdCode8"])])
[docs]class JRNL(PDBRecord): """The JRNL record contains the primary literature citation that describes the experiment which resulted in the deposited coordinate set. There is at most one JRNL reference per entry. If there is no primary reference, then there is no JRNL reference. Other references are given in REMARK 1. """ __slots__ = [] _name = "JRNL " _field_list = [ ("text", 13, 70, "string", "ljust", None)]
[docs]class REMARK(PDBRecord): """REMARK records present experimental details, annotations, comments, and information not included in other records. In a number of cases, REMARKs are used to expand the contents of other record types. A new level of structure is being used for some REMARK records. This is expected to facilitate searching and will assist in the conversion to a relational database. CHANGED FROM ORIGINAL gv: 3 Feb 2006: Omitted the remarkNum field, and moved all to text field """ __slots__ = [] _name = "REMARK" # _field_list = [ # ("remarkNum", 8, 10, "integer", "rjust", None), # ("text", 12, 70, "string", "ljust", None)] _field_list = [ ("text", 8, 70, "string", "ljust", None)]
## SECTION 3: Primary Structure Section
[docs]class DBREF(PDBRecord): """ The DBREF record provides cross-reference links between PDB sequences and the corresponding database entry or entries. A cross reference to the sequence database is mandatory for each peptide chain with a length greater than ten (10) residues. For nucleic acid entries a DBREF record pointing to the Nucleic Acid Database (NDB) is mandatory when the corresponding entry exists in NDB. """ __slots__ = [] _name = "DBREF " _field_list = [ ("idCode", 8, 11, "string", "rjust", None), ("chain_ID", 13, 13, "string", "rjust", None), ("seqBegin", 15, 18, "integer", "rjust", None), ("insertBegin", 19, 19, "string", "rjust", None), ("seqEnd", 21, 24, "integer", "rjust", None), ("insertEnd", 25, 25, "string", "rjust", None), ("database", 27, 32, "string", "ljust", None), ("dbAccession", 34, 41, "string", "ljust", None), ("dbIdCode", 43, 54, "string", "ljust", None), ("dbseqBegin", 56, 60, "integer", "rjust", None), ("idbnsBeg", 61, 61, "string", "rjust", None), ("dbseqEnd", 63, 67, "integer", "rjust", None), ("dbinsEnd", 68, 68, "string", "rjust", None)]
[docs]class SEQADV(PDBRecord): """The SEQADV record identifies conflicts between sequence information in the ATOM records of the PDB entry and the sequence database entry given on DBREF. Please note that these records were designed to identify differences and not errors. No assumption is made as to which database contains the correct data. PDB may include REMARK records in the entry that reflect the depositor's view of which database has the correct sequence. """ __slots__ = [] _name = "SEQADV" _field_list = [ ("idCode", 8, 11, "string", "rjust", None), ("resName", 13, 15, "string", "rjust", None), ("chainID", 17, 17, "string", "rjust", None), ("seqNum", 19, 22, "integer", "rjust", None), ("iCode", 23, 23, "string", "rjust", None), ("database", 25, 28, "string", "ljust", None), ("dbIDCode", 30, 38, "string", "ljust", None), ("dbRes", 40, 42, "string", "rjust", None), ("dbSeq", 44, 48, "integer", "rjust", None), ("convlict", 50, 70, "string", "ljust", None)]
[docs]class SEQRES(PDBRecord): """The SEQRES records contain the amino acid or nucleic acid sequence of residues in each chain of the macromolecule that was studied. """ __slots__ = [] _name = "SEQRES" _multi_record = "serNum" _field_list = [ ("serNum", 9, 10, "integer", "rjust", None), ("chainID", 12, 12, "string", "rjust", None), ("numRes", 14, 17, "integer", "rjust", None), ("resName1", 20, 22, "string", "rjust", None), ("resName2", 24, 26, "string", "rjust", None), ("resName3", 28, 30, "string", "rjust", None), ("resName4", 32, 34, "string", "rjust", None), ("resName5", 36, 38, "string", "rjust", None), ("resName6", 40, 42, "string", "rjust", None), ("resName7", 44, 46, "string", "rjust", None), ("resName8", 48, 50, "string", "rjust", None), ("resName9", 52, 54, "string", "rjust", None), ("resName10", 56, 58, "string", "rjust", None), ("resName11", 60, 62, "string", "rjust", None), ("resName12", 64, 66, "string", "rjust", None), ("resName13", 68, 70, "string", "rjust", None)]
[docs] def process(self, recs): """Returns a dictionary with attributes chain_id, num_res, and sequence_list """ seqres = {} for rec in recs: seqres["chain_id"] = rec.get("chainID", "") seqres["num_res"] = rec.get("numRes", 0) for field in ["resName1","resName2","resName3","resName4", "resName5","resName6","resName7","resName8", "resName9","resName10","resName11","resName12", "resName13"]: try: value = rec[field] except KeyError: continue try: seqres["sequence_list"].append(value) except KeyError: seqres["sequence_list"] = [value] return seqres
[docs]class MODRES(PDBRecord): """The MODRES record provides descriptions of modifications (e.g., chemical or post-translational) to protein and nucleic acid residues. Included are a mapping between residue names given in a PDB entry and standard residues. """ __slots__ = [] _name = "MODRES" _field_list = [ ("idCode", 8, 11, "string", "rjust", None), ("resName", 13, 15, "string", "rjust", None), ("chainID", 17, 17, "string", "rjust", None), ("seqNum", 19, 22, "integer", "rjust", None), ("iCode", 23, 23, "string", "rjust", None), ("stdRes", 25, 27, "string", "rjust", None), ("comment", 30, 70, "string", "ljust", None)]
## SECTION 4: Heterogen Section
[docs]class HET(PDBRecord): """The HET records are used to describe non-standard residues, such as prosthetic groups, inhibitors, solvent molecules, and ions for which coordinates are supplied. Groups are considered HET if they are: - not one of the standard amino acids, and - not one of the nucleic acids (C, G, A, T, U, and I), and - not one of the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), and - not an unknown amino acid or nucleic acid where UNK is used to indicate the unknown residue name. Het records also describe heterogens for which the chemical identity is unknown, in which case the group is assigned the hetID UNK. """ __slots__ = [] _name = "HET " _field_list = [ ("hetID", 8, 10, "string", "rjust", None), ("chainID", 13, 13, "string", "rjust", None), ("seqNum", 14, 17, "integer", "rjust", None), ("iCode", 18, 18, "string", "rjust", None), ("numHetAtoms", 21, 25, "integer", "rjust", None), ("text", 31, 70, "string", "ljust", None)]
[docs]class HETNAM(PDBRecord): """This record gives the chemical name of the compound with the given hetID. """ __slots__ = [] _name = "HETNAM" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "ljust", None), ("hetID", 12, 14, "string", "rjust", None), ("text", 16, 70, "string", "ljust", None)]
[docs]class HETSYN(PDBRecord): """This record provides synonyms, if any, for the compound in the corresponding (i.e., same hetID) HETNAM record. This is to allow greater flexibility in searching for HET groups. """ __slots__ = [] _name = "HETSYN" _multi_record = "continuation" _field_list = [ ("continuation", 9, 10, "integer", "ljust", None), ("hetID", 12, 14, "string", "rjust", None), ("hetSynonyms", 16, 70, "string", "ljust", None)]
[docs]class FORMUL(PDBRecord): """The FORMUL record presents the chemical formula and charge of a non-standard group. (The formulas for the standard residues are given in Appendix 5.) """ __slots__ = [] _name = "FORMUL" _multi_record = "continuation" _field_list = [ ("compNum", 9, 10, "integer", "rjust", None), ("hetID", 13, 15, "string", "rjust", None), ("continuation", 17, 18, "integer", "rjust", None), ("asterisk", 19, 19, "string", "rjust", None), ("text", 20, 70, "string", "ljust", None)]
## SECTION 5: Secondary Structure Section
[docs]class HELIX(PDBRecord): """HELIX records are used to identify the position of helices in the molecule. Helices are both named and numbered. The residues where the helix begins and ends are noted, as well as the total length. """ __slots__ = [] _name = "HELIX " _field_list = [ ("serNum", 8, 10, "integer", "rjust", None), ("helixID", 12, 14, "string", "rjust", None), ("initResName", 16, 18, "string", "rjust", None), ("initChainID", 20, 20, "string", "rjust", None), ("initSeqNum", 22, 25, "integer", "rjust", None), ("initICode", 26, 26, "string", "rjust", None), ("endResName", 28, 30, "string", "rjust", None), ("endChainID", 32, 32, "string", "rjust", None), ("endSeqNum", 34, 37, "integer", "rjust", None), ("endICode", 38, 38, "string", "rjust", None), ("helixClass", 39, 40, "integer", "rjust", None), ("comment", 41, 70, "string", "ljust", None), ("length", 72, 76, "integer", "rjust", None)]
[docs]class SHEET(PDBRecord): """SHEET records are used to identify the position of sheets in the molecule. Sheets are both named and numbered. The residues where the sheet begins and ends are noted. """ __slots__ = [] _name = "SHEET " _field_list = [ ("strand", 8, 10, "integer", "rjust", None), ("sheetID", 12, 14, "string", "rjust", None), ("numStrands", 15, 16, "integer", "rjust", None), ("initResName", 18, 20, "string", "rjust", None), ("initChainID", 22, 22, "string", "rjust", None), ("initSeqNum", 23, 26, "integer", "rjust", None), ("initICode", 27, 27, "string", "rjust", None), ("endResName", 29, 31, "string", "rjust", None), ("endChainID", 33, 33, "string", "rjust", None), ("endSeqNum", 34, 37, "integer", "rjust", None), ("endICode", 38, 38, "string", "rjust", None), ("sense", 39, 40, "integer", "rjust", None), ("curAtom", 42, 45, "string", "rjust", None), ("curResName", 46, 48, "string", "rjust", None), ("curChainID", 50 ,50, "string", "rjust", None), ("curResSeq", 51, 54, "integer", "rjust", None), ("curICode", 55, 55, "string", "rjust", None), ("prevAtom", 57, 60, "string", "rjust", None), ("prevResName", 61, 63, "string", "rjust", None), ("prevChainID", 65, 65, "string", "rjust", None), ("prevResSeq", 66, 69, "integer", "rjust", None), ("prevICode", 70, 70, "string", "rjust", None)]
[docs]class TURN(PDBRecord): """The TURN records identify turns and other short loop turns which normally connect other secondary structure segments. """ __slots__ = [] _name = "TURN " _field_list = [ ("seq", 8, 10, "integer", "rjust", None), ("turnID", 12, 14, "string", "rjust", None), ("initResName", 16, 18, "string", "rjust", None), ("initChainID", 20, 20, "string", "rjust", None), ("initSeqNum", 21, 24, "integer", "rjust", None), ("initICode", 25, 25, "string", "rjust", None), ("endResName", 27, 29, "string", "rjust", None), ("endChainID", 31, 31, "string", "rjust", None), ("endSeqNum", 32, 35, "integer", "rjust", None), ("endICode", 36, 36, "string", "rjust", None), ("comment", 41, 70, "string", "ljust", None)]
## SECTION 6: Connectivity Annotation Section
[docs]class SSBOND(PDBRecord): """The SSBOND record identifies each disulfide bond in protein and polypeptide structures by identifying the two residues involved in the bond. """ __slots__ = [] _name = "SSBOND" _field_list = [ ("serNum", 8, 10, "integer", "rjust", None), ("resName1", 12, 14, "string", "rjust", None), ("chainID1", 16, 16, "string", "rjust", None), ("seqNum1", 18, 21, "integer", "rjust", None), ("iCode1", 22, 22, "string", "rjust", None), ("resName2", 26, 28, "string", "rjust", None), ("chainID2", 30, 30, "string", "rjust", None), ("seqNum2", 32, 35, "integer", "rjust", None), ("iCode2", 36, 36, "string", "rjust", None), ("sym1", 60, 65, "string", "rjust", None), ("sym2", 67, 72, "string", "rjust", None)]
[docs]class HYDBND(PDBRecord): """The HYDBND records specify hydrogen bonds in the entry. """ __slots__ = [] _name = "HYDBND" _field_list = [ ("name1", 13, 16, "string", "rjust", None), ("altLoc1", 17, 17, "string", "rjust", None), ("resName1", 18, 20, "string", "rjust", None), ("chainID1", 22, 22, "string", "rjust", None), ("resSeq1", 23, 27, "integer", "rjust", None), ("iCode1", 28, 28, "string", "rjust", None), ("nameH", 30, 33, "string", "rjust", None), ("altLocH", 34, 34, "string", "rjust", None), ("chainH", 36, 36, "string", "rjust", None), ("resSeqH", 37, 41, "integer", "rjust", None), ("iCodeH", 42, 42, "string", "rjust", None), ("name2", 44, 47, "string", "rjust", None), ("altLoc2", 48, 48, "string", "rjust", None), ("resName2", 49, 51, "string", "rjust", None), ("chainID2", 53, 53, "string", "rjust", None), ("resSeq2", 54, 58, "integer", "rjust", None), ("iCode2", 59, 59, "string", "rjust", None), ("sym1", 60, 65, "string", "rjust", None), ("sym2", 67, 72, "string", "rjust", None)]
[docs]class SLTBRG(PDBRecord): """The SLTBRG records specify salt bridges in the entry. """ __slots__ = [] _name = "SLTBRG" _field_list = [ ("name1", 13, 16, "string", "rjust", None), ("altLoc1", 17, 17, "string", "rjust", None), ("resName1", 18, 20, "string", "rjust", None), ("chainID1", 22, 22, "string", "rjust", None), ("resSeq1", 23, 26, "integer", "rjust", None), ("iCode1", 27, 27, "string", "rjust", None), ("name2", 43, 46, "string", "rjust", None), ("altLoc2", 47, 47, "string", "rjust", None), ("resName2", 48, 50, "string", "rjust", None), ("chainID2", 52, 52, "string", "rjust", None), ("resSeq2", 53, 56, "integer", "rjust", None), ("iCode2", 57, 57, "string", "rjust", None), ("sym1", 60, 65, "string", "rjust", None), ("sym2", 67, 72, "string", "rjust", None)]
[docs]class CISPEP(PDBRecord): """CISPEP records specify the prolines and other peptides found to be in the cis conformation. This record replaces the use of footnote records to list cis peptides. """ __slots__ = [] _name = "CISPEP" _field_list = [ ("serial", 8, 10, "integer", "rjust", None), ("resName1", 12, 14, "string", "rjust", None), ("chainID1", 16, 16, "string", "rjust", None), ("seqNum1", 18, 21, "integer", "rjust", None), ("iCode1", 22, 22, "string", "rjust", None), ("resName2", 26, 28, "string", "rjust", None), ("chainID2", 30, 30, "string", "rjust", None), ("seqNum2", 32, 35, "integer", "rjust", None), ("iCode2", 36, 36, "string", "rjust", None), ("modNum", 44, 46, "integer", "rjust", None), ("measure", 54, 59, "float.2", "rjust", None)]
## SECTION 7: Miscellaneous Features Section
[docs]class SITE(PDBRecord): """The SITE records supply the identification of groups comprising important sites in the macromolecule. """ __slots__ = [] _name = "SITE " _field_list = [ ("seqNum", 8, 10, "integer", "rjust", None), ("siteID", 12, 14, "string", "rjust", None), ("numRes", 16, 17, "integer", "rjust", None), ("resName1", 19, 21, "string", "rjust", None), ("chainID1", 23, 23, "string", "rjust", None), ("seq1", 24, 27, "integer", "rjust", None), ("iCode1", 28, 28, "string", "rjust", None), ("resName2", 30, 32, "string", "rjust", None), ("chainID2", 34, 34, "string", "rjust", None), ("seq2", 35, 38, "integer", "rjust", None), ("iCode2", 39, 39, "string", "rjust", None), ("resName3", 41, 43, "string", "rjust", None), ("chainID3", 45, 45, "string", "rjust", None), ("seq3", 46, 49, "integer", "rjust", None), ("iCode3", 50, 50, "string", "rjust", None), ("resName4", 52, 54, "string", "rjust", None), ("chainID4", 56, 56, "string", "rjust", None), ("seq4", 57, 60, "integer", "rjust", None), ("iCode4", 61, 61, "string", "rjust", None)]
## SECTION 8: Crystallographic and Coordinate Transformation Section
[docs]class CRYSTn(PDBRecord): """The CRYSTn (n=1,2,3) record presents the unit cell parameters, space group, and Z value. If the structure was not determined by crystallographic means, CRYSTn simply defines a unit cube. """ __slots__ = [] _field_list = [ ("a", 7, 15, "float.3", "rjust", None), ("b", 16, 24, "float.3", "rjust", None), ("c", 25, 33, "float.3", "rjust", None), ("alpha", 34, 40, "float.3", "rjust", None), ("beta", 41, 47, "float.3", "rjust", None), ("gamma", 48, 54, "float.3", "rjust", None), ("sgroup", 56, 66, "string", "ljust", None), ("z", 67, 70, "integer", "ljust", None)]
[docs]class CRYST1(CRYSTn): __slots__ = [] _name = "CRYST1"
[docs]class CRYST2(CRYSTn): __slots__ = [] _name = "CRYST2"
[docs]class CRYST3(CRYSTn): __slots__ = [] _name = "CRYST3"
[docs]class ORIGXn(PDBRecord): """The ORIGXn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates contained in the entry to the submitted coordinates. """ __slots__ = [] _field_list = [ ("o[n][1]", 11, 20, "float.6", "rjust", None), ("o[n][2]", 21, 30, "float.6", "rjust", None), ("o[n][3]", 31, 40, "float.6", "rjust", None), ("t[n]", 46, 55, "float.5", "rjust", None)]
[docs]class ORIGX1(ORIGXn): __slots__ = [] _name = "ORIGX1"
[docs]class ORIGX2(ORIGXn): __slots__ = [] _name = "ORIGX2"
[docs]class ORIGX3(ORIGXn): __slots__ = [] _name = "ORIGX3"
[docs]class SCALEn(PDBRecord): """The SCALEn (n = 1, 2, or 3) records present the transformation from the orthogonal coordinates as contained in the entry to fractional crystallographic coordinates. Non-standard coordinate systems should be explained in the remarks. """ __slots__ = [] _field_list = [ ("s[n][1]", 11, 20, "float.6", "rjust", None), ("s[n][2]", 21, 30, "float.6", "rjust", None), ("s[n][3]", 31, 40, "float.6", "rjust", None), ("u[n]", 46, 55, "float.5", "rjust", None)]
[docs]class SCALE1(SCALEn): __slots__ = [] _name = "SCALE1"
[docs]class SCALE2(SCALEn): __slots__ = [] _name = "SCALE2"
[docs]class SCALE3(SCALEn): __slots__ = [] _name = "SCALE3"
[docs]class MTRIXn(PDBRecord): """The MTRIXn (n = 1, 2, or 3) records present transformations expressing non-crystallographic symmetry. """ __slots__ = [] _field_list = [ ("serial", 8, 10, "integer", "rjust", None), ("s[n][1]", 11, 20, "float.6", "rjust", None), ("s[n][2]", 21, 30, "float.6", "rjust", None), ("s[n][3]", 31, 40, "float.6", "rjust", None), ("v[n]", 46, 55, "float.5", "rjust", None), ("iGiven", 60, 60, "integer", "rjust", None)]
[docs]class MTRIX1(MTRIXn): __slots__ = [] _name = "MTRIX1"
[docs]class MTRIX2(MTRIXn): __slots__ = [] _name = "MTRIX2"
[docs]class MTRIX3(MTRIXn): __slots__ = [] _name = "MTRIX3"
[docs]class TVECT(PDBRecord): """The TVECT records present the translation vector for infinite covalently connected structures. """ __slots__ = [] _name = "TVECT " _field_list = [ ("serial", 8, 10, "integer", "rjust", None), ("t[1]", 11, 20, "float.5", "rjust", None), ("t[2]", 21, 30, "float.5", "rjust", None), ("t[3]", 31, 40, "float.5", "rjust", None), ("text", 41, 70, "string", "rjust", None)]
## SECTION 9: Coordinate Selection
[docs]def ATOM_get_name(rec): """This should help older applications which do not use the element field of the ATOM record, these applications used column alignment to distinguish calcium (CA) from, say, an alpha-carbon (CA) """ name = rec.get("name") or "" element = rec.get("element") or "" if len(element) == 2: name = name.ljust(4)[:4] else: l = len(name) if l == 0: name = "".ljust(4) elif name[0].isdigit(): name = name.ljust(4)[:4] elif l < 4: name = " " + name.ljust(3)[:3] return name
[docs]def ATOM_get_resName(rec): """format resName correctly to allow for using 4-char resName fields CHANGED FROM ORIGINAL - added """ resName = rec.get("resName") or "" if len(resName) < 3: resName = resName.rjust(3) + ' ' else: resName = resName.ljust(4) return resName
[docs]class MODEL(PDBRecord): """The MODEL record specifies the model serial number when multiple structures are presented in a single coordinate entry, as is often the case with structures determined by NMR. """ __slots__ = [] _name = "MODEL " _field_list = [ ("serial", 11, 14, "integer", "rjust", None)]
[docs]class ATOM(PDBRecord): """The ATOM records present the atomic coordinates for standard residues. They also present the occupancy and temperature factor for each atom. Heterogen coordinates use the HETATM record type. The element symbol is always present on each ATOM record; segment identifier and charge are optional. """ __slots__ = [] _name = "ATOM " _field_list = [ ("serial", 7, 11, "integer", "rjust", None), ("name", 13, 16, "string", "ljust.rstrip", ATOM_get_name), ("altLoc", 17, 17, "string", "rjust", None), # # CHANGED FROM ORIGINAL - resName field expanded to 4 chars, including char 21 # ("resName", 18, 20, "string", "rjust", None), # # gv 4 positions for cyana 1.x GLU-, ASP- etc ("resName", 18, 21, "string", "ljust", ATOM_get_resName), ("chainID", 22, 22, "string", "rjust", None), ("resSeq", 23, 26, "integer", "rjust", None), ("iCode", 27, 27, "string", "rjust", None), ("x", 31, 38, "float.3", "rjust", None), ("y", 39, 46, "float.3", "rjust", None), ("z", 47, 54, "float.3", "rjust", None), ("occupancy", 55, 60, "float.2", "rjust", None), ("tempFactor", 61, 66, "float.2", "rjust", None), ("column6768", 67, 68, "string", "rjust", None), ("segID", 73, 76, "string", "rjust", None), # Used by xplor-nih ("element", 77, 78, "string", "rjust", None), ("charge", 79, 80, "string", "rjust", None)]
[docs]class ANISOU(PDBRecord): """The ANISOU records present the anisotropic temperature factors. Columns 7 - 27 and 73 - 80 are identical to the corresponding ATOM/HETATM record. """ __slots__ = [] _name = "ANISOU" _field_list = [ ("serial", 7, 11, "integer", "rjust", None), ("name", 13, 16, "string", "ljust", ATOM_get_name), ("altLoc", 17, 17, "string", "rjust", None), ("resName", 18, 20, "string", "rjust", None), ("chainID", 22, 22, "string", "rjust", None), ("resSeq", 23, 26, "integer", "rjust", None), ("iCode", 27, 27, "string", "rjust", None), ("u[0][0]", 29, 35, "integer", "rjust", None), ("u[1][1]", 36, 42, "integer", "rjust", None), ("u[2][2]", 43, 49, "integer", "rjust", None), ("u[0][1]", 50, 56, "integer", "rjust", None), ("u[0][2]", 57, 63, "integer", "rjust", None), ("u[1][2]", 64, 70, "integer", "rjust", None), ("segID", 73, 76, "string", "rjust", None), ("element", 77, 78, "string", "rjust", None), ("charge", 79, 80, "string", "rjust", None)]
[docs]class HETATM(ATOM): """The HETATM records present the atomic coordinate records for atoms within "non-standard" groups. These records are used for water molecules and atoms presented in HET groups. """ __slots__ = [] _name = "HETATM"
[docs]class SIGATM(PDBRecord): """The SIGATM records present the standard deviation of atomic parameters as they appear in ATOM and HETATM records. Columns 7 - 27 and 73 - 80 are identical to the corresponding ATOM/HETATM record. """ _name = "SIGATM" _field_list = [ ("serial", 7, 11, "integer", "rjust", None), ("name", 13, 16, "string", "ljust", ATOM_get_name), ("altLoc", 17, 17, "string", "rjust", None), ("resName", 18, 20, "string", "rjust", None), ("chainID", 22, 22, "string", "rjust", None), ("resSeq", 23, 26, "integer", "rjust", None), ("iCode", 27, 27, "string", "rjust", None), ("sigX", 31, 38, "float.3", "rjust", None), ("sigY", 39, 46, "float.3", "rjust", None), ("sigZ", 47, 54, "float.3", "rjust", None), ("sigOccupancy", 55, 60, "float.2", "rjust", None), ("sigTempFactor", 61, 66, "float.2", "rjust", None), ("segID", 73, 76, "string", "rjust", None), ("element", 77, 78, "string", "rjust", None), ("charge", 79, 80, "string", "rjust", None)]
[docs]class SIGUIJ(PDBRecord): """The SIGUIJ records present the standard deviations of anisotropic temperature factors scaled by a factor of 10**4 (Angstroms**2). Columns 7 - 27 and 73 - 80 are identical to the corresponding ATOM/HETATM record. """ __slots__ = [] _name = "SIGUIJ" _field_list = [ ("serial", 7, 11, "integer", "rjust", None), ("name", 13, 16, "string", "ljust", ATOM_get_name), ("altLoc", 17, 17, "string", "rjust", None), ("resName", 18, 20, "string","rjust", None), ("chainID", 22, 22, "string", "rjust", None), ("resSeq", 23, 26, "integer", "rjust", None), ("iCode", 27, 27, "string", "rjust", None), ("sig[1][1]", 29, 35, "integer", "rjust", None), ("sig[2][2]", 36, 42, "integer", "rjust", None), ("sig[3][3]", 43, 49, "integer", "rjust", None), ("sig[1][2]", 50, 56, "integer", "rjust", None), ("sig[1][3]", 57, 63, "integer", "rjust", None), ("sig[2][3]", 64, 70, "integer", "rjust", None), ("segID", 73, 76, "string", "rjust", None), ("element", 77, 78, "string", "rjust", None), ("charge", 79, 80, "string", "rjust", None)]
[docs]class TER(PDBRecord): """The TER record indicates the end of a list of ATOM/HETATM records for a chain. """ __slots__ = [] _name = "TER " _field_list = [ ("serial", 7, 11, "integer", "rjust", None), ("resName", 18, 20, "string", "rjust", None), ("chainID", 22, 22, "string", "rjust", None), ("resSeq", 23, 26, "integer", "rjust", None), ("iCode", 27, 27, "string", "rjust", None)]
[docs]class ENDMDL(PDBRecord): """The ENDMDL records are paired with MODEL records to group individual structures found in a coordinate entry. """ __slots__ = [] _name = "ENDMDL" _field_list = []
## SECTION 10: Connectivity Section
[docs]class CONECT(PDBRecord): """The CONECT records specify connectivity between atoms for which coordinates are supplied. The connectivity is described using the atom serial number as found in the entry. CONECT records are mandatory for HET groups (excluding water) and for other bonds not specified in the standard residue connectivity table which involve atoms in standard residues (see Appendix 4 for the list of standard residues). These records are generated by the PDB. """ __slots__ = [] _name = "CONECT" _field_list = [ ("serial", 7, 11, "integer", "rjust", None), ("serialBond1", 12, 16, "integer", "rjust", None), ("serialBond2", 17, 21, "integer", "rjust", None), ("serialBond3", 22, 26, "integer", "rjust", None), ("serialBond4", 27, 31, "integer", "rjust", None), ("serialHydBond1", 32, 36, "integer", "rjust", None), ("serialHydBond2", 37, 41, "integer", "rjust", None), ("serialSaltBond1", 42, 46, "integer", "rjust", None), ("serialHydBond3", 47, 51, "integer", "rjust", None), ("serialHydBond4", 52, 56, "integer", "rjust", None), ("serialSaltBond2", 57, 61, "integer", "rjust", None)]
## SECTION 11: Bookkeeping Section
[docs]class MASTER(PDBRecord): """The MASTER record is a control record for bookkeeping. It lists the number of lines in the coordinate entry or file for selected record types. """ __slots__ = [] _name = "MASTER" _field_list = [ ("numRemark", 11, 15, "integer", "rjust", None), ("O", 16, 20, "integer", "rjust", None), ("numHet", 21, 25, "integer", "rjust", None), ("numHelix", 26, 30, "integer", "rjust", None), ("numSheet", 31, 35, "integer", "rjust", None), ("numTurn", 36, 40, "integer", "rjust", None), ("numSite", 41, 45, "integer", "rjust", None), ("numXForm", 46, 50, "integer", "rjust", None), ("numCoord", 51, 55, "integer", "rjust", None), ("numTer", 56, 60, "integer", "rjust", None), ("numConect", 61, 65, "integer", "rjust", None), ("numSeq", 66, 70, "integer", "rjust", None)]
[docs]class END(PDBRecord): """The END record marks the end of the PDB file. """ __slots__ = [] _name = "END " _field_list = []
## PDB Record Name -> Record Class Map PDBRecordMap = { HEADER._name : HEADER, OBSLTE._name : OBSLTE, TITLE._name : TITLE, CAVEAT._name : CAVEAT, COMPND._name : COMPND, SOURCE._name : SOURCE, KEYWDS._name : KEYWDS, EXPDTA._name : EXPDTA, AUTHOR._name : AUTHOR, REVDAT._name : REVDAT, SPRSDE._name : SPRSDE, JRNL._name : JRNL, REMARK._name : REMARK, DBREF._name : DBREF, SEQADV._name : SEQADV, SEQRES._name : SEQRES, MODRES._name : MODRES, HET._name : HET, HETNAM._name : HETNAM, HETSYN._name : HETSYN, FORMUL._name : FORMUL, HELIX._name : HELIX, SHEET._name : SHEET, TURN._name : TURN, SSBOND._name : SSBOND, LINK._name : LINK, HYDBND._name : HYDBND, SLTBRG._name : SLTBRG, CISPEP._name : CISPEP, SITE._name : SITE, CRYST1._name : CRYST1, CRYST2._name : CRYST2, CRYST3._name : CRYST3, ORIGX1._name : ORIGX1, ORIGX2._name : ORIGX2, ORIGX3._name : ORIGX3, SCALE1._name : SCALE1, SCALE2._name : SCALE2, SCALE3._name : SCALE3, MTRIX1._name : MTRIX1, MTRIX2._name : MTRIX2, MTRIX3._name : MTRIX3, MODEL._name : MODEL, ATOM._name : ATOM, ANISOU._name : ANISOU, HETATM._name : HETATM, SIGATM._name : SIGATM, SIGUIJ._name : SIGUIJ, TER._name : TER, ENDMDL._name : ENDMDL, CONECT._name : CONECT, MASTER._name : MASTER, END._name : END } ## this list defines the order the records have to appear in the PDB ## file; there is also an indicator if the record is optional or mandatory PDBRecordOrder = [ (HEADER._name, HEADER, "mandatory"), (OBSLTE._name, OBSLTE, "optional"), (TITLE._name, TITLE, "mandatory"), (CAVEAT._name, CAVEAT, "optional"), (COMPND._name, COMPND, "mandatory"), (SOURCE._name, SOURCE, "mandatory"), (KEYWDS._name, KEYWDS, "mandatory"), (EXPDTA._name, EXPDTA, "mandatory"), (AUTHOR._name, AUTHOR, "mandatory"), (REVDAT._name, REVDAT, "mandatory"), (SPRSDE._name, SPRSDE, "optional"), (JRNL._name, JRNL, "optional"), (REMARK._name, REMARK, "optional"), (DBREF._name, DBREF, "optional"), (SEQADV._name, SEQADV, "optional"), (SEQRES._name, SEQRES, "optional"), (MODRES._name, MODRES, "optional"), (HET._name, HET, "optional"), (HETNAM._name, HETNAM, "optional"), (HETSYN._name, HETSYN, "optional"), (FORMUL._name, FORMUL, "optional"), (HELIX._name, HELIX, "optional"), (SHEET._name, SHEET, "optional"), (TURN._name, TURN, "optional"), (SSBOND._name, SSBOND, "optional"), (LINK._name, LINK, "optional"), (HYDBND._name, HYDBND, "optional"), (SLTBRG._name, SLTBRG, "optional"), (CISPEP._name, CISPEP, "optional"), (SITE._name, SITE, "optional"), (CRYST1._name, CRYST1, "mandatory"), (ORIGX1._name, ORIGX1, "mandatory"), (ORIGX2._name, ORIGX2, "mandatory"), (ORIGX3._name, ORIGX3, "mandatory"), (SCALE1._name, SCALE1, "mandatory"), (SCALE2._name, SCALE2, "mandatory"), (SCALE3._name, SCALE3, "mandatory"), (MTRIX1._name, MTRIX1, "optional"), (MTRIX2._name, MTRIX2, "optional"), (MTRIX3._name, MTRIX3, "optional"), (TVECT._name, TVECT, "optional"), (MODEL._name, MODEL, "optional"), (ATOM._name, ATOM, "optional"), (SIGATM._name, SIGATM, "optional"), (ANISOU._name, ANISOU, "optional"), (SIGUIJ._name, SIGUIJ, "optional"), (TER._name, TER, "optional"), (HETATM._name, HETATM, "optional"), (ENDMDL._name, ENDMDL, "optional"), (CONECT._name, CONECT, "optional"), (MASTER._name, MASTER, "mandatory"), (END._name, END, "mandatory") ] ## END PDB RECORD DEFINITIONS ###############################################################################
[docs]def iter_pdb_records(iterable): """Reads a sequence of PDB lines from iterable sequence and converts them to the correct PDB record objects, then yields them. """ iterable = iter(iterable) for ln in iterable: ## find the record data element for the given line ln = ln.rstrip() rname = ln[:6].ljust(6) try: pdb_record_class = PDBRecordMap[rname] except KeyError: continue ## create/add/parse the record pdb_record = pdb_record_class() pdb_record.read(ln) yield pdb_record
[docs]class PDBFile(list): """Class for managing a PDB file. This class inherits from a Python list object, and contains a list of PDBRecord objects. Load, save, edit, and create PDB files with this class. """ # NBNB Modified __init__that autoloaded file is NOT implemented def __setattr__(self, i, rec): assert isinstance(rec, PDBRecord) list.__setattr__(self, i, rec)
[docs] def append(self, rec): assert isinstance(rec, PDBRecord) list.append(self, rec)
[docs] def insert(self, i, rec): assert isinstance(rec, PDBRecord) list.insert(self, i, rec)
[docs] def load_file(self, fil): """Loads a PDB file from File object fil. """ if isinstance(fil, str): fileobj = open(fil, "r") else: fileobj = fil fileiter = iter(fileobj) for pdb_record in iter_pdb_records(fileiter): self.append(pdb_record)
[docs] def save_file(self, fil): """Saves the PDBFile object in PDB file format to File object fil. """ if isinstance(fil, str): fileobj = open(fil, "w") else: fileobj = fil for pdb_record in self: fileobj.write(str(pdb_record)) fileobj.write("\n") fil.flush()
[docs]class RecordProcessor(object): """ """ def __is_sucsessive_record(self, prev_rec, rec): """Returns True if the current record looks like it is the successive PDB record in a list of records. Fields like continuation and serNum are checked, as well as record name. """ ## check record names if rec._name != prev_rec._name: return False ## NOTE: perhaps record type specific handlers could be put ## here to catch common mistakes which are found in PDB ## files ## check for "continuation" field continuous records if "continuation" in prev_rec or "continuation" in rec: prev_continuation = prev_rec.get("continuation", 1) continuation = rec.get("continuation", 1) if (prev_continuation + 1) == continuation: return True else: return False ## check for "serNum" continuations if "serNum" in prev_rec or "serNum" in rec: prev_serial = prev_rec.get("serNum", 0) serial = rec.get("serNum", 0) if (prev_serial + 1) == serial: return True else: return False return False def __call_processor_multi(self, record_list): """Invake callbacks expecting a list of related PDB records. """ rec = record_list[0] ## form method names to search for name = rec.__class__.__name__ raw_process_method_symbol = "process_%s" % (name) process_method_symbol = "preprocess_%s" % (name) ## call process handler for records if hasattr(self, raw_process_method_symbol): getattr(self, raw_process_method_symbol)(record_list) else: self.process_default(record_list) ## call preprocessor and processor for records if hasattr(rec, "process"): presult = getattr(rec, "process")(record_list) if hasattr(self, process_method_symbol): getattr(self, process_method_symbol)(presult) else: self.preprocess_default(presult) def __call_processor(self, rec): """Invoke callbacks on self.processor for the given record list (recs). """ ## form method names to search for name = rec.__class__.__name__ raw_process_method_symbol = "process_%s" % (name) process_method_symbol = "preprocess_%s" % (name) ## call process handler for records if hasattr(self, raw_process_method_symbol): getattr(self, raw_process_method_symbol)(rec) else: self.process_default(rec) ## call preprocessor and processor for records if hasattr(rec, "process"): presult = getattr(rec, "process")(rec) if hasattr(self, process_method_symbol): getattr(self, process_method_symbol)(presult) else: self.preprocess_default(presult)
[docs] def process_pdb_records(self, pdb_rec_iter, filter_func = None): """Iterates the PDB records in self, and searches for handling methods in the processor object for reading the objects. There are several choices for methods names for the processor objects. """ record_list = None prev_rec = None for rec in pdb_rec_iter: if prev_rec is not None: if self.__is_sucsessive_record(prev_rec, rec): record_list.append(rec) prev_rec = rec continue self.__call_processor_multi(record_list) record_list = None prev_rec = None if filter_func and filter_func(rec) is False: continue if isinstance(rec, ATOM): self.process_ATOM(rec) elif hasattr(rec, "_multi_record"): record_list = [rec] prev_rec = rec else: self.__call_processor(rec) if prev_rec: self.__call_processor_multi(record_list)
[docs] def process_default(self, rec): pass
[docs] def preprocess_default(self, rec): pass
[docs] def process_ATOM(self, rec): self.process_default(rec)
### <testing>
[docs]def test_module(): import sys try: path = sys.argv[1] except IndexError: print("usage: PDB.py <PDB file path>") raise SystemExit pdbfil = PDBFile() pdbfil.load_file(path) pdbfil.save_file(sys.stdout)
if __name__ == "__main__": test_module() ### </testing>