Source code for ample.util.pdb_model

"""
Created on 7 Aug 2013

@author: jmht

Classes for holding data from PDB files
"""

import copy
import os
import types


[docs]class OriginInfo(object): def __init__(self, spaceGroupLabel=None): # These are reset on each call self._spaceGroup = None self._redundantSet = None self._nonRedundantSet = None self._floating = False self._setData() if spaceGroupLabel: self._getAlternateOrigins(spaceGroupLabel) return def _setData(self): # Non-redundant origins from: # http://www.ccp4.ac.uk/dist/html/alternate_origins.html # Organised in tuples, with True for the second item if the origin is one of the non-redundant set self._origins = { # TRICLINIC '1aP': [(['x', 'y', 'z'], True)], # MONOCLINIC '2mP': [([0.0, 'y', 0.0], True), ([0.0, 'y', 0.5], True), ([0.5, 'y', 0.0], True), ([0.5, 'y', 0.5], True)], '2mC': [ ([0.0, 'y', 0.0], True), ([0.0, 'y', 0.5], True), ([0.5, 'y', 0.0], False), ([0.5, 'y', 0.5], False), ], '2mA': [ ([0.0, 'y', 0.0], True), ([0.0, 'y', 0.5], False), ([0.5, 'y', 0.0], True), ([0.5, 'y', 0.5], False), ], '2mI': [ ([0.0, 'y', 0.0], True), ([0.0, 'y', 0.5], True), ([0.5, 'y', 0.0], False), ([0.5, 'y', 0.5], False), ], # ORTHORHOMBIC '222oP': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True), ([0.0, 0.5, 0.0], True), ([0.0, 0.5, 0.5], True), ([0.5, 0.0, 0.0], True), ([0.5, 0.0, 0.5], True), ([0.5, 0.5, 0.0], True), ([0.5, 0.5, 0.5], True), ], '222oC': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.5, 0.0], False), ([0.0, 0.5, 0.5], False), ([0.0, 0.0, 0.5], True), ([0.5, 0.0, 0.0], True), ([0.5, 0.0, 0.5], True), ([0.5, 0.5, 0.0], False), ([0.5, 0.5, 0.5], False), ], '222oF': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], False), ([0.0, 0.5, 0.0], False), ([0.0, 0.5, 0.5], False), ([0.25, 0.25, 0.25], True), ([0.25, 0.25, 0.75], False), ([0.25, 0.75, 0.25], False), ([0.25, 0.75, 0.75], False), ([0.5, 0.0, 0.0], False), ([0.5, 0.0, 0.5], False), ([0.5, 0.5, 0.0], False), ([0.5, 0.5, 0.5], True), ([0.75, 0.25, 0.25], False), ([0.75, 0.25, 0.75], False), ([0.75, 0.75, 0.25], False), ([0.75, 0.75, 0.75], True), ], '222oI': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True), ([0.0, 0.5, 0.0], True), ([0.0, 0.5, 0.5], False), ([0.5, 0.0, 0.0], True), ([0.5, 0.0, 0.5], False), ([0.5, 0.5, 0.0], False), ([0.5, 0.5, 0.5], False), ], # TETRAGONAL '4tP': [([0.0, 0.0, 'z'], True), ([0.5, 0.5, 'z'], True)], '4tI': [([0.0, 0.0, 'z'], True), ([0.5, 0.5, 'z'], False)], '422tP': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True), ([0.5, 0.5, 0.0], True), ([0.5, 0.5, 0.5], True), ], '422tI': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True), ([0.5, 0.5, 0.0], False), ([0.5, 0.5, 0.5], False), ], # TRIGONAL '3hP': [ ([0.0, 0.0, 'z'], True), ([float(1 / 3), float(2 / 3), 'z'], True), ([float(2 / 3), float(1 / 3), 'z'], True), ], '3hR_1': [ ([0.0, 0.0, 'z'], True), ([float(1 / 3), float(2 / 3), 'z'], False), ([float(2 / 3), float(1 / 3), 'z'], False), ], '3hR_2': [(['x', 'x', 'x'], True)], '312hP': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True), ([float(1 / 3), float(2 / 3), 0.0], True), ([float(1 / 3), float(2 / 3), 0.5], True), ([float(2 / 3), float(1 / 3), 0.0], True), ([float(2 / 3), float(1 / 3), 0.5], True), ], '321hP': [([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True)], '32hR_1': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True), ([float(1 / 3), float(2 / 3), float(1 / 6)], False), ([float(1 / 3), float(2 / 3), float(2 / 3)], False), ([float(2 / 3), float(1 / 3), float(1 / 3)], False), ([float(2 / 3), float(1 / 3), float(5 / 6)], False), ], '32hR_2': [([0.0, 0.0, 0.0], True), ([0.5, 0.5, 0.5], True)], # HEXAGONAL '6hP': [([0.0, 0.0, 'z'], True)], '622hP': [([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], True)], # CUBIC '23cP': [([0.0, 0.0, 0.0], True), ([0.5, 0.5, 0.5], True)], '23cF': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], False), ([0.0, 0.5, 0.0], False), ([0.0, 0.5, 0.5], False), ([0.25, 0.25, 0.25], True), ([0.25, 0.25, 0.75], False), ([0.25, 0.75, 0.25], False), ([0.25, 0.75, 0.75], False), ([0.5, 0.0, 0.0], False), ([0.5, 0.0, 0.5], False), ([0.5, 0.5, 0.0], False), ([0.5, 0.5, 0.5], True), ([0.75, 0.25, 0.25], False), ([0.75, 0.25, 0.75], False), ([0.75, 0.75, 0.25], False), ([0.75, 0.75, 0.75], True), ], '23cI': [([0.0, 0.0, 0.0], True), ([0.5, 0.5, 0.5], False)], '432cP': [([0.0, 0.0, 0.0], True), ([0.5, 0.5, 0.5], True)], '432cF': [ ([0.0, 0.0, 0.0], True), ([0.0, 0.0, 0.5], False), ([0.0, 0.5, 0.0], False), ([0.0, 0.5, 0.5], False), ([0.5, 0.0, 0.0], False), ([0.5, 0.0, 0.5], False), ([0.5, 0.5, 0.0], False), ([0.5, 0.5, 0.5], True), ], '432cI': [([0.0, 0.0, 0.0], True), ([0.5, 0.5, 0.5], False)], } self._spacegroup2origin = { # Primitive 'P1': self._origins['1aP'], # MONOCLINIC 'P2': self._origins['2mP'], 'P21': self._origins['2mP'], 'C2': self._origins['2mC'], 'A2': self._origins['2mA'], 'I2': self._origins['2mI'], # ORTHORHOMBIC 'P 2 2 2': self._origins['222oP'], 'P 21 2 2': self._origins['222oP'], 'P 2 21 2': self._origins['222oP'], 'P 2 2 21': self._origins['222oP'], 'P 2 21 21': self._origins['222oP'], 'P 21 2 21': self._origins['222oP'], 'P 21 21 2': self._origins['222oP'], 'P 21 21 21': self._origins['222oP'], 'C 2 2 21': self._origins['222oC'], 'C 2 2 2': self._origins['222oC'], 'F 2 2 2': self._origins['222oF'], 'I 2 2 2': self._origins['222oI'], 'I 21 21 21': self._origins['222oI'], # TETRAGONAL 'P 4': self._origins['4tP'], 'P 41': self._origins['4tP'], 'P 42': self._origins['4tP'], 'P 43': self._origins['4tP'], 'I 4': self._origins['4tI'], 'I 41': self._origins['4tI'], 'P 4 2 2': self._origins['422tP'], 'P 4 21 2': self._origins['422tP'], 'P 41 2 2': self._origins['422tP'], 'P 41 21 2': self._origins['422tP'], 'P 42 2 2': self._origins['422tP'], 'P 42 21 2': self._origins['422tP'], 'P 43 2 2': self._origins['422tP'], 'P 43 21 2': self._origins['422tP'], 'I 4 2 2': self._origins['422tI'], 'I 41 2 2': self._origins['422tI'], # TRIGONAL 'P 3': self._origins['3hP'], 'P 31': self._origins['3hP'], 'P 32': self._origins['3hP'], 'H 3': self._origins['3hR_1'], 'R 3': self._origins['3hR_2'], 'P 3 1 2': self._origins['312hP'], 'P 31 1 2': self._origins['312hP'], 'P 32 1 2': self._origins['312hP'], 'P 3 2 1': self._origins['321hP'], 'P 31 2 1': self._origins['321hP'], 'P 32 2 1': self._origins['321hP'], 'H 3 2': self._origins['32hR_1'], 'R 3 2': self._origins['32hR_2'], # HEXAGONAL 'P 6': self._origins['6hP'], 'P 61': self._origins['6hP'], 'P 65': self._origins['6hP'], 'P 62': self._origins['6hP'], 'P 64': self._origins['6hP'], 'P 63': self._origins['6hP'], 'P 6 2 2': self._origins['622hP'], 'P 61 2 2': self._origins['622hP'], 'P 65 2 2': self._origins['622hP'], 'P 62 2 2': self._origins['622hP'], 'P 64 2 2': self._origins['622hP'], 'P 63 2 2': self._origins['622hP'], # CUBIC 'P 2 3': self._origins['23cP'], 'P 21 3': self._origins['23cP'], 'F 2 3': self._origins['23cF'], 'I 2 3': self._origins['23cI'], 'I 21 3': self._origins['23cI'], 'P 4 3 2': self._origins['432cP'], 'P 42 3 2': self._origins['432cP'], 'P 43 3 2': self._origins['432cP'], 'P 41 3 2': self._origins['432cP'], 'F 4 3 2': self._origins['432cF'], 'F 41 3 2': self._origins['432cF'], 'I 4 3 2': self._origins['432cI'], 'I 41 3 2': self._origins['432cI'], } return
[docs] def spaceGroup(self): return self._spaceGroup
[docs] def isFloating(self, spaceGroupLabel=None): if spaceGroupLabel is not None and self.spaceGroup() != spaceGroupLabel: self._getAlternateOrigins(spaceGroupLabel) return self._floating
[docs] def redundantAlternateOrigins(self, spaceGroupLabel=None): if spaceGroupLabel is not None and self.spaceGroup() != spaceGroupLabel: self._getAlternateOrigins(spaceGroupLabel) return copy.copy(self._redundantSet)
[docs] def nonRedundantAlternateOrigins(self, spaceGroupLabel=None): if spaceGroupLabel is not None and self.spaceGroup() != spaceGroupLabel: self._getAlternateOrigins(spaceGroupLabel) return copy.copy(self._nonRedundantSet)
def _getAlternateOrigins(self, spaceGroupLabel): """Given a space group label, return a list of (non-redundant) alternate origins as a list of float triples""" label = spaceGroupLabel if label not in self._spacegroup2origin: label = self._altlabel(label) self._spaceGroup = label originl = self._spacegroup2origin[label] # We build up a list of the full set (redundant) and also the non-redundant that are # the only ones we need to loop through when we are checking self._nonRedundantSet = [] self._redundantSet = [] self._floating = False for o in originl: if o[1]: self._nonRedundantSet.append(o[0]) self._redundantSet.append(o[0]) self._floating = any(map(lambda o: 'x' in o or 'y' in o or 'z' in o, self._redundantSet)) return # symoplib = "/Applications/ccp4-6.4.0/lib/data/symop.lib" def _altlabel(self, spaceGroup, symoplib=None): if not symoplib: symoplib = os.path.join(os.environ['CCP4'], "lib/data/symop.lib") for line in open(symoplib, 'r'): if "'" in line: # Assume first single-quote enclosed string is the one we want i = line.index("'") j = line.index("'", i + 1) sg = line[i + 1 : j] if spaceGroup == sg: return line.split()[3] raise KeyError(spaceGroup)
[docs]class CrystalInfo(object): def __init__(self, line=None): """foo""" self._reset() if line: self.fromLine(line) return def _reset(self): self.a = None self.b = None self.c = None self.alpha = None self.beta = None self.gamma = None self.spaceGroup = None self.z = None return
[docs] def fromLine(self, line): self.a = float(line[6:15].strip()) self.b = float(line[15:24].strip()) self.c = float(line[24:33].strip()) self.alpha = float(line[33:40]) self.beta = float(line[40:47]) self.gamma = float(line[47:54]) self.spaceGroup = line[55:66].strip() try: self.z = int(line[66:70]) except ValueError: # Z-info could be missing (shelxe output pdb) pass return
[docs]class PdbInfo(object): """A class to hold information extracted from a PDB file""" def __init__(self): self.models = [] # List of PdbModel objects self.pdbCode = None self.title = None # First line of the title self.resolution = None # http://www.wwpdb.org/documentation/format33/remarks1.html#REMARK%20280 self.solventContent = None self.matthewsCoefficient = None self.crystalInfo = None return
[docs] def getSequence(self): """Return the sequence for the first model/chain""" assert len(self.models) >= 1, "Need at least one model!" assert len(self.models[0].chains) >= 1, "Need at least one chain!" return self.sequences[0]
[docs] def numAtoms(self, modelIdx=0): """Return the total number of ATOM atoms in the model""" assert len(self.models) >= 1, "Need at least one model!" assert len(self.models[modelIdx].chains) >= 1, "Need at least one chain!" natoms = 0 for chainAtoms in self.models[modelIdx].atoms: natoms += len(chainAtoms) return natoms
[docs] def numChains(self, modelIdx=0): """Return the total number of chains in the model""" assert len(self.models) >= 1, "Need at least one model!" assert len(self.models[modelIdx].chains) >= 1, "Need at least one chain!" return len(self.models[modelIdx].chains)
[docs] def numCalpha(self, modelIdx=0): """Return the total number of CA ATOM atoms in the model""" assert len(self.models) >= 1, "Need at least one model!" assert len(self.models[modelIdx].chains) >= 1, "Need at least one chain!" ncalpha = 0 for chainAtoms in self.models[modelIdx].atoms: for atom in chainAtoms: if atom.name.strip() == 'CA': ncalpha += 1 return ncalpha
[docs]class PdbModel(object): """A class to hold information on a single model in a PDB file""" def __init__(self): self.pdb = None self.serial = None self.chains = [] # Ordered list of chain IDs self.atoms = [] # List of atoms in each chain self.resSeqs = [] # Ordered list of list of resSeqs for each chain - matches order in self.chains self.sequences = [] # Ordered list of list of sequences for each chain - matches order in self.chains self.caMask = [] # Ordered list of list of booleans of residues with no CA atoms - matches order in self.chains self.bbMask = ( [] ) # Ordered list of list of boleans of residues with no backbone atoms - matches order in self.chains return
[docs]class PdbAtom(object): """ COLUMNS DATA TYPE FIELD DEFINITION ------------------------------------------------------------------------------------- 1 - 6 Record name "ATOM " 7 - 11 Integer serial Atom serial number. 13 - 16 Atom name Atom name. 17 Character altLoc Alternate location indicator. 18 - 20 Residue name resName Residue name. 22 Character chainID Chain identifier. 23 - 26 Integer resSeq Residue sequence number. 27 AChar iCode Code for insertion of residues. 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms. 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms. 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms. 55 - 60 Real(6.2) occupancy Occupancy. 61 - 66 Real(6.2) tempFactor Temperature factor. 73 - 76 LString(4) segID Segment identifier, left-justified. 77 - 78 LString(2) element Element symbol, right-justified. 79 - 80 LString(2) charge Charge on the atom. """ def __init__(self, line=None): """Set up attributes""" self._setAtomType() if line: self.fromLine(line) return def _setAtomType(self): """This gets overridden in HETATM - otherwise everything the same""" self._atomType = "ATOM " return def _reset(self): self.line = None # the line we were created from self.serial = None self.name = None self.altLoc = None self.resName = None self.chainID = None self.resSeq = None self.iCode = None self.x = None self.y = None self.z = None self.occupancy = None self.tempFactor = None self.segID = None self.element = None self.charge = None return def _readCharge(self, line): s = line[78:80] minus = '-' signs = ['+', minus] mult = +1 if s[0] in signs: sign = s[0] val = s[1] elif s[1] in signs: sign = s[1] val = s[0] else: raise RuntimeError("Error getting charge sign ({0}) from line: {1}".format(line[78:80], line)) if sign == minus: mult = -1 try: return int(val) * mult except: raise RuntimeError("Error getting charge ({0}) from line: {1}".format(line[78:80], line)) def _sanityCheck(self, line): assert line[0:6] == self._atomType, "Line did not begin with an {0} record!: {1}".format(self._atomType, line) assert len(line) >= 54, "Line length was: {0}\n{1}".format(len(line), line)
[docs] def fromLine(self, line): """Initialise from the line from a PDB""" self._sanityCheck(line) self._reset() self.line = line self.serial = int(line[6:11]) self.name = line[12:16] # Use for all so None means an empty field if line[16].strip(): self.altLoc = line[16] self.resName = line[17:20].strip() if line[21].strip(): self.chainID = line[21] if line[22:26].strip(): self.resSeq = int(line[22:26]) if line[26].strip(): self.iCode = line[26] self.x = float(line[30:38]) self.y = float(line[38:46]) self.z = float(line[46:54]) if len(line) >= 60 and line[54:60].strip(): self.occupancy = float(line[54:60]) if len(line) >= 66 and line[60:66].strip(): self.tempFactor = float(line[60:66]) if len(line) >= 76 and line[72:76].strip(): self.segID = line[72:76].strip() if len(line) >= 77 and line[76:78].strip(): self.element = line[76:78].strip() if len(line) >= 80 and line[78:80].strip(): self.charge = self._readCharge(line) return
[docs] def toLine(self): """Create a line suitable for printing to a PDB file""" s = self._atomType # 1-6 s += "{0:5d}".format(self.serial) # 7-11 s += " " # 12 blank if len(self.name) != 4: raise RuntimeError("Name must be 4 characters long!") s += "{0:4}".format(self.name) # 13-16 if not self.altLoc: # 17 s += " " else: s += "{0:1}".format(self.altLoc) s += "{0:3}".format(self.resName) # 18-20 s += " " # 21 blank if not self.chainID: # 22 s += " " else: s += "{0:1}".format(self.chainID) s += "{0:4}".format(self.resSeq) # 23-26 if not self.iCode: # 27 s += " " else: s += "{0:1}".format(self.iCode) s += " " # 28-30 blank s += "{0:8.3F}".format(self.x) # 31-38 s += "{0:8.3F}".format(self.y) # 39-46 s += "{0:8.3F}".format(self.z) # 47-54 if not self.occupancy: # 55-60 s += " " else: s += "{0:6.2F}".format(self.occupancy) if not self.tempFactor: # 61-66 s += " " else: s += "{0:6.2F}".format(self.tempFactor) s += " " # 67-72 blank if not self.segID: # 73-76 s += " " else: s += "{0:>4}".format(self.segID) if not self.element: # 77-78 s += " " else: s += "{0:>2}".format(self.element) if not self.charge: # 79-80 s += " " else: s += "{0:2d}".format(self.charge) return s
[docs] def fromHetatm(self, hetatm): """Create Atom from Hetatm""" self.serial = hetatm.serial self.name = hetatm.name self.altLoc = hetatm.altLoc self.resName = hetatm.resName self.chainID = hetatm.chainID self.resSeq = hetatm.resSeq self.iCode = hetatm.iCode self.x = hetatm.x self.y = hetatm.y self.z = hetatm.z self.occupancy = hetatm.occupancy self.tempFactor = hetatm.tempFactor self.segID = hetatm.segID self.element = hetatm.element self.charge = hetatm.charge return self
def __str__(self): """List the data attributes of this object""" me = {} for slot in dir(self): attr = getattr(self, slot) if not slot.startswith("__") and not ( isinstance(attr, types.MethodType) or isinstance(attr, types.FunctionType) ): me[slot] = attr return "{0} : {1}".format(self.__repr__(), str(me))
[docs]class PdbHetatm(PdbAtom): """Identical to PdbAtom but just with a different _atomType""" def _setAtomType(self): self._atomType = "HETATM" return
[docs]class PdbModres(object): """ COLUMNS DATA TYPE FIELD DEFINITION -------------------------------------------------------------------------------- 1 - 6 Record name "MODRES" 8 - 11 IDcode idCode ID code of this entry. 13 - 15 Residue name resName Residue name used in this entry. 17 Character chainID Chain identifier. 19 - 22 Integer seqNum Sequence number. 23 AChar iCode Insertion code. 25 - 27 Residue name stdRes Standard residue name. 30 - 70 String comment Description of the residue modification. """ def __init__(self, line): """Set up attributes""" self.fromLine(line) def _reset(self): self.idCode = None self.resName = None self.chainID = None self.seqNum = None self.iCode = None self.stdRes = None self.comment = None return
[docs] def fromLine(self, line): """Initialise from the line from a PDB""" assert line[0:6] == "MODRES", "Line did not begin with an MODRES record!: {0}".format(line) self._reset() self.idCode = line[7:11] self.resName = line[12:15].strip() # Use for all so None means an empty field if line[16].strip(): self.chainID = line[16] self.seqNum = int(line[18:22]) if line[22].strip(): self.iCode = line[22] self.stdRes = line[24:27].strip() if line[29:70].strip(): self.comment = line[29:70].strip() return
[docs] def toLine(self): """Create a line suitable for printing to a PDB file""" s = "MODRES" # 1-6 s += " " # 7 blank s += "{0:4}".format(self.idCode) # 8-11 s += " " # 12 blank s += "{0:>3}".format(self.resName) # 13-15 s += " " # 16 blank if not self.chainID: # 17 s += " " else: s += "{0:1}".format(self.chainID) s += " " # 18 blank s += "{0:4d}".format(self.seqNum) # 19-22 if not self.iCode: # 23 s += " " else: s += "{0:1}".format(self.iCode) s += " " # 24 blank s += "{0:>3}".format(self.stdRes) # 25-27 s += " " # 28-29 blank if self.comment: # 30-70 s += "{:<}".format(self.comment) return s
def __str__(self): """List the data attributes of this object""" me = {} for slot in dir(self): attr = getattr(self, slot) if not slot.startswith("__") and not ( isinstance(attr, types.MethodType) or isinstance(attr, types.FunctionType) ): me[slot] = attr return "{0} : {1}".format(self.__repr__(), str(me))