Source code for ample.parsers.dssp_parser

[docs]class DsspParser(object): """ Class """ def __init__(self, pfile): self.dsspfile = pfile self.chainIds = [] self.resNames = [] self.resSeqs = [] self.assignment = [] self.percentH = [] self.percentC = [] self.percentE = [] self.parse() return
[docs] def parse(self): """Info from: http://swift.cmbi.ru.nl/gv/dssp/HTML/descrip.html """ self.chainIds = [] self.resNames = [] self.resSeqs = [] self.assignment = [] capture = False currentChain = None for line in open(self.dsspfile, 'r'): if "# RESIDUE" in line: capture = True continue if capture: # Ignore chain break characters - we use the chainId if "!" in line: continue resSeq = int(line[5:10].strip()) chainId = line[10:12].strip() resName = line[12:14].strip() assign = line[16] if currentChain != chainId: currentChain = chainId self.chainIds.append(chainId) self.resNames.append([]) self.resSeqs.append([]) self.assignment.append([]) self.resNames[-1].append(resName) self.resSeqs[-1].append(resSeq) self.assignment[-1].append(assign) if not len(self.resNames[0]) or not len(self.assignment[0]): raise RuntimeError("Got no assignment!") for chain in range(len(self.chainIds)): nH = 0 nC = 0 nE = 0 for p in self.assignment[chain]: if p == "H": nH += 1 elif p == "E": nE += 1 # Just assume everything else is a coil else: nC += 1 self.percentC.append(float(nC) / len(self.assignment[chain]) * 100) self.percentH.append(float(nH) / len(self.assignment[chain]) * 100) self.percentE.append(float(nE) / len(self.assignment[chain]) * 100) return
[docs] def asDict(self): d = {} d['chainIds'] = self.chainIds d['assignment'] = self.assignment d['resNames'] = self.resNames d['resSeqs'] = self.resSeqs d['percentC'] = self.percentC d['percentE'] = self.percentE d['percentH'] = self.percentH return d
[docs] def getAssignment(self, resSeq, chainId, resName=None): ci = self.chainIds.index(chainId) ri = self.resSeqs[ci].index(resSeq) if resName: # Just a check to make sure things are working - ignore X as it'll be a non-standard residue e.g. N-FORMYLMETHIONINE dsspResName = self.resNames[ci][ri] # in dssp cysteine bridges are signified by lower-case letters if dsspResName != resName and not dsspResName.islower() and dsspResName != 'X': raise RuntimeError( "Missmatching residues id {0} chain {1}: {2}: {3}".format( resSeq, chainId, self.resNames[ci][ri], resName ) ) return self.assignment[ci][ri]
[docs] def getResName(self, resSeq, chainId): ci = self.chainIds.index(chainId) ri = self.resSeqs[ci].index(resSeq) # Lower case indicates cysteine name = self.resNames[ci][ri] if name.islower(): return 'C' else: return name