Source code for ample.parsers.dssp_parser
[docs]class DsspParser(object):
"""
Class
"""
def __init__(self,pfile):
self.dsspfile = pfile
self.chainIds = []
self.resNames = []
self.resSeqs = []
self.assignment = []
self.percentH = []
self.percentC = []
self.percentE = []
self.parse()
return
[docs] def parse(self):
"""Info from: http://swift.cmbi.ru.nl/gv/dssp/HTML/descrip.html
"""
self.chainIds = []
self.resNames = []
self.resSeqs = []
self.assignment = []
capture=False
currentChain = None
for line in open(self.dsspfile, 'r'):
if "# RESIDUE" in line:
capture=True
continue
if capture:
# Ignore chain break characters - we use the chainId
if "!" in line:
continue
#print "\"{0}\"".format(line)
#idx = int( line[0:5].strip() )
resSeq = int( line[5:10].strip() )
chainId = line[10:12].strip()
resName = line[12:14].strip()
#print "\"{0}\"".format(line[14:17])
assign = line[16]
if currentChain != chainId:
currentChain = chainId
self.chainIds.append( chainId )
self.resNames.append( [] )
self.resSeqs.append( [] )
self.assignment.append( [] )
self.resNames[-1].append( resName )
self.resSeqs[-1].append( resSeq )
self.assignment[-1].append( assign )
if not len( self.resNames[0] ) or not len( self.assignment[0] ):
raise RuntimeError,"Got no assignment!"
for chain in range( len( self.chainIds ) ):
nH = 0
nC = 0
nE = 0
for p in self.assignment[chain]:
if p == "H":
nH += 1
elif p == "E":
nE += 1
# Just assume everything else is a coil
else:
nC += 1
self.percentC.append( float(nC) / len(self.assignment[ chain ] ) * 100 )
self.percentH.append( float(nH) / len(self.assignment[ chain ] ) * 100 )
self.percentE.append( float(nE) / len(self.assignment[ chain ] ) * 100 )
return
[docs] def asDict(self):
d = {}
d['chainIds'] = self.chainIds
d['assignment'] = self.assignment
d['resNames'] = self.resNames
d['resSeqs'] = self.resSeqs
d['percentC'] = self.percentC
d['percentE'] = self.percentE
d['percentH'] = self.percentH
return d
[docs] def getAssignment(self, resSeq, chainId, resName = None ):
ci = self.chainIds.index( chainId )
ri = self.resSeqs[ ci ].index( resSeq )
if resName:
# Just a check to make sure things are working - ignore X as it'll be a non-standard residue e.g. N-FORMYLMETHIONINE
dsspResName = self.resNames[ ci ][ ri ]
# in dssp cysteine bridges are signified by lower-case letters
if dsspResName != resName and not dsspResName.islower() and dsspResName != 'X' :
raise RuntimeError,"Missmatching residues id {0} chain {1}: {2}: {3}".format( resSeq, chainId, self.resNames[ ci ][ ri ], resName )
return self.assignment[ ci ][ ri ]
[docs] def getResName(self, resSeq, chainId ):
ci = self.chainIds.index( chainId )
ri = self.resSeqs[ ci ].index( resSeq )
# Lower case indicates cysteine
name = self.resNames[ ci ][ ri ]
if name.islower():
return 'C'
else:
return name