Source code for ample.modelling.octopus_predict
'''
Created on 18 Feb 2013
@author: jmht
Query the octopus server http://octopus.cbr.su.se to get transmembrane predictions
'''
import os
from HTMLParser import HTMLParser
import logging
import urllib
import urllib2
[docs]class ParseFileUrl(HTMLParser):
"""
Parse the page returned by an octopus search to get the links to the files
"""
def __init__(self):
self.topo = None
self.nnprf = None
HTMLParser.__init__(self)
[docs] def handle_starttag(self, tag, attrs):
"""Set recording whenever we encounter a tag so handle_data can process it"""
if tag == 'a' and attrs:
for name, value in attrs:
if name == "href" and str(value).endswith(".topo"):
self.topo = str(value)
elif name == "href" and str(value).endswith(".nnprf"):
self.nnprf = str(value)
# End ParseFileUrl
[docs]class OctopusPredict(object):
"""
Query the octopus server http://octopus.cbr.su.se to get transmembrane predictions
"""
def __init__(self):
self.logger = logging.getLogger()
self.octopus_url = "http://octopus.cbr.su.se/"
# The fasta sequence to query with
self.fasta = None
# path to the topo file
self.topo = None
# path to the nnprf file
self.nnprf = None
# url of topo & nnprf files on server
self.topo_url = None
self.nnprf_url = None
[docs] def getPredict(self, name, fasta, directory=None):
"""
Get the octopus prediction for the given fasta sequence string
Args:
fasta -- fasta sequence string
name -- name for the files
directory -- directory to write files to
Sets the topo and nnprf attributes
"""
if not directory:
directory = os.getcwd()
self.octopusFileUrls(fasta)
self.writeFiles(name, directory )
[docs] def octopusFileUrls(self, fasta ):
"""
Query the server for a prediction for the given fasta sequence.
Args:
fasta -- a single fasta sequence as a string
Sets the urls of the topo and nnprf files
"""
data = { 'do' : 'Submit OCTOPUS', 'sequence' : fasta }
edata = urllib.urlencode( data )
#try:
req = urllib2.urlopen( self.octopus_url, edata )
#except Exception, e:
# # Need to encode to deal with possible dodgy characters
# print "Error accessing url: %s\n%s" % (url.encode('ascii','replace'),e)
# sys.exit(1)
m = ParseFileUrl()
# Calls handle_starttag, _data etc.
html = req.read()
m.feed( html )
if not m.topo:
f = open("OCTOPUS_ERROR.html", "w")
f.write( html )
f.close()
msg = "Error getting prediction for fasta:{}\nCheck file: OCTOPUS_ERROR.html".format( fasta.splitlines()[0] )
self.logger.critical(msg)
raise RuntimeError, msg
self.topo_url = self.octopus_url + m.topo
self.nnprf_url = self.octopus_url + m.nnprf
return
##End octopusFileUrls
[docs] def writeFiles(self, name, directory ):
"""
Write the files on the server to disk
Args:
directory: where to write files
name: name for files (with suffix .topo and .nnprf)
"""
# Get full file path
try:
topo_req = urllib2.urlopen( self.topo_url )
except urllib2.HTTPError,e:
msg = "Error accessing topo file: {}\n{}".format(self.topo_url,e)
self.logger.critical(msg)
raise RuntimeError, msg
try:
nnprf_req = urllib2.urlopen( self.nnprf_url )
except urllib2.HTTPError,e:
msg ="Error accessing nnprf file: {}\n{}\nTransmembrane prediction may have failed!".format(self.nnprf_url,e)
self.logger.warn(msg)
fname = os.path.join(directory, name + ".topo")
f = open( fname, "w" )
f.writelines( topo_req.readlines() )
self.logger.debug("Wrote topo file: {}".format( fname ) )
f.close()
self.topo=fname
fname = os.path.join(directory, name + ".nnprf")
f = open( fname, "w" )
f.writelines( nnprf_req.readlines() )
self.logger.debug("Wrote nnprf file: {}".format( fname ) )
f.close()
self.nnprf = fname
[docs] def getFasta(self, fastafile ):
"""
Given a fastafile, extract the first sequence
and return it as \n separated string
"""
fasta = []
header=False
with open( fastafile, "r" ) as f:
for line in f:
line = line.strip()
# skip empty
if not len(line):
continue
# Only read one sequence
if line.startswith(">"):
if header:
break
header=True
fasta.append(line)
if not len(fasta):
return None
return "\n".join(fasta)