Source code for ample.modelling.octopus_predict

'''
Created on 18 Feb 2013

@author: jmht

Query the octopus server http://octopus.cbr.su.se to get transmembrane predictions
'''

import os

from HTMLParser import HTMLParser
import logging
import urllib
import urllib2


[docs]class ParseFileUrl(HTMLParser): """ Parse the page returned by an octopus search to get the links to the files """ def __init__(self): self.topo = None self.nnprf = None HTMLParser.__init__(self)
[docs] def handle_starttag(self, tag, attrs): """Set recording whenever we encounter a tag so handle_data can process it""" if tag == 'a' and attrs: for name, value in attrs: if name == "href" and str(value).endswith(".topo"): self.topo = str(value) elif name == "href" and str(value).endswith(".nnprf"): self.nnprf = str(value)
# End ParseFileUrl
[docs]class OctopusPredict(object): """ Query the octopus server http://octopus.cbr.su.se to get transmembrane predictions """ def __init__(self): self.logger = logging.getLogger() self.octopus_url = "http://octopus.cbr.su.se/" # The fasta sequence to query with self.fasta = None # path to the topo file self.topo = None # path to the nnprf file self.nnprf = None # url of topo & nnprf files on server self.topo_url = None self.nnprf_url = None
[docs] def getPredict(self, name, fasta, directory=None): """ Get the octopus prediction for the given fasta sequence string Args: fasta -- fasta sequence string name -- name for the files directory -- directory to write files to Sets the topo and nnprf attributes """ if not directory: directory = os.getcwd() self.octopusFileUrls(fasta) self.writeFiles(name, directory )
[docs] def octopusFileUrls(self, fasta ): """ Query the server for a prediction for the given fasta sequence. Args: fasta -- a single fasta sequence as a string Sets the urls of the topo and nnprf files """ data = { 'do' : 'Submit OCTOPUS', 'sequence' : fasta } edata = urllib.urlencode( data ) #try: req = urllib2.urlopen( self.octopus_url, edata ) #except Exception, e: # # Need to encode to deal with possible dodgy characters # print "Error accessing url: %s\n%s" % (url.encode('ascii','replace'),e) # sys.exit(1) m = ParseFileUrl() # Calls handle_starttag, _data etc. html = req.read() m.feed( html ) if not m.topo: f = open("OCTOPUS_ERROR.html", "w") f.write( html ) f.close() msg = "Error getting prediction for fasta:{}\nCheck file: OCTOPUS_ERROR.html".format( fasta.splitlines()[0] ) self.logger.critical(msg) raise RuntimeError, msg self.topo_url = self.octopus_url + m.topo self.nnprf_url = self.octopus_url + m.nnprf return
##End octopusFileUrls
[docs] def writeFiles(self, name, directory ): """ Write the files on the server to disk Args: directory: where to write files name: name for files (with suffix .topo and .nnprf) """ # Get full file path try: topo_req = urllib2.urlopen( self.topo_url ) except urllib2.HTTPError,e: msg = "Error accessing topo file: {}\n{}".format(self.topo_url,e) self.logger.critical(msg) raise RuntimeError, msg try: nnprf_req = urllib2.urlopen( self.nnprf_url ) except urllib2.HTTPError,e: msg ="Error accessing nnprf file: {}\n{}\nTransmembrane prediction may have failed!".format(self.nnprf_url,e) self.logger.warn(msg) fname = os.path.join(directory, name + ".topo") f = open( fname, "w" ) f.writelines( topo_req.readlines() ) self.logger.debug("Wrote topo file: {}".format( fname ) ) f.close() self.topo=fname fname = os.path.join(directory, name + ".nnprf") f = open( fname, "w" ) f.writelines( nnprf_req.readlines() ) self.logger.debug("Wrote nnprf file: {}".format( fname ) ) f.close() self.nnprf = fname
[docs] def getFasta(self, fastafile ): """ Given a fastafile, extract the first sequence and return it as \n separated string """ fasta = [] header=False with open( fastafile, "r" ) as f: for line in f: line = line.strip() # skip empty if not len(line): continue # Only read one sequence if line.startswith(">"): if header: break header=True fasta.append(line) if not len(fasta): return None return "\n".join(fasta)