Source code for ample.modelling.octopus_predict

"""
Created on 18 Feb 2013

@author: jmht

Query the octopus server http://octopus.cbr.su.se to get transmembrane predictions
"""

import logging
import os
import sys
import urllib

if sys.version_info.major < 3:
    from HTMLParser import HTMLParser
    from urllib2 import urlopen, HTTPError
else:
    from html.parser import HTMLParser
    from urllib.error import HTTPError
    from urllib.request import urlopen

TIMEOUT = 10.0  # in seconds


[docs]class ParseFileUrl(HTMLParser): """ Parse the page returned by an octopus search to get the links to the files """ def __init__(self): self.topo = None self.nnprf = None HTMLParser.__init__(self)
[docs] def handle_starttag(self, tag, attrs): """Set recording whenever we encounter a tag so handle_data can process it""" if tag == 'a' and attrs: for name, value in attrs: if name == "href" and str(value).endswith(".topo"): self.topo = str(value) elif name == "href" and str(value).endswith(".nnprf"): self.nnprf = str(value)
# End ParseFileUrl
[docs]class OctopusPredict(object): """ Query the octopus server http://octopus.cbr.su.se to get transmembrane predictions """ def __init__(self): self.logger = logging.getLogger() self.octopus_url = "http://octopus.cbr.su.se/" # The fasta sequence to query with self.fasta = None # path to the topo file self.topo = None # path to the nnprf file self.nnprf = None # url of topo & nnprf files on server self.topo_url = None self.nnprf_url = None
[docs] def getPredict(self, name, fasta, directory=None): """ Get the octopus prediction for the given fasta sequence string Args: fasta -- fasta sequence string name -- name for the files directory -- directory to write files to Sets the topo and nnprf attributes """ if not directory: directory = os.getcwd() self.octopusFileUrls(fasta) self.writeFiles(name, directory)
[docs] def octopusFileUrls(self, fasta): """ Query the server for a prediction for the given fasta sequence. Args: fasta -- a single fasta sequence as a string Sets the urls of the topo and nnprf files """ data = dict(do='Submit OCTOPUS', sequence=fasta) edata = urllib.urlencode(data) req = urlopen(self.octopus_url, edata, timeout=TIMEOUT) m = ParseFileUrl() # Calls handle_starttag, _data etc. html = req.read() m.feed(html) if not m.topo: with open("OCTOPUS_ERROR.html", "w") as f: f.write(html) raise RuntimeError( "Error getting prediction for fasta:{}\nCheck file: OCTOPUS_ERROR.html".format(fasta.splitlines()[0]) ) self.topo_url = self.octopus_url + m.topo self.nnprf_url = self.octopus_url + m.nnprf
[docs] def writeFiles(self, name, directory): """ Write the files on the server to disk Args: directory: where to write files name: name for files (with suffix .topo and .nnprf) """ try: topo_req = urlopen(self.topo_url, timeout=TIMEOUT) except HTTPError as e: raise RuntimeError("Error accessing topo file: {}\n{}".format(self.topo_url, e)) try: nnprf_req = urlopen(self.nnprf_url, timeout=TIMEOUT) except uHTTPError as e: msg = "Error accessing nnprf file: {}\n{}\nTransmembrane prediction may have failed!".format( self.nnprf_url, e ) self.logger.warn(msg) fname = os.path.join(directory, name + ".topo") with open(fname, "w") as f: f.writelines(topo_req.readlines()) self.logger.debug("Wrote topo file: {}".format(fname)) self.topo = fname fname = os.path.join(directory, name + ".nnprf") with open(fname, "w") as f: f.writelines(nnprf_req.readlines()) self.logger.debug("Wrote nnprf file: {}".format(fname)) self.nnprf = fname
[docs] def getFasta(self, fastafile): """Given a fastafile, extract the first sequence and return it as \n separated string""" fasta = [] header = False with open(fastafile, "r") as f: for line in f: line = line.strip() # skip empty if not len(line): continue # Only read one sequence if line.startswith(">"): if header: break header = True fasta.append(line) if not len(fasta): return None return "\n".join(fasta)