Source code for ample.parsers.alignment_parser

import os
import sys

BIOPYTHON_AVAILABLE = False
try:
    from Bio import AlignIO, Alphabet, pairwise2, Seq, SeqIO
    BIOPYTHON_AVAILABLE = True
except ImportError:
    msg = "Cannot import Biopython - some functionality will not be available." + os.linesep
    sys.stderr.write(msg)


[docs]class AlignmentParser(object): """ Parser for manipulation of MSAs """
[docs] def a3mToTrimmed(self, alnFile, outFile): """Convert an .a3m HH-suite file to a FASTA format :alnFile: A3M format alignment file :outFile: MSA output file """ # A3m is similar to FASTA except indels and lower case letters records = list(SeqIO.parse(open(alnFile, 'r'), 'fasta')) records_formatted = self._a3mToTrimmed(records) SeqIO.write(records_formatted, open(outFile, 'w'), "fasta") return
def _a3mToTrimmed(self, records): """Takes two file handlers""" for record in records: record.seq = Seq.Seq( "".join([c for c in str(record.seq) if not c.islower()]), Alphabet.single_letter_alphabet ) return records
[docs] def align_sequences(self, seq1, seq2): """Global pairwise alignment of two sequences :returns: aligned sequences as tuple """ alignment = pairwise2.align.globalms(seq1, seq2, 2, -1, -0.5, -0.1) return (alignment[-1][0], alignment[-1][1])
[docs] def resetA3M(self, alnFile, outFile): """ Reset a A3M alignment to FASTA sequences Wrapper function for a3mToTrimmed() and removeGaps() :alnFile: A3M format alignment file :outFile: FASTA sequence file """ records = list(SeqIO.parse(open(alnFile, 'r'), 'fasta')) records_trimmed = self._a3mToTrimmed(records) records_trimmed_gapped = self._removeGaps(records) SeqIO.write(records_trimmed_gapped, open(outFile, 'w'), "fasta") return
[docs] def read(self, alnFile, alnFormat): """Read a multiple sequence alignment :alnFile: MSA file :alnFormat: MSA format :returns: Biopython MSA generator """ return AlignIO.parse(open(alnFile, 'r'), alnFormat)
[docs] def removeGaps(self, alnFile, outFile): """ Remove all gaps in a MSA :alnFile: FASTA alignment file :outFile: FASTA sequence file """ # Instead of reading an AlignIO object read as SeqIO object to treat them # straight away as individual sequences records = list(SeqIO.parse(open(alnFile, 'r'), 'fasta')) records_formatted = self._removeGaps(records) SeqIO.write(records_formatted, open(outFile, 'w'), "fasta") return
def _removeGaps(self, records): """ Format the string to remove the gaps """ for record in records: record.seq = Seq.Seq(str(record.seq).replace("-", ""), Alphabet.single_letter_alphabet) return records
[docs] def write(self, alignment, file, format): """Write a MSA object to a file""" AlignIO.write(alignment, open(file, 'w'), format) return