# Copyright 2001-2009 Brad Chapman. # Revisions copyright 2009-2010 by Peter Cock. # Revisions copyright 2009 by David Winter. # Revisions copyright 2009-2010 by Leighton Pritchard. # All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Code to interact with and run various EMBOSS programs. These classes follow the AbstractCommandline interfaces for running programs. """ from __future__ import print_function from Bio.Application import _Option, _Switch, AbstractCommandline class _EmbossMinimalCommandLine(AbstractCommandline): """Base Commandline object for EMBOSS wrappers (PRIVATE). This is provided for subclassing, it deals with shared options common to all the EMBOSS tools: - auto Turn off prompts - stdout Write standard output - filter Read standard input, write standard output - options Prompt for standard and additional values - debug Write debug output to program.dbg - verbose Report some/full command line options - help Report command line options. More information on associated and general qualifiers can be found with -help -verbose - warning Report warnings - error Report errors - fatal Report fatal errors - die Report dying program messages """ def __init__(self, cmd=None, **kwargs): assert cmd is not None extra_parameters = [ _Switch(["-auto", "auto"], """Turn off prompts. Automatic mode disables prompting, so we recommend you set this argument all the time when calling an EMBOSS tool from Biopython. """), _Switch(["-stdout", "stdout"], "Write standard output."), _Switch(["-filter", "filter"], "Read standard input, write standard output."), _Switch(["-options", "options"], """Prompt for standard and additional values. If you are calling an EMBOSS tool from within Biopython, we DO NOT recommend using this option. """), _Switch(["-debug", "debug"], "Write debug output to program.dbg."), _Switch(["-verbose", "verbose"], "Report some/full command line options"), _Switch(["-help", "help"], """Report command line options. More information on associated and general qualifiers can be found with -help -verbose """), _Switch(["-warning", "warning"], "Report warnings."), _Switch(["-error", "error"], "Report errors."), _Switch(["-die", "die"], "Report dying program messages."), ] try: #Insert extra parameters - at the start just in case there #are any arguments which must come last: self.parameters = extra_parameters + self.parameters except AttributeError: #Should we raise an error? The subclass should have set this up! self.parameters = extra_parameters AbstractCommandline.__init__(self, cmd, **kwargs) class _EmbossCommandLine(_EmbossMinimalCommandLine): """Base Commandline object for EMBOSS wrappers (PRIVATE). This is provided for subclassing, it deals with shared options common to all the EMBOSS tools plus: - outfile Output filename """ def __init__(self, cmd=None, **kwargs): assert cmd is not None extra_parameters = [ _Option(["-outfile", "outfile"], "Output filename", filename=True), ] try: #Insert extra parameters - at the start just in case there #are any arguments which must come last: self.parameters = extra_parameters + self.parameters except AttributeError: #Should we raise an error? The subclass should have set this up! self.parameters = extra_parameters _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs) def _validate(self): #Check the outfile, filter, or stdout option has been set. #We can't simply do this via the required flag for the outfile #output - this seems the simplest solution. if not (self.outfile or self.filter or self.stdout): raise ValueError("You must either set outfile (output filename), " "or enable filter or stdout (output to stdout).") return _EmbossMinimalCommandLine._validate(self) class Primer3Commandline(_EmbossCommandLine): """Commandline object for the Primer3 interface from EMBOSS. The precise set of supported arguments depends on your version of EMBOSS. This version accepts arguments current at EMBOSS 6.1.0, but in order to remain backwards compatible also support the old argument names as well. e.g. Using EMBOSS 6.1.0 or later, >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True) >>> cline.explainflag = True >>> cline.osizeopt=20 >>> cline.psizeopt=200 >>> cline.outfile = "myresults.out" >>> cline.bogusparameter = 1967 # Invalid parameter Traceback (most recent call last): ... ValueError: Option name bogusparameter was not found. >>> print(cline) eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -psizeopt=200 -osizeopt=20 -explainflag=True The equivalent for anyone still using an older version of EMBOSS would be: >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True) >>> cline.explainflag = True >>> cline.oligosize=20 # Old EMBOSS, instead of osizeopt >>> cline.productosize=200 # Old EMBOSS, instead of psizeopt >>> cline.outfile = "myresults.out" >>> print(cline) eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -productosize=200 -oligosize=20 -explainflag=True """ def __init__(self, cmd="eprimer3", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Sequence to choose primers from.", is_required=True), _Option(["-task", "task"], "Tell eprimer3 what task to perform."), _Option(["-hybridprobe", "hybridprobe"], "Find an internal oligo to use as a hyb probe."), _Option(["-numreturn", "numreturn"], "Maximum number of primer pairs to return."), _Option(["-includedregion", "includedregion"], "Subregion of the sequence in which to pick primers."), _Option(["-target", "target"], "Sequence to target for flanking primers."), _Option(["-excludedregion", "excludedregion"], "Regions to exclude from primer picking."), _Option(["-forwardinput", "forwardinput"], "Sequence of a forward primer to check."), _Option(["-reverseinput", "reverseinput"], "Sequence of a reverse primer to check."), _Option(["-gcclamp", "gcclamp"], "The required number of Gs and Cs at the 3' of each primer."), _Option(["-osize", "osize"], "Optimum length of a primer oligo."), _Option(["-minsize", "minsize"], "Minimum length of a primer oligo."), _Option(["-maxsize", "maxsize"], "Maximum length of a primer oligo."), _Option(["-otm", "otm"], """Melting temperature for primer oligo (OBSOLETE). Option replaced in EMBOSS 6.6.0 by -opttm """), _Option(["-opttm", "opttm"], """Optimum melting temperature for a primer oligo. Option added in EMBOSS 6.6.0, replacing -otm """), _Option(["-mintm", "mintm"], "Minimum melting temperature for a primer oligo."), _Option(["-maxtm", "maxtm"], "Maximum melting temperature for a primer oligo."), _Option(["-maxdifftm", "maxdifftm"], "Maximum difference in melting temperatures between " "forward and reverse primers."), _Option(["-ogcpercent", "ogcpercent"], "Optimum GC% for a primer."), _Option(["-mingc", "mingc"], "Minimum GC% for a primer."), _Option(["-maxgc", "maxgc"], "Maximum GC% for a primer."), _Option(["-saltconc", "saltconc"], "Millimolar salt concentration in the PCR."), _Option(["-dnaconc", "dnaconc"], "Nanomolar concentration of annealing oligos in the PCR."), _Option(["-maxpolyx", "maxpolyx"], "Maximum allowable mononucleotide repeat length in a primer."), #Primer length: _Option(["-productosize", "productosize"], """Optimum size for the PCR product (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -psizeopt """), _Option(["-psizeopt", "psizeopt"], """Optimum size for the PCR product. Option added in EMBOSS 6.1.0, replacing -productosize """), _Option(["-productsizerange", "productsizerange"], """Acceptable range of length for the PCR product (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -prange """), _Option(["-prange", "prange"], """Acceptable range of length for the PCR product. Option added in EMBOSS 6.1.0, replacing -productsizerange """), #Primer temperature: _Option(["-productotm", "productotm"], """Optimum melting temperature for the PCR product (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ptmopt """), _Option(["-ptmopt", "ptmopt"], """Optimum melting temperature for the PCR product. Option added in EMBOSS 6.1.0, replacing -productotm """), _Option(["-productmintm", "productmintm"], """Minimum allowed melting temperature for the amplicon (OBSOLETE) Option replaced in EMBOSS 6.1.0 by -ptmmin """), _Option(["-ptmmin", "ptmmin"], """Minimum allowed melting temperature for the amplicon."), Option added in EMBOSS 6.1.0, replacing -productmintm """), _Option(["-productmaxtm", "productmaxtm"], """Maximum allowed melting temperature for the amplicon (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ptmmax """), _Option(["-ptmmax", "ptmmax"], """Maximum allowed melting temperature for the amplicon."), Option added in EMBOSS 6.1.0, replacing -productmaxtm """), #Note to self, should be -oexcludedregion not -oexcluderegion _Option(["-oexcludedregion", "oexcludedregion"], """Do not pick internal oligos in this region."), Option added in EMBOSS 6.1.0, replacing -oligoexcludedregion. """), _Option(["-oligoexcludedregion", "oligoexcludedregion"], """Do not pick internal oligos in this region (OBSOLETE)."), Option replaced in EMBOSS 6.1.0 by -oexcluderegion. """), _Option(["-oligoinput", "oligoinput"], "Sequence of the internal oligo."), #Oligo length: _Option(["-oligosize", "oligosize"], """Optimum length of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -osizeopt. """), _Option(["-osizeopt", "osizeopt"], """Optimum length of internal oligo. Option added in EMBOSS 6.1.0, replaces -oligosize """), _Option(["-oligominsize", "oligominsize"], """Minimum length of internal oligo (OBSOLETE)."), Option replaced in EMBOSS 6.1.0 by -ominsize. """), _Option(["-ominsize", "ominsize"], """Minimum length of internal oligo." Option added in EMBOSS 6.1.0, replaces -oligominsize """), _Option(["-oligomaxsize", "oligomaxsize"], """Maximum length of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -omaxsize. """), _Option(["-omaxsize", "omaxsize"], """Maximum length of internal oligo. Option added in EMBOSS 6.1.0, replaces -oligomaxsize """), #Oligo GC temperature: _Option(["-oligotm", "oligotm"], """Optimum melting temperature of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -otmopt. """), _Option(["-otmopt", "otmopt"], """Optimum melting temperature of internal oligo. Option added in EMBOSS 6.1.0. """), _Option(["-oligomintm", "oligomintm"], """Minimum melting temperature of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -otmmin. """), _Option(["-otmmin", "otmmin"], """Minimum melting temperature of internal oligo. Option added in EMBOSS 6.1.0, replacing -oligomintm """), _Option(["-oligomaxtm", "oligomaxtm"], """Maximum melting temperature of internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -otmmax. """), _Option(["-otmmax", "otmmax"], """Maximum melting temperature of internal oligo. Option added in EMBOSS 6.1.0, replacing -oligomaxtm """), #Oligo GC percent: _Option(["-oligoogcpercent", "oligoogcpercent"], """Optimum GC% for internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ogcopt. """), _Option(["-ogcopt", "ogcopt"], """Optimum GC% for internal oligo." Option added in EMBOSS 6.1.0, replacing -oligoogcpercent """), _Option(["-oligomingc", "oligomingc"], """Minimum GC% for internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -ogcmin. """), _Option(["-ogcmin", "ogcmin"], """Minimum GC% for internal oligo. Option added in EMBOSS 6.1.0, replacing -oligomingc """), _Option(["-oligomaxgc", "oligomaxgc"], """Maximum GC% for internal oligo. Option replaced in EMBOSS 6.1.0 by -ogcmax """), _Option(["-ogcmax", "ogcmax"], """Maximum GC% for internal oligo."), Option added in EMBOSS 6.1.0, replacing -oligomaxgc """), #Oligo salt concentration: _Option(["-oligosaltconc", "oligosaltconc"], """Millimolar concentration of salt in the hybridisation."), Option replaced in EMBOSS 6.1.0 by -osaltconc """), _Option(["-osaltconc", "osaltconc"], """Millimolar concentration of salt in the hybridisation."), Option added in EMBOSS 6.1.0, replacing -oligosaltconc """), _Option(["-oligodnaconc", "oligodnaconc"], """Nanomolar concentration of internal oligo in the hybridisation. Option replaced in EMBOSS 6.1.0 by -odnaconc """), _Option(["-odnaconc", "odnaconc"], """Nanomolar concentration of internal oligo in the hybridisation. Option added in EMBOSS 6.1.0, replacing -oligodnaconc """), #Oligo self complementarity _Option(["-oligoselfany", "oligoselfany"], """Maximum allowable alignment score for self-complementarity (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -oanyself """), _Option(["-oanyself", "oanyself"], """Maximum allowable alignment score for self-complementarity."), Option added in EMBOSS 6.1.0, replacing -oligoselfany """), _Option(["-oligoselfend", "oligoselfend"], """Maximum allowable 3`-anchored global alignment score " for self-complementarity (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -oendself """), _Option(["-oendself", "oendself"], """Max 3`-anchored self-complementarity global alignment score. Option added in EMBOSS 6.1.0, replacing -oligoselfend """), _Option(["-oligomaxpolyx", "oligomaxpolyx"], """Maximum length of mononucleotide repeat in internal oligo (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -opolyxmax """), _Option(["-opolyxmax", "opolyxmax"], """Maximum length of mononucleotide repeat in internal oligo."), Option added in EMBOSS 6.1.0, replacing -oligomaxpolyx """), _Option(["-mispriminglibraryfile", "mispriminglibraryfile"], "File containing library of sequences to avoid amplifying"), _Option(["-maxmispriming", "maxmispriming"], "Maximum allowed similarity of primers to sequences in " "library specified by -mispriminglibrary"), _Option(["-oligomaxmishyb", "oligomaxmishyb"], """Maximum alignment score for hybridisation of internal oligo to library specified by -oligomishyblibraryfile (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -omishybmax """), _Option(["-omishybmax", "omishybmax"], """Maximum alignment score for hybridisation of internal oligo to library specified by -mishyblibraryfile. Option added in EMBOSS 6.1.0, replacing -oligomaxmishyb """), _Option(["-oligomishyblibraryfile", "oligomishyblibraryfile"], """Library file of seqs to avoid internal oligo hybridisation (OBSOLETE). Option replaced in EMBOSS 6.1.0 by -mishyblibraryfile """), _Option(["-mishyblibraryfile", "mishyblibraryfile"], """Library file of seqs to avoid internal oligo hybridisation. Option added in EMBOSS 6.1.0, replacing -oligomishyblibraryfile """), _Option(["-explainflag", "explainflag"], "Produce output tags with eprimer3 statistics"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) class PrimerSearchCommandline(_EmbossCommandLine): """Commandline object for the primersearch program from EMBOSS. """ def __init__(self, cmd="primersearch", **kwargs): self.parameters = [ _Option(["-seqall", "-sequences", "sequences", "seqall"], "Sequence to look for the primer pairs in.", is_required=True), #When this wrapper was written primersearch used -sequences #as the argument name. Since at least EMBOSS 5.0 (and #perhaps earlier) this has been -seqall instead. _Option(["-infile", "-primers", "primers", "infile"], "File containing the primer pairs to search for.", filename=True, is_required=True), #When this wrapper was written primersearch used -primers #as the argument name. Since at least EMBOSS 5.0 (and #perhaps earlier) this has been -infile instead. _Option(["-mismatchpercent", "mismatchpercent"], "Allowed percentage mismatch (any integer value, default 0).", is_required=True), _Option(["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"), _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FDNADistCommandline(_EmbossCommandLine): """Commandline object for the fdnadist program from EMBOSS. fdnadist is an EMBOSS wrapper for the PHYLIP program dnadist for calulating distance matrices from DNA sequence files. """ def __init__(self, cmd = "fdnadist", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "seq file to use (phylip)", filename=True, is_required=True), _Option(["-method", "method"], "sub. model [f,k,j,l,s]", is_required=True), _Option(["-gamma", "gamma"], "gamma [g, i,n]"), _Option(["-ncategories", "ncategories"], "number of rate catergories (1-9)"), _Option(["-rate", "rate"], "rate for each category"), _Option(["-categories", "categories"], "File of substitution rate categories"), _Option(["-weights", "weights"], "weights file"), _Option(["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"), _Option(["-invarfrac", "invarfrac"], "proportoin of invariant sites"), _Option(["-ttratio", "ttratio"], "ts/tv ratio"), _Option(["-freqsfrom", "freqsfrom"], "use emprical base freqs"), _Option(["-basefreq", "basefreq"], "specify basefreqs"), _Option(["-lower", "lower"], "lower triangle matrix (y/N)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FTreeDistCommandline(_EmbossCommandLine): """Commandline object for the ftreedist program from EMBOSS. ftreedist is an EMBOSS wrapper for the PHYLIP program treedist used for calulating distance measures between phylogentic trees. """ def __init__(self, cmd = "ftreedist", **kwargs): self.parameters = [ _Option(["-intreefile", "intreefile"], "tree file to score (phylip)", filename=True, is_required=True), _Option(["-dtype", "dtype"], "distance type ([S]ymetric, [b]ranch score)"), _Option(["-pairing", "pairing"], "tree pairing method ([A]djacent pairs, all [p]ossible pairs)"), _Option(["-style", "style"], "output style - [V]erbose, [f]ill, [s]parse"), _Option(["-noroot", "noroot"], "treat trees as rooted [N/y]"), _Option(["-outgrno", "outgrno"], "which taxon to root the trees with (starts from 0)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FNeighborCommandline(_EmbossCommandLine): """Commandline object for the fneighbor program from EMBOSS. fneighbor is an EMBOSS wrapper for the PHYLIP program neighbor used for calulating neighbor-joining or UPGMA trees from distance matrices. """ def __init__(self, cmd = "fneighbor", **kwargs): self.parameters = [ _Option(["-datafile", "datafile"], "dist file to use (phylip)", filename=True, is_required=True), _Option(["-matrixtype", "matrixtype"], "is martrix [S]quare pr [u]pper or [l]ower"), _Option(["-treetype", "treetype"], "nj or UPGMA tree (n/u)"), _Option(["-outgrno", "outgrno" ], "taxon to use as OG"), _Option(["-jumble", "jumble"], "randommise input order (Y/n)"), _Option(["-seed", "seed"], "provide a random seed"), _Option(["-trout", "trout"], "write tree (Y/n)"), _Option(["-outtreefile", "outtreefile"], "filename for output tree"), _Option(["-progress", "progress"], "print progress (Y/n)"), _Option(["-treeprint", "treeprint"], "print tree (Y/n)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FSeqBootCommandline(_EmbossCommandLine): """Commandline object for the fseqboot program from EMBOSS. fseqboot is an EMBOSS wrapper for the PHYLIP program seqboot used to pseudo-sample alignment files. """ def __init__(self, cmd = "fseqboot", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "seq file to sample (phylip)", filename=True, is_required=True), _Option(["-categories", "catergories"], "file of input categories"), _Option(["-weights", "weights"], " weights file"), _Option(["-test", "test"], "specify operation, default is bootstrap"), _Option(["-regular", "regular"], "absolute number to resample"), _Option(["-fracsample", "fracsample"], "fraction to resample"), _Option(["-rewriteformat", "rewriteformat"], "output format ([P]hyilp, [n]exus, [x]ml"), _Option(["-seqtype", "seqtype"], "output format ([D]na, [p]rotein, [r]na"), _Option(["-blocksize", "blocksize"], "print progress (Y/n)"), _Option(["-reps", "reps"], "how many replicates, defaults to 100)"), _Option(["-justweights", "jusweights"], "what to write out [D]atasets of just [w]eights"), _Option(["-seed", "seed"], "specify random seed"), _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FDNAParsCommandline(_EmbossCommandLine): """Commandline object for the fdnapars program from EMBOSS. fdnapars is an EMBOSS version of the PHYLIP program dnapars, for estimating trees from DNA sequences using parsiomny. Calling this command without providing a value for the option "-intreefile" will invoke "interactive mode" (and as a result fail if called with subprocess) if "-auto" is not set to true. """ def __init__(self, cmd = "fdnapars", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "seq file to use (phylip)", filename=True, is_required=True), _Option(["-intreefile", "intreefile"], "Phylip tree file"), _Option(["-weights", "weights"], "weights file"), _Option(["-maxtrees", "maxtrees"], "max trees to save during run"), _Option(["-thorough", "thorough"], "more thorough search (Y/n)"), _Option(["-rearrange", "rearrange"], "Rearrange on jsut 1 best tree (Y/n)"), _Option(["-transversion", "transversion"], "Use tranversion parsimony (y/N)"), _Option(["-njumble", "njumble"], "number of times to randomise input order (default is 0)"), _Option(["-seed", "seed"], "provide random seed"), _Option(["-outgrno", "outgrno"], "Specify outgroup"), _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"), _Option(["-threshold", "threshold"], "Threshold value"), _Option(["-trout", "trout"], "Write trees to file (Y/n)"), _Option(["-outtreefile", "outtreefile"], "filename for output tree"), _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FProtParsCommandline(_EmbossCommandLine): """Commandline object for the fdnapars program from EMBOSS. fprotpars is an EMBOSS version of the PHYLIP program protpars, for estimating trees from protein sequences using parsiomny. Calling this command without providing a value for the option "-intreefile" will invoke "interactive mode" (and as a result fail if called with subprocess) if "-auto" is not set to true. """ def __init__(self, cmd = "fprotpars", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "seq file to use (phylip)", filename=True, is_required=True), _Option(["-intreefile", "intreefile"], "Phylip tree file to score"), _Option(["-outtreefile", "outtreefile"], "phylip tree output file", filename=True, is_required=True), _Option(["-weights", "weights"], "weights file"), _Option(["-whichcode", "whichcode"], "which genetic code, [U,M,V,F,Y]]"), _Option(["-njumble", "njumble"], "number of times to randomise input order (default is 0)"), _Option(["-seed", "seed"], "provide random seed"), _Option(["-outgrno", "outgrno"], "Specify outgroup"), _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"), _Option(["-threshold", "threshold"], "Threshold value"), _Option(["-trout", "trout"], "Write trees to file (Y/n)"), _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FProtDistCommandline(_EmbossCommandLine): """Commandline object for the fprotdist program from EMBOSS. fprotdist is an EMBOSS wrapper for the PHYLIP program protdist used to estimate trees from protein sequences using parsimony """ def __init__(self, cmd = "fprotdist", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "seq file to use (phylip)", filename=True, is_required=True), _Option(["-ncategories", "ncategories"], "number of rate catergories (1-9)"), _Option(["-rate", "rate"], "rate for each category"), _Option(["-catergories", "catergories"], "file of rates"), _Option(["-weights", "weights"], "weights file"), _Option(["-method", "method"], "sub. model [j,h,d,k,s,c]"), _Option(["-gamma", "gamma"], "gamma [g, i,c]"), _Option(["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"), _Option(["-invarcoefficient", "invarcoefficient"], "float for variation of substitution rate among sites"), _Option(["-aacateg", "aacateg"], "Choose the category to use [G,C,H]"), _Option(["-whichcode", "whichcode"], "genetic code [c,m,v,f,y]"), _Option(["-ease", "ease"], "Pob change catergory (float between -0 and 1)"), _Option(["-ttratio", "ttratio"], "Transition/transversion ratio (0-1)"), _Option(["-basefreq", "basefreq"], "DNA base frequencies (space separated list)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FConsenseCommandline(_EmbossCommandLine): """Commandline object for the fconsense program from EMBOSS. fconsense is an EMBOSS wrapper for the PHYLIP program consense used to calculate consensus trees. """ def __init__(self, cmd = "fconsense", **kwargs): self.parameters = [ _Option(["-intreefile", "intreefile"], "file with phylip trees to make consensus from", filename=True, is_required=True), _Option(["-method", "method"], "consensus method [s, mr, MRE, ml]"), _Option(["-mlfrac", "mlfrac"], "cut-off freq for a branch to appear in consensus (0.5-1.0)"), _Option(["-root", "root"], "treat trees as rooted (YES, no)"), _Option(["-outgrno", "outgrno"], "OTU to use as outgroup (starts from 0)"), _Option(["-trout", "trout"], "treat trees as rooted (YES, no)"), _Option(["-outtreefile", "outtreefile"], "Phylip tree output file (optional)")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class WaterCommandline(_EmbossCommandLine): """Commandline object for the water program from EMBOSS. """ def __init__(self, cmd="water", **kwargs): self.parameters = [ _Option(["-asequence", "asequence"], "First sequence to align", filename=True, is_required=True), _Option(["-bsequence", "bsequence"], "Second sequence to align", filename=True, is_required=True), _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True), _Option(["-gapextend", "gapextend"], "Gap extension penalty", is_required=True), _Option(["-datafile", "datafile"], "Matrix file", filename=True), _Switch(["-nobrief", "nobrief"], "Display extended identity and similarity"), _Switch(["-brief", "brief"], "Display brief identity and similarity"), _Option(["-similarity", "similarity"], "Display percent identity and similarity"), _Option(["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"), _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), _Option(["-aformat", "aformat"], "Display output in a different specified output format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class NeedleCommandline(_EmbossCommandLine): """Commandline object for the needle program from EMBOSS. """ def __init__(self, cmd="needle", **kwargs): self.parameters = [ _Option(["-asequence", "asequence"], "First sequence to align", filename=True, is_required=True), _Option(["-bsequence", "bsequence"], "Second sequence to align", filename=True, is_required=True), _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True), _Option(["-gapextend", "gapextend"], "Gap extension penalty", is_required=True), _Option(["-datafile", "datafile"], "Matrix file", filename=True), _Option(["-endweight", "endweight"], "Apply And gap penalties"), _Option(["-endopen", "endopen"], "The score taken away when an end gap is created."), _Option(["-endextend", "endextend"], "The score added to the end gap penality for each base or " "residue in the end gap."), _Switch(["-nobrief", "nobrief"], "Display extended identity and similarity"), _Switch(["-brief", "brief"], "Display brief identity and similarity"), _Option(["-similarity", "similarity"], "Display percent identity and similarity"), _Option(["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"), _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), _Option(["-aformat", "aformat"], "Display output in a different specified output format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class NeedleallCommandline(_EmbossCommandLine): """Commandline object for the needleall program from EMBOSS. """ def __init__(self, cmd="needleall", **kwargs): self.parameters = [ _Option(["-asequence", "asequence"], "First sequence to align", filename=True, is_required=True), _Option(["-bsequence", "bsequence"], "Second sequence to align", filename=True, is_required=True), _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True), _Option(["-gapextend", "gapextend"], "Gap extension penalty", is_required=True), _Option(["-datafile", "datafile"], "Matrix file", filename=True), _Option(["-minscore", "minscore"], "Exclude alignments with scores below this threshold score."), _Option(["-errorfile", "errorfile"], "Error file to be written to."), _Option(["-endweight", "endweight"], "Apply And gap penalties"), _Option(["-endopen", "endopen"], "The score taken away when an end gap is created."), _Option(["-endextend", "endextend"], "The score added to the end gap penality for each base or " "residue in the end gap."), _Switch(["-nobrief", "nobrief"], "Display extended identity and similarity"), _Switch(["-brief", "brief"], "Display brief identity and similarity"), _Option(["-similarity", "similarity"], "Display percent identity and similarity"), _Option(["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"), _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), _Option(["-aformat", "aformat"], "Display output in a different specified output format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class StretcherCommandline(_EmbossCommandLine): """Commandline object for the stretcher program from EMBOSS. """ def __init__(self, cmd="stretcher", **kwargs): self.parameters = [ _Option(["-asequence", "asequence"], "First sequence to align", filename=True, is_required=True), _Option(["-bsequence", "bsequence"], "Second sequence to align", filename=True, is_required=True), _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True, checker_function=lambda value: isinstance(value, int)), _Option(["-gapextend", "gapextend"], "Gap extension penalty", is_required=True, checker_function=lambda value: isinstance(value, int)), _Option(["-datafile", "datafile"], "Matrix file", filename=True), _Option(["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"), _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"), _Option(["-aformat", "aformat"], "Display output in a different specified output format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class FuzznucCommandline(_EmbossCommandLine): """Commandline object for the fuzznuc program from EMBOSS. """ def __init__(self, cmd="fuzznuc", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Sequence database USA", is_required=True), _Option(["-pattern", "pattern"], "Search pattern, using standard IUPAC one-letter codes", is_required=True), _Option(["-mismatch", "mismatch"], "Number of mismatches", is_required=True), _Option(["-complement", "complement"], "Search complementary strand"), _Option(["-rformat", "rformat"], "Specify the report format to output in.")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class Est2GenomeCommandline(_EmbossCommandLine): """Commandline object for the est2genome program from EMBOSS. """ def __init__(self, cmd="est2genome", **kwargs): self.parameters = [ _Option(["-est", "est"], "EST sequence(s)", is_required=True), _Option(["-genome", "genome"], "Genomic sequence", is_required=True), _Option(["-match", "match"], "Score for matching two bases"), _Option(["-mismatch", "mismatch"], "Cost for mismatching two bases"), _Option(["-gappenalty", "gappenalty"], "Cost for deleting a single base in either sequence, " "excluding introns"), _Option(["-intronpenalty", "intronpenalty"], "Cost for an intron, independent of length."), _Option(["-splicepenalty", "splicepenalty"], "Cost for an intron, independent of length " "and starting/ending on donor-acceptor sites"), _Option(["-minscore", "minscore"], "Exclude alignments with scores below this threshold score."), _Option(["-reverse", "reverse"], "Reverse the orientation of the EST sequence"), _Option(["-splice", "splice"], "Use donor and acceptor splice sites."), _Option(["-mode", "mode"], "This determines the comparion mode. 'both', 'forward' " "'reverse'"), _Option(["-best", "best"], "You can print out all comparisons instead of just the best"), _Option(["-space", "space"], "for linear-space recursion."), _Option(["-shuffle", "shuffle"], "Shuffle"), _Option(["-seed", "seed"], "Random number seed"), _Option(["-align", "align"], "Show the alignment."), _Option(["-width", "width"], "Alignment width") ] _EmbossCommandLine.__init__(self, cmd, **kwargs) class ETandemCommandline(_EmbossCommandLine): """Commandline object for the etandem program from EMBOSS. """ def __init__(self, cmd="etandem", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Sequence", filename=True, is_required=True), _Option(["-minrepeat", "minrepeat"], "Minimum repeat size", is_required=True), _Option(["-maxrepeat", "maxrepeat"], "Maximum repeat size", is_required=True), _Option(["-threshold", "threshold"], "Threshold score"), _Option(["-mismatch", "mismatch"], "Allow N as a mismatch"), _Option(["-uniform", "uniform"], "Allow uniform consensus"), _Option(["-rformat", "rformat"], "Output report format")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class EInvertedCommandline(_EmbossCommandLine): """Commandline object for the einverted program from EMBOSS. """ def __init__(self, cmd="einverted", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Sequence", filename=True, is_required=True), _Option(["-gap", "gap"], "Gap penalty", filename=True, is_required=True), _Option(["-threshold", "threshold"], "Minimum score threshold", is_required=True), _Option(["-match", "match"], "Match score", is_required=True), _Option(["-mismatch", "mismatch"], "Mismatch score", is_required=True), _Option(["-maxrepeat", "maxrepeat"], "Maximum separation between the start and end of repeat"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) class PalindromeCommandline(_EmbossCommandLine): """Commandline object for the palindrome program from EMBOSS. """ def __init__(self, cmd="palindrome", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Sequence", filename=True, is_required=True), _Option(["-minpallen", "minpallen"], "Minimum palindrome length", is_required=True), _Option(["-maxpallen", "maxpallen"], "Maximum palindrome length", is_required=True), _Option(["-gaplimit", "gaplimit"], "Maximum gap between repeats", is_required=True), _Option(["-nummismatches", "nummismatches"], "Number of mismatches allowed", is_required=True), _Option(["-overlap", "overlap"], "Report overlapping matches", is_required=True), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) class TranalignCommandline(_EmbossCommandLine): """Commandline object for the tranalign program from EMBOSS. """ def __init__(self, cmd="tranalign", **kwargs): self.parameters = [ _Option(["-asequence", "asequence"], "Nucleotide sequences to be aligned.", filename=True, is_required=True), _Option(["-bsequence", "bsequence"], "Protein sequence alignment", filename=True, is_required=True), _Option(["-outseq", "outseq"], "Output sequence file.", filename=True, is_required=True), _Option(["-table", "table"], "Code to use")] _EmbossCommandLine.__init__(self, cmd, **kwargs) class DiffseqCommandline(_EmbossCommandLine): """Commandline object for the diffseq program from EMBOSS. """ def __init__(self, cmd="diffseq", **kwargs): self.parameters = [ _Option(["-asequence", "asequence"], "First sequence to compare", filename=True, is_required=True), _Option(["-bsequence", "bsequence"], "Second sequence to compare", filename=True, is_required=True), _Option(["-wordsize", "wordsize"], "Word size to use for comparisons (10 default)", is_required=True), _Option(["-aoutfeat", "aoutfeat"], "File for output of first sequence's features", filename=True, is_required=True), _Option(["-boutfeat", "boutfeat"], "File for output of second sequence's features", filename=True, is_required=True), _Option(["-rformat", "rformat"], "Output report file format") ] _EmbossCommandLine.__init__(self, cmd, **kwargs) class IepCommandline(_EmbossCommandLine): """Commandline for EMBOSS iep: calculated isoelectric point and charge. Example: >>> from Bio.Emboss.Applications import IepCommandline >>> iep_cline = IepCommandline(sequence="proteins.faa", ... outfile="proteins.txt") >>> print(iep_cline) iep -outfile=proteins.txt -sequence=proteins.faa You would typically run the command line with iep_cline() or via the Python subprocess module, as described in the Biopython tutorial. """ def __init__(self, cmd="iep", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Protein sequence(s) filename", filename=True, is_required=True), _Option(["-amino", "amino"], """Number of N-termini Integer 0 (default) or more. """), _Option(["-carboxyl", "carboxyl"], """Number of C-termini Integer 0 (default) or more. """), _Option(["-lysinemodified", "lysinemodified"], """Number of modified lysines Integer 0 (default) or more. """), _Option(["-disulphides", "disulphides"], """Number of disulphide bridges Integer 0 (default) or more. """), #Should we implement the -termini switch as well? _Option(["-notermini", "notermini"], "Exclude (True) or include (False) charge at N and C terminus."), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) #seqret uses -outseq, not -outfile, so use the base class: class SeqretCommandline(_EmbossMinimalCommandLine): """Commandline object for the seqret program from EMBOSS. This tool allows you to interconvert between different sequence file formats (e.g. GenBank to FASTA). Combining Biopython's Bio.SeqIO module with seqret using a suitable intermediate file format can allow you to read/write to an even wider range of file formats. This wrapper currently only supports the core functionality, things like feature tables (in EMBOSS 6.1.0 onwards) are not yet included. """ def __init__(self, cmd="seqret", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Input sequence(s) filename", filename=True), _Option(["-outseq", "outseq"], "Output sequence file.", filename=True), _Option(["-sformat", "sformat"], "Input sequence(s) format (e.g. fasta, genbank)"), _Option(["-osformat", "osformat"], "Output sequence(s) format (e.g. fasta, genbank)"), ] _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs) def _validate(self): #Check the outfile, filter, or stdout option has been set. #We can't simply do this via the required flag for the outfile #output - this seems the simplest solution. if not (self.outseq or self.filter or self.stdout): raise ValueError("You must either set outfile (output filename), " "or enable filter or stdout (output to stdout).") if not (self.sequence or self.filter or self.stdint): raise ValueError("You must either set sequence (input filename), " "or enable filter or stdin (input from stdin).") return _EmbossMinimalCommandLine._validate(self) class SeqmatchallCommandline(_EmbossCommandLine): """ Commandline object for the seqmatchall program from EMBOSS e.g. >>> cline = SeqmatchallCommandline(sequence="opuntia.fasta", outfile="opuntia.txt") >>> cline.auto = True >>> cline.wordsize = 18 >>> cline.aformat = "pair" >>> print(cline) seqmatchall -auto -outfile=opuntia.txt -sequence=opuntia.fasta -wordsize=18 -aformat=pair """ def __init__(self, cmd="seqmatchall", **kwargs): self.parameters = [ _Option(["-sequence", "sequence"], "Readable set of sequences", filename=True, is_required=True), _Option(["-wordsize", "wordsize"], "Word size (Integer 2 or more, default 4)"), _Option(["-aformat", "aformat"], "Display output in a different specified output format"), ] _EmbossCommandLine.__init__(self, cmd, **kwargs) def _test(): """Run the Bio.Emboss.Applications module doctests.""" import doctest doctest.testmod() if __name__ == "__main__": #Run the doctests _test()