# Copyright 2009 by Osvaldo Zagordi. All rights reserved. # Revisions copyright 2010 by Peter Cock. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Command line wrapper for the short read aligner Novoalign by Novocraft.""" from __future__ import print_function from Bio.Application import _Option, AbstractCommandline class NovoalignCommandline(AbstractCommandline): """Command line wrapper for novoalign by Novocraft. See www.novocraft.com - novoalign is a short read alignment program. Examples -------- >>> from Bio.Sequencing.Applications import NovoalignCommandline >>> novoalign_cline = NovoalignCommandline(database='some_db', ... readfile='some_seq.txt') >>> print(novoalign_cline) novoalign -d some_db -f some_seq.txt As with all the Biopython application wrappers, you can also add or change options after creating the object: >>> novoalign_cline.format = 'PRBnSEQ' >>> novoalign_cline.r_method='0.99' # limited valid values >>> novoalign_cline.fragment = '250 20' # must be given as a string >>> novoalign_cline.miRNA = 100 >>> print(novoalign_cline) novoalign -d some_db -f some_seq.txt -F PRBnSEQ -r 0.99 -i 250 20 -m 100 You would typically run the command line with novoalign_cline() or via the Python subprocess module, as described in the Biopython tutorial. Last checked against version: 2.05.04 """ def __init__(self, cmd="novoalign", **kwargs): """Initialize the class.""" READ_FORMAT = ["FA", "SLXFQ", "STDFQ", "ILMFQ", "PRB", "PRBnSEQ"] REPORT_FORMAT = ["Native", "Pairwise", "SAM"] REPEAT_METHOD = ["None", "Random", "All", "Exhaustive", "0.99"] self.parameters = [ _Option( ["-d", "database"], "database filename", filename=True, equate=False ), _Option(["-f", "readfile"], "read file", filename=True, equate=False), _Option( ["-F", "format"], "Format of read files.\n\nAllowed values: %s" % ", ".join(READ_FORMAT), checker_function=lambda x: x in READ_FORMAT, equate=False, ), # Alignment scoring options _Option( ["-t", "threshold"], "Threshold for alignment score", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-g", "gap_open"], "Gap opening penalty [default: 40]", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-x", "gap_extend"], "Gap extend penalty [default: 15]", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-u", "unconverted"], "Experimental: unconverted cytosines penalty in bisulfite mode\n\n" "Default: no penalty", checker_function=lambda x: isinstance(x, int), equate=False, ), # Quality control and read filtering _Option( ["-l", "good_bases"], "Minimum number of good quality bases [default: log(N_g, 4) + 5]", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-h", "homopolymer"], "Homopolymer read filter [default: 20; disable: negative value]", checker_function=lambda x: isinstance(x, int), equate=False, ), # Read preprocessing options _Option( ["-a", "adapter3"], "Strips a 3' adapter sequence prior to alignment.\n\n" "With paired ends two adapters can be specified", checker_function=lambda x: isinstance(x, str), equate=False, ), _Option( ["-n", "truncate"], "Truncate to specific length before alignment", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-s", "trimming"], "If fail to align, trim by s bases until they map or become shorter than l.\n\n" "Ddefault: 2", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-5", "adapter5"], "Strips a 5' adapter sequence.\n\n" "Similar to -a (adaptor3), but on the 5' end.", checker_function=lambda x: isinstance(x, str), equate=False, ), # Reporting options _Option( ["-o", "report"], "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" % ", ".join(REPORT_FORMAT), checker_function=lambda x: x in REPORT_FORMAT, equate=False, ), _Option( ["-Q", "quality"], "Lower threshold for an alignment to be reported [default: 0]", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-R", "repeats"], "If score difference is higher, report repeats.\n\n" "Otherwise -r read method applies [default: 5]", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-r", "r_method"], "Methods to report reads with multiple matches.\n\n" "Allowed values: %s\n" "'All' and 'Exhaustive' accept limits." % ", ".join(REPEAT_METHOD), checker_function=lambda x: x.split()[0] in REPEAT_METHOD, equate=False, ), _Option( ["-e", "recorded"], "Alignments recorded with score equal to the best.\n\n" "Default: 1000 in default read method, otherwise no limit.", checker_function=lambda x: isinstance(x, int), equate=False, ), _Option( ["-q", "qual_digits"], "Decimal digits for quality scores [default: 0]", checker_function=lambda x: isinstance(x, int), equate=False, ), # Paired end options _Option( ["-i", "fragment"], "Fragment length (2 reads + insert) and standard deviation [default: 250 30]", checker_function=lambda x: len(x.split()) == 2, equate=False, ), _Option( ["-v", "variation"], "Structural variation penalty [default: 70]", checker_function=lambda x: isinstance(x, int), equate=False, ), # miRNA mode _Option( ["-m", "miRNA"], "Sets miRNA mode and optionally sets a value for the region scanned [default: off]", checker_function=lambda x: isinstance(x, int), equate=False, ), # Multithreading _Option( ["-c", "cores"], "Number of threads, disabled on free versions [default: number of cores]", checker_function=lambda x: isinstance(x, int), equate=False, ), # Quality calibrations _Option( ["-k", "read_cal"], "Read quality calibration from file (mismatch counts)", checker_function=lambda x: isinstance(x, str), equate=False, ), _Option( ["-K", "write_cal"], "Accumulate mismatch counts and write to file", checker_function=lambda x: isinstance(x, str), equate=False, ), ] AbstractCommandline.__init__(self, cmd, **kwargs) if __name__ == "__main__": from Bio._utils import run_doctest run_doctest()