# Copyright 2009 by Cymon J. Cox.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Command line wrapper for the multiple alignment program DIALIGN2-2.
"""

from __future__ import print_function

__docformat__ = "epytext en"  # Don't just use plain text in epydoc API pages!

from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline


class DialignCommandline(AbstractCommandline):
    """Command line wrapper for the multiple alignment program DIALIGN2-2.

    http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html

    Example:

    To align a FASTA file (unaligned.fasta) with the output files names
    aligned.* including a FASTA output file (aligned.fa), use:

    >>> from Bio.Align.Applications import DialignCommandline
    >>> dialign_cline = DialignCommandline(input="unaligned.fasta",
    ...                                    fn="aligned", fa=True)
    >>> print(dialign_cline)
    dialign2-2 -fa -fn aligned unaligned.fasta

    You would typically run the command line with dialign_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

    Citation:

    B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence
    Alignment at BiBiServ. Nucleic Acids Research 32, W33-W36.

    Last checked against version: 2.2
    """
    def __init__(self, cmd="dialign2-2", **kwargs):
        self.program_name = cmd
        self.parameters = \
            [
            _Switch(["-afc", "afc"],
                    "Creates additional output file '*.afc' "
                    "containing data of all fragments considered "
                    "for alignment WARNING: this file can be HUGE !"),
            _Switch(["-afc_v", "afc_v"],
                    "Like '-afc' but verbose: fragments are explicitly "
                    "printed. WARNING: this file can be EVEN BIGGER !"),
            _Switch(["-anc", "anc"],
                    "Anchored alignment. Requires a file <seq_file>.anc "
                    "containing anchor points."),
            _Switch(["-cs", "cs"],
                    "If segments are translated, not only the `Watson "
                    "strand' but also the `Crick strand' is looked at."),
            _Switch(["-cw", "cw"],
                    "Additional output file in CLUSTAL W format."),
            _Switch(["-ds", "ds"],
                    "`dna alignment speed up' - non-translated nucleic acid "
                    "fragments are taken into account only if they start "
                    "with at least two matches. Speeds up DNA alignment at "
                    "the expense of sensitivity."),
            _Switch(["-fa", "fa"],
                    "Additional output file in FASTA format."),
            _Switch(["-ff", "ff"],
                    "Creates file *.frg containing information about all "
                    "fragments that are part of the respective optimal "
                    "pairwise alignmnets plus information about "
                    "consistency in the multiple alignment"),
            _Option(["-fn", "fn"],
                    "Output files are named <out_file>.<extension>.",
                    equate=False),
            _Switch(["-fop", "fop"],
                    "Creates file *.fop containing coordinates of all "
                    "fragments that are part of the respective pairwise alignments."),
            _Switch(["-fsm", "fsm"],
                    "Creates file *.fsm containing coordinates of all "
                    "fragments that are part of the final alignment"),
            _Switch(["-iw", "iw"],
                    "Overlap weights switched off (by default, overlap "
                    "weights are used if up to 35 sequences are aligned). "
                    "This option speeds up the alignment but may lead "
                    "to reduced alignment quality."),
            _Switch(["-lgs", "lgs"],
                    "`long genomic sequences' - combines the following "
                    "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
                    "-fop, -ff, -cs, -ds, -pst "),
            _Switch(["-lgs_t", "lgs_t"],
                    "Like '-lgs' but with all segment pairs assessed "
                    "at the peptide level (rather than 'mixed alignments' "
                    "as with the '-lgs' option). Therefore faster than "
                    "-lgs but not very sensitive for non-coding regions."),
            _Option(["-lmax", "lmax"],
                    "Maximum fragment length = x  (default: x = 40 or "
                    "x = 120 for `translated' fragments). Shorter x "
                    "speeds up the program but may affect alignment quality.",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Switch(["-lo", "lo"],
                    "(Long Output) Additional file *.log with information "
                    "about fragments selected for pairwise alignment and "
                    "about consistency in multi-alignment proceedure."),
            _Switch(["-ma", "ma"],
                    "`mixed alignments' consisting of P-fragments and "
                    "N-fragments if nucleic acid sequences are aligned."),
            _Switch(["-mask", "mask"],
                    "Residues not belonging to selected fragments are "
                    "replaced by `*' characters in output alignment "
                    "(rather than being printed in lower-case characters)"),
            _Switch(["-mat", "mat"],
                    "Creates file *mat with substitution counts derived "
                    "from the fragments that have been selected for alignment."),
            _Switch(["-mat_thr", "mat_thr"],
                    "Like '-mat' but only fragments with weight score "
                    "> t are considered"),
            _Switch(["-max_link", "max_link"],
                    "'maximum linkage' clustering used to construct "
                    "sequence tree (instead of UPGMA)."),
            _Switch(["-min_link", "min_link"],
                    "'minimum linkage' clustering used."),
            _Option(["-mot", "mot"],
                    "'motif' option.",
                    equate=False),
            _Switch(["-msf", "msf"],
                    "Separate output file in MSF format."),
            _Switch(["-n", "n"],
                    "Input sequences are nucleic acid sequences. "
                    "No translation of fragments."),
            _Switch(["-nt", "nt"],
                    "Input sequences are nucleic acid sequences and "
                    "`nucleic acid segments' are translated to `peptide "
                    "segments'."),
            _Switch(["-nta", "nta"],
                    "`no textual alignment' - textual alignment suppressed. "
                    "This option makes sense if other output files are of "
                    "intrest -- e.g. the fragment files created with -ff, "
                    "-fop, -fsm or -lo."),
            _Switch(["-o", "o"],
                    "Fast version, resulting alignments may be slightly "
                    "different."),
            _Switch(["-ow", "ow"],
                    "Overlap weights enforced (By default, overlap weights "
                    "are used only if up to 35 sequences are aligned since "
                    "calculating overlap weights is time consuming)."),
            _Switch(["-pst", "pst"],
                    "'print status'. Creates and updates a file *.sta with "
                    "information about the current status of the program "
                    "run.  This option is recommended if large data sets "
                    "are aligned since it allows the user to estimate the "
                    "remaining running time."),
            _Switch(["-smin", "smin"],
                    "Minimum similarity value for first residue pair "
                    "(or codon pair) in fragments. Speeds up protein "
                    "alignment or alignment of translated DNA fragments "
                    "at the expense of sensitivity."),
            _Option(["-stars", "stars"],
                    "Maximum number of `*' characters indicating degree "
                    "of local similarity among sequences. By default, no "
                    "stars are used but numbers between 0 and 9, instead.",
                    checker_function = lambda x: x in range(0, 10),
                    equate=False),
            _Switch(["-stdo", "stdo"],
                    "Results written to standard output."),
            _Switch(["-ta", "ta"],
                    "Standard textual alignment printed (overrides "
                    "suppression of textual alignments in special "
                    "options, e.g. -lgs)"),
            _Option(["-thr", "thr"],
                    "Threshold T = x.",
                    checker_function = lambda x: isinstance(x, int),
                    equate=False),
            _Switch(["-xfr", "xfr"],
                    "'exclude fragments' - list of fragments can be "
                    "specified that are NOT considered for pairwise alignment"),
            _Argument(["input"],
                      "Input file name. Must be FASTA format",
                      filename=True,
                      is_required=True),
            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)


def _test():
    """Run the module's doctests (PRIVATE)."""
    print("Running modules doctests...")
    import doctest
    doctest.testmod()
    print("Done")

if __name__ == "__main__":
    _test()