# -*- coding: utf-8 -*-
# Copyright 2011 by Andreas Wilm. All rights reserved.
# Based on ClustalW wrapper copyright 2009 by Cymon J. Cox.
#
# Wrapper for Clustal Omega by Andreas Wilm (2011). Used _Clustalw.py
# as template.
#
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Command line wrapper for the multiple alignment program Clustal Omega.
"""

from __future__ import print_function

__docformat__ = "epytext en"  # Don't just use plain text in epydoc API pages!

from Bio.Application import _Option, _Switch, AbstractCommandline


class ClustalOmegaCommandline(AbstractCommandline):
    """Command line wrapper for clustal omega

    http://www.clustal.org/omega

    Example:

    >>> from Bio.Align.Applications import ClustalOmegaCommandline
    >>> in_file = "unaligned.fasta"
    >>> out_file = "aligned.fasta"
    >>> clustalomega_cline = ClustalOmegaCommandline(infile=in_file, outfile=out_file, verbose=True, auto=True)
    >>> print(clustalomega_cline)
    clustalo -i unaligned.fasta -o aligned.fasta --auto -v


    You would typically run the command line with clustalomega_cline() or via
    the Python subprocess module, as described in the Biopython tutorial.

    Citation:

    Sievers F, Wilm A, Dineen DG, Gibson TJ, Karplus K, Li W, Lopez R,
    McWilliam H, Remmert M, Söding J, Thompson JD, Higgins DG (2011).
    Fast, scalable generation of high-quality protein multiple
    sequence alignments using Clustal Omega.
    Molecular Systems Biology 7:539 doi:10.1038/msb.2011.75

    Last checked against versions: 1.2.0
    """
    def __init__(self, cmd="clustalo", **kwargs):
        # order parameters in the same order as clustalo --help
        self.parameters = \
            [
            # Sequence Input
            _Option(["-i", "--in", "--infile", "infile"],
                    "Multiple sequence input file",
                    filename=True,
                    equate=False),
            _Option(["--hmm-in", "HMM input", "hmm_input"],
                    "HMM input files",
                    filename=True,
                    equate=False),
            _Switch(["--dealign", "dealign"],
                    "Dealign input sequences"),
            _Option(["--profile1", "--p1", "profile1"],
                    "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
                    filename=True,
                    equate=False),
            _Option(["--profile2", "--p2", "profile2"],
                    "Pre-aligned multiple sequence file (aligned columns will be kept fix).",
                    filename=True,
                    equate=False),
            _Option(["-t", "--seqtype", "seqtype"],
                    "{Protein, RNA, DNA} Force a sequence type (default: auto).",
                    equate=False,
                    checker_function=lambda x: x in ["protein", "rna", "dna",
                                                     "Protein", "RNA", "DNA",
                                                     "PROTEIN"]),
            _Switch(["--is-profile", "isprofile"],
                    "disable check if profile, force profile (default no)"),
            _Option(["--infmt", "infmt"],
                    """Forced sequence input file format (default: auto)

                    Allowed values: a2m, fa[sta], clu[stal], msf, phy[lip], selex, st[ockholm], vie[nna]
                    """,
                    equate=False,
                    checker_function=lambda x: x in ["a2m", "fa", "fasta",
                                                     "clu", "clustal",
                                                     "msf",
                                                     "phy", "phylip",
                                                     "selex",
                                                     "st", "stockholm",
                                                     "vie", "vienna"]),

            # Clustering
            _Option(["--distmat-in", "distmat_in"],
                    "Pairwise distance matrix input file (skips distance computation).",
                    filename=True,
                    equate=False),
            _Option(["--distmat-out", "distmat_out"],
                    "Pairwise distance matrix output file.",
                    filename=True,
                    equate=False),
            _Option(["--guidetree-in", "guidetree_in"],
                    "Guide tree input file (skips distance computation and guide-tree clustering step).",
                    filename=True,
                    equate=False),
            _Option(["--guidetree-out", "guidetree_out"],
                    "Guide tree output file.",
                    filename=True,
                    equate=False),
            _Switch(["--full", "distmat_full"],
                    "Use full distance matrix for guide-tree calculation (slow; mBed is default)"),
            _Switch(["--full-iter", "distmat_full_iter"],
                    "Use full distance matrix for guide-tree calculation during iteration (mBed is default)"),
            _Option(["--cluster-size", "clustersize"],
                    "soft maximum of sequences in sub-clusters",
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["--clustering-out", "clusteringout"],
                    "Clustering output file",
                    filename=True),
            _Switch(["--use-kimura", "usekimura"],
                    "use Kimura distance correction for aligned sequences (default no)"),
            _Switch(["--percent-id", "percentid"],
                    "convert distances into percent identities (default no)"),

            # Alignment Output
            _Option(["-o", "--out", "--outfile", "outfile"],
                    "Multiple sequence alignment output file (default: stdout).",
                    filename=True,
                    equate=False),
            _Option(["--outfmt", "outfmt"],
                    "MSA output file format:"
                    " a2m=fa[sta],clu[stal],msf,phy[lip],selex,st[ockholm],vie[nna]"
                    " (default: fasta).",
                    equate=False,
                    checker_function=lambda x: x in ["a2m", "fa", "fasta",
                                                     "clu", "clustal",
                                                     "msf",
                                                     "phy", "phylip",
                                                     "selex",
                                                     "st", "stockholm",
                                                     "vie", "vienna"]),
            _Switch(["--residuenumber", "--resno", "residuenumber"],
                    "in Clustal format print residue numbers (default no)"),
            _Option(["--wrap", "wrap"],
                    "number of residues before line-wrap in output",
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["--output-order", "outputorder"],
                    "MSA output order like in input/guide-tree",
                    checker_function=lambda x: x in ["input-order", "tree-order"]),

           # Iteration
            _Option(["--iterations", "--iter", "iterations"],
                    "Number of (combined guide-tree/HMM) iterations",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["--max-guidetree-iterations", "max_guidetree_iterations"],
                    "Maximum number of guidetree iterations",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["--max-hmm-iterations", "max_hmm_iterations"],
                    "Maximum number of HMM iterations",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),

            # Limits (will exit early, if exceeded):
            _Option(["--maxnumseq", "maxnumseq"],
                    "Maximum allowed number of sequences",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["--maxseqlen", "maxseqlen"],
                    "Maximum allowed sequence length",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),

            # Miscellaneous:
            _Switch(["--auto", "auto"],
                    "Set options automatically (might overwrite some of your options)"),
            _Option(["--threads", "threads"],
                    "Number of processors to use",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-l", "--log", "log"],
                    "Log all non-essential output to this file.",
                    filename=True,
                    equate=False),
            _Switch(["-h", "--help", "help"],
                    "Print help and exit."),
            _Switch(["-v", "--verbose", "verbose"],
                    "Verbose output"),
            _Switch(["--version", "version"],
                    "Print version information and exit"),
            _Switch(["--long-version", "long_version"],
                    "Print long version information and exit"),
            _Switch(["--force", "force"],
                    "Force file overwriting."),

            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)


def _test():
    """Run the module's doctests (PRIVATE)."""
    print("Running ClustalOmega doctests...")
    import doctest
    doctest.testmod()
    print("Done")

if __name__ == "__main__":
    _test()