#!/usr/bin/env python

#  Copyright (c) 2006, The Regents of the University of California, through 
#  Lawrence Berkeley National Laboratory (subject to receipt of any required
#  approvals from the U.S. Dept. of Energy).  All rights reserved.

#  This software is distributed under the new BSD Open Source License.
#  <http://www.opensource.org/licenses/bsd-license.html>
#  Please see the LICENSE.txt file that should have been included
#  as part of this package.


""" A tool for converting multiple sequence alignments from 
one format to another.
"""

from corebio.utils.deoptparse import DeOptionParser
from optparse import OptionGroup
from corebio import seq_io
from corebio.seq import SeqList, Seq, nucleic_alphabet

import sys

__version__ = "1.0.0"
description = """ A tool for converting multiple sequence alignments from 
one format to another. """


def main(): 
    # ------ Parse Command line ------
    parser = _build_option_parser()                
    (opts, args) = parser.parse_args(sys.argv[1:])
    if args : parser.error("Unparsable arguments: %s " % args)

    seqs = opts.reader.read(opts.fin)
    
    if opts.trans_seg: 
        from corebio.transform import mask_low_complexity
        seqs = SeqList([mask_low_complexity(s) for s in seqs])
    
    if opts.subsample is not None:
        from random import random
        frac = opts.subsample
        ss = []
        for s in seqs :
            if random() < frac : ss.append(s)
        seqs = SeqList(ss)

    if opts.reverse :
        seqs = SeqList([s.reverse() for s in seqs])
 
    if opts.complement :
         seqs = SeqList([Seq(s,alphabet=nucleic_alphabet) for s in seqs]) 
         seqs = SeqList([s.complement() for s in seqs])
 
    
    opts.writer.write( opts.fout, seqs)


def _build_option_parser() :
    parser = DeOptionParser(usage="%prog [options]  < sequence_data.fa > sequence_logo.eps",
        description = description,
        version     = __version__ ,
        add_verbose_options = False
        )    
        
    io_grp = OptionGroup(parser, "Input/Output Options",)
    parser.add_option_group(io_grp)
    
    io_grp.add_option( "-f", "--fin",
        dest="fin",
        action="store",
        type="file_in",
        default=sys.stdin,
        help="Sequence input file (default: stdin)",
        metavar="FILENAME")

    io_grp.add_option("", "--format-in", 
        dest="reader",
        action="store", type ="dict",
        default = seq_io,
        choices = seq_io.format_names(),
        help="Multiple sequence alignment format: (%s)" % 
           ', '.join([ f.names[0] for f in seq_io.formats]),
        metavar="FORMAT")

    io_grp.add_option("-o", "--fout", dest="fout",
        type="file_out",
        default=sys.stdout,
        help="Output file (default: stdout)",
        metavar="FILENAME")
    
    trans_grp = OptionGroup(parser, "Transformations",)
    parser.add_option_group(trans_grp)

    trans_grp.add_option( "", "--seg",
        dest="trans_seg",
        action="store_true",
        default=False,
        help="Mask low complexity regions in protein sequences.",
        metavar="TRUE/FALSE")

    trans_grp.add_option( "", "--subsample",
        dest="subsample",
        action="store",
        type= "float",
        default=None,
        help="Return a random subsample of the sequences.",
        metavar="FRACTION")

    trans_grp.add_option( "", "--reverse",
        dest="reverse",
        action="store_true",
        default=False,
        help="reverse sequences",
        metavar="TRUE/FALSE")

    trans_grp.add_option( "", "--complement",
        dest="complement",
        action="store_true",
        default=False,
        help="complement DNA sequences",
        metavar="TRUE/FALSE")


    # Writers      
    out_formats = []
    for f in seq_io.formats:
        if hasattr(f, "write") : out_formats.append(f)
    out_choices = {}
    for f in out_formats:
        out_choices[ f.names[0] ] = f
    out_names =  [f.names[0] for f in out_formats]
    
    io_grp.add_option("-F", "--format-out", 
        dest="writer",
        action="store", type ="dict",
        default = seq_io.fasta_io,
        choices = out_choices,
        help="Multiple sequence alignment output format: (%s) (Default: fasta)" % 
           ', '.join(out_names),
        metavar="FORMAT")


    return parser


# Standard python voodoo for CLI
if __name__ == "__main__":
    main()