#!/usr/bin/env python

import re
import os
import os.path
import subprocess
import sys

stdopt=[["quickadd"], ["treefile"]]

jumbles = 10
nice    = 10
cleanUp = True

#  The spaces in the echo and grep are required because of a "feature" that
#  causes /bin/sh echo to consume ANY leading argument that begins with -n.

option_count = 1
while option_count < len(sys.argv[1:]):
    option = sys.argv[option_count]
    if not option.startswith("-"):
        break
    elif option == "-":
        option_count += 1
        break
    else:
        if   option == "-noclean" or option == "-c":
            cleanUp   = False
        elif option == "-max" or option == "-m":
            jumbles   = sys.argv[option_count]
            option_count += 1
        elif option == "-nice" or option == "-n":
            nice      = sys.argv[option_count]
            option_count += 1
        else:
            print "Bad flag:  $*"
            option_count = len(sys.argv)
            break
        option_count += 1

if option_count + 1 == len(sys.argv):
    opts = stdopt
elif option_count + 2 == len(sys.argv):
    opts = stdopt
    opts.append(sys.argv[option_count + 2].split())
else:
    print """
Usage: fastDNAml_loop.py [-max maxjumble] [-noclean] [-nice nicevalue] \\
                in_file  n_best  [ "'"'"dnaml_opt1 [ | dnaml_opt2 [...]]"'"'" ]

For the given input file, the sequence input order is jumbled (up to maxjumble
times) until the same best tree is found n_best times.  The output files are
then reduced to a summary of the scores produced by jumbling, and one example of
the best tree.

Jumble, treefile and quickadd are included by the script and should not
be specified by the user or in the data file.  Additional fastDNAml
program options are enclosed in quotes, and separated by vertical bars (|).

Flags and parameters:

    in_file -- name of the input data file
    n_best -- input order is jumbled (up to maxjumble times) until same tree
              is found n_best times
    -max maxjumble -- maximum attempts at replicating inferred tree
                      (Default=10)
    -nice nicevalue -- run fastDNAml with specified nice value (Default=10)
    -noclean -- inhibits cleanup of the output files
"""
    exit()

if   os.path.exists(sys.argv[option_count]):
    root   = re.sub("\.phy(lip)?$", "", sys.argv[option_count])
    infile = sys.argv[option_count]
elif os.path.exists(sys.argv[option_count] + ".phy"):
    root   = sys.argv[option_count]
    infile = sys.argv[option_count] + ".phy"
elif os.path.exists(sys.argv[option_count] + ".phylip"):
    root   = sys.argv[option_count]
    infile = sys.argv[option_count] + ".phylip"
else:
    print "fastDNAml_loop.py: Unable to find input file: " + sys.argv[option_count]
    exit()

out = os.path.basename(root)

#  Don't clobber an existing file

if cleanUp and (os.path.exists(out + ".tree") or os.path.exists(out + ".out")):
    print """
fastDNAml_loop.py: File(s) with the name(s) $out.out and/or $out.tree"
already exist and would be clobbered by 'cleaning' the jumble output"
files.  Move them to a new name and try again."
"""
    exit()

#  Loop over jumble orders:

loopable = True

while loopable:
    #test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -gt 0; then
    files=os.listdir(os.getcwd())
    #ignore files starting with '.' using list comprehension
    files=[filename for filename in files if re.search(out + "\.[0-9]", filename) and os.path.isfile(filename)]

    if cleanUp and os.path.exists(out + ".summary"):
        print """
fastDNAml_loop.py: Jumbling stopped by existence of summary file:
""" + out + """.summary
"""
        jumbles = 0
        nBest   = 0
        nJumble = 0
    elif len(files) > 0:
        #nBest=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
        #      sed -e 's/^.*:Ln Likelihood =\(.*\)$/\1/g' | sort -nr +0 |
        #      awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'`

        for filename in files:
            nJumble = 0
            line_list = []
            h_file = open(filename, "r")
            for line in h_file:
                # nJumble=`grep '^Ln Likelihood' $out.[0-9]* /dev/null | wc -l`
                if (line.startswith('Ln Likelihood')):
                    nJumble += 1
                    line_list.append(re.replace('^.*:Ln Likelihood =(.*)$', '\1'))
            h_file.close()
        line_list = line_list.sort().reverse()
        # nBest =  line_list >>>> awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'`
        nBest = 0
        b = line_list[0] - 0.001
        for line in line_list[1:]:
            if int(line) >= b:
                nBest += 1
    else:
        nBest=0
        nJumble=0

    loopable = nBest < sys.argv[option_count + 1] and nJumble < jumbles

    if loopable:
        # eval "jumble < $infile | $opts |
        #    nice -$nice out.PID fastDNAml $out" >/dev/null || exit"
        infile_h = open(infile, "r")
        p_jumble = subprocess.Popen(["jumble"], stdin=infile_h, stdout=subprocess.PIPE)

        # pipe the output to the optional extra commands
        curr_stdout = p_jumble.stdout
        for option_cmd in opts:
            p_option = subprocess.Popen(option_cmd, stdin=curr_stdout, stdout=subprocess.PIPE)
            curr_stdout = p_option.stdout
        os.nice(0 - int(nice))
        subprocess.call(["out.PID", "fastDNAml", out], stdin=curr_stdout)
        os.nice(0)
        infile_h.close()

if cleanUp:
    subprocess.call(["clean_jumbles", out])