#!/usr/bin/env python

import re
import os
import os.path
import subprocess
import sys

stdopt    = [["jumble"], ["quickadd"], ["treefile"]]
cleanflag = "-noclean"
seed      = "$$`date +%M%S`"
remaining = 1
jumbles   = 10
nice      = 10
cleanUp   = True

#  The spaces in the echo and grep are required because of a "feature" that
#  causes /bin/sh echo to consume ANY leading argument that begins with -n.

option_count = 1
while option_count < len(sys.argv[1:]):
    option = sys.argv[option_count]
    if not option.startswith("-"):
        break
    elif option == "-":
        option_count += 1
        break
    else:
        if   option == "-noclean" or option == "-c":
            cleanUp   = False
        elif option == "-max" or option == "-m":
            jumbles   = sys.argv[option_count]
            option_count += 1
        elif option == "-nice" or option == "-n":
            nice      = sys.argv[option_count]
            option_count += 1
        elif option == "-seed" or option == "-s":
            seed      = sys.argv[option_count]
            option_count += 1
        elif option == "-boots" or option == "-b":
            remaining = sys.argv[option_count]
            option_count += 1
        else:
            print "Bad flag:  $*"
            option_count = len(sys.argv)
            break
        option_count += 1

#    subprocess.call(["fastDNAml_boot.py", "-boots", remaining, "-max", jumbles, cleanflag, "-nice", nice, "$@"])
# $@
while remaining > 0:
    if option_count + 1 == len(sys.argv):
        opts = stdopt
    elif option_count + 2 == len(sys.argv):
        opts = stdopt.append(sys.argv[option_count + 2].split())
    else:
        print """
Usage: fastDNAml_boot.py  [-boots nboot]  [-seed seed]  \\
              [-max maxjumble]  [-nice nicevalue]  [-noclean]  \\
              in_file  n_best  [ "'"'"dnaml_opt1 [ | dnaml_opt2 [...]]"'"'" ]

For the current bootstrap seed, the sequence input order is jumbled (up to
maxjumble times) until the same best tree is found n_best times.  The output
files are then reduced to a summary of the scores produced by jumbling, and one
example of the best tree.  The number process is then repeated with new
bootstrap seeds until nboot samples have been analyzed.

Boot, jumble, treefile and quickadd are included by the script and should not
be specified by the user or in the data file.  AdditionalJfastDNAml
program options are enclosed in quotes, and separated by vertical bars (|).

Flags and parameters:

    in_file -- name of the input data file
    n_best -- input order is jumbled (up to maxjumble times) until same tree
              is found n_best times
    -boots nboot -- number of different bootstrap samples (Default=1)
    -seed seed -- seed for first bootstrap (Default is based on the process
                  ID and time of day)
    -max maxjumble -- maximum attempts at replicating inferred tree
                      (Default=10)
    -nice nicevalue -- run fastDNAml with specified nice value (Default=10)
    -noclean -- inhibits cleanup of the output files for the individual seeds
"""
        exit()

    if cleanUp:
        cleanflag=""

    if   os.path.exists(sys.argv[option_count]):
        root   = re.sub("\.phy(lip)?$", "", sys.argv[option_count])
        infile = sys.argv[option_count]
    elif os.path.exists(sys.argv[option_count] + ".phy"):
        root   = sys.argv[option_count]
        infile = sys.argv[option_count] + ".phy"
    elif os.path.exists(sys.argv[option_count] + ".phylip"):
        root   = sys.argv[option_count]
        infile = sys.argv[option_count] + ".phylip"
    else:
        print "fastDNAml_boot.py: Unable to find input file: " + sys.argv[option_count]
        exit()

    # `echo $seed | awk '{printf("%09d",$1)}'`
    seed = str(seed)
    if len(seed) < 9:
        seed = ("000000000" + seed)[-9:]
    out  = os.path.basename(root + "_" + seed)

    #  Check for reuse of same random seed:
    if (not os.path.exists(out + ".tree")) and (not os.path.exists(out + ".out")):
        #  Loop over jumble orders:
        loopable = True
        while loopable:
            #test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -gt 0; then
            files=os.listdir(os.getcwd())
            #ignore files starting with '.' using list comprehension
            files=[filename for filename in files if re.search(out + "\.[0-9]", filename) and os.path.isfile(filename)]
            if cleanUp and os.path.exists(out + ".summary"):
                print ""
                print "fastDNAml_boot.py: Jumbling stopped by existence of summary file:"
                print out + ".summary"
                print ""
                jumbles = 0
                nBest   = 0
                nJumble = 0
            elif len(files) > 0:
                #nBest=`grep '^Ln Likelihood' $out.[0-9]* /dev/null |
                #      sed -e 's/^.*:Ln Likelihood =\(.*\)$/\1/g' | sort -nr +0 |
                #      awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'`

                for filename in files:
                    nJumble = 0
                    line_list = []
                    h_file = open(filename, "r")
                    for line in h_file:
                        # nJumble=`grep '^Ln Likelihood' $out.[0-9]* /dev/null | wc -l`
                        if (line.startswith('Ln Likelihood')):
                            nJumble += 1
                            line_list.append(re.replace('^.*:Ln Likelihood =(.*)$', '\1'))
                    h_file.close()
                line_list = line_list.sort().reverse()
                # nBest =  line_list >>>> awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'`
                nBest = 0
                b = line_list[0] - 0.001
                for line in line_list[1:]:
                    if int(line) >= b:
                        nBest += 1
            else:
                nBest=0
                nJumble=0
            loopable = nBest < sys.argv[option_count + 1] and nJumble < jumbles

            if loopable:
                # eval "jumble < $infile | $opts |
                #    nice -$nice out.PID fastDNAml $out" >/dev/null || exit"
                infile_h = open(infile, "r")
                p_jumble = subprocess.Popen(["bootstrap", seed], stdin=infile_h, stdout=subprocess.PIPE)

                # pipe the output to the optional extra commands
                curr_stdout = p_jumble.stdout
                for option_cmd in opts:
                    p_option = subprocess.Popen(option_cmd, stdin=curr_stdout, stdout=subprocess.PIPE)
                    curr_stdout = p_option.stdout
                os.nice(0 - int(nice))
                subprocess.call(["out.PID", "fastDNAml", out], stdin=curr_stdout)
                os.nice(0)
                infile_h.close()

        if cleanUp:
            subprocess.call(["clean_jumbles", out])

        remaining -= 1