#!/usr/bin/env python import re import os import os.path import subprocess import sys stdopt=[["quickadd"], ["treefile"]] jumbles = 10 nice = 10 cleanUp = True # The spaces in the echo and grep are required because of a "feature" that # causes /bin/sh echo to consume ANY leading argument that begins with -n. option_count = 1 while option_count < len(sys.argv[1:]): option = sys.argv[option_count] if not option.startswith("-"): break elif option == "-": option_count += 1 break else: if option == "-noclean" or option == "-c": cleanUp = False elif option == "-max" or option == "-m": jumbles = sys.argv[option_count] option_count += 1 elif option == "-nice" or option == "-n": nice = sys.argv[option_count] option_count += 1 else: print "Bad flag: $*" option_count = len(sys.argv) break option_count += 1 if option_count + 1 == len(sys.argv): opts = stdopt elif option_count + 2 == len(sys.argv): opts = stdopt opts.append(sys.argv[option_count + 2].split()) else: print """ Usage: fastDNAml_loop.py [-max maxjumble] [-noclean] [-nice nicevalue] \\ in_file n_best [ "'"'"dnaml_opt1 [ | dnaml_opt2 [...]]"'"'" ] For the given input file, the sequence input order is jumbled (up to maxjumble times) until the same best tree is found n_best times. The output files are then reduced to a summary of the scores produced by jumbling, and one example of the best tree. Jumble, treefile and quickadd are included by the script and should not be specified by the user or in the data file. Additional fastDNAml program options are enclosed in quotes, and separated by vertical bars (|). Flags and parameters: in_file -- name of the input data file n_best -- input order is jumbled (up to maxjumble times) until same tree is found n_best times -max maxjumble -- maximum attempts at replicating inferred tree (Default=10) -nice nicevalue -- run fastDNAml with specified nice value (Default=10) -noclean -- inhibits cleanup of the output files """ exit() if os.path.exists(sys.argv[option_count]): root = re.sub("\.phy(lip)?$", "", sys.argv[option_count]) infile = sys.argv[option_count] elif os.path.exists(sys.argv[option_count] + ".phy"): root = sys.argv[option_count] infile = sys.argv[option_count] + ".phy" elif os.path.exists(sys.argv[option_count] + ".phylip"): root = sys.argv[option_count] infile = sys.argv[option_count] + ".phylip" else: print "fastDNAml_loop.py: Unable to find input file: " + sys.argv[option_count] exit() out = os.path.basename(root) # Don't clobber an existing file if cleanUp and (os.path.exists(out + ".tree") or os.path.exists(out + ".out")): print """ fastDNAml_loop.py: File(s) with the name(s) $out.out and/or $out.tree" already exist and would be clobbered by 'cleaning' the jumble output" files. Move them to a new name and try again." """ exit() # Loop over jumble orders: loopable = True while loopable: #test `ls -d $out.[0-9]* 2>/dev/null | wc -l` -gt 0; then files=os.listdir(os.getcwd()) #ignore files starting with '.' using list comprehension files=[filename for filename in files if re.search(out + "\.[0-9]", filename) and os.path.isfile(filename)] if cleanUp and os.path.exists(out + ".summary"): print """ fastDNAml_loop.py: Jumbling stopped by existence of summary file: """ + out + """.summary """ jumbles = 0 nBest = 0 nJumble = 0 elif len(files) > 0: #nBest=`grep '^Ln Likelihood' $out.[0-9]* /dev/null | # sed -e 's/^.*:Ln Likelihood =\(.*\)$/\1/g' | sort -nr +0 | # awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'` for filename in files: nJumble = 0 line_list = [] h_file = open(filename, "r") for line in h_file: # nJumble=`grep '^Ln Likelihood' $out.[0-9]* /dev/null | wc -l` if (line.startswith('Ln Likelihood')): nJumble += 1 line_list.append(re.replace('^.*:Ln Likelihood =(.*)$', '\1')) h_file.close() line_list = line_list.sort().reverse() # nBest = line_list >>>> awk 'BEGIN{c=0} NR==1{b=$1-0.001} $1>=b{c++} END{print c}'` nBest = 0 b = line_list[0] - 0.001 for line in line_list[1:]: if int(line) >= b: nBest += 1 else: nBest=0 nJumble=0 loopable = nBest < sys.argv[option_count + 1] and nJumble < jumbles if loopable: # eval "jumble < $infile | $opts | # nice -$nice out.PID fastDNAml $out" >/dev/null || exit" infile_h = open(infile, "r") p_jumble = subprocess.Popen(["jumble"], stdin=infile_h, stdout=subprocess.PIPE) # pipe the output to the optional extra commands curr_stdout = p_jumble.stdout for option_cmd in opts: p_option = subprocess.Popen(option_cmd, stdin=curr_stdout, stdout=subprocess.PIPE) curr_stdout = p_option.stdout os.nice(0 - int(nice)) subprocess.call(["out.PID", "fastDNAml", out], stdin=curr_stdout) os.nice(0) infile_h.close() if cleanUp: subprocess.call(["clean_jumbles", out])