#!/usr/bin/env python3 import birchenv import os import os.path import phylip import shutil import subprocess import sys import time import random #Version Feb. 26, 2020 # Run discrete data parsimony programs as a command #Synopsis: discpars.py infile utree ufn method replicates percent\ # TCONMETH PARSTYPE THRESHOLD outgroup jumble numjum\ # termout alltrees outfile treefile alltreefile """ ensure that there are enough command line arguments to parse """ if len(sys.argv) < 16: print("Usage: discpars.py INFILE UFN METHOD REPLICATES PERCENT") print(" TCONMETH PARSTYPE THRESHOLD OUTGROUP JUMBLE NUMJUM TERMOUT") print(" OUTFILE TREEFILE ALLTREEFILE") exit(); #Convert arguments to variables INFILE = sys.argv[1] # bltree: If no file has been chosen for UFN, UFN will be blank. # Simply substituting $UFN into the command for restml.csh would # therefore result in a missing parameter. Here we concatenate # UTREE and UFN in the form UTREE_UFN. restml.csh has been # modified to read both parameters from the UTREE variable, # and UFN is no longer passed as a separate variable. UTREEPARAM = sys.argv[2] UTREE = UTREEPARAM[0] UFN = UTREEPARAM[1:] METHOD = sys.argv[3] REPLICATES = sys.argv[4] PERCENT = sys.argv[5] TCONMETH = sys.argv[6] PARSTYPE = sys.argv[7] THRESHOLD = sys.argv[8] OUTGROUP = sys.argv[9] JUMBLE = sys.argv[10] NUMJUM = sys.argv[11] TERMOUT = sys.argv[12] ALLTREES = sys.argv[13] OUTFILE = sys.argv[14] TREEFILE = sys.argv[15] ALLTREEFILE = sys.argv[16] # Debug statement. #p_testinfile = subprocess.call(["nedit", UFN]) # Make a temporary directory in which to run the program STARTDIR = os.getcwd() TEMPDIR = 'DISCPARS.' + str(os.getpid()) os.mkdir(TEMPDIR) shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.temp')) if UTREE == 'y': # Turn off bootstrapping when evaluating a user tree. # If bootstrapping is on, an empty file may be generated # which could cause drawtree or drawgram to loop infinitely METHOD = 'n' # Make sure that treefile begins with number of trees on first # line of file. If first line in file has parentheses, the # number must be added. phylip.ufn(UFN, TEMPDIR) os.chdir(TEMPDIR) # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile.temp"]) # ------------------ Choose program ------------------------------ PROGRAM = 'dollop' INFORMAT = 's' if TCONMETH == "d": PROGRAM = 'dollop' INFORMAT = 's' elif TCONMETH == "p": PROGRAM = 'dollop' INFORMAT = 's' elif TCONMETH == "c": PROGRAM = 'mix' INFORMAT = 's' elif TCONMETH == "w": PROGRAM = 'mix' INFORMAT = 's' elif TCONMETH == "P": PROGRAM = 'pars' INFORMAT = 'i' elif TCONMETH == "L": PROGRAM = 'pars' INFORMAT = 'i' elif TCONMETH == "1": PROGRAM = 'pars' INFORMAT = 'i' else: pass # x requires a file called 'mixed', which specifies, for each character, # which method to use. Tricky to implement, at present, so we'll leave # this commented out. # elif TCONMETH == "x" # PROGRAM = 'mix' msgfile_h = open('MSGFILE', 'a') msgfile_h.write("--------------------- DISCRETE DATA PARSIMONY METHODS ---------------------\n") msgfile_h.write("\n") msgfile_h.write('PROGRAM: ' + PROGRAM + '\n') msgfile_h.write("\n") # SHAZZBOT! DOLLOP and MIX can only read files in Phylip sequential format, # while PARS can only read files in Phylip interleaved format. Therefore, # we have to convert to the correct format, depending on the program. # The original infile.temp is in interleaved format. # # Dollop - can read multiple datasets either as datasets or weights # Mix - # Pars - # # If the programs read multiple datasets, the datasets must be in infile. # If the programs read weights, the original sequence is in infile, and weights # are in weights. if INFORMAT == "s": subprocess.call(['python', os.path.join(birchenv.BIRCH, 'script','phylcnv.py'), '-inf', 'pint', '-outf', 'pseq', 'infile.temp', 'infile.s.temp']) shutil.move('infile.s.temp', 'infile.temp') else: # For bootstrapping, both formats are needed. See below. subprocess.call(['python', os.path.join(birchenv.BIRCH, 'script','phylcnv.py'), '-inf', 'pint', '-outf', 'pseq', 'infile.temp', 'infile.s.temp']) #----------------- generated resampled datasets, if specified ----- # Choose resampling method #if PROGRAM == "pars": # shutil.move("infile.s.temp", "infile.temp") if METHOD == "n": msgfile_h.write(" ") shutil.copyfile('infile.temp', 'infile') if METHOD in ("b", "d"): phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 'r', INFORMAT) elif METHOD in ("ps", "po", "pw"): phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 'r', INFORMAT) # This statement reads from infile, so we have to run it before # running TREEPROGRAM, which also reads infile. NUMSEQ = phylip.get_numseq("infile") RSEED = phylip.phylip_random() msgfile_h.write(PROGRAM + ": SEED= " + str(RSEED) + "\n") #----------------- generate keyboard input to send to parsimony program ----- os.nice(8) comfile_h = open("DiscparsComfile", 'w') if UTREE == 'y': comfile_h.write('u\n') #Resampling makes no sense when evaluating a user-defined tree. METHOD = "n" #As well, when evaluating a user-defined tree, the Jumble option doesn't #appear in the menu, so we must turn jumble off. Otherwise, "J" will be #treated as an illegal option, and the program won't run. JUMBLE = "n" # Jumble - When multiple datasets are analyzed, some programs automatically # jumbles, and prompts for a random number seed for jumbling. Othersise, # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore # to set jumbling first, so that we don't need to handle a bunch of special cases in # which the program asks for jumbling at a later time. if METHOD != "n": JUMBLE = "J" if JUMBLE == "J": phylip.jumble(comfile_h, msgfile_h, NUMJUM) if METHOD in ("b", "d"): comfile_h.write('m\n') comfile_h.write('w\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') elif METHOD in ("ps", "po", "pw"): comfile_h.write('m\n') comfile_h.write('d\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') # Set program-specific parameters if TCONMETH == "p": comfile_h.write('p\n') elif TCONMETH == "c": comfile_h.write('p\n') elif TCONMETH == "w": pass elif TCONMETH == "P": pass elif TCONMETH == "L": comfile_h.write('s\n') comfile_h.write('n\n') elif TCONMETH == "1": comfile_h.write('s\n') comfile_h.write('y\n') else: pass # skip because it's the same as the default values # elif TCONMETH == "d": # set INFORMAT = 's' # x requires a file called 'mixed', which specifies, for each character, # which method to use. Tricky to implement, at present, so we'll leave # this commented out. # elif TCONMETH == "x" # comfile_h.write('x\n') # Outgroup if PROGRAM == 'mix' or PROGRAM == 'pars': # Outgroup OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h) # Threshold or ordinary parsimony if PARSTYPE == 't': msgfile_h.write('Using threshold parsimony, THRESHOLD= ' + THRESHOLD + '\n') if THRESHOLD < 2: THRESHOLD = 2 comfile_h.write(PARSTYPE + '\n') comfile_h.write(THRESHOLD + '\n') # When resampling, turn off printing trees to outfile if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw': comfile_h.write('3\n') #accept current settings and do the analysis comfile_h.write('y\n') comfile_h.close() #----------------- Run DOLLOP, MIX or PARS ----- # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile"]) comfile_h = open("DiscparsComfile", 'r') start_time = time.time() p_PROGRAM = subprocess.Popen([PROGRAM], stdin=comfile_h) #Send program output to the terminal termout_h = open(TERMOUT, 'w') p_PROGRAM.wait() time_elapsed = time.time() - start_time #os.nice(0) termout_h.close() outfile_h = open('outfile', 'a') outfile_h.write("Execution times on " + os.uname()[1] + ": " + str(time_elapsed)) outfile_h.close() OUTfile2_h = open(os.path.join(STARTDIR, OUTFILE), 'w') msgfile_h.close() #----------- Return results to calling directory---------------- # When using resampling, filter the treefile through # consense to generate an unrooted consensus tree. if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw': outfile_h = open('outfile', 'a') outfile_h.write(" \n") outfile_h.write('-------------------------------------------\n') outfile_h.close() if PROGRAM == "dollop": ROOTEDTREE = "y" else: ROOTEDTREE = "n" subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), ROOTEDTREE, 'outfile.consense', os.path.join(STARTDIR, TREEFILE)]) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), True, True) if ALLTREES == 'y': shutil.move('outtree', os.path.join(STARTDIR, ALLTREEFILE)) else: shutil.move('outtree', os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False) OUTfile2_h.close() os.chdir(STARTDIR) shutil.rmtree(TEMPDIR) print(PROGRAM + ' completed.')