#!/usr/bin/env python import os import os.path import phylip import shutil import subprocess import sys import time #Version Feb. 26, 2020 # Run proml as a command #Synopsis: proml.py infile utree_ufn program pmodel speedy global method bseed\ # replicates blocksize percent jumble jseed numjum outgroup termout printdata\ # outfile treefile #Convert arguments to variables INFILE = sys.argv[1] #print 'INFILE: ' + INFILE # bltree: If no file has been chosen for UFN, UFN will be blank. # Simply substituting $UFN into the command for restml.csh would # therefore result in a missing parameter. Here we concatenate # UTREE and UFN in the form UTREE_UFN. restml.csh has been # modified to read both parameters from the UTREE variable, # and UFN is no longer passed as a separate variable. UTREEPARAM = sys.argv[2] UTREE = UTREEPARAM[0] UFN = UTREEPARAM[1:] PROGRAM = sys.argv[3] PMODEL = sys.argv[4] SPEEDY = sys.argv[5] GLOBAL = sys.argv[6] METHOD = sys.argv[7] REPLICATES = sys.argv[8] BLOCKSIZE = sys.argv[9] PERCENT = sys.argv[10] JUMBLE = sys.argv[11] NUMJUM = sys.argv[12] OUTGROUP = sys.argv[13] TERMOUT = sys.argv[14] PRINTDATA = sys.argv[15] OUTFILE = sys.argv[16] TREEFILE = sys.argv[17] # Make a temporary directory in which to run the program STARTDIR = os.getcwd() TEMPDIR = 'DNAML.' + str(os.getpid()) os.mkdir(TEMPDIR) shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.temp')) if UTREE == 'y': # Turn off bootstrapping when evaluating a user tree. # If bootstrapping is on, an empty file may be generated # which could cause drawtree or drawgram to loop infinitely METHOD = 'n' # Make sure that treefile begins with number of trees on first # line of file. If first line in file has parentheses, the # number must be added. phylip.ufn(UFN, TEMPDIR) os.chdir(TEMPDIR) # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile.temp"]) # Debug statement. #p_testinfile = subprocess.call(["nedit", "intree"]) #----------------- generate keyboard input to send to program ----- msgfile_h = open("MSGFILE", "w") # only two options, and the default is proml if PROGRAM == "promlk": msgfile_h.write("-------- PROMLK - PROTEIN MAXIMUM LIKELIHOOD WITH EVOLUTIONARY CLOCK ---------\n") else: msgfile_h.write("--------------------- PROML - PROTEIN MAXIMUM LIKELIHOOD ---------------------\n") msgfile_h.write(" \n") # Percent of characters to sample if int(PERCENT) < 5 or int(PERCENT) > 100: PERCENT = 100 if METHOD == "n": msgfile_h.write(" \n") shutil.copyfile('infile.temp', 'infile') if METHOD in ("b", "d"): phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') elif METHOD in ("ps", "po", "pw"): phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile"]) #-------- Run proml ----------- termout_h = open(TERMOUT, 'w') RSEED = phylip.phylip_random() msgfile_h.write(PROGRAM + ": SEED= " + str(RSEED) + "\n") # This statement reads from infile, so we have to run it before # running TREEPROGRAM, which also reads infile. NUMSEQ = phylip.get_numseq("infile") comfile_h = open('PromlComfile', 'w') if UTREE == 'y': comfile_h.write('u\n') # Jumble - When multiple datasets are analyzed, some programs automatically # jumbles, and prompts for a random number seed for jumbling. Othersise, # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore # to set jumbling first, so that we don't need to handle a bunch of special cases in # which the program asks for jumbling at a later time. if METHOD != "n": JUMBLE = "J" if JUMBLE == "J": phylip.jumble(comfile_h, msgfile_h, NUMJUM) # Unlike other programs, dnaml doesn't prompt for a random number # seed if jumbling is set.y if METHOD in ("b", "d"): comfile_h.write('m\n') comfile_h.write('w\n') comfile_h.write(REPLICATES + '\n') # comfile_h.write(str(RSEED) + '\n') elif METHOD in ("ps", "po", "pw"): comfile_h.write('m\n') comfile_h.write('d\n') comfile_h.write(REPLICATES + '\n') # comfile_h.write(str(RSEED) + '\n') # Speedier but rougher analysis if SPEEDY == 'n': comfile_h.write('s\n') # Global rearrangements if GLOBAL == 'y': comfile_h.write( 'g\n') # Outgroup OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h) # Genetic code if PMODEL == "h": # echo 'Using Henikoff/Tiller PMG model' >> MSGFILE comfile_h.write('p\n') elif PMODEL == "d": # echo 'Using Dayoff PAM model' >> MSGFILE comfile_h.write('p\n') comfile_h.write('p\n') # Should sequence data be printed? if PRINTDATA == 'y' and METHOD == 'n': comfile_h.write("1\n") # When resampling or jumbling, turn off printing trees to outfile if METHOD in ('b', 'd', 'ps', 'po', 'pw'): comfile_h.write("3\n") #accept current settings and do the analysis comfile_h.write("y\n") comfile_h.close() #-------- run proml ----------- os.nice(4) start_time = time.time() comfile_h = open('PromlComfile', 'r') p_PROGRAM = subprocess.Popen([PROGRAM], stdin=comfile_h) p_PROGRAM.wait() comfile_h.close() termout_h.close() msgfile_h.close() time_elapsed = time.time() - start_time os.nice(0) outfile_h = open('outfile', 'a') outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds") outfile_h.close() #----------- Return results to calling directory---------------- # When using resampling, filter the treefile through # consense to generate an unrooted consensus tree. if METHOD in ('b', 'd', 'ps', 'po', 'pw'): outfile_h = open('outfile', 'a') outfile_h.write(" \n") outfile_h.write('-------------------------------------------\n') outfile_h.close() subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', TREEFILE]) shutil.move(TREEFILE, os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE)) else: shutil.move('outtree', os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False) os.chdir(STARTDIR) shutil.rmtree(TEMPDIR) print("PROML completed.")