#!/usr/bin/env python import os import os.path import phylip import shutil import subprocess import sys import time # Run dnapars as a command #Synopsis: dnapars.py infile soption method replicates percent blocksize # jumble numjum outgroup parstype threshold transversion termout printdata outfile treefile alltreefile #Convert arguments to variables INFILE = sys.argv[1] SOPTION = sys.argv[2] METHOD = sys.argv[3] REPLICATES = sys.argv[4] PERCENT = sys.argv[5] BLOCKSIZE = sys.argv[6] JUMBLE = sys.argv[7] NUMJUM = sys.argv[8] OUTGROUP = sys.argv[9] PARSTYPE = sys.argv[10] THRESHOLD = sys.argv[11] TRANSVERSION = sys.argv[12] TERMOUT = sys.argv[13] ALLTREES = sys.argv[14] PRINTDATA = sys.argv[15] OUTFILE = sys.argv[16] TREEFILE = sys.argv[17] ALLTREEFILE = sys.argv[18] # Make a temporary directory in which to run the program STARTDIR = os.getcwd() TEMPDIR = 'DNAPARS.' + str(os.getpid()) os.mkdir(TEMPDIR) shutil.copyfile(INFILE, os.path.join(TEMPDIR, 'infile.temp')) os.chdir(TEMPDIR) msgfile_h = open('MSGFILE', 'a') msgfile_h.write("--------------------- DNAPARS - DNA PARSIMONY METHODS ---------------------\n") msgfile_h.write("\n") #----------------- generated resampled datasets, if specified ----- # Choose resampling method # Percent of characters to sample if PERCENT < 5 or PERCENT > 100: PERCENT = 100 if METHOD == "n": msgfile_h.write(" \n") shutil.copyfile('infile.temp', 'infile') if METHOD in ("b", "d"): phylip.stdresample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') elif METHOD in ("ps", "po", "pw"): phylip.weightless_resample(METHOD, PERCENT, REPLICATES, '1', msgfile_h, 's', 'i') # Debug statement. #p_testinfile = subprocess.call(["nedit", "infile"]) #------------------------------- DNAPARS --------------------------------- termout_h = open(TERMOUT, 'w') RSEED = phylip.phylip_random() msgfile_h.write("DNAPARS: SEED= " + str(RSEED) + "\n") # This statement reads from infile, so we have to run it before # running TREEPROGRAM, which also reads infile. NUMSEQ = phylip.get_numseq("infile") os.nice(4) start_time = time.time() #----------------- generate keyboard input to send to program ----- # Jumble - When multiple datasets are analyzed, some programs automatically # jumbles, and prompts for a random number seed for jumbling. Othersise, # jumbling must be explicitly set. From a scripting perspecitve, it is safest therefore # to set jumbling first, so that we don't need to handle a bunch of special cases in # which the program asks for jumbling at a later time. comfile_h = open('Dnaparsfile', 'w') if METHOD != "n": JUMBLE = "J" if JUMBLE == "J": phylip.jumble(comfile_h, msgfile_h, NUMJUM) if METHOD in ("b", "d"): comfile_h.write('m\n') comfile_h.write('w\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') elif METHOD in ("ps", "po", "pw"): comfile_h.write('m\n') comfile_h.write('d\n') comfile_h.write(REPLICATES + '\n') comfile_h.write(str(RSEED) + '\n') # Search option if SOPTION == "m": msgfile_h.write('Search option: THOROUGH\n') elif SOPTION == "l": msgfile_h.write('Search option: LESS THOROUGH\n') comfile_h.write('s\n') comfile_h.write('n\n') elif SOPTION == "r": msgfile_h.write('Search option: REARRANGE ON JUST ONE BEST TREE\n') comfile_h.write('s\n') comfile_h.write('y\n') # Outgroup OUTGROUP = phylip.do_outgroup(OUTGROUP, NUMSEQ, comfile_h, msgfile_h) # Threshold or ordinary parsimony if PARSTYPE == 't': if int(THRESHOLD) < 2: THRESHOLD = 2 comfile_h.write(PARSTYPE + '\n') comfile_h.write(str(THRESHOLD) + '\n') msgfile_h.write('Using threshold parsimony, THRESHOLD= ' + str(THRESHOLD) + '\n') # Count all steps: a; Count only transversions: t if TRANSVERSION == 't': comfile_h.write('n\n') # Should sequence data be printed? if PRINTDATA == 'y' and METHOD == 'n': comfile_h.write('1\n') # When resampling or jumbling, turn off printing trees to outfile if METHOD in ('b', 'd', 'ps', 'po', 'pw'): comfile_h.write('3\n') #accept current settings and do the analysis comfile_h.write('y\n') comfile_h.close() #-------- Run dnapars using keyboard input from Dnaparsfile ----------- comfile_h = open('Dnaparsfile', 'r') p_PROGRAM = subprocess.Popen(['dnapars'], stdin=comfile_h) p_PROGRAM.wait() comfile_h.close() termout_h.close() msgfile_h.close() time_elapsed = time.time() - start_time os.nice(0) outfile_h = open('outfile', 'a') outfile_h.write("Elapsed time on " + os.uname()[1] + ": " + str(time_elapsed) + " seconds") outfile_h.close() #----------- Return results to calling directory---------------- # When using resampling or jumbling, filter the outtree through # consense to generate an unrooted consensus tree. if METHOD == 'b' or METHOD == 'd' or METHOD == 'ps' or METHOD == 'po' or METHOD == 'pw': outfile_h = open('outfile', 'a') outfile_h.write(" \n") outfile_h.write('-------------------------------------------\n') outfile_h.close() subprocess.call(['consense.py', 'outtree', 'e', '1', str(OUTGROUP), 'n', 'outfile.consense', os.path.join(STARTDIR, TREEFILE)]) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), True, True) if ALLTREES == 'y': shutil.move('outtree', os.path.join(STARTDIR, ALLTREEFILE)) else: shutil.move('outtree', os.path.join(STARTDIR, TREEFILE)) phylip.merge_msg(os.path.join(STARTDIR, OUTFILE), False) os.chdir(STARTDIR) shutil.rmtree(TEMPDIR) print 'DNAPARS Completed'