#!/usr/bin/env python import os import os.path import re import sys import subprocess #Version June 9, 2016 # Run NUMSEQ as a command #Synopsis: numseq.py infile outfile gcfile start finish nuccase startno group gpl which strands kind numbers nucs peptides frames form # #BACHREST is an interactive program, so we first generate a file containing the #keystrokes the user would normally type, and then run the program using that file. """ ensure that there are enough command line arguments to parse """ if len(sys.argv) < 18: print("Usage: numseq.py INFILE OUTFILE GCFILE START FINISH NUCCASE STARTNO") print(" GROUP GPL WHICH STRANDS KIND NUMBERS NUCS PEPTIDES FRAMES FORM") sys.exit(); #Convert arguments to variables INFILE = sys.argv[1] OUTFILE = sys.argv[2] GCFILE = sys.argv[3] START = int(sys.argv[4]) FINISH = int(sys.argv[5]) NUCCASE = sys.argv[6] STARTNO = int(sys.argv[7]) GROUP = int(sys.argv[8]) GPL = int(sys.argv[9]) WHICH = sys.argv[10] STRANDS = sys.argv[11] KIND = sys.argv[12] NUMBERS = sys.argv[13] NUCS = sys.argv[14] PEPTIDES = sys.argv[15] FRAMES = sys.argv[16] FORM = sys.argv[17] CFN = INFILE + '.' + str(os.getpid()) # Abort if INFILE does not exist or is of zero length if os.path.exists(INFILE) and os.path.getsize(INFILE) > 1 : # Get the sequence length from line 1 of a GenBank file file = open(INFILE, 'r') firstline = file.readline() if (re.search('^LOCUS', firstline)): tokens = re.split('[ ]+', firstline) SEQLENGTH = int(tokens[2]) file.close() # a bit of sanity checking if (START > SEQLENGTH): START = SEQLENGTH if (FINISH > SEQLENGTH): FINISH = SEQLENGTH # ------------------- Generate a file of commands to be read by NUMSEQ ---------- comfile = open(CFN, 'w') #initial filenames comfile.write(INFILE + '\n') #input filename comfile.write('g\n') #GenBank format if (os.path.exists(OUTFILE)): comfile.write(OUTFILE + '\nY\n') #outfile else: comfile.write(OUTFILE + '\n') #outfile comfile.write('3\n' + GCFILE + '\n') #Genetic Code file # Set parameters comfile.write('4\n') #Choose parameter menu comfile.write('1\n' + str(START) + '\n') #choose start comfile.write('2\n' + str(FINISH) + '\n') #choose finish comfile.write('3\n' + str(NUCCASE) + '\n') #choose nuccase # STARTNO = 0 means do not set STARTNO (default = START) if STARTNO != 0: comfile.write('4\n' + str(STARTNO) + '\n') #choose startno #if PEPTIDES = Yes, round GROUP to nearest multiple of three. if PEPTIDES == 'Y': remainder = int(GROUP) % 3 GROUP = int(GROUP) - remainder comfile.write('5\n' + str(GROUP) + '\n') #choose group comfile.write('6\n' + str(GPL) + '\n') #choose gpl comfile.write('7\n' + str(WHICH) + '\n') #choose which comfile.write('8\n' + str(STRANDS) + '\n') #choose strands comfile.write('9\n' + str(KIND) + '\n') #choose kind comfile.write('10\n' + str(NUMBERS) + '\n') #choose numbers comfile.write('11\n' + str(NUCS) + '\n') #choose nucs comfile.write('12\n' + str(PEPTIDES) + '\n') #choose peptides comfile.write('13\n' + str(FRAMES) + '\n') #choose frames comfile.write('14\n' + str(FORM) + '\n') #choose form # exit from NUMSEQ comfile.write('0\n') #exit parameter menu comfile.write('6\n') #Print numbered sequence comfile.write('\n') #dummy prompt line comfile.write('0\n') #exit program comfile.close() #---------------- Run NUMSEQ using the command file as input -------------------- comfile=open(CFN,'r') p = subprocess.Popen(['numseq'], stdin=comfile) p.wait() comfile.close() os.remove(CFN) else: print "Error: Cannot open file!"