#!/usr/bin/env python import os import os.path import subprocess import sys """ ensure that there are enough command line arguments to parse """ if len(sys.argv) < 3: print("Usage: free2fasta.py INFILE OUTFILE") exit(); #Version 10/ 28/09 # Convert free format file to pseudo GenBank format # to be read by GDE. #Synopsis: free2gb.csh infile outfile #Convert arguments to variables INFILE = sys.argv[1] OUTFILE = sys.argv[2] # Abort if INFILE does not exist or is of zero length if os.path.exists(INFILE) and os.path.isfile(INFILE): PID = str(os.getpid()) #process id p = subprocess.Popen(['funnel'], stdin=subprocess.PIPE) #run funnel to delete non-sequence characters p.stdin.write(INFILE + '\n') #input filename p.stdin.write(PID + '.raw\n') #outfile p.stdin.write('50\n') # print 50 nt per line p.stdin.close() p.wait() # $INFILE could be a fully qualified path, so we don't want to use # that as the sequence name. Get rid of the path and just # keep the file name. # get rid of the file extension, if any SEQNAME = os.path.splitext(os.path.basename(INFILE))[0] h_fastaout = open(OUTFILE, 'w') # Create a Fasta format file for input to readseq. h_fastaout.write('>' + SEQNAME + '\n') # copy any non-comment lines to end of fasta file h_raw = open(PID + '.raw', 'r') for line in h_raw: sc_index = line.find(';') if sc_index == 0: line = "" elif sc_index > 0: line = line[0:sc_index] h_fastaout.write(line) h_raw.close() # add a blank line to end of file. A bug in the old # readseq loses some characters from the end of file. # This is not a problem in the new Java readseq. #h_fastaout.write(' \n') #h_fastaout.close() # convert to pseudo GenBank format #subprocess.call(['readseq', '-a', '-fGenBank', '-o=' + OUTFILE, PID + '.wrp']) # delete temporary files os.remove(PID + '.raw') os.remove(PID + '.wrp')