#!/usr/local/bin/python # July 31, 2004, Dr. Brian Fristensky, University of Manitoba # Description: SeqHound functions such as SHoundGetGenBankff # return a list of GenBank entries, but omit the '//' line # that terminates each entry. This script puts that line # back. Hopefully, SLRI will eventually fix this problem, # and this script can be discarded. # Synopsis: SHound.fixGenBank.py infile outfile # Files: infile output from SHound # outfile correctly-formatted GenBank file import sys import string import re # Find the origin line and read the next line # Copy each line read to OUTFILE def FindOrigin(LINE) : while not (LINE[:6] == 'ORIGIN') and (LINE != '') : OUTFILE.write(LINE) LINE = INFILE.readline() OUTFILE.write(LINE) LINE = INFILE.readline() return LINE # Find the first line after the end of the sequence # Copy each line read to OUTFILE # Make sure line after last seq line begins with '//' def FindEndOfSeq(LINE) : # Every sequence line has a number right-justified # in the first 9 columns. NUMFIELD=LINE[:9] while (re.match(' +[0-9]+',NUMFIELD)) and (LINE != '') : OUTFILE.write(LINE) LINE = INFILE.readline() NUMFIELD=LINE[:9] # if LINE != '' : LINE = '//' + LINE OUTFILE.write(LINE) return LINE #========================== MAIN PROCEDURE ========================== #---------- Set global variables IFN = sys.argv[1] OFN = sys.argv[2] INFILE = open(IFN,'r') OUTFILE = open(OFN,'w') LINE = INFILE.readline() while LINE != '' : LINE = FindOrigin(LINE) LINE = FindEndOfSeq(LINE) LINE = INFILE.readline() INFILE.close() OUTFILE.close()