#!/usr/bin/env python """ May 27, 2016, Dr. Brian Fristensky, University of Manitoba delgi.py - remove GI numbers from GenBank flatfiles, in compliance with the NCBI phase out of GI numbers. http://www.ncbi.nlm.nih.gov/news/03-02-2016-phase-out-of-GI-numbers/ Synopsis: delgi.py infile infile - a GenBank flat with GI numbers file outfile - a GenBank flat file without GI numbers Given an infile whose name is in the form name.gen, the original file will be saved as name_ig.gen, and the output will be written to name.gen. @modified: May 27 2016 @author: Brian Fristensky @contact: frist@cc.umanitba.ca """ import sys import os import re import shutil import sys PROGRAM = "delgi.py: " USAGE = "\n\t USAGE: delgi.py infile outfile" #======================== MAIN PROCEDURE ========================== def main(): """ Called when not in documentation mode. """ #set filenames and open files IFN = sys.argv[1] OFN = sys.argv[2] infile=open(IFN,'r') outfile=open(OFN,'w') exp1 = '/db_xref=\"GI:' exp2 = '(^VERSION[ ]+.+)([ ]+GI:\d+)' exp3 = '\g<1>' for line in infile: if not exp1 in line: line = re.sub(exp2, exp3, line) outfile.write(line) infile.close() outfile.close() if ( "-test" in sys.argv): pass else: main()