#!/usr/bin/env python ## get_protein_sql.py -- ## get a protein sequence from a local Uniprot or NCBI/Refseq mySQL database using the accession ## import sys import re import textwrap import MySQLdb.cursors db = MySQLdb.connect(db='uniprot', host='wrpxdb.bioch.virginia.edu', user='web_user', passwd='fasta_www', cursorclass=MySQLdb.cursors.DictCursor) cur1 = db.cursor() cur2 = db.cursor() sql_get_uniprot='select db, acc, id, descr, seq from annot2 join protein using(acc) where acc="%s"' sql_get_refseq ='select db, acc, "" as id, descr, seq from seqdb_demox.annot join seqdb_demox.protein using(prot_id) where acc="%s"' sub_range = '' for acc in sys.argv[1:]: if (re.search(r':',acc)): (acc, sub_range) = acc.split(':') if (re.match(r'^(sp|tr|iso|ref)\|',acc)): acc=acc.split('|')[1] if (re.match(r'[A-Z]P_\d+',acc)): sql_get_prot=sql_get_refseq else: sql_get_prot=sql_get_uniprot cur1.execute(sql_get_prot%(acc,)) row = cur1.fetchone() if (not row): sys.stderr.write("*** %s *** not found\n"%(acc)) exit(1) header = ">%s|%s"%(row['db'],row['acc']) if (row['id']): header += "|%s"%(row['id']) header += " "+row['descr'] start = 0 if (sub_range): start, stop = sub_range.split('-') start, stop = int(start), int(stop) if (start > 0): seq = row['seq'][start-1:stop] print("%s @C%d" %(header, start+1)) else: seq = row['seq'] print(header) print('\n'.join(textwrap.wrap(seq)))