#!/usr/bin/env python
#########################
# KEGG locus tag mapper #
#########################
# SYNOPSIS
#    This program determines which locus tags in a GenBANK file belong
#    to a given KEGG pathway.  The program does this by mapping the EC
#    numbers for each locus tag in a GenBANK file to the EC numbers in
#    a KEGG pathway.  NOTE: incomplete EC numbers are skipped.
#
# USAGE
#    kegg_mapper.py <kegg_pathway_id> <genbank_genome_file>
#
#    Examples:
#       kegg_mapper.py map00010 ecoli_k12_genome.gb
#
# INPUT FILES
#    This program takes a KEGG pathway ID and one GenBANK file as input.
#    Only the locus tags with EC numbers in the GenBANK file will be
#    comprared.  Any incomplete EC numbers (e.g. 1.-.-.-, etc.) will be
#    skipped (unless the KEGG pathway also contains the exact same
#    incomplete number - which seems unlikely).
#
# PREREQUISITES
#    This program requires the following modules (version numbers in
#    parenthesis are the version numbers I used, NOT the minimum or
#    maximum version numbers):
#
#      Python (2.6.1) -- the below are probably installed by default
#         CSV (1.0)
#         URL-lib (1.17)
#         URL-lib 2 (2.5)
#      BioPython (1.6.0)
#          To install BioPython, visit: http://biopython.org/wiki/Download
#
# OUTPUT
#    The program output consists of the locus tags in the GenBANK file
#    which belong to the KEGG pathway.  Each line of output consists of
#    a locus tag name from the GenBANK file.
#
#    Example (E. coli K12 w/map00010):
#
#       b0356
#       b1241
#       b1478
#       b1779
#       .... (continued)
#
###################################
# THIS IS OPEN SOURCE CODE (YAY!) #
###################################
# LICENSE
#    This code is licensed under the Creative Commons 3.0
#    Attribution + ShareAlike license - for details see:
#       http://creativecommons.org/licenses/by-sa/3.0/
#
# PROGRAM AUTHOR
#    Graham Alvare - home.cc.umanitoba.ca/~alvare
#
# ACKNOWLEDGEMENTS/CO-AUTHORS
#    Dr. Brian Fristensky - my work supervisor, and the man who introduced
#                           me to the wonderful field of bioinformatics.
#    Justin Zhang         - for asking me to design such a tool to help with his work
#    MGCB2 project        - for providing me with the funding to build this script
#    Manitoba Government (Innovation, Energy and Mines) - MGCB2 co-funding
#    Richard Sparling and David Levin - MGCB2 Project leads
#
# QUESTIONS & COMMENTS
#    If you have any questions, please contact me: alvare@cc.umanitoba.ca
#    I usually get back to people within 1-2 weekdays (weekends, I am slower)
#
#    P.S. please also let me know of any bugs, or if you have any suggestions
#         I am generally happy to help create new tools, or modify my existing
#         tools to make them more useful.
#
# Happy usage!

import os, os.path, sys, csv, collections
sys.path.append('/home/u4/zhangju/script/Python/lib')
from Bio import GenBank
from Bio import SeqIO
import urllib
from urllib.request import urlopen
import re
from parse_genbank import *
import glob
#import urllib2

########
# locus_to_ec (filename)
########
# SYNOPSIS
# --------
# Creates a map between ec numbers and locus tags.  The map is specified
# as a dict object containing a list object for each value.  Each key in
# the dict object is an ec number read from the GenBANK file.  Each value
# is a list of locus tags which correspond to that EC number.
#
# PARAMETERS
# ----------
# 'filename' - the GenBANK file to read
########
def locus_to_ec (filename):
	# dictionary for converting locus tags to EC numbers
	ec2tag = dict()
	gene_product=dict()

	input_handle = open(filename)
	BASE = SeqIO.parse(input_handle, "genbank")
	
	# iterate through every record in the GenBANK file
	for record in BASE:
		ckey = record.name
		
		# iterate through feature in the GenBANK record
		for feature in record.features:
			if (('locus_tag' in feature.qualifiers.keys()) and ('EC_number' in feature.qualifiers.keys())):
				# ensure that there is only one locus tag
				# for the current GenBank feature
				if len(feature.qualifiers['locus_tag']) > 1:
					print( "INVALID LOCUS TAG: " + str(feature.qualifiers['locus_tag']))
					sys.exit(1)

				# extract the feature data
				tag = feature.qualifiers['locus_tag'][0]

				# extract the EC number information	
				ecnums = feature.qualifiers['EC_number']
				
				#extract protein function
				if("product" in feature.qualifiers.keys()):
					gene_product[tag]=feature.qualifiers["product"][0]
				else:
					gene_product[tag]="NA"

				# add the locus tags to the 
				for num in ecnums:
					if not num in ec2tag.keys():
						ec2tag[num] = list()
					ec2tag[num].append(tag)

				# extract the DNA sequence
				# seq = feature.extract(record.seq)

	input_handle.close()
	return [ec2tag, gene_product]

#Find cluster of genes: assume locus tag increasing by 5
def locus_cluster (filename):
	
	IMGF = open(filename,"r")
	line=IMGF.readline()
	line=IMGF.readline()
	cluster_tag=line.split('\t')[0]
	last_tag_index=cluster_tag.split('_')[1]
	ClusterF=open(filename+"_cluster","w")
	while(line):
		arr1=line.split('\t')
		arr2=arr1[0].split('_')
		if((arr2[1][0:2]=="0r") or (arr2[1][0:2]=="0t")):
			print(arr2)
		elif(int(arr2[1])==int(last_tag_index)+5):
			cluster_tag=cluster_tag+','+arr1[0]
			last_tag_index=arr2[1]
		else:
			if(len(cluster_tag.split(','))>1):
				ClusterF.write(str(len(cluster_tag.split(',')))+"\t"+cluster_tag+"\n")
			cluster_tag=arr1[0]
			last_tag_index=arr2[1]
		line=IMGF.readline()
	IMGF.close()
	


#Mapping locus tag to COG number

def locus_to_COG (filename):
	# dictionary for converting locus tags to EC numbers
	cog2tag = dict()
	#gene_product=dict()
	cog=dict()
	
	IMGF = open(filename,"r")
	line=IMGF.readline()
	while(line):
		if re.search(r'PPUTLS46_\d+\tCOG_category',line):
			arr1=line.split('\t')
			tag=arr1[1]
			# add the locus tags to the 
			if not tag in cog2tag.keys():
				cog2tag[tag] = list()
			cog2tag[tag].append(arr1[3])
			if not arr1[3] in cog.keys():
				cog[arr1[3]]=list()
			cog[arr1[3]].append(tag)
		line=IMGF.readline()
	IMGF.close()
	
	CogFile=open(filename+"_cog.txt","w")
	for cog_key in cog.keys():
		CogFile.write(cog_key+"\t"+str(len(cog[cog_key]))+"\t"+":".join(cog[cog_key])+"\n")
	CogFile.close()
	#return cog2tag
	#return [cog2tag, gene_product]
	
def IMGTag2COG(IMG_file):
	tag2cog=dict()
	IMGF = open(IMG_file,"r")
	line=IMGF.readline()
	while(line):
		if re.search(r'SMa.+\tCOG_category',line):#if re.search(r'PPUTLS46_.+\tCOG_category',line):
			arr1=line.rstrip().split('\t')
			if(arr1[1] in tag2cog.keys()):
				tag2cog[arr1[1]]=tag2cog[arr1[1]]+":"+arr1[3]
			else:
				tag2cog[arr1[1]]=arr1[3]
		line=IMGF.readline()
	IMGF.close()
	return tag2cog

def Sm_IMGTag2Product(IMG_file):
	tag2cog=dict()
	tag2length=dict()
	IMGF = open(IMG_file,"r")
	line=IMGF.readline()
	while(line):
		if re.search(r'.+\tProduct_name',line):
			arr1=line.rstrip().split('\t')
			if(len(arr1)<4):
				tag2cog[arr1[1]]="N/A"
			else:
				tag2cog[arr1[1]]=arr1[4]
		elif re.search(r'.+\tDNA_length',line):
			arr1=line.rstrip().split('\t')
			tag2length[arr1[1]]=arr1[4]
		line=IMGF.readline()
		
	IMGF.close()
	return (tag2cog,tag2length)

def IMGTag2Product(IMG_file):
	tag2cog=dict()
	tag2length=dict()
	IMGF = open(IMG_file,"r")
	line=IMGF.readline()
	while(line):
		if re.search(r'PPUTLS46_.+\tProduct_name',line):
			arr1=line.rstrip().split('\t')
			tag2cog[arr1[1]]=arr1[4]
		elif re.search(r'PPUTLS46_.+\tDNA_length',line):
			arr1=line.rstrip().split('\t')
			tag2length[arr1[1]]=arr1[4]
		line=IMGF.readline()
		
	IMGF.close()
	return (tag2cog,tag2length)


def FPKM_addIMGCOG():

	files_in_dir=glob.glob('*withLocusTag')
	tag2cog=IMGTag2COG("/local/workspace01/zhangju/PutidaLS46_RNASeq/KEGG/IMG_gene.txt")
	for file_fpkm in files_in_dir:
		CogFile=open(file_fpkm+"_multi_cog.txt","w")
		FPKM=open(file_fpkm,"r")
		line=FPKM.readline()
		CogFile.write("COG\t"+line)
		line=FPKM.readline()
		while(line):
			arr1=line.split('\t')
			#if(arr1[1]=="PPUTLS46_000390"):
			    #print("390")
			#else:
			    #print(arr1[1])
			if(arr1[0] in tag2cog.keys()):
			    cog_array=tag2cog[arr1[0]].split(':')
			    for cog in cog_array:
			       CogFile.write(cog+"\t"+line)
			else:
			    CogFile.write("None\t"+line)
			line=FPKM.readline()
		CogFile.close()
		FPKM.close()

def Cuffdiff_addKEGG():	
	

	files_in_dir=glob.glob('*.txt')
	#files_in_dir=glob.glob('*_cuffdiff')
	tag2kegg=IMGTag2KEGG('/local/workspace01/zhangju/PutidaLS46_RNASeq/KEGG/kegg_ko_gene_set.txt')
	#tag2cog=IMGTag2COG("/local/workspace01/zhangju/PutidaLS46_RNASeq/KEGG/IMG_gene.txt")
	for file_fpkm in files_in_dir:
		CogFile=open(file_fpkm+"_KEGG","w")
		FPKM=open(file_fpkm,"r")
		line=FPKM.readline()
		CogFile.write("KEGG\t"+line)
		line=FPKM.readline()
		while(line):
		    arr1=line.split('\t')
		    if(arr1[0] in tag2kegg.keys()):
			    kegg_array=tag2kegg[arr1[0]].split(':')
			    for cog in kegg_array:
			       CogFile.write(cog+"\t"+line)
		    else:
			    CogFile.write("None\t"+line)
		    line=FPKM.readline()
		CogFile.close()
		FPKM.close()
		
def IMGTag2KEGG(filename):
	tag2kegg=dict()
	IMGF = open(filename,"r")
	line=IMGF.readline()
	while(line):
		arr1=line.rstrip().split('\t')
		kegg=arr1[0]
		for tag in arr1[1:]:
			if(tag in tag2kegg):
				tag2kegg[tag]=tag2kegg[tag]+":"+kegg
			else:
				tag2kegg[tag]=kegg
		line=IMGF.readline()
	IMGF.close()
	return tag2kegg
		
def Cuffdiff_addIMGCOG():	
	

	files_in_dir=glob.glob('*diff_product.txt')#files_in_dir=glob.glob('*.txt')
	#files_in_dir=glob.glob('*_cuffdiff')
	#tag2cog=IMGTag2COG("/local/workspace01/zhangju/PutidaLS46_RNASeq/KEGG/IMG_gene.txt")
	tag2cog=IMGTag2COG("/local/workspace01/zhangju/sm1021/IMG/IMG_gene.txt")
	for file_fpkm in files_in_dir:
		CogFile=open(file_fpkm+"_COG","w")
		FPKM=open(file_fpkm,"r")
		line=FPKM.readline()
		line=FPKM.readline()
		CogFile.write("COG\t"+line)
		line=FPKM.readline()
		while(line):
		    arr1=line.split('\t')
		    if(arr1[1] in tag2cog.keys()): #if(arr1[0] in tag2cog.keys()):
			    cog_array=tag2cog[arr1[1]].split(':')#cog_array=tag2cog[arr1[0]].split(':')
			    for cog in cog_array:
			       CogFile.write(cog+"\t"+line)
		    else:
			    CogFile.write("None\t"+line)
		    line=FPKM.readline()
		CogFile.close()
		FPKM.close()
		
def FPKM_or_Cuffdiff_addIMGProduct():
	#files_in_dir=glob.glob('*.fpkm_tracking_withLocusTag')
	#files_in_dir=glob.glob('*.diff')
	#files_in_dir=glob.glob('*')
	#files_in_dir=glob.glob('*ATCC27405*tracking')
	#files_in_dir=glob.glob('*DSM1313_genes.fpkm_tracking')
	#files_in_dir=glob.glob('*refDSM1313*')
	files_in_dir=glob.glob('*refATCC27405*')
	#(tag2cog,tag2length)=IMGTag2Product("/local/workspace01/zhangju/PutidaLS46_RNASeq/KEGG/IMG_gene.txt")
	#(tag2cog,tag2length)=Sm_IMGTag2Product("/local/workspace01/zhangju/sm1021/KEGG/IMG_gene.txt")
	#(tag2cog,tag2length)=Sm_IMGTag2Product("/local/workspace01/zhangju/Caldi_sac/IMGGeneCaldSac.xls")
	
	#(tag2cog,tag2length)=Sm_IMGTag2Product("/local/workspace01/zhangju/Thermo_petrophila/IMGGene_ThermoPetro.xls")
	(tag2cog,tag2length)=Sm_IMGTag2Product("/local/workspace01/zhangju/Cthermocellum/ATCC27405_IMGGene.xls")
	#(tag2cog,tag2length)=Sm_IMGTag2Product("/local/workspace01/zhangju/Cthermocellum/DSM1313_IMGGene.xls")
	#(tag2cog,tag2length)=IMGTag2Product("/local/workspace01/zhangju/sm1021/KEGG/IMG_gene.txt")
	for file_fpkm in files_in_dir:
		#CogFile=open(file_fpkm+"_product.txt","w")
		#CogFile=open("/local/workspace01/zhangju/Caldi_sac/FPKM_withIMGFeature/"+file_fpkm,"w")
		#CogFile=open("/local/workspace01/zhangju/Thermo_petrophila/FPKM_withIMGFeature/"+file_fpkm,"w")
		CogFile=open("/local/workspace01/zhangju/Cthermocellum/FPKM_withFeature/"+file_fpkm,"w")
		FPKM=open(file_fpkm,"r")
		line=FPKM.readline()
		#CogFile.write("Product\t"+line)#.rstrip()+"\tDNA length\n")
		CogFile.write("Product\t"+line)#.rstrip()+"\tDNA length\n")
		line=FPKM.readline()
		while(line):
		    arr1=line.split('\t')
		    #CogFile.write(tag2cog[arr1[0]]+"\t"+line.rstrip()+"\t"+tag2length[arr1[0]][:-2]+"\n")
		    CogFile.write(tag2cog[arr1[0]]+"\t"+line)#.rstrip()+"\t"+tag2length[arr1[0]][:-2]+"\n")
		    line=FPKM.readline()
		CogFile.close()
		FPKM.close()
		
#Mapping locus tag to ko number

def locus_to_ko (filename):
	# dictionary for converting locus tags to EC numbers
	ko2tag = dict()
	gene_product=dict()
	
	IMGF = open(filename,"r")
	line=IMGF.readline()
	while(line):
		if re.search(r'PPUTLS46_\d+\tKO:K\d+',line):
			arr1=line.split()
			tag=arr1[1]
			num=arr1[2].split(':')[1]
			# add the locus tags to the 
			if not num in ko2tag.keys():
				ko2tag[num] = list()
			ko2tag[num].append(tag)
			
		line=IMGF.readline()
	IMGF.close()
	return [ko2tag, gene_product]
'''
	input_handle = open(filename)
	BASE = SeqIO.parse(input_handle, "genbank")
	
	# iterate through every record in the GenBANK file
	for record in BASE:
		ckey = record.name
		
		# iterate through feature in the GenBANK record
		for feature in record.features:
			if (('locus_tag' in feature.qualifiers.keys()) and ('EC_number' in feature.qualifiers.keys())):
				# ensure that there is only one locus tag
				# for the current GenBank feature
				if len(feature.qualifiers['locus_tag']) > 1:
					print( "INVALID LOCUS TAG: " + str(feature.qualifiers['locus_tag']))
					sys.exit(1)

				# extract the feature data
				tag = feature.qualifiers['locus_tag'][0]

				# extract the EC number information	
				ecnums = feature.qualifiers['EC_number']
				
				#extract protein function
				if("product" in feature.qualifiers.keys()):
					gene_product[tag]=feature.qualifiers["product"][0]
				else:
					gene_product[tag]="NA"

				# add the locus tags to the 
				for num in ecnums:
					if not num in ec2tag.keys():
						ec2tag[num] = list()
					ec2tag[num].append(tag)

				# extract the DNA sequence
				# seq = feature.extract(record.seq)

	input_handle.close()
'''
	

########
# get_keggpw_ecs(pathway)
########
# SYNOPSIS
# --------
# Obtains (via the KEGG API - http://www.kegg.jp/kegg/rest/keggapi.html)
# the list of EC numbers associated with a given pathway.
#
# PARAMETERS
# ----------
# 'pathway' - the KEGG pathway ID to obtain the EC numbers for.
########
def get_keggpw_ecs(pathway):
	url = 'http://rest.kegg.jp/link/ec/' + pathway
	#url='http://www.genome.jp/dbget-bin/get_linkdb?-t+orthology+path:'+pathway
	params = dict()
	handle = urlopen(url) #, params)
	reader=handle.read()
	#prog = re.compile(r"ec:\d+\.\d+\.\d+\.\d+")
	prog = re.compile(r"EC:\d+\.\d+\.\d+\.\d+")
	result = re.findall(prog,str(reader))
	#reader = csv.reader(handle, dialect='excel-tab')
	#result = [line[1][3:] for line in reader if len(str(line)) > 1 and line[1].lower().startswith('ec:')]
	#handle.close()
	return result

def get_keggpw_kos(pathway):
	#url = 'http://rest.kegg.jp/link/ec/' + pathway
	url='http://www.genome.jp/dbget-bin/get_linkdb?-t+orthology+path:'+pathway
	params = dict()
	handle = urlopen(url) #, params)
	reader=handle.read()
	#prog = re.compile(r"ec:\d+\.\d+\.\d+\.\d+")
	prog = re.compile(r">(K\d+)<")
	result = re.findall(prog,str(reader))
	#reader = csv.reader(handle, dialect='excel-tab')
	#result = [line[1][3:] for line in reader if len(str(line)) > 1 and line[1].lower().startswith('ec:')]
	#handle.close()
	return result

########
# get_keggpath(imgLS46_list, kegg_allPutida)
########
# --------
# imgLS46_list has pathway category and names of each pathway
# kegg_allPutida has all pathways of Putida with pathway id and name
# Obtain the list of path id for IMG LS46
##########

def get_keggid(imgLS46_list, kegg_allPathway_file):
	
	IMGF = open(imgLS46_list,"r")
	kegg_name=list()
	line=IMGF.readline()
	while(line):
		if re.search("\(\d",line):
			arr1 = re.split('\(\d',line)
			kegg_name.append(arr1[0].strip().rstrip())
		line=IMGF.readline()
	IMGF.close()
	kegg_id = dict()
	PutidaF = open(kegg_allPathway_file,"r")
	line=PutidaF.readline()
	while(line):
		
		arr1=line.rstrip().split('  ')
		kname=""
		if(len(arr1)==4):
			kname=arr1[3]
		if(kname in kegg_name):
			#KEGG pathway options - ec, ko, general
			#ko path has more items than map path
			#For example map02020 (two-component system has no entries
			#   but ko02020 has entries 
			# here we use ko to include 
			map_id="ko"+arr1[2].rstrip()
			kegg_id[map_id.replace(" ","")]=kname
		line=PutidaF.readline()
		
	PutidaF.close()
	return kegg_id

'''
		arr1=re.split(r'             ',line.rstrip())
		pre_size=3
		if(len(arr1)==1):
					arr1=re.split(r'            ',line.rstrip())
					pre_size=4
		kid="map"+arr1[0][pre_size:]
		kname=arr1[1].split("-")[0].rstrip()
		if(kname in kegg_name):
			kegg_id[kid]=kname
'''

def get_ec_geneset():
	
	img_kegg_file = "LS46KEGG_fromIMG.txt"
	kegg_allPathway_file="kegg_pathway.txt"
	
	gbk_file = "Nov20_IMG25372.gbk" #"ALPV02.gbf"
	
	gbk_file_convert=fixing_img_gbk(gbk_file)

	kegg_list = get_keggid(img_kegg_file,kegg_allPathway_file)
	
	[ec2tag,gene_product] = locus_to_ec(gbk_file_convert)	# GenBANK
	
	img_gene_file="IMG_gene.txt" # downloaed from IMG gene information
	
	
	KEGGF = open("kegg_gene_set.txt","w")
	count=0
	
	for keggid in kegg_list:
		count=count+1
		#KEGGF.write("set_"+str(count)+"\t"+kegg_list[keggid])
		KEGGF.write(kegg_list[keggid])
#		print(kegg_list[keggid]+"\n======\n")

			# map the EC numbers in the pathway to locus tags in the genome
		ecnums = get_keggpw_ecs(keggid)	# KEGG ID
		result = list()
		for ec in ecnums:
			#if ec.split(':')[1] in ec2tag.keys():
			#	result.extend(ec2tag[ec.split(':')[1]])
			if ec.split(':')[1] in ec2tag.keys():
				result.extend(ec2tag[ec.split(':')[1]])
		# print the results
		KEGGF.write("("+str(len(result))+")")
		for tag in result:
#			print(tag+'\t'+gene_product[tag])
			KEGGF.write("\t"+tag)
		KEGGF.write("\n")
	KEGGF.close()

def get_ko_geneset():

	img_kegg_file = "LS46KEGG_fromIMG.txt"
	kegg_allPathway_file="kegg_pathway.txt"
	
	gbk_file = "Nov20_IMG25372.gbk" #"ALPV02.gbf"
	
	#gbk_file_convert=fixing_img_gbk(gbk_file)

	kegg_list = get_keggid(img_kegg_file,kegg_allPathway_file)
	
	#[ec2tag,gene_product] = locus_to_ec(gbk_file_convert)	# GenBANK
	
	img_gene_file="IMG_gene.txt" # downloaed from IMG gene information
	
	[ko2tag,gene_product] = locus_to_ko(img_gene_file)	# GenBANK
	
	
	KEGGF = open("kegg_gene_set.txt","w")
	count=0
	
	for keggid in kegg_list:
		count=count+1
		#KEGGF.write("set_"+str(count)+"\t"+kegg_list[keggid])
		KEGGF.write(kegg_list[keggid])
#		print(kegg_list[keggid]+"\n======\n")

			# map the EC numbers in the pathway to locus tags in the genome
		kos = get_keggpw_kos(keggid)	# KEGG ID
		result = list()
		for ko in kos:
			#if ec.split(':')[1] in ec2tag.keys():
			#	result.extend(ec2tag[ec.split(':')[1]])
			if ko in ko2tag.keys():
				result.extend(ko2tag[ko])
		# print the results
		KEGGF.write("("+str(len(result))+")")
		for tag in result:
#			print(tag+'\t'+gene_product[tag])
			KEGGF.write("\t"+tag)
		KEGGF.write("\n")
	KEGGF.close()
########
# main - kegg_mapper.py <pathway> <genome>
########
# SYNOPSIS
# --------
# Reads in a GenBANK file and determines which locus tags (genes) are
# involved in a given KEGG pathway.  The tags are read from the GenBANK
# file using the method 'locus_to_ec'.  The pathway is read from KEGG
# using the method 'get_keggpw_ecs'.  All comparison procedures are
# currently done in the body of the main method (below).
########
if __name__ == "__main__":
	
	#locus_to_COG("IMG_gene.txt")
	#FPKM_addIMGCOG()
	#Cuffdiff_addIMGCOG()
	FPKM_or_Cuffdiff_addIMGProduct()
	#get_ko_geneset()
	#Cuffdiff_addKEGG()
	#locus_cluster(sys.argv[1])
	
		
'''	
	# Ensure that the proper command line structure is used
	# for calling the command.  (i.e. ensure that a pathway,
	# and a genome (GenBank file) are supplied).
	if len(sys.argv) < 2:
		print("Usage: kegg_mapper.py <pathway> <genome>")
		sys.exit()
	else:
		# initialize the results list
		result = list()
		
		# TODO (?): handle dashes!!!
		#print( >> sys.stderr, "NOTE: EC #'s with dashes are dropped!")
		
		# read the GenBANK file and load the KEGG pathway
		ecnums = get_keggpw_ecs(sys.argv[1])	# KEGG ID
		[ec2tag,gene_product] = locus_to_ec(sys.argv[2])	# GenBANK
	
		# map the EC numbers in the pathway to locus tags in the genome
		for ec in ecnums:
			if ec.split(':')[1] in ec2tag.keys():
				result.extend(ec2tag[ec.split(':')[1]])
		# print the results
		for tag in result:
			print(tag+'\t'+gene_product[tag])
'''