# Copyright 2001 by Tarjei Mikkelsen. All rights reserved. # Copyright 2007 by Michiel de Hoon. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Code to work with the KEGG Enzyme database. Functions: - parse - Returns an iterator giving Record objects. Classes: - Record - Holds the information from a KEGG Enzyme record. """ from __future__ import print_function from Bio.KEGG import _default_wrap, _struct_wrap, _wrap_kegg, _write_kegg # Set up line wrapping rules (see Bio.KEGG._wrap_kegg) rxn_wrap = [ 0, "", (" + ", "", 1, 1), (" = ", "", 1, 1), (" ", "$", 1, 1), ("-", "$", 1, 1), ] name_wrap = [0, "", (" ", "$", 1, 1), ("-", "$", 1, 1)] id_wrap = _default_wrap struct_wrap = _struct_wrap class Record(object): """Holds info from a KEGG Enzyme record. Attributes: - entry The EC number (withou the 'EC '). - name A list of the enzyme names. - classname A list of the classification terms. - sysname The systematic name of the enzyme. - reaction A list of the reaction description strings. - substrate A list of the substrates. - product A list of the products. - inhibitor A list of the inhibitors. - cofactor A list of the cofactors. - effector A list of the effectors. - comment A list of the comment strings. - pathway A list of 3-tuples: (database, id, pathway) - genes A list of 2-tuples: (organism, list of gene ids) - disease A list of 3-tuples: (database, id, disease) - structures A list of 2-tuples: (database, list of struct ids) - dblinks A list of 2-tuples: (database, list of db ids) """ def __init__(self): """Initialize a new Record.""" self.entry = "" self.name = [] self.classname = [] self.sysname = [] self.reaction = [] self.substrate = [] self.product = [] self.inhibitor = [] self.cofactor = [] self.effector = [] self.comment = [] self.pathway = [] self.genes = [] self.disease = [] self.structures = [] self.dblinks = [] def __str__(self): """Return a string representation of this Record.""" return ( self._entry() + self._name() + self._classname() + self._sysname() + self._reaction() + self._substrate() + self._product() + self._inhibitor() + self._cofactor() + self._effector() + self._comment() + self._pathway() + self._genes() + self._disease() + self._structures() + self._dblinks() + "///" ) def _entry(self): return _write_kegg("ENTRY", ["EC " + self.entry]) def _name(self): return _write_kegg( "NAME", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.name] ) def _classname(self): return _write_kegg("CLASS", self.classname) def _sysname(self): return _write_kegg( "SYSNAME", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.sysname] ) def _reaction(self): return _write_kegg( "REACTION", [_wrap_kegg(l, wrap_rule=rxn_wrap) for l in self.reaction] ) def _substrate(self): return _write_kegg( "SUBSTRATE", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.substrate] ) def _product(self): return _write_kegg( "PRODUCT", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.product] ) def _inhibitor(self): return _write_kegg( "INHIBITOR", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.inhibitor] ) def _cofactor(self): return _write_kegg( "COFACTOR", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.cofactor] ) def _effector(self): return _write_kegg( "EFFECTOR", [_wrap_kegg(l, wrap_rule=name_wrap) for l in self.effector] ) def _comment(self): return _write_kegg( "COMMENT", [_wrap_kegg(l, wrap_rule=id_wrap(0)) for l in self.comment] ) def _pathway(self): s = [] for entry in self.pathway: s.append(entry[0] + ": " + entry[1] + " " + entry[2]) return _write_kegg("PATHWAY", [_wrap_kegg(l, wrap_rule=id_wrap(16)) for l in s]) def _genes(self): s = [] for entry in self.genes: s.append(entry[0] + ": " + " ".join(entry[1])) return _write_kegg("GENES", [_wrap_kegg(l, wrap_rule=id_wrap(5)) for l in s]) def _disease(self): s = [] for entry in self.disease: s.append(entry[0] + ": " + entry[1] + " " + entry[2]) return _write_kegg("DISEASE", [_wrap_kegg(l, wrap_rule=id_wrap(13)) for l in s]) def _structures(self): s = [] for entry in self.structures: s.append(entry[0] + ": " + " ".join(entry[1]) + " ") return _write_kegg( "STRUCTURES", [_wrap_kegg(l, wrap_rule=struct_wrap(5)) for l in s] ) def _dblinks(self): # This is a bit of a cheat that won't work if enzyme entries # have more than one link id per db id. For now, that's not # the case - storing links ids in a list is only to make # this class similar to the Compound.Record class. s = [] for entry in self.dblinks: s.append(entry[0] + ": " + " ".join(entry[1])) return _write_kegg("DBLINKS", s) def parse(handle): """Parse a KEGG Enzyme file, returning Record objects. This is an iterator function, typically used in a for loop. For example, using one of the example KEGG files in the Biopython test suite, >>> with open("KEGG/enzyme.sample") as handle: ... for record in parse(handle): ... print("%s %s" % (record.entry, record.name[0])) ... 1.1.1.1 alcohol dehydrogenase 1.1.1.62 17beta-estradiol 17-dehydrogenase 1.1.1.68 Transferred to 1.5.1.20 1.6.5.3 NADH:ubiquinone reductase (H+-translocating) 1.14.13.28 3,9-dihydroxypterocarpan 6a-monooxygenase 2.4.1.68 glycoprotein 6-alpha-L-fucosyltransferase 3.1.1.6 acetylesterase 2.7.2.1 acetate kinase """ record = Record() for line in handle: if line[:3] == "///": yield record record = Record() continue if line[:12] != " ": keyword = line[:12] data = line[12:].strip() if keyword == "ENTRY ": words = data.split() record.entry = words[1] elif keyword == "CLASS ": record.classname.append(data) elif keyword == "COFACTOR ": record.cofactor.append(data) elif keyword == "COMMENT ": record.comment.append(data) elif keyword == "DBLINKS ": if ":" in data: key, values = data.split(":") values = values.split() row = (key, values) record.dblinks.append(row) else: row = record.dblinks[-1] key, values = row values.extend(data.split()) row = key, values record.dblinks[-1] = row elif keyword == "DISEASE ": if ":" in data: database, data = data.split(":") number, name = data.split(None, 1) row = (database, number, name) record.disease.append(row) else: row = record.disease[-1] database, number, name = row name = name + " " + data row = database, number, name record.disease[-1] = row elif keyword == "EFFECTOR ": record.effector.append(data.strip(";")) elif keyword == "GENES ": if data[3:5] == ": " or data[4:6] == ": ": key, values = data.split(":", 1) values = [value.split("(")[0] for value in values.split()] row = (key, values) record.genes.append(row) else: row = record.genes[-1] key, values = row for value in data.split(): value = value.split("(")[0] values.append(value) row = key, values record.genes[-1] = row elif keyword == "INHIBITOR ": record.inhibitor.append(data.strip(";")) elif keyword == "NAME ": record.name.append(data.strip(";")) elif keyword == "PATHWAY ": if data[:5] == "PATH:": _, map_num, name = data.split(None, 2) pathway = ("PATH", map_num, name) record.pathway.append(pathway) else: ec_num, name = data.split(None, 1) pathway = "PATH", ec_num, name record.pathway.append(pathway) elif keyword == "PRODUCT ": record.product.append(data.strip(";")) elif keyword == "REACTION ": record.reaction.append(data.strip(";")) elif keyword == "STRUCTURES ": if data[:4] == "PDB:": database = data[:3] accessions = data[4:].split() row = (database, accessions) record.structures.append(row) else: row = record.structures[-1] database, accessions = row accessions.extend(data.split()) row = (database, accessions) record.structures[-1] = row elif keyword == "SUBSTRATE ": record.substrate.append(data.strip(";")) elif keyword == "SYSNAME ": record.sysname.append(data.strip(";")) def read(handle): """Parse a KEGG Enzyme file with exactly one entry. If the handle contains no records, or more than one record, an exception is raised. For example: >>> with open("KEGG/enzyme.new") as handle: ... record = read(handle) ... print("%s %s" % (record.entry, record.name[0])) ... 6.2.1.25 benzoate---CoA ligase """ iterator = parse(handle) try: first = next(iterator) except StopIteration: first = None if first is None: raise ValueError("No records found in handle") try: second = next(iterator) except StopIteration: second = None if second is not None: raise ValueError("More than one record found in handle") return first if __name__ == "__main__": from Bio._utils import run_doctest run_doctest()