# Copyright 1999 by Jeffrey Chang. All rights reserved. # Copyright 2009 by Michiel de Hoon. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Parse the enzyme.dat file from Enzyme at ExPASy. See https://www.expasy.org/enzyme/ Tested with the release of 03-Mar-2009. Functions: - read Reads a file containing one ENZYME entry - parse Reads a file containing multiple ENZYME entries Classes: - Record Holds ENZYME data. """ def parse(handle): """Parse ENZYME records. This function is for parsing ENZYME files containing multiple records. Arguments: - handle - handle to the file. """ while True: record = __read(handle) if not record: break yield record def read(handle): """Read one ENZYME record. This function is for parsing ENZYME files containing exactly one record. Arguments: - handle - handle to the file. """ record = __read(handle) # We should have reached the end of the record by now remainder = handle.read() if remainder: raise ValueError("More than one ENZYME record found") return record class Record(dict): """Holds information from an ExPASy ENZYME record as a Python dictionary. Each record contains the following keys: - ID: EC number - DE: Recommended name - AN: Alternative names (if any) - CA: Catalytic activity - CF: Cofactors (if any) - PR: Pointers to the Prosite documentation entrie(s) that correspond to the enzyme (if any) - DR: Pointers to the Swiss-Prot protein sequence entrie(s) that correspond to the enzyme (if any) - CC: Comments """ def __init__(self): """Initialize the class.""" dict.__init__(self) self["ID"] = '' self["DE"] = '' self["AN"] = [] self["CA"] = '' self["CF"] = '' self["CC"] = [] # one comment per line self["PR"] = [] self["DR"] = [] def __repr__(self): if self["ID"]: if self["DE"]: return "%s (%s, %s)" % (self.__class__.__name__, self["ID"], self["DE"]) else: return "%s (%s)" % (self.__class__.__name__, self["ID"]) else: return "%s ( )" % (self.__class__.__name__) def __str__(self): output = ["ID: " + self["ID"], "DE: " + self["DE"], "AN: " + repr(self["AN"]), "CA: '" + self["CA"] + "'", "CF: " + self["CF"], "CC: " + repr(self["CC"]), "PR: " + repr(self["PR"]), "DR: %d Records" % len(self["DR"])] return "\n".join(output) # Everything below is private def __read(handle): record = None for line in handle: key, value = line[:2], line[5:].rstrip() if key == "ID": record = Record() record["ID"] = value elif key == "DE": record["DE"] += value elif key == "AN": if record["AN"] and not record["AN"][-1].endswith("."): record["AN"][-1] += " " + value else: record["AN"].append(value) elif key == "CA": record["CA"] += value elif key == "DR": pair_data = value.rstrip(";").split(';') for pair in pair_data: t1, t2 = pair.split(',') row = [t1.strip(), t2.strip()] record["DR"].append(row) elif key == "CF": if record["CF"]: record["CF"] += " " + value else: record["CF"] = value elif key == "PR": assert value.startswith("PROSITE; ") value = value[9:].rstrip(";") record["PR"].append(value) elif key == 'CC': if value.startswith("-!- "): record["CC"].append(value[4:]) elif value.startswith(" ") and record["CC"]: record["CC"][-1] += value[3:] # copyright notice is silently skipped elif key == "//": if record: return record else: # This was the copyright notice continue if record: raise ValueError("Unexpected end of stream")