# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# Copyright 2009 by Michiel de Hoon.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Parse the enzyme.dat file from Enzyme at ExPASy.

See https://www.expasy.org/enzyme/

Tested with the release of 03-Mar-2009.

Functions:
 - read       Reads a file containing one ENZYME entry
 - parse      Reads a file containing multiple ENZYME entries

Classes:
 - Record     Holds ENZYME data.

"""


def parse(handle):
    """Parse ENZYME records.

    This function is for parsing ENZYME files containing multiple
    records.

    Arguments:
     - handle   - handle to the file.

    """
    while True:
        record = __read(handle)
        if not record:
            break
        yield record


def read(handle):
    """Read one ENZYME record.

    This function is for parsing ENZYME files containing
    exactly one record.

    Arguments:
     - handle   - handle to the file.

    """
    record = __read(handle)
    # We should have reached the end of the record by now
    remainder = handle.read()
    if remainder:
        raise ValueError("More than one ENZYME record found")
    return record


class Record(dict):
    """Holds information from an ExPASy ENZYME record as a Python dictionary.

    Each record contains the following keys:

    - ID: EC number
    - DE: Recommended name
    - AN: Alternative names (if any)
    - CA: Catalytic activity
    - CF: Cofactors (if any)
    - PR: Pointers to the Prosite documentation entrie(s) that
      correspond to the enzyme (if any)
    - DR: Pointers to the Swiss-Prot protein sequence entrie(s)
      that correspond to the enzyme (if any)
    - CC: Comments

    """

    def __init__(self):
        """Initialize the class."""
        dict.__init__(self)
        self["ID"] = ''
        self["DE"] = ''
        self["AN"] = []
        self["CA"] = ''
        self["CF"] = ''
        self["CC"] = []   # one comment per line
        self["PR"] = []
        self["DR"] = []

    def __repr__(self):
        if self["ID"]:
            if self["DE"]:
                return "%s (%s, %s)" % (self.__class__.__name__,
                                        self["ID"], self["DE"])
            else:
                return "%s (%s)" % (self.__class__.__name__,
                                    self["ID"])
        else:
            return "%s ( )" % (self.__class__.__name__)

    def __str__(self):
        output = ["ID: " + self["ID"],
                  "DE: " + self["DE"],
                  "AN: " + repr(self["AN"]),
                  "CA: '" + self["CA"] + "'",
                  "CF: " + self["CF"],
                  "CC: " + repr(self["CC"]),
                  "PR: " + repr(self["PR"]),
                  "DR: %d Records" % len(self["DR"])]
        return "\n".join(output)

# Everything below is private


def __read(handle):
    record = None
    for line in handle:
        key, value = line[:2], line[5:].rstrip()
        if key == "ID":
            record = Record()
            record["ID"] = value
        elif key == "DE":
            record["DE"] += value
        elif key == "AN":
            if record["AN"] and not record["AN"][-1].endswith("."):
                record["AN"][-1] += " " + value
            else:
                record["AN"].append(value)
        elif key == "CA":
            record["CA"] += value
        elif key == "DR":
            pair_data = value.rstrip(";").split(';')
            for pair in pair_data:
                t1, t2 = pair.split(',')
                row = [t1.strip(), t2.strip()]
                record["DR"].append(row)
        elif key == "CF":
            if record["CF"]:
                record["CF"] += " " + value
            else:
                record["CF"] = value
        elif key == "PR":
            assert value.startswith("PROSITE; ")
            value = value[9:].rstrip(";")
            record["PR"].append(value)
        elif key == 'CC':
            if value.startswith("-!- "):
                record["CC"].append(value[4:])
            elif value.startswith("    ") and record["CC"]:
                record["CC"][-1] += value[3:]
            # copyright notice is silently skipped
        elif key == "//":
            if record:
                return record
            else:  # This was the copyright notice
                continue
    if record:
        raise ValueError("Unexpected end of stream")