# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# Copyright 2009 by Michiel de Hoon.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""
This module provides code to work with the enzyme.dat file from
Enzyme.
http://www.expasy.ch/enzyme/

Tested with the release of 03-Mar-2009.

Functions:
read       Reads a file containing one ENZYME entry
parse      Reads a file containing multiple ENZYME entries

Classes:
Record     Holds ENZYME data.

"""

def parse(handle):
    """Parse ENZYME records.

    This function is for parsing ENZYME files containing multiple
    records.

    handle   - handle to the file."""

    while True:
        record = __read(handle)
        if not record:
            break
        yield record

def read(handle):
    """Read one ENZYME record.

    This function is for parsing ENZYME files containing
    exactly one record.

    handle   - handle to the file."""

    record = __read(handle)
    # We should have reached the end of the record by now
    remainder = handle.read()
    if remainder:
        raise ValueError("More than one ENZYME record found")
    return record


class Record(dict):
    """\
Holds information from an ExPASy ENZYME record as a Python dictionary.

Each record contains the following keys:
    ID: EC number
    DE: Recommended name
    AN: Alternative names (if any)
    CA: Catalytic activity
    CF: Cofactors (if any)
    PR: Pointers to the Prosite documentation entrie(s) that
        correspond to the enzyme (if any)
    DR: Pointers to the Swiss-Prot protein sequence entrie(s)
        that correspond to the enzyme (if any)
    CC: Comments
"""

    def __init__(self):
        dict.__init__(self)
        self["ID"] = ''
        self["DE"] = ''
        self["AN"] = []
        self["CA"] = ''
        self["CF"] = ''
        self["CC"] = []   # one comment per line
        self["PR"] = []
        self["DR"] = []
    
    def __repr__(self):
        if self["ID"]:
            if self["DE"]:
                return "%s (%s, %s)" % (self.__class__.__name__, 
                                        self["ID"], self["DE"])
            else:
                return "%s (%s)" % (self.__class__.__name__, 
                                       self["ID"])
        else:
            return "%s ( )" % (self.__class__.__name__)
            
    def __str__(self):
        output = "ID: " + self["ID"]
        output += " DE: " + self["DE"]
        output += " AN: " + repr(self["AN"])
        output += " CA: '" + self["CA"] + "'"
        output += " CF: " + self["CF"]
        output += " CC: " + repr(self["CC"])
        output += " PR: " + repr(self["PR"])
        output += " DR: %d Records" % len(self["DR"])
        return output

# Everything below is private

def __read(handle):
    record = None
    for line in handle:
        key, value = line[:2], line[5:].rstrip()
        if key=="ID":
            record = Record()
            record["ID"] = value
        elif key=="DE":
            record["DE"]+=value
        elif key=="AN":
            if record["AN"] and not record["AN"][-1].endswith("."):
                record["AN"][-1] += " " + value
            else:
                record["AN"].append(value)
        elif key=="CA":
            record["CA"] += value
        elif key=="DR":
            pair_data = value.rstrip(";").split(';')
            for pair in pair_data:
                t1, t2 = pair.split(',')
                row = [t1.strip(), t2.strip()]
                record["DR"].append(row)
        elif key=="CF":
            if record["CF"]:
                record["CF"] += " " + value
            else:
                record["CF"] = value
        elif key=="PR":
            assert value.startswith("PROSITE; ")
            value = value[9:].rstrip(";")
            record["PR"].append(value)
        elif key=='CC':
            if value.startswith("-!- "):
                record["CC"].append(value[4:])
            elif value.startswith("    ") and record["CC"]:
                record["CC"][-1] += value[3:]
            # copyright notice is silently skipped
        elif key=="//":
            if record:
                return record
            else: # This was the copyright notice
                continue
    if record:
        raise ValueError("Unexpected end of stream")