# Copyright 2004 by Iddo Friedberg.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Reduced alphabets which lump together several amino-acids into one letter.

Reduced (redundant or simplified) alphabets are used to represent protein sequences using an
alternative alphabet which lumps together several amino-acids into one letter, based
on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually
quite interchangeable, so many sequence studies group them into one letter

Examples of reduced alphabets are available in:

http://viscose.ifg.uni-muenster.de/html/alphabets.html

The Murphy tables are from here:

Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid
alphabets for protein fold recognition and implications for folding.
Protein Eng. 13(3):149-152

Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of
the tables here, or a user-defined table.
"""

from Bio import Alphabet


murphy_15_tab = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "C",
             "A": "A",
             "G": "G",
             "S": "S",
             "T": "T",
             "P": "P",
             "F": "F",
             "Y": "F",
             "W": "W",
             "E": "E",
             "D": "D",
             "N": "N",
             "Q": "Q",
             "K": "K",
             "R": "K",
             "H": "H"}

class Murphy15(Alphabet.ProteinAlphabet):
   letters = "LCAGSTPFWEDNQKH"
   size = 15
murphy_15 = Murphy15()

murphy_10_tab = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "C",
             "A": "A",
             "G": "G",
             "S": "S",
             "T": "S",
             "P": "P",
             "F": "F",
             "Y": "F",
             "W": "F",
             "E": "E",
             "D": "E",
             "N": "E",
             "Q": "E",
             "K": "K",
             "R": "K",
             "H": "H"}
class Murphy10(Alphabet.ProteinAlphabet):
   letters = "LCAGSPFEKH"
   size = 10
murphy_10 = Murphy10()

murphy_8_tab  = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "L",
             "A": "A",
             "G": "A",
             "S": "S",
             "T": "S",
             "P": "P",
             "F": "F",
             "Y": "F",
             "W": "F",
             "E": "E",
             "D": "E",
             "N": "E",
             "Q": "E",
             "K": "K",
             "R": "K",
             "H": "H"}

class Murphy8(Alphabet.ProteinAlphabet):
   letters = "LASPFEKH"
   size = 8
murphy_8 = Murphy8()

murphy_4_tab  = {"L": "L",
             "V": "L",
             "I": "L",
             "M": "L",
             "C": "L",
             "A": "A",
             "G": "A",
             "S": "A",
             "T": "A",
             "P": "A",
             "F": "F",
             "Y": "F",
             "W": "F",
             "E": "E",
             "D": "E",
             "N": "E",
             "Q": "E",
             "K": "E",
             "R": "E",
             "H": "E"}

class Murphy4(Alphabet.ProteinAlphabet):
   letters = "LAFE"
   size = 4
murphy_4 = Murphy4()

hp_model_tab = {"A": "P",   # Hydrophilic
            "G": "P",
            "T": "P",
            "S": "P",
            "N": "P",
            "Q": "P",
            "D": "P",
            "E": "P",
            "H": "P",
            "R": "P",
            "K": "P",
            "P": "P",
            "C": "H",  # Hydrophobic
            "M": "H",
            "F": "H",
            "I": "H",
            "L": "H",
            "V": "H",
            "W": "H",
            "Y": "H"}

class HPModel(Alphabet.ProteinAlphabet):
   letters = "HP"
   size = 2
hp_model = HPModel()

pc_5_table  = {"I": "A", # Aliphatic
         "V": "A",
         "L": "A",
         "F": "R", # Aromatic
         "Y": "R",
         "W": "R",
         "H": "R",
         "K": "C", # Charged
         "R": "C",
         "D": "C",
         "E": "C",
         "G": "T", # Tiny
         "A": "T",
         "C": "T",
         "S": "T",
         "T": "D", # Diverse
         "M": "D",
         "Q": "D",
         "N": "D",
         "P": "D"}

class PC5(Alphabet.ProteinAlphabet):
   letters = "ARCTD"
   size = 5
hp_model = HPModel()