# Copyright (c) 2003-2005 The Regents of the University of California. # Copyright (c) 2005 Gavin E. Crooks # This software is distributed under the MIT Open Source License. # # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), # to deal in the Software without restriction, including without limitation # the rights to use, copy, modify, merge, publish, distribute, sublicense, # and/or sell copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included # in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. """ Popular color codings for nucleic and amino acids. Classes: ColorScheme -- A color scheme SymbolColor IndexColor RefSeqColor Generic monochrome Nucleotides nucleotide base pairing Amino Acid hydrophobicity chemistry charge taylor Status : Beta - Needs documentation. """ # Good online references include bioruby and the JalView alignment editor. # Clamp, M., Cuff, J., Searle, S. M. and Barton, G. J. (2004), # "The Jalview Java Alignment Editor," Bioinformatics, 12, 426-7 # http://www.jalview.org from typing import List, Optional, Sequence from . import seq from .color import Color from .seq import Alphabet # TODO: Make as abstract class ColorRule: """ Define an interface for coloring individual symbols based on their position and identity. Subclasses should reimplement the symbol_color() method to return a Color object based on the given parameters. """ def symbol_color(self, seq_index: int, symbol: str, rank: int) -> Optional[Color]: raise NotImplementedError # pragma: no cover class ColorScheme(ColorRule): """ Specify which color each symbol in a sequence logo should be. A color scheme is primarily a container of color rules. These rules would be along the lines of "hydrophobic residues are blue" or "indices 5-10 are red" or "the wildtype sequence is black". When a color is requested for a particular symbol, each rule is consulted in turn until one provides a color. If no rule provides a color, the given default color will be used. """ def __init__( self, rules: List[ColorRule] = [], title: str = "", description: str = "", default_color: str = "black", alphabet: Alphabet = seq.generic_alphabet, ) -> None: self.rules = rules self.title = title self.description = description self.default_color = Color.from_string(default_color) self.alphabet = alphabet def symbol_color(self, seq_index: int, symbol: str, rank: int) -> Color: if symbol not in self.alphabet: raise KeyError("Colored symbol '%s' does not exist in alphabet." % symbol) for rule in self.rules: color = rule.symbol_color(seq_index, symbol, rank) if color is not None: return color return self.default_color class SymbolColor(ColorRule): """ Represent the given set of symbols (e.g. "DEHKR" for charged residues) with a single color. """ def __init__(self, symbols: str, color: str, description: str = None) -> None: self.symbols = symbols.upper() self.color = Color.from_string(color) self.description = description def symbol_color(self, seq_index: int, symbol: str, rank: int) -> Optional[Color]: if symbol.upper() in self.symbols: return self.color return None class IndexColor(ColorRule): """ Represent the given set of indices (e.g. range(10) for the first ten residues) with a single color. """ def __init__( self, indices: Sequence[list], color: str, description: str = None ) -> None: self.indices = indices self.color = Color.from_string(color) self.description = description def symbol_color(self, seq_index: int, symbol: str, rank: int) -> Optional[Color]: if seq_index in self.indices: return self.color return None class RefSeqColor(ColorRule): """ Color the given reference sequence in its own color, so you can easily see which positions match that sequence and which don't. """ def __init__(self, ref_seq: str, color: str, description: str = None) -> None: self.ref_seq = ref_seq.upper() self.color = Color.from_string(color) self.description = description def symbol_color(self, seq_index: int, symbol: str, rank: int) -> Optional[Color]: if symbol.upper() == self.ref_seq[seq_index]: return self.color return None monochrome = ColorScheme([]) # This list intentionally left blank # From makelogo nucleotide = ColorScheme( [ SymbolColor("G", "orange"), SymbolColor("TU", "red"), SymbolColor("C", "blue"), SymbolColor("A", "green"), ], ) base_pairing = ColorScheme( [ SymbolColor("TAU", "darkorange", "Weak (2 Watson-Crick hydrogen bonds)"), SymbolColor("GC", "blue", "Strong (3 Watson-Crick hydrogen bonds)"), ], ) # From Crooks2004c-Proteins-SeqStr.pdf hydrophobicity = ColorScheme( [ SymbolColor("RKDENQ", "blue", "hydrophilic"), SymbolColor("SGHTAP", "green", "neutral"), SymbolColor("YVMCLFIW", "black", "hydrophobic"), ], alphabet=seq.unambiguous_protein_alphabet, ) # from makelogo chemistry = ColorScheme( [ SymbolColor("GSTYC", "green", "polar"), SymbolColor("NQ", "purple", "neutral"), SymbolColor("KRH", "blue", "basic"), SymbolColor("DE", "red", "acidic"), SymbolColor("PAWFLIMV", "black", "hydrophobic"), ], alphabet=seq.unambiguous_protein_alphabet, ) charge = ColorScheme( [SymbolColor("KRH", "blue", "Positive"), SymbolColor("DE", "red", "Negative")], alphabet=seq.unambiguous_protein_alphabet, ) taylor = ColorScheme( [ SymbolColor("A", "#CCFF00"), SymbolColor("C", "#FFFF00"), SymbolColor("D", "#FF0000"), SymbolColor("E", "#FF0066"), SymbolColor("F", "#00FF66"), SymbolColor("G", "#FF9900"), SymbolColor("H", "#0066FF"), SymbolColor("I", "#66FF00"), SymbolColor("K", "#6600FF"), SymbolColor("L", "#33FF00"), SymbolColor("M", "#00FF00"), SymbolColor("N", "#CC00FF"), SymbolColor("P", "#FFCC00"), SymbolColor("Q", "#FF00CC"), SymbolColor("R", "#0000FF"), SymbolColor("S", "#FF3300"), SymbolColor("T", "#FF6600"), SymbolColor("V", "#99FF00"), SymbolColor("W", "#00CCFF"), SymbolColor("Y", "#00FFCC"), ], title="Taylor", description="W. Taylor, Protein Engineering, Vol 10 , 743-746 (1997)", alphabet=seq.unambiguous_protein_alphabet, )