# Copyright 2013 by Leighton Pritchard. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Classes to represent a KGML Pathway Map. The KGML definition is as of release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Classes: - Pathway - Specifies graph information for the pathway map - Relation - Specifies a relationship between two proteins or KOs, or protein and compound. There is an implied direction to the relationship in some cases. - Reaction - A specific chemical reaction between a substrate and a product. - Entry - A node in the pathway graph - Graphics - Entry subelement describing its visual representation """ import time from itertools import chain from xml.dom import minidom import xml.etree.ElementTree as ET from Bio._py3k import _is_int_or_long, _as_string # Pathway class Pathway(object): """Represents a KGML pathway from KEGG. Specifies graph information for the pathway map, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: - name - KEGGID of the pathway map - org - ko/ec/[org prefix] - number - map number (integer) - title - the map title - image - URL of the image map for the pathway - link - URL of information about the pathway - entries - Dictionary of entries in the pathway, keyed by node ID - reactions - Set of reactions in the pathway The name attribute has a restricted format, so we make it a property and enforce the formatting. The Pathway object is the only allowed route for adding/removing Entry, Reaction, or Relation elements. Entries are held in a dictionary and keyed by the node ID for the pathway graph - this allows for ready access via the Reaction/Relation etc. elements. Entries must be added before reference by any other element. Reactions are held in a dictionary, keyed by node ID for the path. The elements referred to in the reaction must be added before the reaction itself. """ def __init__(self): """Initialize the class.""" self._name = '' self.org = '' self._number = None self.title = '' self.image = '' self.link = '' self.entries = {} self._reactions = {} self._relations = set() def get_KGML(self): """Return the pathway as a string in prettified KGML format.""" header = '\n'.join(['', '', '' % time.asctime()]) rough_xml = header + _as_string(ET.tostring(self.element, 'utf-8')) reparsed = minidom.parseString(rough_xml) return reparsed.toprettyxml(indent=" ") def add_entry(self, entry): """Add an Entry element to the pathway.""" # We insist that the node ID is an integer if not _is_int_or_long(entry.id): raise TypeError("Node ID must be an integer, got %s (%s)" % (type(entry.id), entry.id)) entry._pathway = self # Let the entry know about the pathway self.entries[entry.id] = entry def remove_entry(self, entry): """Remove an Entry element from the pathway.""" if not _is_int_or_long(entry.id): raise TypeError("Node ID must be an integer, got %s (%s)" % (type(entry.id), entry.id)) # We need to remove the entry from any other elements that may # contain it, which means removing those elements # TODO del self.entries[entry.id] def add_reaction(self, reaction): """Add a Reaction element to the pathway.""" # We insist that the node ID is an integer and corresponds to an entry if not _is_int_or_long(reaction.id): raise ValueError("Node ID must be an integer, got %s (%s)" % (type(reaction.id), reaction.id)) if reaction.id not in self.entries: raise ValueError("Reaction ID %d has no corresponding" " entry" % reaction.id) reaction._pathway = self # Let the reaction know about the pathway self._reactions[reaction.id] = reaction def remove_reaction(self, reaction): """Remove a Reaction element from the pathway.""" if not _is_int_or_long(reaction.id): raise TypeError("Node ID must be an integer, got %s (%s)" % (type(reaction.id), reaction.id)) # We need to remove the reaction from any other elements that may # contain it, which means removing those elements # TODO del self._reactions[reaction.id] def add_relation(self, relation): """Add a Relation element to the pathway.""" relation._pathway = self # Let the reaction know about the pathway self._relations.add(relation) def remove_relation(self, relation): """Remove a Relation element from the pathway.""" self._relations.remove(relation) def __str__(self): """Return a readable summary description string.""" outstr = ['Pathway: %s' % self.title, 'KEGG ID: %s' % self.name, 'Image file: %s' % self.image, 'Organism: %s' % self.org, 'Entries: %d' % len(self.entries), 'Entry types:'] for t in ['ortholog', 'enzyme', 'reaction', 'gene', 'group', 'compound', 'map']: etype = [e for e in self.entries.values() if e.type == t] if len(etype): outstr.append('\t%s: %d' % (t, len(etype))) return '\n'.join(outstr) + '\n' # Assert correct formatting of the pathway name, and other attributes def _getname(self): return self._name def _setname(self, value): if not value.startswith('path:'): raise ValueError("Pathway name should begin with 'path:', " "got %s" % value) self._name = value def _delname(self): del self._name name = property(_getname, _setname, _delname, "The KEGGID for the pathway map.") def _getnumber(self): return self._number def _setnumber(self, value): self._number = int(value) def _delnumber(self): del self._number number = property(_getnumber, _setnumber, _delnumber, "The KEGG map number.") @property def compounds(self): """Get a list of entries of type compound.""" return [e for e in self.entries.values() if e.type == 'compound'] @property def maps(self): """Get a list of entries of type map.""" return [e for e in self.entries.values() if e.type == 'map'] @property def orthologs(self): """Get a list of entries of type ortholog.""" return [e for e in self.entries.values() if e.type == 'ortholog'] @property def genes(self): """Get a list of entries of type gene.""" return [e for e in self.entries.values() if e.type == 'gene'] @property def reactions(self): """Get a list of reactions in the pathway.""" return self._reactions.values() @property def reaction_entries(self): """List of entries corresponding to each reaction in the pathway.""" return [self.entries[i] for i in self._reactions] @property def relations(self): """Get a list of relations in the pathway.""" return list(self._relations) @property def element(self): """Return the Pathway as a valid KGML element.""" # The root is this Pathway element pathway = ET.Element('pathway') pathway.attrib = {'name': self._name, 'org': self.org, 'number': str(self._number), 'title': self.title, 'image': self.image, 'link': self.link, } # We add the Entries in node ID order for eid, entry in sorted(self.entries.items()): pathway.append(entry.element) # Next we add Relations for relation in self._relations: pathway.append(relation.element) for eid, reaction in sorted(self._reactions.items()): pathway.append(reaction.element) return pathway @property def bounds(self): """Coordinate bounds for all Graphics elements in the Pathway. Returns the [(xmin, ymin), (xmax, ymax)] coordinates for all Graphics elements in the Pathway """ xlist, ylist = [], [] for b in [g.bounds for g in self.entries.values()]: xlist.extend([b[0][0], b[1][0]]) ylist.extend([b[0][1], b[1][1]]) return [(min(xlist), min(ylist)), (max(xlist), max(ylist))] # Entry class Entry(object): """Represent an Entry from KGML. Each Entry element is a node in the pathway graph, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: - id - The ID of the entry in the pathway map (integer) - names - List of KEGG IDs for the entry - type - The type of the entry - link - URL of information about the entry - reaction - List of KEGG IDs of the corresponding reactions (integer) - graphics - List of Graphics objects describing the Entry's visual representation - components - List of component node ID for this Entry ('group') - alt - List of alternate names for the Entry NOTE: The alt attribute represents a subelement of the substrate and product elements in the KGML file """ def __init__(self): """Initialize the class.""" self._id = None self._names = [] self.type = '' self.image = '' self.link = '' self.graphics = [] self.components = set() self.alt = [] self._pathway = None self._reactions = [] def __str__(self): """Return readable descriptive string.""" outstr = ['Entry node ID: %d' % self.id, 'Names: %s' % self.name, 'Type: %s' % self.type, 'Components: %s' % self.components, 'Reactions: %s' % self.reaction, 'Graphics elements: %d %s' % (len(self.graphics), self.graphics)] return '\n'.join(outstr) + '\n' def add_component(self, element): """Add an element to the entry. If the Entry is already part of a pathway, make sure the component already exists. """ if self._pathway is not None: if element.id not in self._pathway.entries: raise ValueError("Component %s is not an entry in the " "pathway" % element.id) self.components.add(element) def remove_component(self, value): """Remove the entry with the passed ID from the group.""" self.components.remove(value) def add_graphics(self, entry): """Add the Graphics entry.""" self.graphics.append(entry) def remove_graphics(self, entry): """Remove the Graphics entry with the passed ID from the group.""" self.graphics.remove(entry) # Names may be given as a space-separated list of KEGG identifiers def _getname(self): return ' '.join(self._names) def _setname(self, value): self._names = value.split() def _delname(self): self._names = [] name = property(_getname, _setname, _delname, "List of KEGG identifiers for the Entry.") # Reactions may be given as a space-separated list of KEGG identifiers def _getreaction(self): return ' '.join(self._reactions) def _setreaction(self, value): self._reactions = value.split() def _delreaction(self): self._reactions = [] reaction = property(_getreaction, _setreaction, _delreaction, "List of reaction KEGG IDs for this Entry.") # We make sure that the node ID is an integer def _getid(self): return self._id def _setid(self, value): self._id = int(value) def _delid(self): del self._id id = property(_getid, _setid, _delid, "The pathway graph node ID for the Entry.") @property def element(self): """Return the Entry as a valid KGML element.""" # The root is this Entry element entry = ET.Element('entry') entry.attrib = {'id': str(self._id), 'name': self.name, 'link': self.link, 'type': self.type } if len(self._reactions): entry.attrib['reaction'] = self.reaction if len(self.graphics): for g in self.graphics: entry.append(g.element) if len(self.components): for c in self.components: entry.append(c.element) return entry @property def bounds(self): """Coordinate bounds for all Graphics elements in the Entry. Return the [(xmin, ymin), (xmax, ymax)] co-ordinates for the Entry Graphics elements. """ xlist, ylist = [], [] for b in [g.bounds for g in self.graphics]: xlist.extend([b[0][0], b[1][0]]) ylist.extend([b[0][1], b[1][1]]) return [(min(xlist), min(ylist)), (max(xlist), max(ylist))] @property def is_reactant(self): """Return true if this Entry participates in any reaction in its parent pathway.""" for rxn in self._pathway.reactions: if self._id in rxn.reactant_ids: return True return False # Component class Component(object): """An Entry subelement used to represents a complex node. A subelement of the Entry element, used when the Entry is a complex node, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) The Component acts as a collection (with type 'group', and typically its own Graphics subelement), having only an ID. """ def __init__(self, parent): """Initialize the class.""" self._id = None self._parent = parent # We make sure that the node ID is an integer def _getid(self): return self._id def _setid(self, value): self._id = int(value) def _delid(self): del self._id id = property(_getid, _setid, _delid, "The pathway graph node ID for the Entry") @property def element(self): """Return the Component as a valid KGML element.""" # The root is this Component element component = ET.Element('component') component.attrib = {'id': str(self._id)} return component # Graphics class Graphics(object): """An Entry subelement used to represents the visual representation. A subelement of Entry, specifying its visual representation, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: - name Label for the graphics object - x X-axis position of the object (int) - y Y-axis position of the object (int) - coords polyline co-ordinates, list of (int, int) tuples - type object shape - width object width (int) - height object height (int) - fgcolor object foreground color (hex RGB) - bgcolor object background color (hex RGB) Some attributes are present only for specific graphics types. For example, line types do not (typically) have a width. We permit non-DTD attributes and attribute settings, such as dash List of ints, describing an on/off pattern for dashes """ def __init__(self, parent): """Initialize the class.""" self.name = '' self._x = None self._y = None self._coords = None self.type = '' self._width = None self._height = None self.fgcolor = '' self.bgcolor = '' self._parent = parent # We make sure that the XY coordinates, width and height are numbers def _getx(self): return self._x def _setx(self, value): self._x = float(value) def _delx(self): del self._x x = property(_getx, _setx, _delx, "The X coordinate for the graphics element.") def _gety(self): return self._y def _sety(self, value): self._y = float(value) def _dely(self): del self._y y = property(_gety, _sety, _dely, "The Y coordinate for the graphics element.") def _getwidth(self): return self._width def _setwidth(self, value): self._width = float(value) def _delwidth(self): del self._width width = property(_getwidth, _setwidth, _delwidth, "The width of the graphics element.") def _getheight(self): return self._height def _setheight(self, value): self._height = float(value) def _delheight(self): del self._height height = property(_getheight, _setheight, _delheight, "The height of the graphics element.") # We make sure that the polyline co-ordinates are integers, too def _getcoords(self): return self._coords def _setcoords(self, value): clist = [int(e) for e in value.split(',')] self._coords = [tuple(clist[i:i + 2]) for i in range(0, len(clist), 2)] def _delcoords(self): del self._coords coords = property(_getcoords, _setcoords, _delcoords, "Polyline coordinates for the graphics element.") # Set default colors def _getfgcolor(self): return self._fgcolor def _setfgcolor(self, value): if value == 'none': self._fgcolor = '#000000' # this default defined in KGML spec else: self._fgcolor = value def _delfgcolor(self): del self._fgcolor fgcolor = property(_getfgcolor, _setfgcolor, _delfgcolor, "Foreground color.") def _getbgcolor(self): return self._bgcolor def _setbgcolor(self, value): if value == 'none': self._bgcolor = '#000000' # this default defined in KGML spec else: self._bgcolor = value def _delbgcolor(self): del self._bgcolor bgcolor = property(_getbgcolor, _setbgcolor, _delbgcolor, "Background color.") @property def element(self): """Return the Graphics as a valid KGML element.""" # The root is this Component element graphics = ET.Element('graphics') if isinstance(self.fgcolor, str): # Assumes that string is hexstring fghex = self.fgcolor else: # Assumes ReportLab Color object fghex = '#' + self.fgcolor.hexval()[2:] if isinstance(self.bgcolor, str): # Assumes that string is hexstring bghex = self.bgcolor else: # Assumes ReportLab Color object bghex = '#' + self.bgcolor.hexval()[2:] graphics.attrib = {'name': self.name, 'type': self.type, 'fgcolor': fghex, 'bgcolor': bghex} for (n, attr) in [('x', '_x'), ('y', '_y'), ('width', '_width'), ('height', '_height')]: if getattr(self, attr) is not None: graphics.attrib[n] = str(getattr(self, attr)) if self.type == 'line': # Need to write polycoords graphics.attrib['coords'] = \ ','.join([str(e) for e in chain.from_iterable(self.coords)]) return graphics @property def bounds(self): """Coordinate bounds for the Graphics element. Return the bounds of the Graphics object as an [(xmin, ymin), (xmax, ymax)] tuple. Co-ordinates give the centre of the circle, rectangle, roundrectangle elements, so we have to adjust for the relevant width/height. """ if self.type == 'line': xlist = [x for x, y in self.coords] ylist = [y for x, y in self.coords] return [(min(xlist), min(ylist)), (max(xlist), max(ylist))] else: return [(self.x - self.width * 0.5, self.y - self.height * 0.5), (self.x + self.width * 0.5, self.y + self.height * 0.5)] @property def centre(self): """Return the centre of the Graphics object as an (x, y) tuple.""" return (0.5 * (self.bounds[0][0] + self.bounds[1][0]), 0.5 * (self.bounds[0][1] + self.bounds[1][1])) # Reaction class Reaction(object): """A specific chemical reaction with substrates and products. This describes a specific chemical reaction between one or more substrates and one or more products. Attributes: - id Pathway graph node ID of the entry - names List of KEGG identifier(s) from the REACTION database - type String: reversible or irreversible - substrate Entry object of the substrate - product Entry object of the product """ def __init__(self): """Initialize the class.""" self._id = None self._names = [] self.type = '' self._substrates = set() self._products = set() self._pathway = None def __str__(self): """Return an informative human-readable string.""" outstr = ['Reaction node ID: %s' % self.id, 'Reaction KEGG IDs: %s' % self.name, 'Type: %s' % self.type, 'Substrates: %s' % ','.join([s.name for s in self.substrates]), 'Products: %s' % ','.join([s.name for s in self.products]), ] return '\n'.join(outstr) + '\n' def add_substrate(self, substrate_id): """Add a substrate, identified by its node ID, to the reaction.""" if self._pathway is not None: if int(substrate_id) not in self._pathway.entries: raise ValueError("Couldn't add substrate, no node ID %d in " "Pathway" % int(substrate_id)) self._substrates.add(substrate_id) def add_product(self, product_id): """Add a product, identified by its node ID, to the reaction.""" if self._pathway is not None: if int(product_id) not in self._pathway.entries: raise ValueError("Couldn't add product, no node ID %d in " "Pathway" % product_id) self._products.add(int(product_id)) # The node ID is also the node ID of the Entry that corresponds to the # reaction; we get the corresponding Entry when there is an associated # Pathway def _getid(self): return self._id def _setid(self, value): self._id = int(value) def _delid(self): del self._id id = property(_getid, _setid, _delid, "Node ID for the reaction.") # Names may show up as a space-separated list of several KEGG identifiers def _getnames(self): return ' '.join(self._names) def _setnames(self, value): self._names.extend(value.split()) def _delnames(self): del self.names name = property(_getnames, _setnames, _delnames, "List of KEGG identifiers for the reaction.") # products and substrates are read-only properties, returning lists # of Entry objects @property def substrates(self): """Return list of substrate Entry elements.""" return [self._pathway.entries[sid] for sid in self._substrates] @property def products(self): """Return list of product Entry elements.""" return [self._pathway.entries[pid] for pid in self._products] @property def entry(self): """Return the Entry corresponding to this reaction.""" return self._pathway.entries[self._id] @property def reactant_ids(self): """Return a list of substrate and product reactant IDs.""" return self._products.union(self._substrates) @property def element(self): """Return KGML element describing the Reaction.""" # The root is this Relation element reaction = ET.Element('reaction') reaction.attrib = {'id': str(self.id), 'name': self.name, 'type': self.type} for s in self._substrates: substrate = ET.Element('substrate') substrate.attrib['id'] = str(s) substrate.attrib['name'] = self._pathway.entries[s].name reaction.append(substrate) for p in self._products: product = ET.Element('product') product.attrib['id'] = str(p) product.attrib['name'] = self._pathway.entries[p].name reaction.append(product) return reaction # Relation class Relation(object): """A relationship between to products, KOs, or protein and compound. This describes a relationship between two products, KOs, or protein and compound, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: - entry1 - The first Entry object node ID defining the relation (int) - entry2 - The second Entry object node ID defining the relation (int) - type - The relation type - subtypes - List of subtypes for the relation, as a list of (name, value) tuples """ def __init__(self): """Initialize the class.""" self._entry1 = None self._entry2 = None self.type = '' self.subtypes = [] self._pathway = None def __str__(self): """Return a useful human-readable string.""" outstr = ['Relation (subtypes: %d):' % len(self.subtypes), 'Entry1:', str(self.entry1), 'Entry2:', str(self.entry2)] for s in self.subtypes: outstr.extend(['Subtype: %s' % s[0], str(s[1])]) return '\n'.join(outstr) # Properties entry1 and entry2 def _getentry1(self): if self._pathway is not None: return self._pathway.entries[self._entry1] return self._entry1 def _setentry1(self, value): self._entry1 = int(value) def _delentry1(self): del self._entry1 entry1 = property(_getentry1, _setentry1, _delentry1, "Entry1 of the relation.") def _getentry2(self): if self._pathway is not None: return self._pathway.entries[self._entry2] return self._entry2 def _setentry2(self, value): self._entry2 = int(value) def _delentry2(self): del self._entry2 entry2 = property(_getentry2, _setentry2, _delentry2, "Entry2 of the relation.") @property def element(self): """Return KGML element describing the Relation.""" # The root is this Relation element relation = ET.Element('relation') relation.attrib = {'entry1': str(self._entry1), 'entry2': str(self._entry2), 'type': self.type} for (name, value) in self.subtypes: subtype = ET.Element('subtype') subtype.attrib[name] = str(value) relation.append(subtype) return relation