# Copyright 2013 by Leighton Pritchard. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """This module provides classes to represent a KGML Pathway Map. The KGML definition is as of release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Classes: - Pathway Specifies graph information for the pathway map - Relation Specifies a relationship between two proteins or KOs, or protein and compound. There is an implied direction to the relationship in some cases. - Reaction A specific chemical reaction between a substrate and a product. - Entry A node in the pathway graph - Graphics Entry subelement describing its visual representation """ import time from itertools import chain from xml.dom import minidom import xml.etree.ElementTree as ET from Bio._py3k import _is_int_or_long, _as_string # Pathway class Pathway(object): """Represents a KGML pathway from KEGG. Specifies graph information for the pathway map, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: name KEGGID of the pathway map org ko/ec/[org prefix] number map number (integer) title the map title image URL of the image map for the pathway link URL of information about the pathway entries Dictionary of entries in the pathway, keyed by node ID reactions Set of reactions in the pathway The name attribute has a restricted format, so we make it a property and enforce the formatting. The Pathway object is the only allowed route for adding/removing Entry, Reaction, or Relation elements. Entries are held in a dictionary and keyed by the node ID for the pathway graph - this allows for ready access via the Reaction/Relation etc. elements. Entries must be added before reference by any other element. Reactions are held in a dictionary, keyed by node ID for the path. The elements referred to in the reaction must be added before the reaction itself. """ def __init__(self): self._name = '' self.org = '' self._number = None self.title = '' self.image = '' self.link = '' self.entries = {} self._reactions = {} self._relations = set() def get_KGML(self): """Return the pathway as a string in prettified KGML format.""" header = '\n'.join(['', '', '' % time.asctime()]) rough_xml = header + _as_string(ET.tostring(self.element, 'utf-8')) reparsed = minidom.parseString(rough_xml) return reparsed.toprettyxml(indent=" ") def add_entry(self, entry): """Add an Entry element to the pathway.""" # We insist that the node ID is an integer assert _is_int_or_long(entry.id), \ "Node ID must be an integer, got %s (%s)" % (type(entry.id), entry.id) entry._pathway = self # Let the entry know about the pathway self.entries[entry.id] = entry def remove_entry(self, entry): """Remove an Entry element from the pathway.""" assert _is_int_or_long(entry.id), \ "Node ID must be an integer, got %s (%s)" % (type(entry.id), entry.id) # We need to remove the entry from any other elements that may # contain it, which means removing those elements # TODO del self.entries[entry.id] def add_reaction(self, reaction): """Add a Reaction element to the pathway.""" # We insist that the node ID is an integer and corresponds to an entry assert _is_int_or_long(reaction.id), \ "Node ID must be an integer, got %s (%s)" % (type(reaction.id), reaction.id) assert reaction.id in self.entries, \ "Reaction ID %d has no corresponding entry" % reaction.id reaction._pathway = self # Let the reaction know about the pathway self._reactions[reaction.id] = reaction def remove_reaction(self, reaction): """Remove a Reaction element from the pathway.""" assert _is_int_or_long(reaction.id), \ "Node ID must be an integer, got %s (%s)" % (type(reaction.id), reaction.id) # We need to remove the reaction from any other elements that may # contain it, which means removing those elements # TODO del self._reactions[reaction.id] def add_relation(self, relation): """Add a Relation element to the pathway.""" relation._pathway = self # Let the reaction know about the pathway self._relations.add(relation) def remove_relation(self, relation): """Remove a Relation element from the pathway.""" self._relations.remove(relation) def __str__(self): """Returns a readable summary description string.""" outstr = ['Pathway: %s' % self.title, 'KEGG ID: %s' % self.name, 'Image file: %s' % self.image, 'Organism: %s' % self.org, 'Entries: %d' % len(self.entries), 'Entry types:'] for t in ['ortholog', 'enzyme', 'reaction', 'gene', 'group', 'compound', 'map']: etype = [e for e in self.entries.values() if e.type == t] if len(etype): outstr.append('\t%s: %d' % (t, len(etype))) return '\n'.join(outstr) + '\n' # Assert correct formatting of the pathway name, and other attributes def _getname(self): return self._name def _setname(self, value): assert value.startswith('path:'), \ "Pathway name should begin with 'path:', got %s" % value self._name = value def _delname(self): del self._name name = property(_getname, _setname, _delname, "The KEGGID for the pathway map.") def _getnumber(self): return self._number def _setnumber(self, value): self._number = int(value) def _delnumber(self): del self._number number = property(_getnumber, _setnumber, _delnumber, "The KEGG map number.") @property def compounds(self): """Get a list of entries of type compound.""" return [e for e in self.entries.values() if e.type == 'compound'] @property def maps(self): """Get a list of entries of type map.""" return [e for e in self.entries.values() if e.type == 'map'] @property def orthologs(self): """Get a list of entries of type ortholog.""" return [e for e in self.entries.values() if e.type == 'ortholog'] @property def genes(self): """Get a list of entries of type gene.""" return [e for e in self.entries.values() if e.type == 'gene'] @property def reactions(self): """Get a list of reactions in the pathway.""" return self._reactions.values() @property def reaction_entries(self): """Get a list of entries corresponding to each reaction in the pathway. """ return [self.entries[i] for i in self._reactions] @property def relations(self): """Get a list of relations in the pathway.""" return list(self._relations) @property def element(self): """Return the Pathway as a valid KGML element.""" # The root is this Pathway element pathway = ET.Element('pathway') pathway.attrib = {'name': self._name, 'org': self.org, 'number': str(self._number), 'title': self.title, 'image': self.image, 'link': self.link, } # We add the Entries in node ID order for eid, entry in sorted(self.entries.items()): pathway.append(entry.element) # Next we add Relations for relation in self._relations: pathway.append(relation.element) for eid, reaction in sorted(self._reactions.items()): pathway.append(reaction.element) return pathway @property def bounds(self): """Coordinate bounds for all Graphics elements in the Pathway. Returns the [(xmin, ymin), (xmax, ymax)] coordinates for all Graphics elements in the Pathway """ xlist, ylist = [], [] for b in [g.bounds for g in self.entries.values()]: xlist.extend([b[0][0], b[1][0]]) ylist.extend([b[0][1], b[1][1]]) return [(min(xlist), min(ylist)), (max(xlist), max(ylist))] # Entry class Entry(object): """Represent an Entry from KGML. Each Entry element is a node in the pathway graph, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: - id The ID of the entry in the pathway map (integer) - names List of KEGG IDs for the entry - type The type of the entry - link URL of information about the entry - reaction List of KEGG IDs of the corresponding reactions (integer) - graphics List of Graphics objects describing the Entry's visual representation - components List of component node ID for this Entry ('group') - alt List of alternate names for the Entry NOTE: The alt attribute represents a subelement of the substrate and product elements in the KGML file """ def __init__(self): self._id = None self._names = [] self.type = '' self.image = '' self.link = '' self.graphics = [] self.components = set() self.alt = [] self._pathway = None self._reactions = [] def __str__(self): """Return readable descriptive string.""" outstr = ['Entry node ID: %d' % self.id, 'Names: %s' % self.name, 'Type: %s' % self.type, 'Components: %s' % self.components, 'Reactions: %s' % self.reaction, 'Graphics elements: %d %s' % (len(self.graphics), self.graphics)] return '\n'.join(outstr) + '\n' def add_component(self, element): """Add an element to the entry. If the Entry is already part of a pathway, make sure the component already exists. """ if self._pathway is not None: assert element.id in self._pathway.entries, \ "Component %s is not an entry in the pathway" % element.id self.components.add(element) def remove_component(self, value): """Remove the entry with the passed ID from the group.""" self.components.remove(value) def add_graphics(self, entry): """Add the Graphics entry.""" self.graphics.append(entry) def remove_graphics(self, entry): """Remove the Graphics entry with the passed ID from the group.""" self.graphics.remove(entry) # Names may be given as a space-separated list of KEGG identifiers def _getname(self): return ' '.join(self._names) def _setname(self, value): self._names = value.split() def _delname(self): self._names = [] name = property(_getname, _setname, _delname, "List of KEGG identifiers for the Entry.") # Reactions may be given as a space-separated list of KEGG identifiers def _getreaction(self): return ' '.join(self._reactions) def _setreaction(self, value): self._reactions = value.split() def _delreaction(self): self._reactions = [] reaction = property(_getreaction, _setreaction, _delreaction, "List of reaction KEGG IDs for this Entry.") # We make sure that the node ID is an integer def _getid(self): return self._id def _setid(self, value): self._id = int(value) def _delid(self): del self._id id = property(_getid, _setid, _delid, "The pathway graph node ID for the Entry.") @property def element(self): """Return the Entry as a valid KGML element.""" # The root is this Entry element entry = ET.Element('entry') entry.attrib = {'id': str(self._id), 'name': self.name, 'link': self.link, 'type': self.type } if len(self._reactions): entry.attrib['reaction'] = self.reaction if len(self.graphics): for g in self.graphics: entry.append(g.element) if len(self.components): for c in self.components: entry.append(c.element) return entry @property def bounds(self): """Coordinate bounds for all Graphics elements in the Entry. Return the [(xmin, ymin), (xmax, ymax)] co-ordinates for the Entry Graphics elements. """ xlist, ylist = [], [] for b in [g.bounds for g in self.graphics]: xlist.extend([b[0][0], b[1][0]]) ylist.extend([b[0][1], b[1][1]]) return [(min(xlist), min(ylist)), (max(xlist), max(ylist))] @property def is_reactant(self): """Does this Entry participate in any reaction in parent pathway? Returns True if the Entry participates in any reaction of its parent Pathway """ for rxn in self._pathway.reactions: if self._id in rxn.reactant_ids: return True return False # Component class Component(object): """An Entry subelement used to represents a complex node. A subelement of the Entry element, used when the Entry is a complex node, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) The Component acts as a collection (with type 'group', and typically its own Graphics subelement), having only an ID. """ def __init__(self, parent): self._id = None self._parent = parent # We make sure that the node ID is an integer def _getid(self): return self._id def _setid(self, value): self._id = int(value) def _delid(self): del self._id id = property(_getid, _setid, _delid, "The pathway graph node ID for the Entry") @property def element(self): """Return the Component as a valid KGML element.""" # The root is this Component element component = ET.Element('component') component.attrib = {'id': str(self._id)} return component # Graphics class Graphics(object): """An Entry subelement used to represents the visual representation. A subelement of Entry, specifying its visual representation, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: name Label for the graphics object x X-axis position of the object (int) y Y-axis position of the object (int) coords polyline co-ordinates, list of (int, int) tuples type object shape width object width (int) height object height (int) fgcolor object foreground color (hex RGB) bgcolor object background color (hex RGB) Some attributes are present only for specific graphics types. For example, line types do not (typically) have a width. We permit non-DTD attributes and attribute settings, such as dash List of ints, describing an on/off pattern for dashes """ def __init__(self, parent): self.name = '' self._x = None self._y = None self._coords = None self.type = '' self._width = None self._height = None self.fgcolor = '' self.bgcolor = '' self._parent = parent # We make sure that the XY coordinates, width and height are numbers def _getx(self): return self._x def _setx(self, value): self._x = float(value) def _delx(self): del self._x x = property(_getx, _setx, _delx, "The X coordinate for the graphics element.") def _gety(self): return self._y def _sety(self, value): self._y = float(value) def _dely(self): del self._y y = property(_gety, _sety, _dely, "The Y coordinate for the graphics element.") def _getwidth(self): return self._width def _setwidth(self, value): self._width = float(value) def _delwidth(self): del self._width width = property(_getwidth, _setwidth, _delwidth, "The width of the graphics element.") def _getheight(self): return self._height def _setheight(self, value): self._height = float(value) def _delheight(self): del self._height height = property(_getheight, _setheight, _delheight, "The height of the graphics element.") # We make sure that the polyline co-ordinates are integers, too def _getcoords(self): return self._coords def _setcoords(self, value): clist = [int(e) for e in value.split(',')] self._coords = [tuple(clist[i:i + 2]) for i in range(0, len(clist), 2)] def _delcoords(self): del self._coords coords = property(_getcoords, _setcoords, _delcoords, "Polyline coordinates for the graphics element.") # Set default colors def _getfgcolor(self): return self._fgcolor def _setfgcolor(self, value): if value == 'none': self._fgcolor = '#000000' # this default defined in KGML spec else: self._fgcolor = value def _delfgcolor(self): del self._fgcolor fgcolor = property(_getfgcolor, _setfgcolor, _delfgcolor, "Foreground color.") def _getbgcolor(self): return self._bgcolor def _setbgcolor(self, value): if value == 'none': self._bgcolor = '#000000' # this default defined in KGML spec else: self._bgcolor = value def _delbgcolor(self): del self._bgcolor bgcolor = property(_getbgcolor, _setbgcolor, _delbgcolor, "Background color.") @property def element(self): """Return the Graphics as a valid KGML element.""" # The root is this Component element graphics = ET.Element('graphics') if isinstance(self.fgcolor, str): # Assumes that string is hexstring fghex = self.fgcolor else: # Assumes ReportLab Color object fghex = '#' + self.fgcolor.hexval()[2:] if isinstance(self.bgcolor, str): # Assumes that string is hexstring bghex = self.bgcolor else: # Assumes ReportLab Color object bghex = '#' + self.bgcolor.hexval()[2:] graphics.attrib = {'name': self.name, 'type': self.type, 'fgcolor': fghex, 'bgcolor': bghex} for (n, attr) in [('x', '_x'), ('y', '_y'), ('width', '_width'), ('height', '_height')]: if getattr(self, attr) is not None: graphics.attrib[n] = str(getattr(self, attr)) if self.type == 'line': # Need to write polycoords graphics.attrib['coords'] = \ ','.join([str(e) for e in chain.from_iterable(self.coords)]) return graphics @property def bounds(self): """Coordinate bounds for the Graphics element. Return the bounds of the Graphics object as an [(xmin, ymin), (xmax, ymax)] tuple. Co-ordinates give the centre of the circle, rectangle, roundrectangle elements, so we have to adjust for the relevant width/height. """ if self.type == 'line': xlist = [x for x, y in self.coords] ylist = [y for x, y in self.coords] return [(min(xlist), min(ylist)), (max(xlist), max(ylist))] else: return [(self.x - self.width * 0.5, self.y - self.height * 0.5), (self.x + self.width * 0.5, self.y + self.height * 0.5)] @property def centre(self): """Return the centre of the Graphics object as an (x, y) tuple.""" return (0.5 * (self.bounds[0][0] + self.bounds[1][0]), 0.5 * (self.bounds[0][1] + self.bounds[1][1])) # Reaction class Reaction(object): """A specific chemical reaction with substrates and products. This describes a specific chemical reaction between one or more substrates and one or more products. Attributes: id Pathway graph node ID of the entry names List of KEGG identifier(s) from the REACTION database type String: reversible or irreversible substrate Entry object of the substrate product Entry object of the product """ def __init__(self): self._id = None self._names = [] self.type = '' self._substrates = set() self._products = set() self._pathway = None def __str__(self): """Return an informative human-readable string.""" outstr = ['Reaction node ID: %s' % self.id, 'Reaction KEGG IDs: %s' % self.name, 'Type: %s' % self.type, 'Substrates: %s' % ','.join([s.name for s in self.substrates]), 'Products: %s' % ','.join([s.name for s in self.products]), ] return '\n'.join(outstr) + '\n' def add_substrate(self, substrate_id): """Add a substrate, identified by its node ID, to the reaction.""" if self._pathway is not None: assert int(substrate_id) in self._pathway.entries, \ "Couldn't add substrate, no node ID %d in Pathway" % \ int(substrate_id) self._substrates.add(substrate_id) def add_product(self, product_id): """Add a product, identified by its node ID, to the reaction.""" if self._pathway is not None: assert int(product_id) in self._pathway.entries, \ "Couldn't add product, no node ID %d in Pathway" % product_id self._products.add(int(product_id)) # The node ID is also the node ID of the Entry that corresponds to the # reaction; we get the corresponding Entry when there is an associated # Pathway def _getid(self): return self._id def _setid(self, value): self._id = int(value) def _delid(self): del self._id id = property(_getid, _setid, _delid, "Node ID for the reaction.") # Names may show up as a space-separated list of several KEGG identifiers def _getnames(self): return ' '.join(self._names) def _setnames(self, value): self._names.extend(value.split()) def _delnames(self): del self.names name = property(_getnames, _setnames, _delnames, "List of KEGG identifiers for the reaction.") # products and substrates are read-only properties, returning lists # of Entry objects @property def substrates(self): """Return list of substrate Entry elements.""" return [self._pathway.entries[sid] for sid in self._substrates] @property def products(self): """Return list of product Entry elements.""" return [self._pathway.entries[pid] for pid in self._products] @property def entry(self): """Return the Entry corresponding to this reaction.""" return self._pathway.entries[self._id] @property def reactant_ids(self): """Return a list of substrate and product reactant IDs.""" return self._products.union(self._substrates) @property def entry(self): """Return the Entry corresponding to this reaction.""" return self._pathway.entries[self._id] @property def element(self): """Return KGML element describing the Reaction.""" # The root is this Relation element reaction = ET.Element('reaction') reaction.attrib = {'id': str(self.id), 'name': self.name, 'type': self.type} for s in self._substrates: substrate = ET.Element('substrate') substrate.attrib['id'] = str(s) substrate.attrib['name'] = self._pathway.entries[s].name reaction.append(substrate) for p in self._products: product = ET.Element('product') product.attrib['id'] = str(p) product.attrib['name'] = self._pathway.entries[p].name reaction.append(product) return reaction # Relation class Relation(object): """A relationship between to products, KOs, or protein and compound. This describes a relationship between two products, KOs, or protein and compound, as described in release KGML v0.7.1 (http://www.kegg.jp/kegg/xml/docs/) Attributes: - entry1 The first Entry object node ID defining the relation (int) - entry2 The second Entry object node ID defining the relation (int) - type The relation type - subtypes List of subtypes for the relation, as a list of (name, value) tuples """ def __init__(self): self._entry1 = None self._entry2 = None self.type = '' self.subtypes = [] self._pathway = None def __str__(self): """A useful human-readable string.""" outstr = ['Relation (subtypes: %d):' % len(self.subtypes), 'Entry1:', str(self.entry1), 'Entry2:', str(self.entry2)] for s in self.subtypes: outstr.extend(['Subtype: %s' % s[0], str(s[1])]) return '\n'.join(outstr) # Properties entry1 and entry2 def _getentry1(self): if self._pathway is not None: return self._pathway.entries[self._entry1] return self._entry1 def _setentry1(self, value): self._entry1 = int(value) def _delentry1(self): del self._entry1 entry1 = property(_getentry1, _setentry1, _delentry1, "Entry1 of the relation.") def _getentry2(self): if self._pathway is not None: return self._pathway.entries[self._entry2] return self._entry2 def _setentry2(self, value): self._entry2 = int(value) def _delentry2(self): del self._entry2 entry2 = property(_getentry2, _setentry2, _delentry2, "Entry2 of the relation.") @property def element(self): """Return KGML element describing the Relation.""" # The root is this Relation element relation = ET.Element('relation') relation.attrib = {'entry1': str(self._entry1), 'entry2': str(self._entry2), 'type': self.type} for (name, value) in self.subtypes: subtype = ET.Element('subtype') subtype.attrib[name] = str(value) relation.append(subtype) return relation