# Copyright 1999 by Jeffrey Chang. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Code to parse the keywlist.txt file from SwissProt/UniProt See: http://www.expasy.ch/sprot/sprot-top.html ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/docs/keywlist.txt Classes: Record Stores the information about one keyword or one category in the keywlist.txt file. Functions: parse Parses the keywlist.txt file and returns an iterator to the records it contains. """ class Record(dict): """ This record stores the information of one keyword or category in the keywlist.txt as a Python dictionary. The keys in this dictionary are the line codes that can appear in the keywlist.txt file: --------- --------------------------- ---------------------- Line code Content Occurrence in an entry --------- --------------------------- ---------------------- ID Identifier (keyword) Once; starts a keyword entry IC Identifier (category) Once; starts a category entry AC Accession (KW-xxxx) Once DE Definition Once or more SY Synonyms Optional; once or more GO Gene ontology (GO) mapping Optional; once or more HI Hierarchy Optional; once or more WW Relevant WWW site Optional; once or more CA Category Once per keyword entry; absent in category entries """ def __init__(self): dict.__init__(self) for keyword in ("DE", "SY", "GO", "HI", "WW"): self[keyword] = [] def parse(handle): record = Record() # First, skip the header - look for start of a record for line in handle: if line.startswith("ID "): # Looks like there was no header record["ID"] = line[5:].strip() break if line.startswith("IC "): # Looks like there was no header record["IC"] = line[5:].strip() break # Now parse the records for line in handle: if line.startswith("-------------------------------------"): # We have reached the footer break key = line[:2] if key=="//": record["DE"] = " ".join(record["DE"]) record["SY"] = " ".join(record["SY"]) yield record record = Record() elif line[2:5]==" ": value = line[5:].strip() if key in ("ID", "IC", "AC", "CA"): record[key] = value elif key in ("DE", "SY", "GO", "HI", "WW"): record[key].append(value) else: print "Ignoring: %s" % line.strip() # Read the footer and throw it away for line in handle: pass