/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */

package	org.biojavax.bio.seq.io;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.ParserConfigurationException;

import org.biojava.bio.proteomics.MassCalc;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.io.ParseException;
import org.biojava.bio.seq.io.SeqIOListener;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.Location;
import org.biojava.bio.symbol.SimpleSymbolList;
import org.biojava.bio.symbol.Symbol;
import org.biojava.bio.symbol.SymbolList;
import org.biojava.utils.ChangeVetoException;
import org.biojava.utils.xml.PrettyXMLWriter;
import org.biojava.utils.xml.XMLWriter;
import org.biojavax.Comment;
import org.biojavax.CrossRef;
import org.biojavax.DocRef;
import org.biojavax.DocRefAuthor;
import org.biojavax.Namespace;
import org.biojavax.Note;
import org.biojavax.RankedCrossRef;
import org.biojavax.RankedDocRef;
import org.biojavax.RichAnnotation;
import org.biojavax.RichObjectFactory;
import org.biojavax.SimpleCrossRef;
import org.biojavax.SimpleDocRef;
import org.biojavax.SimpleDocRefAuthor;
import org.biojavax.SimpleNamespace;
import org.biojavax.SimpleNote;
import org.biojavax.SimpleRankedCrossRef;
import org.biojavax.SimpleRankedDocRef;
import org.biojavax.SimpleRichAnnotation;
import org.biojavax.bio.seq.Position;
import org.biojavax.bio.seq.RichFeature;
import org.biojavax.bio.seq.RichLocation;
import org.biojavax.bio.seq.RichSequence;
import org.biojavax.bio.seq.io.UniProtCommentParser.Event;
import org.biojavax.bio.seq.io.UniProtCommentParser.Interaction;
import org.biojavax.bio.seq.io.UniProtCommentParser.Isoform;
import org.biojavax.bio.taxa.NCBITaxon;
import org.biojavax.bio.taxa.SimpleNCBITaxon;
import org.biojavax.ontology.ComparableOntology;
import org.biojavax.ontology.ComparableTerm;
import org.biojavax.ontology.SimpleComparableOntology;
import org.biojavax.utils.CRC64Checksum;
import org.biojavax.utils.StringTools;
import org.biojavax.utils.XMLTools;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * Format reader for UniProtXML files. This version of UniProtXML format will generate
 * and write RichSequence objects. Loosely Based on code from the old, deprecated,
 * org.biojava.bio.seq.io.GenbankXmlFormat object.
 *
 * Understands http://www.ebi.uniprot.org/support/docs/uniprot.xsd
 *
 * @author Alan Li (code based on his work)
 * @author Richard Holland
 * @since 1.5
 */
public class UniProtXMLFormat extends RichSequenceFormat.BasicFormat {
                        
    // Register this format with the format auto-guesser.
    static {
        RichSequence.IOTools.registerFormat(UniProtXMLFormat.class);
    }
    
    /**
     * The name of this format
     */
    public static final String UNIPROTXML_FORMAT = "UniProtXML";
    
    protected static final String ENTRY_GROUP_TAG = "uniprot";
    protected static final String ENTRY_TAG = "entry";
    protected static final String ENTRY_VERSION_ATTR = "version";
    protected static final String ENTRY_NAMESPACE_ATTR = "dataset";
    protected static final String ENTRY_CREATED_ATTR = "created";
    protected static final String ENTRY_UPDATED_ATTR = "modified";
    protected static final String COPYRIGHT_TAG = "copyright";
    
    protected static final String ACCESSION_TAG = "accession";
    protected static final String NAME_TAG = "name";
    protected static final String TEXT_TAG = "text";
    
    protected static final String REF_ATTR = "ref";
    protected static final String TYPE_ATTR = "type";
    protected static final String KEY_ATTR = "key";
    protected static final String ID_ATTR = "id";
    protected static final String EVIDENCE_ATTR = "evidence";
    protected static final String VALUE_ATTR = "value";
    protected static final String STATUS_ATTR = "value";
    protected static final String NAME_ATTR = "name";
    
    protected static final String PROTEIN_TAG = "protein";
    protected static final String PROTEIN_TYPE_ATTR = "type";
    
    protected static final String DOMAIN_TAG = "domain";
    protected static final String COMPONENT_TAG = "component";
    protected static final String GENE_TAG = "gene";
    protected static final String ORGANISM_TAG = "organism";
    protected static final String DBXREF_TAG = "dbReference";
    protected static final String PROPERTY_TAG = "property";
    protected static final String LINEAGE_TAG = "lineage";
    protected static final String TAXON_TAG = "taxon";
    protected static final String GENELOCATION_TAG = "geneLocation";
    protected static final String GENELOCATION_NAME_TAG = "name";
    
    protected static final String REFERENCE_TAG = "reference";
    protected static final String CITATION_TAG = "citation";
    protected static final String TITLE_TAG = "title";
    protected static final String EDITOR_LIST_TAG = "editorList";
    protected static final String AUTHOR_LIST_TAG = "authorList";
    protected static final String PERSON_TAG = "person";
    protected static final String CONSORTIUM_TAG = "consortium";
    protected static final String LOCATOR_TAG = "locator";
    protected static final String RP_LINE_TAG = "scope";
    protected static final String RC_LINE_TAG = "source";
    protected static final String RC_SPECIES_TAG = "species";
    protected static final String RC_TISSUE_TAG = "tissue";
    protected static final String RC_TRANSP_TAG = "transposon";
    protected static final String RC_STRAIN_TAG = "strain";
    protected static final String RC_PLASMID_TAG = "plasmid";
    
    protected static final String COMMENT_TAG = "comment";
    protected static final String COMMENT_MASS_ATTR = "mass";
    protected static final String COMMENT_ERROR_ATTR = "error";
    protected static final String COMMENT_METHOD_ATTR = "method";
    protected static final String COMMENT_LOCTYPE_ATTR = "locationType";
    
    protected static final String COMMENT_ABSORPTION_TAG = "absorption";
    protected static final String COMMENT_ABS_MAX_TAG = "max";
    protected static final String COMMENT_KINETICS_TAG = "kinetics";
    protected static final String COMMENT_KIN_KM_TAG = "KM";
    protected static final String COMMENT_KIN_VMAX_TAG = "VMax";
    protected static final String COMMENT_PH_TAG = "phDependence";
    protected static final String COMMENT_REDOX_TAG = "redoxPotential";
    protected static final String COMMENT_TEMPERATURE_TAG = "temperatureDependence";
    protected static final String COMMENT_LINK_TAG = "link";
    protected static final String COMMENT_LINK_URI_ATTR = "uri";
    protected static final String COMMENT_EVENT_TAG = "event";
    protected static final String COMMENT_ISOFORM_TAG = "isoform";
    protected static final String COMMENT_INTERACTANT_TAG = "interactant";
    protected static final String COMMENT_INTERACT_INTACT_ATTR = "intactId";
    protected static final String COMMENT_INTERACT_LABEL_TAG = "label";
    protected static final String COMMENT_ORGANISMS_TAG = "organismsDiffer";
    protected static final String COMMENT_EXPERIMENTS_TAG = "experiments";
    
    protected static final String NOTE_TAG = "note";
    protected static final String KEYWORD_TAG = "keyword";
    protected static final String PROTEIN_EXISTS_TAG = "proteinExistence";
    protected static final String ID_TAG = "id";
    
    protected static final String FEATURE_TAG = "feature";
    protected static final String FEATURE_DESC_ATTR = "description";
    protected static final String FEATURE_ORIGINAL_TAG = "original";
    protected static final String FEATURE_VARIATION_TAG = "variation";
    
    protected static final String EVIDENCE_TAG = "evidence";
    protected static final String EVIDENCE_CATEGORY_ATTR = "category";
    protected static final String EVIDENCE_ATTRIBUTE_ATTR = "attribute";
    protected static final String EVIDENCE_DATE_ATTR = "date";
    
    protected static final String LOCATION_TAG = "location";
    protected static final String LOCATION_SEQ_ATTR = "sequence";
    protected static final String LOCATION_BEGIN_TAG = "begin";
    protected static final String LOCATION_END_TAG = "end";
    protected static final String LOCATION_POSITION_ATTR = "position";
    protected static final String LOCATION_POSITION_TAG = "position";
    
    protected static final String SEQUENCE_TAG = "sequence";
    protected static final String SEQUENCE_VERSION_ATTR = "version";
    protected static final String SEQUENCE_LENGTH_ATTR = "length";
    protected static final String SEQUENCE_MASS_ATTR = "mass";
    protected static final String SEQUENCE_CHECKSUM_ATTR = "checksum";
    protected static final String SEQUENCE_MODIFIED_ATTR = "modified";
    
    // RP line parser
    protected static final Pattern rppat = Pattern.compile("SEQUENCE OF (\\d+)-(\\d+)");
    
    protected static final Pattern xmlSchema = Pattern.compile(".*http://www\\.uniprot\\.org/support/docs/uniprot\\.xsd.*");
    
    /**
     * Implements some UniProtXML-specific terms.
     */
    public static class Terms extends RichSequence.Terms {        
        public static final String CONTAINS_PREFIX = "Contains:";
        public static final String INCLUDES_PREFIX = "Includes:";
        
        public static final String GENENAME_KEY = "primary";
        public static final String GENESYNONYM_KEY = "synonym";
        public static final String ORDLOCNAME_KEY = "ordered locus";
        public static final String ORFNAME_KEY = "ORF";
        
        public static final String NCBI_TAXON_KEY = "NCBI Taxonomy";
        public static final String COMMON_NAME_KEY = "common";
        public static final String FULL_NAME_KEY = "full";
        public static final String SCIENTIFIC_NAME_KEY = "scientific";
        public static final String SYNONYM_NAME_KEY = "synonym";
        public static final String ABBREV_NAME_KEY = "abbreviation";
        
        public static final String LOC_FUZZY_START_KEY = "less than";
        public static final String LOC_FUZZY_END_KEY = "greater than";
        
        // Ontology for uniprot keywords (because they have identifiers, aaargh...)
        private static ComparableOntology uniprotKWOnto = null;
        
        /**
         * Getter for the protein exists term
         * @return The protein exists Term
         */
        public static ComparableTerm getProteinExistsTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("UniProt protein exists");
        }
        
        /**
         * Getter for the private uniprot ontology.
         * @return the ontology.
         */
        public static ComparableOntology getUniprotKWOnto() {
            return (ComparableOntology)RichObjectFactory.getObject(SimpleComparableOntology.class, new Object[]{"uniprot_kw"});
        }
        
        /**
         * Getter for the UniProtXML term
         * @return The UniProtXML Term
         */
        public static ComparableTerm getUniProtXMLTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("UniProtXML");
        }
        
        /**
         * Getter for the protein type term
         * @return The protein type Term
         */
        public static ComparableTerm getProteinTypeTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("protein_type");
        }
        
        /**
         * Getter for the evidence category term
         * @return The evidence category Term
         */
        public static ComparableTerm getEvidenceCategoryTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("evidence_category");
        }
        
        /**
         * Getter for the evidence type term
         * @return The evidence type Term
         */
        public static ComparableTerm getEvidenceTypeTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("evidence_type");
        }
        
        /**
         * Getter for the evidence date term
         * @return The evidence date Term
         */
        public static ComparableTerm getEvidenceDateTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("evidence_date");
        }
        
        /**
         * Getter for the evidence attr term
         * @return The evidence attr Term
         */
        public static ComparableTerm getEvidenceAttrTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("evidence_attr");
        }
        
        /**
         * Getter for the feature ref term
         * @return The feature ref Term
         */
        public static ComparableTerm getFeatureRefTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("feature_ref");
        }
        
        /**
         * Getter for the feature status term
         * @return The feature status Term
         */
        public static ComparableTerm getFeatureStatusTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("feature_status");
        }
        
        /**
         * Getter for the feature original term
         * @return The feature original Term
         */
        public static ComparableTerm getFeatureOriginalTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("feature_original");
        }
        
        /**
         * Getter for the feature variation term
         * @return The feature variation Term
         */
        public static ComparableTerm getFeatureVariationTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("feature_variation");
        }
        
        /**
         * Getter for the location seq term
         * @return The location seq Term
         */
        public static ComparableTerm getLocationSequenceTerm() {
            return RichObjectFactory.getDefaultOntology().getOrCreateTerm("locseq");
        }
    }    
    
    /**
     * {@inheritDoc}
     * A file is in UniProtXML format if the second XML line contains the phrase "http://www.uniprot.org/support/docs/uniprot.xsd".
     */
    @Override
    public boolean canRead(File file) throws IOException {
        BufferedReader br = new BufferedReader(new FileReader(file));
        br.readLine(); // skip first line
        String secondLine = br.readLine();
        boolean readable = secondLine!=null && xmlSchema.matcher(secondLine).matches(); // check on second line
        br.close();
        return readable;
    }
    
    /**
     * {@inheritDoc}
     * Always returns a protein tokenizer.
     */
    @Override
    public SymbolTokenization guessSymbolTokenization(File file) throws IOException {
        return RichSequence.IOTools.getProteinParser();
    }
    
    /**
     * {@inheritDoc}
     * A stream is in UniProtXML format if the second XML line contains the phrase "http://www.uniprot.org/support/docs/uniprot.xsd".
     */
    public boolean canRead(BufferedInputStream stream) throws IOException {
        stream.mark(2000); // some streams may not support this
        BufferedReader br = new BufferedReader(new InputStreamReader(stream));
        br.readLine(); // skip first line
        String secondLine = br.readLine();
        boolean readable = secondLine!=null && xmlSchema.matcher(secondLine).matches(); // check on second line
        // don't close the reader as it'll close the stream too.
        // br.close();
        stream.reset();
        return readable;
    }
    
    /**
     * {@inheritDoc}
     * Always returns a protein tokenizer.
     */
    public SymbolTokenization guessSymbolTokenization(BufferedInputStream stream) throws IOException {
        return RichSequence.IOTools.getProteinParser();
    }
    
    /**
     * {@inheritDoc}
     */
    public boolean readSequence(BufferedReader reader,
            SymbolTokenization symParser,
            SeqIOListener listener)
            throws IllegalSymbolException, IOException, ParseException {
        if (!(listener instanceof RichSeqIOListener)) throw new IllegalArgumentException("Only accepting RichSeqIOListeners today");
        return this.readRichSequence(reader,symParser,(RichSeqIOListener)listener,null);
    }
    
    /**
     * {@inheritDoc}
     * If namespace is null, then the namespace of the sequence in the fasta is used.
     * If the namespace is null and so is the namespace of the sequence in the fasta,
     * then the default namespace is used.
     */
    public boolean readRichSequence(BufferedReader reader,
            SymbolTokenization symParser,
            RichSeqIOListener rlistener,
            Namespace ns)
            throws IllegalSymbolException, IOException, ParseException {
        
        Pattern copyright = Pattern.compile(".*<"+COPYRIGHT_TAG+".*");
        
        try {
            rlistener.startSequence();                   
            DefaultHandler m_handler = new UniProtXMLHandler(this,symParser,rlistener,ns);
            boolean hasMore=XMLTools.readXMLChunk(reader, m_handler, ENTRY_TAG);
            // deal with copyright chunk
            reader.mark(10000);
            String line = reader.readLine();
            reader.reset();
            if (copyright.matcher(line).matches()) XMLTools.readXMLChunk(reader, m_handler, COPYRIGHT_TAG);
            // all done!
            rlistener.endSequence();
            return hasMore;
        } catch (ParserConfigurationException e) {
            throw new ParseException(e);
        } catch (SAXException e) {
            throw new ParseException(e);
        }
    }
    
    private PrintWriter pw;
    private XMLWriter xml;
    
    /**
     * {@inheritDoc}
     */
    public void beginWriting() throws IOException {
        // make an XML writer
        pw = new PrintWriter(this.getPrintStream());
        xml = new PrettyXMLWriter(pw);
        xml.printRaw("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
        xml.openTag(ENTRY_GROUP_TAG);
        xml.attribute("xmlns","http://uniprot.org/uniprot");
        xml.attribute("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance");
        xml.attribute("xsi:schemaLocation","http://uniprot.org/uniprot http://www.uniprot.org/support/docs/uniprot.xsd");
    }
    
    /**
     * {@inheritDoc}
     */
    public void finishWriting() throws IOException {
        xml.closeTag(ENTRY_GROUP_TAG);
        pw.flush();
    }
    
    /**
     * {@inheritDoc}
     */
    public void	writeSequence(Sequence seq, PrintStream os) throws IOException {
        if (this.getPrintStream()==null) this.setPrintStream(this.getPrintStream());
        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
    }
    
    /**
     * {@inheritDoc}
     */
    public void writeSequence(Sequence seq, String format, PrintStream os) throws IOException {
        if (this.getPrintStream()==null) this.setPrintStream(this.getPrintStream());
        if (!format.equals(this.getDefaultFormat())) throw new IllegalArgumentException("Unknown format: "+format);
        this.writeSequence(seq, RichObjectFactory.getDefaultNamespace());
    }
    
    /**
     * {@inheritDoc}
     * If namespace is null, then the sequence's own namespace is used.
     */
    public void writeSequence(Sequence seq, Namespace ns) throws IOException {
        RichSequence rs;
        try {
            if (seq instanceof RichSequence) rs = (RichSequence)seq;
            else rs = RichSequence.Tools.enrich(seq);
        } catch (ChangeVetoException e) {
            IOException e2 = new IOException("Unable to enrich sequence");
            e2.initCause(e);
            throw e2;
        }
        
        int key = 1;
        
        Set notes = rs.getNoteSet();
        List accessions = new ArrayList();
        List kws = new ArrayList();
        String cdat = null;
        String udat = null;
        String arel = null;
        String adat = null;
        String copyright = null;
        String proteinType = null;
        String proteinExists = null;
        Map genenames = new TreeMap();
        Map genesynonyms = new TreeMap();
        Map orfnames = new TreeMap();
        Map ordlocnames = new TreeMap();
        Set evidenceIDs = new TreeSet();
        Set organelles = new TreeSet();
        Map evcats = new TreeMap();
        Map evtypes = new TreeMap();
        Map evdates = new TreeMap();
        Map evattrs = new TreeMap();
        Map speciesRecs = new TreeMap();
        Map strainRecs = new TreeMap();
        Map tissueRecs = new TreeMap();
        Map transpRecs = new TreeMap();
        Map plasmidRecs = new TreeMap();
        for (Iterator i = notes.iterator(); i.hasNext();) {
            Note n = (Note)i.next();
            if (n.getTerm().equals(Terms.getDateCreatedTerm())) cdat=n.getValue();
            else if (n.getTerm().equals(Terms.getDateUpdatedTerm())) udat=n.getValue();
            else if (n.getTerm().equals(Terms.getRelAnnotatedTerm())) arel=n.getValue();
            else if (n.getTerm().equals(Terms.getDateAnnotatedTerm())) adat=n.getValue();
            else if (n.getTerm().equals(Terms.getAdditionalAccessionTerm())) accessions.add(n.getValue());
            else if (n.getTerm().equals(Terms.getOrganelleTerm())) organelles.add(n.getValue());
            else if (n.getTerm().equals(Terms.getKeywordTerm())) {
                ComparableTerm t = Terms.getUniprotKWOnto().getOrCreateTerm(n.getValue());
                try {
                    if (t.getIdentifier()==null || t.getIdentifier().length()==0) t.setIdentifier("UNKNOWN");
                } catch (ChangeVetoException ce) {
                    IOException e = new IOException("Failed to assign keyword identifier");
                    e.initCause(ce);
                    throw e;
                }
                kws.add(t);
            } else if (n.getTerm().equals(Terms.getCopyrightTerm())) copyright=n.getValue();
            else if (n.getTerm().equals(Terms.getProteinTypeTerm())) proteinType=n.getValue();
            else if (n.getTerm().equals(Terms.getProteinExistsTerm())) proteinExists=n.getValue();
            // use the nasty hack to split the reference rank away from the actual value in this field
            else if (n.getTerm().equals(Terms.getGeneNameTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                genenames.put(refID, ref.substring(colon+1)); // map of id -> string as only one name per gene
            } else if (n.getTerm().equals(Terms.getGeneSynonymTerm())) {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (genesynonyms.get(refID)==null) genesynonyms.put(refID, new ArrayList());
                ((List)genesynonyms.get(refID)).add(ref.substring(colon+1));
            } else if (n.getTerm().equals(Terms.getOrderedLocusNameTerm())) {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (ordlocnames.get(refID)==null) ordlocnames.put(refID, new ArrayList());
                ((List)ordlocnames.get(refID)).add(ref.substring(colon+1));
            } else if (n.getTerm().equals(Terms.getORFNameTerm())) {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (orfnames.get(refID)==null) orfnames.put(refID, new ArrayList());
                ((List)orfnames.get(refID)).add(ref.substring(colon+1));
            }
            // use the nasty hack to split the reference rank away from the actual value in this field
            else if (n.getTerm().equals(Terms.getEvidenceCategoryTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                evcats.put(refID, ref.substring(colon+1)); // map of id -> string as only one name per gene
                evidenceIDs.add(refID);
            } else if (n.getTerm().equals(Terms.getEvidenceTypeTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                evtypes.put(refID, ref.substring(colon+1)); // map of id -> string as only one name per gene
                evidenceIDs.add(refID);
            } else if (n.getTerm().equals(Terms.getEvidenceDateTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                evdates.put(refID, ref.substring(colon+1)); // map of id -> string as only one name per gene
                evidenceIDs.add(refID);
            } else if (n.getTerm().equals(Terms.getEvidenceAttrTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                evattrs.put(refID, ref.substring(colon+1)); // map of id -> string as only one name per gene
                evidenceIDs.add(refID);
            }
            // use the nasty hack to split the reference rank away from the actual value in this field
            // we'll end up with a bunch in key 0 for those which did not come from us. We ignore these for now.
            else if (n.getTerm().equals(Terms.getSpeciesTerm())) {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (speciesRecs.get(refID)==null) speciesRecs.put(refID, new ArrayList());
                ((List)speciesRecs.get(refID)).add(ref.substring(colon+1));
            } else if (n.getTerm().equals(Terms.getStrainTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (strainRecs.get(refID)==null) strainRecs.put(refID, new ArrayList());
                ((List)strainRecs.get(refID)).add(ref.substring(colon+1));
            } else if (n.getTerm().equals(Terms.getTissueTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (tissueRecs.get(refID)==null) tissueRecs.put(refID, new ArrayList());
                ((List)tissueRecs.get(refID)).add(ref.substring(colon+1));
            } else if (n.getTerm().equals(Terms.getTransposonTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (transpRecs.get(refID)==null) transpRecs.put(refID, new ArrayList());
                ((List)transpRecs.get(refID)).add(ref.substring(colon+1));
            } else if (n.getTerm().equals(Terms.getPlasmidTerm()))  {
                String ref = n.getValue();
                int colon = ref.indexOf(':');
                Integer refID = new Integer(0);
                if (colon>=1) refID = new Integer(ref.substring(0,colon));
                if (plasmidRecs.get(refID)==null) plasmidRecs.put(refID, new ArrayList());
                ((List)plasmidRecs.get(refID)).add(ref.substring(colon+1));
            }
        }
        
        xml.openTag(ENTRY_TAG);
        xml.attribute(ENTRY_VERSION_ATTR,""+(arel==null?""+rs.getVersion():arel));
        xml.attribute(ENTRY_NAMESPACE_ATTR,(ns==null?rs.getNamespace().getName():ns.getName()));
        xml.attribute(ENTRY_CREATED_ATTR,cdat);
        xml.attribute(ENTRY_UPDATED_ATTR,(adat==null?cdat:adat)); // annotation update
        
        xml.openTag(ACCESSION_TAG);
        xml.print(rs.getAccession());
        xml.closeTag(ACCESSION_TAG);
        
        xml.openTag(NAME_TAG);
        xml.print(rs.getName());
        xml.closeTag(NAME_TAG);
        
        xml.openTag(PROTEIN_TAG);
        if (proteinType!=null) xml.attribute(TYPE_ATTR,proteinType);
        String desc = rs.getDescription().trim(); // this is only going to make sense if it was a UniProt seq to start with
        if (desc.endsWith(".")) desc = desc.substring(0, desc.length()-1); // chomp trailing dot
        String[] parts = desc.split("\\[");
        for (int j = 0 ; j < parts.length; j++) {
            if (parts[j].startsWith(Terms.CONTAINS_PREFIX)) {
                // contains section
                String chunk = parts[j].substring(Terms.CONTAINS_PREFIX.length()+1).trim();
                if (chunk.endsWith("]")) chunk = chunk.substring(0, chunk.length()-1); // chomp trailing ]
                String[] moreparts = chunk.split(";");
                for (int k = 0; k < moreparts.length; k++) {
                    xml.openTag(DOMAIN_TAG);
                    String[] names = moreparts[k].split("\\(");
                    for (int l = 0; l < names.length; l++) {
                        String name = names[l].trim();
                        if (name.endsWith(")")) name = name.substring(0,name.length()-1); // chomp trailing )
                        xml.openTag(NAME_TAG);
                        xml.print(name);
                        xml.closeTag(NAME_TAG);
                    }
                    xml.closeTag(DOMAIN_TAG);
                }
            } else if (parts[j].startsWith(Terms.INCLUDES_PREFIX)) {
                // includes section
                String chunk = parts[j].substring(Terms.INCLUDES_PREFIX.length()+1).trim();
                if (chunk.endsWith("]")) chunk = chunk.substring(0, chunk.length()-1); // chomp trailing ]
                String[] moreparts = chunk.split(";");
                for (int k = 0; k < moreparts.length; k++) {
                    xml.openTag(COMPONENT_TAG);
                    String[] names = moreparts[k].split("\\(");
                    for (int l = 0; l < names.length; l++) {
                        String name = names[l].trim();
                        if (name.endsWith(")")) name = name.substring(0,name.length()-1); // chomp trailing )
                        xml.openTag(NAME_TAG);
                        xml.print(name);
                        xml.closeTag(NAME_TAG);
                    }
                    xml.closeTag(COMPONENT_TAG);
                }
            } else {
                // plain names
                String[] names = parts[j].split("\\(");
                for (int l = 0; l < names.length; l++) {
                    String name = names[l].trim();
                    if (name.endsWith(")")) name = name.substring(0,name.length()-1); // chomp trailing )
                    xml.openTag(NAME_TAG);
                    xml.print(name);
                    xml.closeTag(NAME_TAG);
                }
            }
        }
        xml.closeTag(PROTEIN_TAG);
        
        // gene line
        for (Iterator i = genenames.keySet().iterator(); i.hasNext(); ) {
            Integer geneid = (Integer)i.next();
            String genename = (String)genenames.get(geneid);
            List synonyms = (List)genesynonyms.get(geneid);
            List orfs = (List)orfnames.get(geneid);
            List ordlocs = (List)ordlocnames.get(geneid);
            
            xml.openTag(GENE_TAG);
            
            xml.openTag(NAME_TAG);
            xml.attribute(TYPE_ATTR,Terms.GENENAME_KEY);
            xml.print(genename);
            xml.closeTag(NAME_TAG);
            
            if (synonyms!=null) {
                for (Iterator j = synonyms.iterator(); j.hasNext(); ) {
                    xml.openTag(NAME_TAG);
                    xml.attribute(TYPE_ATTR,Terms.GENESYNONYM_KEY);
                    xml.print((String)j.next());
                    xml.closeTag(NAME_TAG);
                }
            }
            if (ordlocs!=null) {
                for (Iterator j = synonyms.iterator(); j.hasNext(); ) {
                    xml.openTag(NAME_TAG);
                    xml.attribute(TYPE_ATTR,Terms.ORDLOCNAME_KEY);
                    xml.print((String)j.next());
                    xml.closeTag(NAME_TAG);
                }
            }
            if (orfs!=null) {
                for (Iterator j = synonyms.iterator(); j.hasNext(); ) {
                    xml.openTag(NAME_TAG);
                    xml.attribute(TYPE_ATTR,Terms.ORFNAME_KEY);
                    xml.print((String)j.next());
                    xml.closeTag(NAME_TAG);
                }
            }
            
            xml.closeTag(GENE_TAG);
        }
        
        // source line (from taxon)
        //   organism line
        NCBITaxon tax = rs.getTaxon();
        if (tax!=null) {
            xml.openTag(ORGANISM_TAG);
            xml.attribute(KEY_ATTR,""+(key++));
            
            for (Iterator i = tax.getNameClasses().iterator(); i.hasNext(); ) {
                String nameclass = (String)i.next();
                String ournameclass = Terms.COMMON_NAME_KEY;
                if (nameclass.equalsIgnoreCase(Terms.FULL_NAME_KEY)) ournameclass = NCBITaxon.EQUIVALENT;
                else if (nameclass.equalsIgnoreCase(Terms.SCIENTIFIC_NAME_KEY)) ournameclass = NCBITaxon.SCIENTIFIC;
                else if (nameclass.equalsIgnoreCase(Terms.SYNONYM_NAME_KEY)) ournameclass = NCBITaxon.SYNONYM;
                else if (nameclass.equalsIgnoreCase(Terms.ABBREV_NAME_KEY)) ournameclass = NCBITaxon.ACRONYM;
                for (Iterator j = tax.getNames(nameclass).iterator(); j.hasNext(); ) {
                    xml.openTag(NAME_TAG);
                    xml.attribute(TYPE_ATTR,ournameclass);
                    xml.print((String)j.next());
                    xml.closeTag(NAME_TAG);
                }
            }
            
            xml.openTag(DBXREF_TAG);
            xml.attribute(KEY_ATTR,""+(key++));
            xml.attribute(TYPE_ATTR,Terms.NCBI_TAXON_KEY);
            xml.attribute(ID_ATTR,""+tax.getNCBITaxID());
            xml.closeTag(DBXREF_TAG);
            
            String h = tax.getNameHierarchy();
            h = h.substring(0, h.length()-1); // chomp dot
            String[] hierarch = h.split(";");
            xml.openTag(LINEAGE_TAG);
            for (int j = 0; j < hierarch.length; j++) {
                xml.openTag(TAXON_TAG);
                xml.print(hierarch[j].trim());
                xml.closeTag(TAXON_TAG);
            }
            xml.closeTag(LINEAGE_TAG);
            
            xml.closeTag(ORGANISM_TAG);
        }
        
        // gene location line (organelle)
        for (Iterator i = organelles.iterator(); i.hasNext(); ) {
            String org = (String)i.next();
            xml.openTag(GENELOCATION_TAG);
            if (org.startsWith("Plasmid")) {
                xml.attribute(TYPE_ATTR,"plasmid");
                String[] subparts = org.split(",");
                for (int j = 0; j < parts.length; j++) {
                    org = subparts[j].trim();
                    if (org.startsWith("and")) org = org.substring(3).trim();
                    org = org.substring("Plasmid".length()).trim();
                    xml.openTag(GENELOCATION_NAME_TAG);
                    xml.attribute(STATUS_ATTR,"known");
                    xml.print(org);
                    xml.closeTag(GENELOCATION_NAME_TAG);
                }
            } else {
                xml.attribute(TYPE_ATTR,org.toLowerCase()); // uniprotxml must have lower case
            }
            xml.closeTag(GENELOCATION_TAG);
        }
        
        // docrefs
        for (Iterator i = rs.getRankedDocRefs().iterator(); i.hasNext(); ) {
            RankedDocRef rdr = (RankedDocRef)i.next();
            DocRef dr = rdr.getDocumentReference();
            
            xml.openTag(REFERENCE_TAG);
            xml.attribute(KEY_ATTR,""+(key++));
            
            xml.openTag(CITATION_TAG);
            xml.attribute(TYPE_ATTR,"journal article"); // faking it i know
            
            if (dr.getTitle()!=null) {
                xml.openTag(TITLE_TAG);
                xml.print(dr.getTitle());
                xml.closeTag(TITLE_TAG);
            }
            
            List auths = new ArrayList(dr.getAuthorList());
            List editors = new ArrayList(auths);
            for (final Iterator j = editors.iterator(); j.hasNext(); ) {
                DocRefAuthor a = (DocRefAuthor)j.next();
                if (!a.isEditor())
                    j.remove();
                else
                    auths.remove(a);
            }
            if (!editors.isEmpty()) {
                xml.openTag(EDITOR_LIST_TAG);
                for (Iterator j = editors.iterator(); j.hasNext(); ) {
                    DocRefAuthor a = (DocRefAuthor)j.next();
                    if (a.isEditor()) {
                        if (a.isConsortium()) {
                            xml.openTag(CONSORTIUM_TAG);
                            xml.attribute(NAME_ATTR,a.getName());
                            xml.closeTag(CONSORTIUM_TAG);
                        } else {
                            xml.openTag(PERSON_TAG);
                            xml.attribute(NAME_ATTR,a.getName());
                            xml.closeTag(PERSON_TAG);
                        }
                    }
                }
                xml.closeTag(EDITOR_LIST_TAG);
            }
            if (!auths.isEmpty()) {
                xml.openTag(AUTHOR_LIST_TAG);
                for (Iterator j = auths.iterator(); j.hasNext(); ) {
                    DocRefAuthor a = (DocRefAuthor)j.next();
                    if (a.isConsortium()) {
                        xml.openTag(CONSORTIUM_TAG);
                        xml.attribute(NAME_ATTR,a.getName());
                        xml.closeTag(CONSORTIUM_TAG);
                    } else {
                        xml.openTag(PERSON_TAG);
                        xml.attribute(NAME_ATTR,a.getName());
                        xml.closeTag(PERSON_TAG);
                    }
                }
                xml.closeTag(AUTHOR_LIST_TAG);
            }
            
            xml.openTag(LOCATOR_TAG);
            xml.print(dr.getLocation());
            xml.closeTag(LOCATOR_TAG);
            
            CrossRef cr = dr.getCrossref();
            if (cr!=null) {
                xml.openTag(DBXREF_TAG);
                xml.attribute(TYPE_ATTR,cr.getDbname());
                xml.attribute(ID_ATTR,cr.getAccession());
                xml.attribute(KEY_ATTR,""+(key++));
                if (!cr.getNoteSet().isEmpty()) {
                    for (Iterator j = cr.getNoteSet().iterator(); j.hasNext(); ) {
                        Note n = (Note)j.next();
                        xml.openTag(PROPERTY_TAG);
                        xml.attribute(TYPE_ATTR,n.getTerm().getName());
                        xml.attribute(VALUE_ATTR,n.getValue());
                        xml.closeTag(PROPERTY_TAG);
                    }
                }
                xml.closeTag(DBXREF_TAG);
            }
            
            xml.closeTag(CITATION_TAG);
            
            // RP
            xml.openTag(RP_LINE_TAG);
            xml.print(dr.getRemark());
            xml.closeTag(RP_LINE_TAG);
            // Print out ref position if present
            if (rdr.getStart()!=null && rdr.getEnd()!=null && !rppat.matcher(dr.getRemark()).matches()) {
                xml.openTag(RP_LINE_TAG);
                xml.print("SEQUENCE OF "+rdr.getStart()+"-"+rdr.getEnd()+".");
                xml.closeTag(RP_LINE_TAG);
            }
            
            // RC
            boolean rcOpened = false;
            Integer rank = new Integer(rdr.getRank());
            if (speciesRecs.get(rank)!=null) {
                if (!rcOpened) {
                    xml.openTag(RC_LINE_TAG);
                    rcOpened = true;
                }
                for (Iterator j = ((List)speciesRecs.get(rank)).iterator(); j.hasNext(); ) {
                    xml.openTag(RC_SPECIES_TAG);
                    xml.print((String)j.next());
                    xml.closeTag(RC_SPECIES_TAG);
                }
            }
            if (strainRecs.get(rank)!=null) {
                if (!rcOpened) {
                    xml.openTag(RC_LINE_TAG);
                    rcOpened = true;
                }
                for (Iterator j = ((List)strainRecs.get(rank)).iterator(); j.hasNext(); ) {
                    xml.openTag(RC_STRAIN_TAG);
                    xml.print((String)j.next());
                    xml.closeTag(RC_STRAIN_TAG);
                }
            }
            if (tissueRecs.get(rank)!=null) {
                if (!rcOpened) {
                    xml.openTag(RC_LINE_TAG);
                    rcOpened = true;
                }
                for (Iterator j = ((List)tissueRecs.get(rank)).iterator(); j.hasNext(); ) {
                    xml.openTag(RC_TISSUE_TAG);
                    xml.print((String)j.next());
                    xml.closeTag(RC_TISSUE_TAG);
                }
            }
            if (transpRecs.get(rank)!=null) {
                if (!rcOpened) {
                    xml.openTag(RC_LINE_TAG);
                    rcOpened = true;
                }
                for (Iterator j = ((List)transpRecs.get(rank)).iterator(); j.hasNext(); ) {
                    xml.openTag(RC_TRANSP_TAG);
                    xml.print((String)j.next());
                    xml.closeTag(RC_TRANSP_TAG);
                }
            }
            if (plasmidRecs.get(rank)!=null) {
                if (!rcOpened) {
                    xml.openTag(RC_LINE_TAG);
                    rcOpened = true;
                }
                for (Iterator j = ((List)plasmidRecs.get(rank)).iterator(); j.hasNext(); ) {
                    xml.openTag(RC_PLASMID_TAG);
                    xml.print((String)j.next());
                    xml.closeTag(RC_PLASMID_TAG);
                }
            }
            if (rcOpened)
                xml.closeTag(RC_LINE_TAG);
            
            xml.closeTag(REFERENCE_TAG);
        }
        
        // comments
        for (Iterator i = rs.getComments().iterator(); i.hasNext(); ) {
            // use UniProtCommentParser to convert each text comment from string to object
            // do not print unconvertible ones (eg. no -!- on text)
            Comment c = (Comment)i.next();
            if (UniProtCommentParser.isParseable(c)) {
                // otherwise parse and display appropriately
                UniProtCommentParser ucp = new UniProtCommentParser();
                try {
                    ucp.parseComment(c);
                } catch (ParseException ce) {
                    IOException e = new IOException("Failed to parse comment when outputting");
                    e.initCause(ce);
                    throw e;
                }
                String type = ucp.getCommentType();
                String xtype = type.toLowerCase(); // uniprotxml requires lower case
                if (type.equalsIgnoreCase(UniProtCommentParser.PTM)) xtype = "posttranslational modification";
                else if (type.equalsIgnoreCase(UniProtCommentParser.DATABASE)) xtype = "online information";
                
                xml.openTag(COMMENT_TAG);
                xml.attribute(TYPE_ATTR,xtype);
                
                // database comment
                if (type.equalsIgnoreCase(UniProtCommentParser.DATABASE)) {
                    xml.attribute(NAME_ATTR,ucp.getDatabaseName());
                    
                    xml.openTag(COMMENT_LINK_TAG);
                    xml.attribute(COMMENT_LINK_URI_ATTR,ucp.getUri());
                    xml.closeTag(COMMENT_LINK_TAG);
                }
                // mass spec
                else if (type.equalsIgnoreCase(UniProtCommentParser.MASS_SPECTROMETRY)) {
                    xml.attribute(COMMENT_MASS_ATTR,""+ucp.getMolecularWeight());
                    if (ucp.getMolWeightError()!=null) xml.attribute(COMMENT_ERROR_ATTR,""+ucp.getMolWeightError());
                    xml.attribute(COMMENT_METHOD_ATTR,""+ucp.getMolWeightMethod());
                    
                    xml.openTag(LOCATION_TAG);
                    xml.openTag(LOCATION_BEGIN_TAG);
                    xml.attribute(LOCATION_POSITION_ATTR,""+ucp.getMolWeightRangeStart());
                    xml.closeTag(LOCATION_BEGIN_TAG);
                    xml.openTag(LOCATION_END_TAG);
                    xml.attribute(LOCATION_POSITION_ATTR,""+ucp.getMolWeightRangeEnd());
                    xml.closeTag(LOCATION_END_TAG);
                    xml.closeTag(LOCATION_TAG);
                }
                // interaction
                else if (type.equalsIgnoreCase(UniProtCommentParser.INTERACTION)) {
                    // UniProt flat allows for multiple interactions per comment, but
                    // UniProtXML only allows for a single one. So, we have to open/close
                    // and write additional comments as necessary.
                    for (Iterator j = ucp.getInteractions().iterator(); j.hasNext(); ) {
                        // process comment
                        Interaction interact = (Interaction)j.next();
                        
                        xml.openTag(COMMENT_INTERACTANT_TAG);
                        xml.attribute(COMMENT_INTERACT_INTACT_ATTR,interact.getFirstIntActID());
                        xml.closeTag(COMMENT_INTERACTANT_TAG);
                        
                        xml.openTag(COMMENT_INTERACTANT_TAG);
                        xml.attribute(COMMENT_INTERACT_INTACT_ATTR,interact.getSecondIntActID());
                        xml.openTag(ID_TAG);
                        xml.print(interact.getID());
                        xml.closeTag(ID_TAG);
                        if (interact.getLabel()!=null) {
                            xml.openTag(COMMENT_INTERACT_LABEL_TAG);
                            xml.print(interact.getLabel());
                            xml.closeTag(COMMENT_INTERACT_LABEL_TAG);
                        }
                        xml.closeTag(COMMENT_INTERACTANT_TAG);
                        
                        xml.openTag(COMMENT_ORGANISMS_TAG);
                        xml.print(interact.isOrganismsDiffer()?"true":"false");
                        xml.closeTag(COMMENT_ORGANISMS_TAG);
                        
                        xml.openTag(COMMENT_EXPERIMENTS_TAG);
                        xml.print(""+interact.getNumberExperiments());
                        xml.closeTag(COMMENT_EXPERIMENTS_TAG);
                        
                        // if has next, close and open next comment tag
                        if (j.hasNext()) {
                            xml.closeTag(COMMENT_TAG);
                            xml.openTag(COMMENT_TAG);
                            xml.attribute(TYPE_ATTR,xtype);
                        }
                    }
                }
                // alternative products
                else if (type.equalsIgnoreCase(UniProtCommentParser.ALTERNATIVE_PRODUCTS)) {
                    for (Iterator j = ucp.getEvents().iterator(); j.hasNext(); ) {
                        Event event = (Event)j.next();
                        xml.openTag(COMMENT_EVENT_TAG);
                        xml.attribute(TYPE_ATTR,event.getType().toLowerCase()); // uniprotxml requires lowercase
                        xml.closeTag(COMMENT_EVENT_TAG);
                    }
                    for (Iterator j = ucp.getIsoforms().iterator(); j.hasNext(); ) {
                        Isoform isoform = (Isoform)j.next();
                        xml.openTag(COMMENT_ISOFORM_TAG);
                        for (Iterator k = isoform.getIsoIDs().iterator(); k.hasNext(); ) {
                            xml.openTag(ID_TAG);
                            xml.print((String)k.next());
                            xml.closeTag(ID_TAG);
                        }
                        for (Iterator k = isoform.getNames().iterator(); k.hasNext(); ) {
                            xml.openTag(NAME_TAG);
                            xml.print((String)k.next());
                            xml.closeTag(NAME_TAG);
                        }
                        xml.openTag(SEQUENCE_TAG);
                        xml.attribute(TYPE_ATTR,isoform.getSequenceType().toLowerCase());
                        if (isoform.getSequenceType().equalsIgnoreCase("Described")) {
                            xml.attribute(REF_ATTR,isoform.getSequenceRef());
                        }
                        xml.closeTag(SEQUENCE_TAG);
                        xml.openTag(NOTE_TAG);
                        xml.print(isoform.getNote());
                        xml.closeTag(NOTE_TAG);
                        xml.closeTag(COMMENT_ISOFORM_TAG);
                    }
                }
                // biophysicoblahblah stuff
                else if (type.equalsIgnoreCase(UniProtCommentParser.BIOPHYSICOCHEMICAL_PROPERTIES)) {
                    if (ucp.getAbsorptionNote()!=null) {
                        xml.openTag(COMMENT_ABSORPTION_TAG);
                        xml.openTag(COMMENT_ABS_MAX_TAG);
                        xml.print(ucp.getAbsorptionMax());
                        xml.closeTag(COMMENT_ABS_MAX_TAG);
                        xml.openTag(TEXT_TAG);
                        xml.print(ucp.getAbsorptionNote());
                        xml.closeTag(TEXT_TAG);
                        xml.closeTag(COMMENT_ABSORPTION_TAG);
                    }
                    if (ucp.getKineticsNote()!=null) {
                        xml.openTag(COMMENT_KINETICS_TAG);
                        for (Iterator j = ucp.getKMs().iterator(); j.hasNext(); ) {
                            xml.openTag(COMMENT_KIN_KM_TAG);
                            xml.print((String)j.next());
                            xml.closeTag(COMMENT_KIN_KM_TAG);
                        }
                        for (Iterator j = ucp.getVMaxes().iterator(); j.hasNext(); ) {
                            xml.openTag(COMMENT_KIN_VMAX_TAG);
                            xml.print((String)j.next());
                            xml.closeTag(COMMENT_KIN_VMAX_TAG);
                        }
                        xml.openTag(TEXT_TAG);
                        xml.print(ucp.getKineticsNote());
                        xml.closeTag(TEXT_TAG);
                        xml.closeTag(COMMENT_KINETICS_TAG);
                    }
                    if (ucp.getPHDependence()!=null) {
                        xml.openTag(COMMENT_PH_TAG);
                        xml.print(ucp.getPHDependence());
                        xml.closeTag(COMMENT_PH_TAG);
                    }
                    if (ucp.getRedoxPotential()!=null) {
                        xml.openTag(COMMENT_REDOX_TAG);
                        xml.print(ucp.getRedoxPotential());
                        xml.closeTag(COMMENT_REDOX_TAG);
                    }
                    if (ucp.getTemperatureDependence()!=null) {
                        xml.openTag(COMMENT_TEMPERATURE_TAG);
                        xml.print(ucp.getTemperatureDependence());
                        xml.closeTag(COMMENT_TEMPERATURE_TAG);
                    }
                }
                // all other comments
                else {
                    xml.openTag(TEXT_TAG);
                    xml.print(ucp.getText());
                    xml.closeTag(TEXT_TAG);
                }
                
                // finish comment up
                if (ucp.getNote()!=null) {
                    xml.openTag(NOTE_TAG);
                    xml.print(ucp.getNote());
                    xml.closeTag(NOTE_TAG);
                }
                
                xml.closeTag(COMMENT_TAG);
            }
        }
        
        // xrefs
        for (Iterator i = rs.getRankedCrossRefs().iterator(); i.hasNext(); ) {
            RankedCrossRef rcr = (RankedCrossRef)i.next();
            CrossRef cr = rcr.getCrossRef();
            
            xml.openTag(DBXREF_TAG);
            String dbname = cr.getDbname();
            xml.attribute(TYPE_ATTR,dbname);
            xml.attribute(ID_ATTR,cr.getAccession());
            xml.attribute(KEY_ATTR,""+(key++));
            if (!cr.getNoteSet().isEmpty()) {
                int acccount = 2;
                for (Iterator j = cr.getNoteSet().iterator(); j.hasNext(); ) {
                    Note n = (Note)j.next();
                    if (n.getTerm().equals(Terms.getAdditionalAccessionTerm()) && !n.getValue().equals("-")) {
                        xml.openTag(PROPERTY_TAG);
                        String name = n.getTerm().getName();
                        if (acccount==2) {
                            // SECONDARY IDENTIFIER
                            if (dbname.equalsIgnoreCase("HIV") ||
                                    dbname.equalsIgnoreCase("INTERPRO") ||
                                    dbname.equalsIgnoreCase("PANTHER") ||
                                    dbname.equalsIgnoreCase("PFAM") ||
                                    dbname.equalsIgnoreCase("PIR") ||
                                    dbname.equalsIgnoreCase("PRINTS") ||
                                    dbname.equalsIgnoreCase("PRODOM") ||
                                    dbname.equalsIgnoreCase("REBASE") ||
                                    dbname.equalsIgnoreCase("SMART") ||
                                    dbname.equalsIgnoreCase("TIGRFAMS")) {
                                // the secondary identifier is the entry name.
                                name = "entry name";
                            } else if (dbname.equalsIgnoreCase("PDB")) {
                                // the secondary identifier is the structure determination method, which is controlled vocabulary that currently includes: X-ray(for X-ray crystallography), NMR(for NMR spectroscopy), EM(for electron microscopy and cryo-electron diffraction), Fiber(for fiber diffraction), IR(for infrared spectroscopy), Model(for predicted models) and Neutron(for neutron diffraction).
                                name = "structure determination method";
                            } else if (dbname.equalsIgnoreCase("DICTYBASE") ||
                                    dbname.equalsIgnoreCase("ECOGENE") ||
                                    dbname.equalsIgnoreCase("FLYBASE") ||
                                    dbname.equalsIgnoreCase("HGNC") ||
                                    dbname.equalsIgnoreCase("MGI") ||
                                    dbname.equalsIgnoreCase("RGD") ||
                                    dbname.equalsIgnoreCase("SGD") ||
                                    dbname.equalsIgnoreCase("STYGENE") ||
                                    dbname.equalsIgnoreCase("SUBTILIST") ||
                                    dbname.equalsIgnoreCase("WORMBASE") ||
                                    dbname.equalsIgnoreCase("ZFIN")) {
                                // the secondary identifier is the gene designation. If the gene designation is not available, a dash('-') is used.
                                name = "gene designation";
                            } else if (dbname.equalsIgnoreCase("GO")) {
                                // the second identifier is a 1-letter abbreviation for one of the 3 ontology aspects, separated from the GO term by a column. If the term is longer than 46 characters, the first 43 characters are indicated followed by 3 dots('...'). The abbreviations for the 3 distinct aspects of the ontology are P(biological Process), F(molecular Function), and C(cellular Component).
                                name = "term";
                            } else if (dbname.equalsIgnoreCase("HAMAP")) {
                                // the secondary identifier indicates if a domain is 'atypical' and/or 'fused', otherwise the field is empty('-').
                                name = "domain";
                            } else if (dbname.equalsIgnoreCase("ECO2DBASE")) {
                                // the secondary identifier is the latest release number or edition of the database that has been used to derive the cross-reference.
                                name = "release number";
                            } else if (dbname.equalsIgnoreCase("SWISS-2DPAGE") ||
                                    dbname.equalsIgnoreCase("HSC-2DPAGE")) {
                                // the secondary identifier is the species or tissue of origin.
                                name = "organism name";
                            } else if (dbname.equalsIgnoreCase("ENSEMBL")) {
                                // the secondary identifier is the species of origin.
                                name = "organism name";
                            } else if (dbname.equalsIgnoreCase("PIRSF")) {
                                // the secondary identifier is the protein family name.
                                name = "protein family name";
                            } else if (dbname.equalsIgnoreCase("AARHUS") ||
                                    dbname.equalsIgnoreCase("GHENT-2DPAGE")) {
                                // the secondary identifier is either 'IEF' (for isoelectric focusing) or 'NEPHGE' (for non-equilibrium pH gradient electrophoresis).
                                name = "secondary identifier";
                            } else if (dbname.equalsIgnoreCase("WORMPEP")) {
                                // the secondary identifier is a number attributed by the C.elegans genome-sequencing project to that protein.
                                name = "C.elegans number";
                            } else if (dbname.equalsIgnoreCase("AGD") ||
                                    dbname.equalsIgnoreCase("ANU-2DPAGE") ||
                                    dbname.equalsIgnoreCase("COMPLUYEAST-2DPAGE") ||
                                    dbname.equalsIgnoreCase("ECHOBASE") ||
                                    dbname.equalsIgnoreCase("GENEDB_SPOMBE") ||
                                    dbname.equalsIgnoreCase("GERMONLINE") ||
                                    dbname.equalsIgnoreCase("GLYCOSUITEDB") ||
                                    dbname.equalsIgnoreCase("GRAMENE") ||
                                    dbname.equalsIgnoreCase("H-INVDB") ||
                                    dbname.equalsIgnoreCase("INTACT") ||
                                    dbname.equalsIgnoreCase("LEGIOLIST") ||
                                    dbname.equalsIgnoreCase("LEPROMA") ||
                                    dbname.equalsIgnoreCase("LISTILIST") ||
                                    dbname.equalsIgnoreCase("MAIZEDB") ||
                                    dbname.equalsIgnoreCase("MEROPS") ||
                                    dbname.equalsIgnoreCase("MIM") ||
                                    dbname.equalsIgnoreCase("MYPULIST") ||
                                    dbname.equalsIgnoreCase("OGP") ||
                                    dbname.equalsIgnoreCase("PHCI-2DPAGE") ||
                                    dbname.equalsIgnoreCase("PHOSSITE") ||
                                    dbname.equalsIgnoreCase("PHOTOLIST") ||
                                    dbname.equalsIgnoreCase("PMMA-2DPAGE") ||
                                    dbname.equalsIgnoreCase("RAT-HEART-2DPAGE") ||
                                    dbname.equalsIgnoreCase("REACTOME") ||
                                    dbname.equalsIgnoreCase("SAGALIST") ||
                                    dbname.equalsIgnoreCase("SIENA-2DPAGE") ||
                                    dbname.equalsIgnoreCase("TAIR") ||
                                    dbname.equalsIgnoreCase("TIGR") ||
                                    dbname.equalsIgnoreCase("TRANSFAC") ||
                                    dbname.equalsIgnoreCase("TUBERCULIST")) {
                                // the secondary identifier is not used and a dash('-') is stored in that field.
                                // should never get here - I hope!
                            } else if (dbname.equalsIgnoreCase("HSSP")) {
                                // the secondary identifier is the entry name of the PDB structure related to that of the entry in which the HSSP cross-reference is present.
                                name = "entry name";
                            } else if (dbname.equalsIgnoreCase("GENEFARM")) {
                                // the secondary identifier is the gene family identifier. If the gene family identifier is not available, a dash('-') is used.
                                name = "gene family";
                            } else if (dbname.equalsIgnoreCase("SMR")) {
                                // the secondary identifier indicates the range(s) relevant to the structure model(s).
                                name = "range";
                            } else if (dbname.equalsIgnoreCase("EMBL") ||
                                    dbname.equalsIgnoreCase("DDBJ") ||
                                    dbname.equalsIgnoreCase("GENBANK")) {
                                // PROTEIN_ID; STATUS_IDENTIFIER; MOLECULE_TYPE
                                name = "protein id";
                            } else if (dbname.equalsIgnoreCase("PROSITE")) {
                                // ENTRY_NAME; STATUS.
                                name = "entry name";
                            }
                        } else if (acccount==3) {
                            // TERTIARY IDENTIFIER
                            if (dbname.equalsIgnoreCase("HAMAP") ||
                                    dbname.equalsIgnoreCase("PANTHER") ||
                                    dbname.equalsIgnoreCase("PFAM") ||
                                    dbname.equalsIgnoreCase("PIRSF") ||
                                    dbname.equalsIgnoreCase("PRODOM") ||
                                    dbname.equalsIgnoreCase("SMART") ||
                                    dbname.equalsIgnoreCase("TIGRFAMS")) {
                                // the tertiary identifier is the number of hits found in the sequence.
                                name = "number of hits";
                            } else if (dbname.equalsIgnoreCase("GO")) {
                                // the tertiary identifier is a 3-character GO evidence code. The meaning of the evidence codes is: IDA=inferred from direct assay, IMP=inferred from mutant phenotype, IGI=inferred from genetic interaction, IPI=inferred from physical interaction, IEP=inferred from expression pattern, TAS=traceable author statement, NAS=non-traceable author statement, IC=inferred by curator, ISS=inferred from sequence or structural similarity.
                                name = "evidence";
                            } else if (dbname.equalsIgnoreCase("PDB")) {
                                // the tertiary identifier indicates the chain(s) and the corresponding range, of which the structure has been determined. If the range is unknown, a dash is given rather than the range positions(e.g. 'A/B=-.'), if the chains and the range is unknown, a dash is used.
                                name = "chains";
                            } else if (dbname.equalsIgnoreCase("EMBL") ||
                                    dbname.equalsIgnoreCase("DDBJ") ||
                                    dbname.equalsIgnoreCase("GENBANK")) {
                                // PROTEIN_ID; STATUS_IDENTIFIER; MOLECULE_TYPE
                                name = "status identifier";
                            } else if (dbname.equalsIgnoreCase("PROSITE")) {
                                // ENTRY_NAME; STATUS.
                                name = "status";
                            }
                        } else {
                            // QUATERNARY AND ADDITIONAL
                            if (dbname.equalsIgnoreCase("EMBL") ||
                                    dbname.equalsIgnoreCase("DDBJ") ||
                                    dbname.equalsIgnoreCase("GENBANK")) {
                                // PROTEIN_ID; STATUS_IDENTIFIER; MOLECULE_TYPE
                                name = "molecule type";
                            }
                        }
                        xml.attribute(TYPE_ATTR,name);
                        xml.attribute(VALUE_ATTR,n.getValue());
                        xml.closeTag(PROPERTY_TAG);
                        acccount++;
                    }
                }
            }
            xml.closeTag(DBXREF_TAG);
        }
        
        // protein exists
        xml.openTag(PROTEIN_EXISTS_TAG);
        xml.attribute(TYPE_ATTR,proteinExists);
        xml.closeTag(PROTEIN_EXISTS_TAG);
        
        // keywords
        for (Iterator j = kws.iterator(); j.hasNext(); ) {
            ComparableTerm t = (ComparableTerm)j.next();
            xml.openTag(KEYWORD_TAG);
            xml.attribute(ID_ATTR,t.getIdentifier());
            xml.print(t.getName());
            xml.closeTag(KEYWORD_TAG);
        }
        
        // features
        for (Iterator i = rs.getFeatureSet().iterator(); i.hasNext(); ) {
            RichFeature f = (RichFeature)i.next();
            String descr = null;
            String ftid = null;
            String ref = null;
            String status = null;
            String original = null;
            String locseq = null;
            List variation = new ArrayList();
            for (Iterator j = f.getNoteSet().iterator(); j.hasNext(); ) {
                Note n = (Note)j.next();
                if (n.getTerm().equals(Terms.getFTIdTerm())) ftid = n.getValue();
                else if (n.getTerm().equals(Terms.getFeatureDescTerm())) descr = n.getValue();
                else if (n.getTerm().equals(Terms.getFeatureStatusTerm())) status = n.getValue();
                else if (n.getTerm().equals(Terms.getFeatureRefTerm())) ref = n.getValue();
                else if (n.getTerm().equals(Terms.getFeatureOriginalTerm())) original = n.getValue();
                else if (n.getTerm().equals(Terms.getFeatureVariationTerm())) variation.add(n.getValue());
                else if (n.getTerm().equals(Terms.getLocationSequenceTerm())) locseq = n.getValue();
            }
            
            xml.openTag(FEATURE_TAG);
            
            xml.attribute(TYPE_ATTR,f.getTypeTerm().getName()); // TODO : need to translate from UniProt flatfile format names?
            if (ftid!=null) xml.attribute(ID_ATTR,ftid);
            if (descr!=null) xml.attribute(FEATURE_DESC_ATTR,descr);
            if (ref!=null) xml.attribute(REF_ATTR,ref);
            if (status!=null) xml.attribute(STATUS_ATTR,status);
            if (original!=null) {
                xml.openTag(FEATURE_ORIGINAL_TAG);
                xml.print(original.trim());
                xml.closeTag(FEATURE_ORIGINAL_TAG);
            }
            for (Iterator j = variation.iterator(); j.hasNext(); ) {
                xml.openTag(FEATURE_VARIATION_TAG);
                xml.print(((String)j.next()).trim());
                xml.closeTag(FEATURE_VARIATION_TAG);
            }
            
            xml.openTag(LOCATION_TAG);
            if (locseq!=null) xml.attribute(LOCATION_SEQ_ATTR,locseq.trim());
            RichLocation rl = (RichLocation)f.getLocation();
            if (rl.getMinPosition().equals(rl.getMaxPosition())) {
                // point position
                xml.openTag(LOCATION_POSITION_TAG);
                if (rl.getMinPosition().getFuzzyStart() || rl.getMaxPosition().getFuzzyStart()) xml.attribute(STATUS_ATTR,"less than");
                else if (rl.getMinPosition().getFuzzyEnd() || rl.getMaxPosition().getFuzzyEnd()) xml.attribute(STATUS_ATTR,"greater than");
                xml.attribute(LOCATION_POSITION_ATTR,""+rl.getMin());
                xml.closeTag(LOCATION_POSITION_TAG);
            } else {
                // range position
                // begin
                xml.openTag(LOCATION_BEGIN_TAG);
                Position begin = rl.getMinPosition();
                if (begin.getFuzzyStart()) xml.attribute(STATUS_ATTR,"less than");
                else if (begin.getFuzzyEnd()) xml.attribute(STATUS_ATTR,"greater than");
                xml.attribute(LOCATION_POSITION_ATTR,""+begin.getStart());
                xml.closeTag(LOCATION_BEGIN_TAG);
                // end
                xml.openTag(LOCATION_END_TAG);
                Position end = rl.getMaxPosition();
                if (end.getFuzzyStart()) xml.attribute(STATUS_ATTR,"less than");
                else if (end.getFuzzyEnd()) xml.attribute(STATUS_ATTR,"greater than");
                xml.attribute(LOCATION_POSITION_ATTR,""+end.getEnd());
                xml.closeTag(LOCATION_END_TAG);
            }
            xml.closeTag(LOCATION_TAG);
            
            xml.closeTag(FEATURE_TAG);
        }
        
        // evidence
        for (Iterator i = evidenceIDs.iterator(); i.hasNext(); ) {
            Integer evidenceID = (Integer)i.next();
            String cat = (String)evcats.get(evidenceID);
            String type = (String)evtypes.get(evidenceID);
            String date = (String)evdates.get(evidenceID);
            String attr = (String)evattrs.get(evidenceID);
            
            xml.openTag(EVIDENCE_TAG);
            xml.attribute(KEY_ATTR,""+(key++));
            xml.attribute(EVIDENCE_CATEGORY_ATTR,cat);
            xml.attribute(EVIDENCE_DATE_ATTR,date);
            xml.attribute(TYPE_ATTR,type);
            if (attr!=null) xml.attribute(EVIDENCE_ATTRIBUTE_ATTR,attr);
            xml.closeTag(EVIDENCE_TAG);
        }
        
        // sequence
        int mw = 0;
        try {
            mw = (int)MassCalc.getMolecularWeight(rs);
        } catch (IllegalSymbolException e) {
            throw new RuntimeException("Found illegal symbol", e);
        }
        CRC64Checksum crc = new CRC64Checksum();
        String seqstr = rs.seqString();
        crc.update(seqstr.getBytes(),0,seqstr.length());
        xml.openTag(SEQUENCE_TAG);
        xml.attribute(SEQUENCE_VERSION_ATTR,""+rs.getVersion());
        xml.attribute(SEQUENCE_LENGTH_ATTR,""+rs.length());
        xml.attribute(SEQUENCE_MASS_ATTR,""+mw);
        xml.attribute(SEQUENCE_CHECKSUM_ATTR,""+crc);
        xml.attribute(SEQUENCE_MODIFIED_ATTR,(udat==null?cdat:udat)); // sequence update
        String[] lines = StringTools.wordWrap(rs.seqString(), "\\s+", this.getLineWidth());
        for (int i = 0; i < lines.length; i ++) xml.println(lines[i]);
        xml.closeTag(SEQUENCE_TAG);
        
        // close entry
        xml.closeTag(ENTRY_TAG);
        
        // copyright (if present)
        if (copyright!=null) {
            xml.openTag(COPYRIGHT_TAG);
            xml.println(copyright);
            xml.closeTag(COPYRIGHT_TAG);
        }
        
        pw.flush();
    }
    
    /**
     * {@inheritDoc}
     */
    public String getDefaultFormat() {
        return UNIPROTXML_FORMAT;
    }
    
    // SAX event handler for parsing http://www.ebi.uniprot.org/support/docs/uniprot.xsd
    private class UniProtXMLHandler extends DefaultHandler {
        
        private RichSequenceFormat parent;
        private SymbolTokenization symParser;
        private RichSeqIOListener rlistener;
        private Namespace ns;
        private StringBuffer m_currentString;
        
        private NCBITaxon tax;
        private RichFeature.Template templ;
        private StringBuffer proteinDesc;
        private boolean firstNameInProteinGroup;
        private boolean firstDomainInProteinGroup;
        private boolean firstComponentInProteinGroup;
        private int currGene;
        private String geneNameClass;
        private String organismNameClass;
        private Map currNames = new TreeMap();
        private StringBuffer organelleDesc;
        private List currDBXrefs = new ArrayList();
        private List currComments = new ArrayList();
        private String currRefLocation;
        private List currRefAuthors;
        private String currRefTitle;
        private int currRefStart;
        private int currRefEnd;
        private int currRefRank;
        private String currPersonIs;
        private int currRCID;
        private int currEvID;
        private String currKWID;
        private UniProtCommentParser currUCParser;
        private Interaction currUCParserInteract;
        private Event currUCParserEvent;
        private Isoform currUCParserIsoform;
        private String currLocIsFor;
        private String currTextIsFor;
        private String currNoteIsFor;
        private String currSeqIsFor;
        private String currIDIsFor;
        private String currNameIsFor;
        private int interactantCount;
        private StringBuffer currLocStr;
        private int featNoteRank;
        
        // construct a new handler that will populate the given list of sequences
        private UniProtXMLHandler(RichSequenceFormat parent,
                SymbolTokenization symParser,
                RichSeqIOListener rlistener,
                Namespace ns) {
            this.parent = parent;
            this.symParser = symParser;
            this.rlistener = rlistener;
            this.ns = ns;
            this.m_currentString = new StringBuffer();
        }
        
        
        // process an opening tag
        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            
            if (qName.equals(ENTRY_TAG)) {
                try {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(ENTRY_NAMESPACE_ATTR) && this.ns==null) ns=(Namespace)RichObjectFactory.getObject(SimpleNamespace.class,new Object[]{val});
                        else if (name.equals(ENTRY_VERSION_ATTR)) rlistener.addSequenceProperty(Terms.getRelAnnotatedTerm(), val);
                        else if (name.equals(ENTRY_CREATED_ATTR)) rlistener.addSequenceProperty(Terms.getDateCreatedTerm(), val);
                        else if (name.equals(ENTRY_UPDATED_ATTR)) rlistener.addSequenceProperty(Terms.getDateAnnotatedTerm(), val);
                    }
                    if (this.ns==null) ns=RichObjectFactory.getDefaultNamespace();
                    rlistener.setNamespace(ns);
                } catch (ParseException e) {
                    throw new SAXException(e);
                }
                this.currNameIsFor = "ENTRY";
                this.currSeqIsFor = "ENTRY";
                this.currGene = 0;
                this.currNames.clear();
                this.currRefRank = 0;
                this.currRCID = 0;
                this.currEvID = 0;
            }
            
            else if (qName.equals(PROTEIN_TAG)) {
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i).trim();
                    String val = attributes.getValue(i).trim();
                    try {
                        if (name.equals(TYPE_ATTR)) rlistener.addSequenceProperty(Terms.getProteinTypeTerm(),val);
                    } catch (ParseException e) {
                        throw new SAXException(e);
                    }
                }
                this.proteinDesc = new StringBuffer();
                this.currNameIsFor = "PROTEIN";
                this.firstNameInProteinGroup = true;
                this.firstDomainInProteinGroup = true;
                this.firstComponentInProteinGroup = true;
            } else if (qName.equals(DOMAIN_TAG)) {
                if (!this.firstComponentInProteinGroup) proteinDesc.append("]");
                if (this.firstDomainInProteinGroup) proteinDesc.append(" ["+Terms.CONTAINS_PREFIX);
                else proteinDesc.append(";");
                this.firstDomainInProteinGroup = false;
                this.firstNameInProteinGroup = true;
            } else if (qName.equals(COMPONENT_TAG)) {
                if (!this.firstDomainInProteinGroup) proteinDesc.append("]");
                if (this.firstComponentInProteinGroup) proteinDesc.append(" ["+Terms.INCLUDES_PREFIX);
                else proteinDesc.append(";");
                this.firstComponentInProteinGroup = false;
                this.firstNameInProteinGroup = true;
            }
            
            else if (qName.equals(GENE_TAG)) {
                this.currGene++;
                this.currNameIsFor="GENE";
            }
            
            else if (qName.equals(NAME_TAG)) {
                if (this.currNameIsFor.equals("GENE")) {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(TYPE_ATTR)) this.geneNameClass=val;
                    }
                }
                
                else if (this.currNameIsFor.equals("ORGANISM")) {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(TYPE_ATTR)) this.organismNameClass=val;
                    }
                }
            }
            
            else if (qName.equals(ORGANISM_TAG)) {
                this.currNameIsFor="ORGANISM";
            }
            
            else if (qName.equals(DBXREF_TAG)) {
                if (this.currNameIsFor.equals("ORGANISM")) {
                    Integer taxID = null;
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(ID_ATTR)) taxID = Integer.valueOf(val);
                    }
                    try {
                        tax = (NCBITaxon)RichObjectFactory.getObject(SimpleNCBITaxon.class, new Object[]{taxID});
                        rlistener.setTaxon(tax);
                        for (Iterator j = currNames.keySet().iterator(); j.hasNext(); ) {
                            String nameClass = (String)j.next();
                            Set nameSet = (Set)this.currNames.get(nameClass);
                            try {
                                for (Iterator k = nameSet.iterator(); k.hasNext(); ) {
                                    String name = (String)k.next();
                                    tax.addName(nameClass,name);
                                }
                            } catch (ChangeVetoException ce) {
                                throw new ParseException(ce);
                            }
                        }
                    } catch (ParseException e) {
                        throw new SAXException(e);
                    }
                    this.currNames.clear();
                }
                
                else {
                    String type = null;
                    String id = null;
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(ID_ATTR)) id = val;
                        else if (name.equals(TYPE_ATTR)) type = val;
                    }
                    CrossRef dbx = (CrossRef)RichObjectFactory.getObject(SimpleCrossRef.class,new Object[]{type, id, new Integer(0)});
                    this.currDBXrefs.add(dbx);
                }
            } else if (qName.equals(PROPERTY_TAG)) {
                String id = null;
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(VALUE_ATTR)) id = val;
                }
                Note note = new SimpleNote(Terms.getAdditionalAccessionTerm(),id,1);
                try {
                    int last = this.currDBXrefs.size();
                    ((CrossRef)this.currDBXrefs.get(last-1)).getRichAnnotation().addNote(note);
                } catch (ChangeVetoException ce) {
                    SAXException pe = new SAXException("Could not annotate identifier terms");
                    pe.initCause(ce);
                    throw pe;
                }
            }
            
            else if (qName.equals(GENELOCATION_TAG)) {
                this.currNameIsFor = "ORGANELLE";
                this.organelleDesc = new StringBuffer();
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(TYPE_ATTR)) {
                        val = val.toUpperCase().charAt(0)+val.substring(1); // init caps for flat format
                        if (!val.equals("Plasmid")) this.organelleDesc.append(val);
                    }
                }
            }
            
            else if (qName.equals(REFERENCE_TAG) && !this.parent.getElideReferences()) {
                this.currRefLocation = null;
                this.currRefAuthors = new ArrayList();
                this.currRefTitle = null;
                this.currDBXrefs.clear();
                this.currComments.clear();
                this.currRefRank++;
                this.currRefStart = -999;
                this.currRefEnd = -999;
            } else if (qName.equals(CITATION_TAG) && !this.parent.getElideReferences()) {
                StringBuffer currRef = new StringBuffer();
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    // combine everything except type into a fake reference to use if locator is a no-show
                    if (!name.equals(TYPE_ATTR)) {
                        if (currRef.length()>0) currRef.append(" ");
                        currRef.append(val);
                    }
                }
                this.currRefLocation = currRef.toString();
            } else if (qName.equals(EDITOR_LIST_TAG)) {
                this.currPersonIs = "EDITOR";
            } else if (qName.equals(AUTHOR_LIST_TAG)) {
                this.currPersonIs = "AUTHOR";
            }  else if (qName.equals(PERSON_TAG)) {
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(NAME_ATTR)) {
                        if (this.currPersonIs.equals("AUTHOR")) currRefAuthors.add(new SimpleDocRefAuthor(val, false, false));
                        else if (this.currPersonIs.equals("EDITOR")) currRefAuthors.add(new SimpleDocRefAuthor(val, false, true));
                    }
                }
            } else if (qName.equals(CONSORTIUM_TAG)) {
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(NAME_ATTR)) {
                        if (this.currPersonIs.equals("AUTHOR")) currRefAuthors.add(new SimpleDocRefAuthor(val, true, false));
                        else if (this.currPersonIs.equals("EDITOR")) currRefAuthors.add(new SimpleDocRefAuthor(val, true, true));
                    }
                }
            }  else if (qName.equals(RC_LINE_TAG)) {
                this.currRCID++;
            }
            
            else if (qName.equals(PROTEIN_EXISTS_TAG)) {
                try {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(TYPE_ATTR)) rlistener.addSequenceProperty(Terms.getProteinExistsTerm(),val);
                    }
                } catch (ParseException e) {
                    SAXException pe = new SAXException("Could not annotate protein exists terms");
                    pe.initCause(e);
                    throw pe;
                }
            }
            
            else if (qName.equals(KEYWORD_TAG)) {
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(ID_ATTR)) this.currKWID = val;
                }
            }
            
            else if (qName.equals(EVIDENCE_TAG)) {
                this.currEvID++;
                try {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(EVIDENCE_CATEGORY_ATTR)) rlistener.addSequenceProperty(Terms.getEvidenceCategoryTerm(),val);
                        else if (name.equals(EVIDENCE_DATE_ATTR)) rlistener.addSequenceProperty(Terms.getEvidenceDateTerm(),val);
                        else if (name.equals(TYPE_ATTR)) rlistener.addSequenceProperty(Terms.getEvidenceTypeTerm(),val);
                        else if (name.equals(EVIDENCE_ATTRIBUTE_ATTR)) rlistener.addSequenceProperty(Terms.getEvidenceAttrTerm(),val);
                    }
                } catch (ParseException e) {
                    SAXException pe = new SAXException("Could not annotate evidence terms");
                    pe.initCause(e);
                    throw pe;
                }
            }
            
            else if (qName.equals(LOCATION_TAG)) {
                this.currLocStr = new StringBuffer();
                if (this.currLocIsFor.equals("FEATURE")) {
                    try {
                        for (int i = 0; i < attributes.getLength(); i++) {
                            String name = attributes.getQName(i);
                            String val = attributes.getValue(i);
                            if (name.equals(LOCATION_SEQ_ATTR)) {
                                Note note = new SimpleNote(Terms.getLocationSequenceTerm(), val, this.featNoteRank++);
                                ((RichAnnotation)templ.annotation).addNote(note);
                            }
                        }
                    } catch (ChangeVetoException e) {
                        SAXException pe = new SAXException("Could not create location terms");
                        pe.initCause(e);
                        throw pe;
                    }
                }
            } else if (qName.equals(LOCATION_BEGIN_TAG) || qName.equals(LOCATION_END_TAG) || qName.equals(LOCATION_POSITION_TAG)) {
                StringBuffer pos = new StringBuffer();
                pos.append(" "); // space between start and end
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(STATUS_ATTR)) {
                        if (val.equals("less than")) pos.append("<");
                        else if (val.equals("greater than")) pos.append(">");
                    } else if (name.equals(LOCATION_POSITION_ATTR)) {
                        pos.append(val);
                    }
                }
                this.currLocStr.append(pos.toString());
                if (qName.equals(LOCATION_POSITION_TAG)) currLocStr.append(pos.toString()); // fake it as begin=end
            }
            
            else if (qName.equals(FEATURE_TAG) && !this.parent.getElideFeatures()) {
                this.featNoteRank = 1;
                templ = new RichFeature.Template();
                templ.annotation = new SimpleRichAnnotation();
                templ.sourceTerm = Terms.getUniProtXMLTerm();
                templ.featureRelationshipSet = new TreeSet();
                templ.rankedCrossRefs = new TreeSet();
                try {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(TYPE_ATTR)) {
                            templ.typeTerm = RichObjectFactory.getDefaultOntology().getOrCreateTerm(val);
                        } else if (name.equals(ID_ATTR)) {
                            Note note = new SimpleNote(Terms.getFTIdTerm(), val, this.featNoteRank++);
                            ((RichAnnotation)templ.annotation).addNote(note);
                        } else if (name.equals(FEATURE_DESC_ATTR)) {
                            Note note = new SimpleNote(Terms.getFeatureDescTerm(), val, this.featNoteRank++);
                            ((RichAnnotation)templ.annotation).addNote(note);
                        } else if (name.equals(STATUS_ATTR)) {
                            Note note = new SimpleNote(Terms.getFeatureStatusTerm(), val, this.featNoteRank++);
                            ((RichAnnotation)templ.annotation).addNote(note);
                        } else if (name.equals(REF_ATTR)) {
                            Note note = new SimpleNote(Terms.getFeatureRefTerm(), val, this.featNoteRank++);
                            ((RichAnnotation)templ.annotation).addNote(note);
                        }
                    }
                } catch (ChangeVetoException e) {
                    SAXException pe = new SAXException("Could not create location terms");
                    pe.initCause(e);
                    throw pe;
                }
                this.currLocStr = new StringBuffer();
                this.currLocIsFor = "FEATURE";
            }
            
            else if (qName.equals(COMMENT_TAG)) {
                this.currUCParser = new UniProtCommentParser();
                this.currUCParser.setInteractions(new ArrayList());
                this.currUCParser.setEvents(new ArrayList());
                this.currUCParser.setIsoforms(new ArrayList());
                this.currUCParser.setKMs(new ArrayList());
                this.currUCParser.setVMaxes(new ArrayList());
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i).trim();
                    String val = attributes.getValue(i).trim();
                    if (name.equals(TYPE_ATTR)) {
                        String type = val.toUpperCase(); // easier to check this way, plus flat uniprot requires it
                        if (type.equals("POSTTRANSLATIONAL MODIFICATION")) type="PTM";
                        else if (type.equals("ONLINE INFORMATION")) type="DATABASE";
                        currUCParser.setCommentType(type);
                    } else if (name.equals(COMMENT_MASS_ATTR)) this.currUCParser.setMolecularWeight(Integer.parseInt(val));
                    else if (name.equals(COMMENT_ERROR_ATTR)) this.currUCParser.setMolWeightError(Integer.valueOf(val));
                    else if (name.equals(COMMENT_METHOD_ATTR)) this.currUCParser.setMolWeightMethod(val);
                    else if (name.equals(NAME_ATTR)) this.currUCParser.setDatabaseName(val);
                }
                this.currLocIsFor="COMMENT";
                this.currTextIsFor="COMMENT";
                this.currNoteIsFor="COMMENT";
                this.interactantCount = 0;
            } else if (qName.equals(COMMENT_ABSORPTION_TAG)) {
                this.currTextIsFor="ABSORPTION";
            } else if (qName.equals(COMMENT_KINETICS_TAG)) {
                this.currTextIsFor="KINETICS";
            } else if (qName.equals(COMMENT_LINK_TAG)) {
                this.currTextIsFor="KINETICS";
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(COMMENT_LINK_URI_ATTR)) this.currUCParser.setUri(val);
                }
            } else if (qName.equals(COMMENT_EVENT_TAG)) {
                this.currUCParserEvent = new Event();
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(TYPE_ATTR)) {
                        val = val.toUpperCase().charAt(0)+val.substring(1); // make first letter upper case for flat uniprot
                        this.currUCParserEvent.setType(val);
                    }
                }
                currUCParser.getEvents().add(currUCParserEvent);
            } else if (qName.equals(COMMENT_ISOFORM_TAG)) {
                this.currUCParserIsoform = new Isoform();
                this.currUCParser.getIsoforms().add(currUCParserIsoform);
                this.currUCParserEvent.setNamedIsoforms(this.currUCParser.getIsoforms().size());
                this.currNameIsFor="ISOFORM";
                this.currNoteIsFor="ISOFORM";
                this.currSeqIsFor="ISOFORM";
                this.currIDIsFor="ISOFORM";
            } else if (qName.equals(COMMENT_INTERACTANT_TAG)) {
                this.currIDIsFor="INTERACTION";
                this.interactantCount++;
                for (int i = 0; i < attributes.getLength(); i++) {
                    String name = attributes.getQName(i);
                    String val = attributes.getValue(i);
                    if (name.equals(COMMENT_INTERACT_INTACT_ATTR)) {
                        if (this.interactantCount%2==1) {
                            this.currUCParserInteract = new Interaction();
                            this.currUCParserInteract.setFirstIntActID(val);
                            this.currUCParser.getInteractions().add(this.currUCParserInteract);
                        }
                        else this.currUCParserInteract.setSecondIntActID(val);
                    }
                }
            }
            
            else if (qName.equals(SEQUENCE_TAG)) {
                if (this.currSeqIsFor.equals("ENTRY")) {
                    try {
                        for (int i = 0; i < attributes.getLength(); i++) {
                            String name = attributes.getQName(i);
                            String val = attributes.getValue(i);
                            if (name.equals(SEQUENCE_MODIFIED_ATTR)) {
                                rlistener.addSequenceProperty(Terms.getDateUpdatedTerm(),val);
                            }
                            else if (name.equals(SEQUENCE_VERSION_ATTR))
                                rlistener.setVersion(Integer.parseInt(val));
                        }
                    } catch (ParseException e) {
                        SAXException pe = new SAXException("Could not set sequence properties");
                        pe.initCause(e);
                        throw pe;
                    }
                }
                
                else if (this.currSeqIsFor.equals("ISOFORM")) {
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getQName(i);
                        String val = attributes.getValue(i);
                        if (name.equals(TYPE_ATTR)) {
                            val = val.toUpperCase().charAt(0)+val.substring(1); // init caps for flat uniprot
                            this.currUCParserIsoform.setSequenceType(val);
                        } else if (name.equals(REF_ATTR)) {
                            this.currUCParserIsoform.setSequenceRef(val);
                        }
                    }
                }
            }
        }
        
        // process a closing tag - we will have read the text already
        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            String val = this.m_currentString.toString().trim();
            
            try {
                if (qName.equals(COPYRIGHT_TAG)) {
                    rlistener.addSequenceProperty(Terms.getCopyrightTerm(),val);
                }
                
                else if (qName.equals(ACCESSION_TAG)) {
                    rlistener.setAccession(val);
                } else if (qName.equals(NAME_TAG)) {
                    if (this.currNameIsFor.equals("ENTRY")) rlistener.setName(val);
                    
                    else if (this.currNameIsFor.equals("PROTEIN")) {
                        if (this.firstNameInProteinGroup) {
                            proteinDesc.append(" ");
                            proteinDesc.append(val);
                        } else {
                            proteinDesc.append(" (");
                            proteinDesc.append(val);
                            proteinDesc.append(")");
                        }
                        this.firstNameInProteinGroup = false;
                    }
                    
                    else if (this.currNameIsFor.equals("GENE")) {
                        if (this.geneNameClass.equals(Terms.GENENAME_KEY)) rlistener.addSequenceProperty(Terms.getGeneNameTerm(), this.currGene+":"+val);
                        else if (this.geneNameClass.equals(Terms.GENESYNONYM_KEY)) rlistener.addSequenceProperty(Terms.getGeneSynonymTerm(), this.currGene+":"+val);
                        else if (this.geneNameClass.equals(Terms.ORDLOCNAME_KEY)) rlistener.addSequenceProperty(Terms.getOrderedLocusNameTerm(), this.currGene+":"+val);
                        else if (this.geneNameClass.equals(Terms.ORFNAME_KEY)) rlistener.addSequenceProperty(Terms.getORFNameTerm(), this.currGene+":"+val);
                    }
                    
                    else if (this.currNameIsFor.equals("ORGANISM")) {
                        String ournameclass = NCBITaxon.COMMON;
                        if (this.organismNameClass.equals(Terms.ABBREV_NAME_KEY)) ournameclass = NCBITaxon.ACRONYM;
                        else if (this.organismNameClass.equals(Terms.FULL_NAME_KEY)) ournameclass = NCBITaxon.EQUIVALENT;
                        else if (this.organismNameClass.equals(Terms.SCIENTIFIC_NAME_KEY)) ournameclass = NCBITaxon.SCIENTIFIC;
                        else if (this.organismNameClass.equals(Terms.SYNONYM_NAME_KEY)) ournameclass = NCBITaxon.SYNONYM;
                        if (!this.currNames.containsKey(ournameclass)) this.currNames.put(ournameclass,new TreeSet());
                        ((Set)this.currNames.get(ournameclass)).add(val);
                    }
                    
                    else if (this.currNameIsFor.equals("ORGANELLE")) {
                        this.organelleDesc.append(", Plasmid ");
                        this.organelleDesc.append(val);
                    }
                    
                    else if (this.currNameIsFor.equals("ISOFORM")) {
                        this.currUCParserIsoform.getNames().add(val);
                    }
                }
                
                else if (qName.equals(PROTEIN_TAG)) {
                    if (!this.firstDomainInProteinGroup || !this.firstComponentInProteinGroup) this.proteinDesc.append("]");
                    this.proteinDesc.append(".");
                    rlistener.setDescription(this.proteinDesc.toString());
                }
                
                else if (qName.equals(ORGANISM_TAG)) {
                    this.currNameIsFor="";
                }
                
                else if (qName.equals(GENELOCATION_TAG)) {
                    String total = this.organelleDesc.toString().substring(3); // chomp leading ", "
                    int lastComma = total.lastIndexOf(',');
                    if (lastComma>-1) {
                        this.organelleDesc.insert(lastComma+1," and");
                        total = this.organelleDesc.toString();
                    }
                    rlistener.addSequenceProperty(Terms.getOrganelleTerm(), total);
                }
                
                else if (qName.equals(RC_SPECIES_TAG)) {
                    rlistener.addSequenceProperty(Terms.getSpeciesTerm(), this.currRCID+":"+val);
                } else if (qName.equals(RC_TISSUE_TAG)) {
                    rlistener.addSequenceProperty(Terms.getTissueTerm(), this.currRCID+":"+val);
                } else if (qName.equals(RC_TRANSP_TAG)) {
                    rlistener.addSequenceProperty(Terms.getTransposonTerm(), this.currRCID+":"+val);
                } else if (qName.equals(RC_PLASMID_TAG)) {
                    rlistener.addSequenceProperty(Terms.getPlasmidTerm(), this.currRCID+":"+val);
                }
                
                else if (qName.equals(TITLE_TAG)) {
                    this.currRefTitle = val;
                } else if (qName.equals(LOCATOR_TAG)) {
                    this.currRefLocation = val;
                } else if (qName.equals(RP_LINE_TAG)) {
                    this.currComments.add(val);
                    // Try to use it to find the location of the reference, if we have one.
                    Matcher m = rppat.matcher(val);
                    if (m.matches()) {
                        this.currRefStart = Integer.parseInt(m.group(1));
                        this.currRefEnd = Integer.parseInt(m.group(2));
                    }
                } else if (qName.equals(REFERENCE_TAG) && !this.parent.getElideReferences()) {
                    // do the crossrefs
                    CrossRef useForDocRef = null;
                    for (Iterator j = this.currDBXrefs.iterator(); j.hasNext();) {
                        CrossRef dbx = (CrossRef)j.next();
                        RankedCrossRef rdbx = new SimpleRankedCrossRef(dbx,0);
                        rlistener.setRankedCrossRef(rdbx);
                        if (useForDocRef==null) useForDocRef = dbx;
                        else {
                            // medline gets priority, then pubmed - if multiple, use last
                            if (dbx.getDbname().equalsIgnoreCase(Terms.MEDLINE_KEY) || 
                                    (dbx.getDbname().equalsIgnoreCase(Terms.PUBMED_KEY) && 
                                    !useForDocRef.getDbname().equalsIgnoreCase(Terms.MEDLINE_KEY))) {
                                useForDocRef = dbx;
                            }
                        }
                    }
                    // do the comment - can only be one in this object model
                    String currRefRemark = null;
                    if (currComments.size()>0) currRefRemark = (String)currComments.iterator().next();
                    // create the docref object
                    try {
                        DocRef dr = (DocRef)RichObjectFactory.getObject(SimpleDocRef.class,new Object[]{currRefAuthors,currRefLocation,currRefTitle});
                        // assign the pubmed or medline to the docref - medline gets priority
                        if (useForDocRef!=null) dr.setCrossref(useForDocRef);
                        // assign the remarks
                        dr.setRemark(currRefRemark);
                        // assign the docref to the bioentry
                        RankedDocRef rdr = new SimpleRankedDocRef(dr,
                                (currRefStart != -999 ? new Integer(currRefStart) : null),
                                (currRefEnd != -999 ? new Integer(currRefEnd) : null),
                                currRefRank);
                        rlistener.setRankedDocRef(rdr);
                    } catch (ChangeVetoException e) {
                        throw new ParseException(e);
                    }
                    currDBXrefs.clear();
                    currComments.clear();
                }
                
                // keywords
                else if (qName.equals(KEYWORD_TAG)) {
                    // create and persist term
                    ComparableTerm t = Terms.getUniprotKWOnto().getOrCreateTerm(val);
                    try {
                        t.setIdentifier(currKWID);
                    } catch (ChangeVetoException e) {
                        throw new ParseException(e);
                    }
                    rlistener.addSequenceProperty(Terms.getKeywordTerm(), val);
                }
                
                else if (qName.equals(LOCATION_TAG)) {
                    if (currLocIsFor.equals("FEATURE")) {
                        templ.location = UniProtLocationParser.parseLocation(currLocStr.toString());
                    } else if (currLocIsFor.equals("COMMENT")) {
                        Location l = UniProtLocationParser.parseLocation(currLocStr.toString());
                        this.currUCParser.setMolWeightRangeStart(l.getMin());
                        this.currUCParser.setMolWeightRangeEnd(l.getMax());
                    }
                }
                
                else if (qName.equals(FEATURE_TAG)) {
                    // start the feature from the template we built
                    rlistener.startFeature(templ);
                    // end the feature
                    rlistener.endFeature();
                } else if (qName.equals(FEATURE_ORIGINAL_TAG)) {
                    try {
                        Note note = new SimpleNote(Terms.getFeatureOriginalTerm(), val, featNoteRank++);
                        ((RichAnnotation)templ.annotation).addNote(note);
                    } catch (ChangeVetoException e) {
                        SAXException pe = new SAXException("Could not create location terms");
                        pe.initCause(e);
                        throw pe;
                    }
                } else if (qName.equals(FEATURE_VARIATION_TAG)) {
                    try {
                        Note note = new SimpleNote(Terms.getFeatureVariationTerm(), val, featNoteRank++);
                        ((RichAnnotation)templ.annotation).addNote(note);
                    } catch (ChangeVetoException e) {
                        SAXException pe = new SAXException("Could not create location terms");
                        pe.initCause(e);
                        throw pe;
                    }
                }
                
                else if (qName.equals(COMMENT_TAG)) {
                    rlistener.setComment(currUCParser.generate());
                } else if (qName.equals(TEXT_TAG)) {
                    if (this.currTextIsFor.equals("COMMENT")) currUCParser.setText(val);
                    else if (this.currTextIsFor.equals("ABSORPTION")) currUCParser.setAbsorptionNote(val);
                    else if (this.currTextIsFor.equals("KINETICS")) currUCParser.setKineticsNote(val);
                } else if (qName.equals(COMMENT_ABS_MAX_TAG)) {
                    currUCParser.setAbsorptionMax(val);
                } else if (qName.equals(COMMENT_KIN_KM_TAG)) {
                    currUCParser.getKMs().add(val);
                } else if (qName.equals(COMMENT_KIN_VMAX_TAG)) {
                    currUCParser.getVMaxes().add(val);
                } else if (qName.equals(COMMENT_PH_TAG)) {
                    currUCParser.setPHDependence(val);
                } else if (qName.equals(COMMENT_REDOX_TAG)) {
                    currUCParser.setRedoxPotential(val);
                } else if (qName.equals(COMMENT_TEMPERATURE_TAG)) {
                    currUCParser.setTemperatureDependence(val);
                } else if (qName.equals(COMMENT_ORGANISMS_TAG)) {
                    if (val.equalsIgnoreCase("true")) currUCParserInteract.setOrganismsDiffer(true);
                    else currUCParserInteract.setOrganismsDiffer(false);
                } else if (qName.equals(COMMENT_EXPERIMENTS_TAG)) {
                    currUCParserInteract.setNumberExperiments(Integer.parseInt(val));
                } else if (qName.equals(NOTE_TAG)) {
                    if (currNoteIsFor.equals("COMMENT")) currUCParser.setNote(val);
                    else if (currNoteIsFor.equals("ISOFORM")) currUCParser.setNote(val);
                } else if (qName.equals(COMMENT_EVENT_TAG)) {
                    currUCParserEvent.setComment(val);
                } else if (qName.equals(COMMENT_ISOFORM_TAG)) {
                    this.currSeqIsFor = "ENTRY";
                    this.currNoteIsFor = "COMMENT";
                } else if (qName.equals(ID_TAG)) {
                    if (currIDIsFor.equals("ISOFORM")) currUCParserIsoform.getIsoIDs().add(val);
                    else if (currIDIsFor.equals("INTERACTION")) currUCParserInteract.setID(val);
                } else if (qName.equals(COMMENT_INTERACT_LABEL_TAG)) {
                    currUCParserInteract.setLabel(val);
                }
                
                else if (qName.equals(SEQUENCE_TAG)) {
                    if (this.currSeqIsFor.equals("ENTRY") && !this.parent.getElideSymbols()) {
                        try {
                            SymbolList sl = new SimpleSymbolList(symParser,
                                    val.replaceAll("\\s+","").replaceAll("[\\.|~]","-"));
                            rlistener.addSymbols(symParser.getAlphabet(),
                                    (Symbol[])(sl.toList().toArray(new Symbol[0])),
                                    0, sl.length());
                        } catch (Exception e) {
                            throw new ParseException(e);
                        }
                    }
                }
                
                else if (qName.equals(ENTRY_TAG)) {
                    // do the comments
                    for (Iterator j = currComments.iterator(); j.hasNext();) {
                        rlistener.setComment((String)j.next());
                    }
                    // do the crossrefs
                    for (Iterator j = currDBXrefs.iterator(); j.hasNext();) {
                        CrossRef dbx = (CrossRef)j.next();
                        RankedCrossRef rdbx = new SimpleRankedCrossRef(dbx, 0);
                        rlistener.setRankedCrossRef(rdbx);
                    }
                    // end the sequence
                    currComments.clear();
                    currDBXrefs.clear();
                }
                
            } catch (ParseException e) {
                throw new SAXException(e);
            }
            
            // drop old string
            this.m_currentString.setLength(0);
        }
        
        // process text inside tags
        @Override
        public void characters(char[] ch, int start, int length) {
            this.m_currentString.append(ch, start, length);
        }
    }
}