// PathVisio, // a tool for data visualization and analysis using Biological Pathways // Copyright 2006-2011 BiGCaT Bioinformatics // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // package org.pathvisio.indexer; import java.io.IOException; import java.io.Reader; import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharTokenizer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.bridgedb.IDMapper; import org.bridgedb.IDMapperException; import org.bridgedb.Xref; import org.bridgedb.bio.Organism; import org.pathvisio.core.debug.Logger; import org.pathvisio.core.model.ObjectType; import org.pathvisio.core.model.Pathway; import org.pathvisio.core.model.PathwayElement; /** * Indexes every DataNode, including * it's synonym database references * @author thomas */ public class DataNodeIndexer extends IndexerBase { /** * An identifier of a DataNode xref that is on the pathway */ public static final String FIELD_ID = "id"; /** * An identifier/code combination * of a DataNode xref that is on the pathway. * The combination is of the form: * "identifier:code", where code is the system code * obtained by {@link DataSource#getSystemCode()}. */ public static final String FIELD_ID_CODE = "id.database"; /** * An identifier that was found by looking up all cross references * for a DataNode xref on the pathway. */ public static final String FIELD_XID = "x.id"; /** * An identifier/code combination * of a DataNode xref that was found by looking up all cross references * for a DataNode xref on the pathway. * The combination is of the form: * "identifier:code", where code is the system code * obtained by {@link org.pathvisio.model.DataSource#getSystemCode()}. */ public static final String FIELD_XID_CODE = "x.id.database"; XrefAnalyzer analyzer; public DataNodeIndexer(String source, Pathway pathway, IndexWriter writer) { super(source, pathway, writer); analyzer = new XrefAnalyzer(); } public void indexPathway() throws CorruptIndexException, IOException { for(PathwayElement pe : pathway.getDataObjects()) { if(pe.getObjectType() == ObjectType.DATANODE) { indexDataNode(pe); } } } void indexDataNode(PathwayElement pe) throws CorruptIndexException, IOException { Set addedXrefs = new HashSet(); Document doc = new Document(); if(pe.getGraphId() != null) { doc.add(new Field(FIELD_GRAPHID, pe.getGraphId(), Store.YES, Index.NO)); } Xref xref = pe.getXref(); if(xref == null || xref.getDataSource() == null || xref.getId() == null) { return; } addCrossRef(xref, doc, FIELD_ID, FIELD_ID_CODE); //Add cross references if connected Organism organism = Organism.fromLatinName(pathway.getMappInfo().getOrganism()); if(gdbs != null) { for(IDMapper gdb : gdbs.getGdbs(organism)) { if(gdb != null && gdb.isConnected()) { try { Set crossRefs = gdb.mapID(xref); for(Xref c : crossRefs) { if(!addedXrefs.contains(c)) { addCrossRef(c, doc, FIELD_XID, FIELD_XID_CODE); addedXrefs.add(c); } } } catch(IDMapperException e) { Logger.log.error("Unable to fetch cross references", e); } } } } addDocument(doc, analyzer); } void addCrossRef(Xref xref, Document doc, String field_id, String field_id_code) { if(xref != null) { String id = xref.getId(); doc.add( new Field( field_id, id, Field.Store.YES, Field.Index.UN_TOKENIZED ) ); doc.add( new Field( field_id_code, xref2string(xref), Field.Store.YES, Field.Index.TOKENIZED ) ); //Also add untokenized for exact searches doc.add( new Field( field_id_code, xref2string(xref), Field.Store.YES, Field.Index.UN_TOKENIZED ) ); } } public static String xref2string(Xref xref) { String id = xref.getId(); String code = ""; if(xref.getDataSource() != null) { code = xref.getDataSource().getSystemCode(); } return id + ":" + code; } class XrefAnalyzer extends Analyzer { SimpleAnalyzer simpleAnalyzer = new SimpleAnalyzer(); public TokenStream tokenStream(String field, Reader reader) { if(FIELD_XID_CODE.equals(field) || FIELD_ID_CODE.equals(field)) { return new CharTokenizer(reader) { protected boolean isTokenChar(char c) { return c != ':'; } }; } else { return simpleAnalyzer.tokenStream(field, reader); } } } }