/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.program.ssbind; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.biojava.bio.BioException; import org.biojava.bio.search.SearchContentHandler; import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.StrandedFeature; import org.biojava.bio.seq.StrandedFeature.Strand; import org.biojava.bio.seq.homol.Homology; import org.biojava.bio.seq.homol.HomologyFeature; import org.biojava.bio.seq.homol.SimpleHomology; import org.biojava.bio.seq.io.SymbolTokenization; import org.biojava.bio.symbol.Alignment; import org.biojava.bio.symbol.FiniteAlphabet; import org.biojava.bio.symbol.RangeLocation; import org.biojava.bio.symbol.SimpleAlignment; import org.biojava.bio.symbol.SimpleSymbolList; import org.biojava.utils.ChangeVetoException; /** *

BlastLikeHomologyBuilder populates a * List with Homology instances created from * SAX events supplied via a SeqSimilarityAdapter. The * SAX events should describe elements conforming to the BioJava * BlastLikeDataSetCollection DTD. Suitable sources are * BlastLikeSAXParser or * FastaSAXParser. Annotated ViewSequences * wrapping both query and subject sequences are created and populated * with HomologyFeatures. See the documentation of * Homology and HomologyFeature.

* *

As SimpleHomologyFeatures are created on views of * the query and subject sequences, both query and subject should be * nucleotide sequences (SimpleHomologyFeature extends * StrandedFeature.). This limits the searches currently * handled to BLASTN, TBLASTX and Fasta DNA.

* * @author Keith James * @author Greg Cox * @since 1.2 */ public class BlastLikeHomologyBuilder extends ViewSequenceFactory implements SearchContentHandler { /** * HOMOLOGY_FEATURE_TYPE is the type String used by * BlastLikeHomologyBuilder when creating * HomologyFeatures. This is the String which is * returned when an HomologyFeature's * getType() method is called. */ public static final String HOMOLOGY_FEATURE_TYPE = "homology"; // Identifiers for query and database private String queryID; // Data holders for search result properties private Map resultData; private Map hitData; private Map subHitData; private SymbolTokenization tokenParser; private StringBuffer tokenBuffer; // List for holding homologies from current search. There may be // more than one search result in a stream private List homologies; // Flag indicating whether there are more results in the stream private boolean moreSearchesAvailable = false; // List to accept homologies from all results in the stream private List target; /** * Creates a new BlastLikeHomologyBuilder which will * instantiate Homology objects into the * List target. * * @param target a List. */ public BlastLikeHomologyBuilder(List target) { this.target = target; resultData = new HashMap(); hitData = new HashMap(); subHitData = new HashMap(); queryViewCache = new HashMap(); subjectViewCache = new HashMap(); tokenBuffer = new StringBuffer(1024); } public void setQueryID(String queryID) { this.queryID = queryID; } public boolean getMoreSearches() { return moreSearchesAvailable; } public void setMoreSearches(boolean value) { moreSearchesAvailable = value; } public void startSearch() { subjectViewCache.clear(); homologies = new ArrayList(); } public void endSearch() { target.addAll(homologies); } public void startHeader() { resultData.clear(); } public void endHeader() { } public void startHit() { hitData.clear(); subHitData.clear(); } public void endHit() { } public void startSubHit() { } public void endSubHit() { try { homologies.add(makeHomology()); } catch (BioException be) { System.err.println("Failed to build Homology:"); be.printStackTrace(); } } public void addSearchProperty(Object key, Object value) { resultData.put(key, value); } public void addHitProperty(Object key, Object value) { hitData.put(key, value); } public void addSubHitProperty(Object key, Object value) { subHitData.put(key, value); } /** * makeHomology creates a new * SimpleHomology describing the similarity between * the query and subject sequences. The * HomologyFeatures created are added to * ViewSequences wrapping the query and subject * sequences. * * @return an Homology. * * @exception BioException if an error occurs. */ private Homology makeHomology() throws BioException { subHitData.putAll(resultData); subHitData.putAll(hitData); if (tokenParser == null) { String identifier; // Try explicit sequence type first if (subHitData.containsKey("hitSequenceType")) identifier = (String) subHitData.get("hitSequenceType"); // Otherwise try to resolve from the program name (only // works for Blast) else if (subHitData.containsKey("program")) identifier = (String) subHitData.get("program"); else throw new BioException("Failed to determine sequence type"); FiniteAlphabet alpha = AlphabetResolver.resolveAlphabet(identifier); tokenParser = alpha.getTokenization("token"); } Strand qStrand = StrandedFeature.POSITIVE; Strand sStrand = StrandedFeature.POSITIVE; // In cases where an explicit strand is given (FASTA DNA, BLASTN) if (subHitData.containsKey("queryStrand") && subHitData.get("queryStrand").equals("minus")) qStrand = StrandedFeature.NEGATIVE; if (subHitData.containsKey("subjectStrand") && subHitData.get("subjectStrand").equals("minus")) sStrand = StrandedFeature.NEGATIVE; // In cases where a frame is given as this contains strand // information (TBLASTN for hit, TBLASTX for both query and // hit) if (subHitData.containsKey("queryFrame") && ((String) subHitData.get("queryFrame")).startsWith("minus")) qStrand = StrandedFeature.NEGATIVE; if (subHitData.containsKey("subjectFrame") && ((String) subHitData.get("subjectFrame")).startsWith("minus")) sStrand = StrandedFeature.NEGATIVE; int qStart = Integer.parseInt((String) subHitData.get("querySequenceStart")); int qEnd = Integer.parseInt((String) subHitData.get("querySequenceEnd")); int sStart = Integer.parseInt((String) subHitData.get("subjectSequenceStart")); int sEnd = Integer.parseInt((String) subHitData.get("subjectSequenceEnd")); // The start/end coordinates from BioJava XML don't follow the // BioJava paradigm of start < end, with orientation given by // the strand property. Rather, they present start/end as // displayed in BLAST output, with the coordinates being // inverted on the reverse strand. We account for this here. if (qStrand == StrandedFeature.NEGATIVE) { int swap = qStart; qStart = qEnd; qEnd = swap; } if (sStrand == StrandedFeature.NEGATIVE) { int swap = sStart; sStart = sEnd; sEnd = swap; } String subjectID = (String) hitData.get("subjectId"); Sequence queryView = makeQueryViewSequence(queryID); Sequence subjectView = makeSubjectViewSequence(subjectID); SimpleHomology homology = new SimpleHomology(); // Map of HomologyFeatures to Alignment sequences Map labelMap = new HashMap(); try { String source = "unknown"; if (subHitData.containsKey("program")) source = (String) subHitData.get("program"); tokenBuffer.setLength(0); tokenBuffer.append((String) subHitData.get("querySequence")); // Query sequence feature HomologyFeature.Template qt = new HomologyFeature.Template(); qt.type = HOMOLOGY_FEATURE_TYPE; qt.source = source; qt.location = new RangeLocation(qStart, qEnd); qt.strand = qStrand; qt.annotation = AnnotationFactory.makeAnnotation(subHitData); qt.homology = homology; // Map the new feature to the alignment SymbolList labelMap.put(queryView.createFeature(qt), new SimpleSymbolList(tokenParser, tokenBuffer.substring(0))); tokenBuffer.setLength(0); tokenBuffer.append((String) subHitData.get("subjectSequence")); // Subject sequence feature HomologyFeature.Template st = new HomologyFeature.Template(); st.type = HOMOLOGY_FEATURE_TYPE; st.source = source; st.location = new RangeLocation(sStart, sEnd); st.strand = sStrand; st.annotation = AnnotationFactory.makeAnnotation(subHitData); st.homology = homology; // Map the new feature to the alignment SymbolList labelMap.put(subjectView.createFeature(st), new SimpleSymbolList(tokenParser, tokenBuffer.substring(0))); Alignment a = new SimpleAlignment(labelMap); homology.setAlignment(a); return homology; } catch (ChangeVetoException cve) { throw new BioException( "Failed to create HomologyFeature",cve); } } }