/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.program.hmmer; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.Iterator; import java.util.StringTokenizer; import org.biojava.bio.dist.Distribution; import org.biojava.bio.dist.DistributionFactory; import org.biojava.bio.dp.DotState; import org.biojava.bio.dp.EmissionState; import org.biojava.bio.dp.State; import org.biojava.bio.seq.ProteinTools; import org.biojava.bio.seq.io.SymbolTokenization; import org.biojava.bio.symbol.Alphabet; import org.biojava.bio.symbol.FiniteAlphabet; import org.biojava.bio.symbol.Symbol; /** A class for parsing in Hmmer markov models from HMM_ls files generated by HMMER training * note that this class is still currently experimental. * @author Lachlan Coin */ public class HmmerProfileParser{ protected Alphabet alph = ProteinTools.getAlphabet(); /** Returns a profile HMM representing the core HMMER hmm * @param inputfile the file which contains the Profile HMM data, as output by HMMER - e.g. HMM_ls */ public static HmmerProfileHMM parse(File inputfile){ HmmerProfileParser hmmP = new HmmerProfileParser(inputfile.toString()); hmmP.parseModel(inputfile); hmmP.setProfileHMM(); return hmmP.getModel(); } /** Returns the full markov model - including the core model + J,C,N loop states. * @param inputfile the file which contains the Profile HMM data, as output by HMMER - e.g. HMM_ls */ public static FullHmmerProfileHMM parseFull(File inputfile){ HmmerProfileParser hmmP = new HmmerProfileParser(inputfile.toString()); hmmP.parseModel(inputfile); hmmP.setProfileHMM(); hmmP.initialiseFullProfileHMM(); hmmP.setFullProfileHMM(); return hmmP.getFullModel(); } protected String domain1; protected HmmerProfileParser(String domain){ this.domain1 = domain; } protected HmmerProfileHMM initialiseProfileHMM(int len){ try{ DistributionFactory matchFactory = DistributionFactory.DEFAULT; DistributionFactory insertFactory = DistributionFactory.DEFAULT; return new HmmerProfileHMM(alph, len, matchFactory, insertFactory, domain1); } catch(Throwable t){ t.printStackTrace(); return null; } } protected HmmerModel hmm; private static final double sumCheckThreshold = 0.001; public HmmerProfileHMM getModel(){ return hmm.hmm; } FullHmmerProfileHMM getFullModel(){ return hmm.hmm_full; } void initialiseFullProfileHMM(){ hmm.initialiseFullProfileHMM(); } public void setProfileHMM(){ hmm.setProfileHMM(); } void setFullProfileHMM(){ hmm.setFullProfileHMM(); } public void parseModel(File inputFile){ System.out.println("Parsing model "+inputFile); try{ BufferedReader in = new BufferedReader(new FileReader(inputFile)); boolean inModel=false; int seq_pos=1; int rel_pos=0; String s = new String(); while((s = in.readLine())!= null){ if(s.startsWith("//")) break; if(!inModel){ if(s.startsWith("LENG")) { int[] a = parseString(s.substring(5),1); hmm = new HmmerModel(a[0]); } else if(s.startsWith("NULE")) hmm.setNullEmissions(s.substring(5)); else if(s.startsWith("NULT")) hmm.setNullTransitions(s.substring(5)); else if(s.startsWith("XT")) hmm.setSpecialTransitions(s.substring(5)); else if(s.startsWith("HMM ")){ inModel=true; hmm.setAlphList(s.substring(7)); in.readLine(); hmm.setBeginTransition(in.readLine()); } } else{ if(rel_pos==0){ hmm.setEmissions(s.substring(7), seq_pos); } else if(rel_pos==1 && seq_pos==1){ hmm.setInsertEmissions(s.substring(7)); } else if(rel_pos==2){ hmm.setTransitions(s.substring(7), seq_pos); } rel_pos++; if(rel_pos==3){ rel_pos=0; seq_pos++; } } } in.close(); } catch(Throwable t){ t.printStackTrace(); } } static int[] parseString(String s, int len){ String[] s1 = parseStringA(s,len); int[] s2 = new int[len]; for(int i=0; i0 && isumCheckThreshold) throw new Exception("Distribution does not sum to 1. Sums to "+sum); } /** Modifies HMM search for partial hits, by dividing probability by 2 at * each point and adding transition to end state */ void addProfileHMMTransitions() throws Exception{ for(int i=1; i<=hmm.columns(); i++){ if(i>1){ hmm.createTransition(hmm.magicalState(), hmm.getMatch(i)); } if(i1){ addProbability(hmm.getWeights(hmm.magicalState()), hmm.getMatch(i), 0.5); } if(i=1){ dist.setWeight(hmm.getInsert(i), convertToProb(transitions[i][1])); } dist.setWeight(hmm.getDelete(i+1), convertToProb(transitions[i][2])); } else{ dist.setWeight(hmm.magicalState(),1.0); } } for(int i=1; i1 && i