/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package dist; import java.util.*; import org.biojava.utils.*; import org.biojava.bio.symbol.*; import org.biojava.bio.dist.*; import org.biojava.bio.seq.*; import org.biojava.bio.seq.io.*; /** * Demonstration of the using training to obtain the distribution of a n-mer. *

* This program generates a random DNA sequence. It then constructs * views of this sequence of the required order and collates the frequencies * of given n-mers. * * @author David Huen, who cobbled it together from code by all and sundry. */ public class TestOrderNAlphabet { public static void main(String [] args) { try { // verify arguments if (args.length != 2) { System.out.println("Usage: java dist/TestOrderNDistribution "); System.exit(1); } SymbolList res = Tools.createSymbolList(Integer.parseInt(args[0])); int order = Integer.parseInt(args[1]); // generate the Nth order view of this sequence SymbolList view = SymbolListViews.orderNSymbolList(res, order); // create a cross product Alphabet of required order List alphas = Collections.nCopies(order, DNATools.getDNA()); FiniteAlphabet orderNAlfa = (FiniteAlphabet) AlphabetManager.getCrossProductAlphabet(alphas); // create a distribution training context for this job and register it for training DistributionTrainerContext dtc = new SimpleDistributionTrainerContext(); Distribution orderNDistribution = DistributionFactory.DEFAULT.createDistribution(orderNAlfa); dtc.registerDistribution(orderNDistribution); dtc.clearCounts(); // now iterate thru' the order n symbol list view and accumulate counts for (int i=1; i <= view.length(); i++) { dtc.addCount(orderNDistribution, view.symbolAt(i), 1.0); } // go normalise the whole shebang! try { dtc.train(); } catch (ChangeVetoException cve) { throw new AssertionFailure("couldn't train distribution"); } // we have to be able to tokenise the symbols! SymbolTokenization tokenizer = orderNAlfa.getTokenization("name"); // now print out the observed distribution for (Iterator i = orderNAlfa.iterator(); i.hasNext();) { Symbol s = (Symbol) i.next(); // print the weights System.out.println( tokenizer.tokenizeSymbol(s) + "\t" + orderNDistribution.getWeight(s) ); } } catch (Throwable t) { t.printStackTrace(); System.exit(1); } } }