/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package seq; import java.io.BufferedReader; import java.io.FileReader; import org.biojava.bio.seq.DNATools; import org.biojava.bio.seq.Sequence; import org.biojava.bio.symbol.Symbol; import org.biojavax.RichObjectFactory; import org.biojavax.bio.seq.RichSequence; import org.biojavax.bio.seq.RichSequenceIterator; /** * Counts the GC content of a sequence in fasta format * @author Mark Schreiber */ public class GCContent { /** * Run the program * @param args a fasta file */ public static void main(String[] args) throws Exception { if (args.length != 1) throw new Exception("usage: java seq.GCContent filename.fa"); String fileName = args[0]; BufferedReader br = new BufferedReader(new FileReader(fileName)); RichSequenceIterator it = RichSequence.IOTools.readFastaDNA(br, RichObjectFactory.getDefaultNamespace()); // Iterate over all sequences in the stream while (it.hasNext()) { Sequence seq = it.nextRichSequence(); System.out.println("Length: " + seq.length()); int gc = 0; for (int pos = 1; pos <= seq.length(); ++pos) { Symbol sym = seq.symbolAt(pos); if (sym == DNATools.g() || sym == DNATools.c()) ++gc; } System.out.println(seq.getName() + ": " + ((gc * 100.0) / seq.length()) + " %"); } } }