/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ /* * AAindexStreamReader.java */ package org.biojava.bio.proteomics.aaindex; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.util.Map; import java.util.NoSuchElementException; import org.biojava.bio.BioException; import org.biojava.bio.seq.io.SymbolTokenization; import org.biojava.bio.symbol.Symbol; import org.biojava.bio.symbol.SymbolPropertyTable; /** * Iterator over {@link org.biojava.bio.proteomics.aaindex.AAindex} objects that * are stored in a stream in the AAindex1 file format. The format * of such an Amino Acid Index Database file is described in the * AAindex manual * . The {@link #nextTable()} method returns objects of type * {@link org.biojava.bio.proteomics.aaindex.AAindex}. See this class also for * further informations. To hold an AAindex1 file in memory for random access * use the {@link org.biojava.bio.proteomics.aaindex.SimpleSymbolPropertyTableDB} * class: *

 * SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB(
 *         new AAindexStreamReader(new FileReader("aaindex1")));
 * AAindex hydrophobicity = (AAindex) db.table("CIDH920105");
 * SymbolList symbols = ProteinTools.createProtein(
 *     "ARNDCEQGHILKMFPSTWYV");
 * double hp = 0.0;
 * for (int i = 1; i <= symbols.length(); i++) {
 *     hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i));
 * }
 * System.out.println("Average hydrophobicity: " + Double.toString(
 *         hp / symbols.length()));
 *

References:

Kawashima, S. and Kanehisa, M.; AAindex: amino acid index database. * Nucleic Acids Res. 28, 374 (2000).

Tomii, K. and Kanehisa, M.; Analysis of amino acid indices and mutation * matrices for sequence comparison and structure prediction of proteins. * Protein Eng. 9, 27-36 (1996).

Nakai, K., Kidera, A., and Kanehisa, M.; Cluster analysis of amino acid * indices for prediction of protein structure and function. * Protein Eng. 2, 93-100 (1988)

* @author Martin Szugat * @version $Revision: 3601 $ */ public class AAindexStreamReader implements SymbolPropertyTableIterator { // public static final void main(String[] args) throws NullPointerException, // FileNotFoundException, BioException, IOException { // SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB( // new AAindexStreamReader(new FileReader("aaindex1"))); // AAindex hydrophobicity = (AAindex) db.table("CIDH920105"); // SymbolList symbols = ProteinTools.createProtein( // "ARNDCEQGHILKMFPSTWYV"); // double hp = 0.0; // for (int i = 1; i <= symbols.length(); i++) { // hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i)); // } // System.out.println("Average hydrophobicity: " + Double.toString( // hp / symbols.length())); // } // /* PRIVATE CONSTANTS */ /** * Name of the tokenizer. */ private static final String TOKENIZER = "token"; /* STATIC FIELDS */ /** * List of amino acid symbols. */ private static Symbol[] aa = null; /* STATIC CONSTRUCTOR */ static { try { SymbolTokenization tokenizer = AAindex.PROTEIN_ALPHABET.getTokenization(TOKENIZER); aa = new Symbol[] {tokenizer.parseToken("A"), tokenizer.parseToken("R"), tokenizer.parseToken("N"), tokenizer.parseToken("D"), tokenizer.parseToken("C"), tokenizer.parseToken("Q"), tokenizer.parseToken("E"), tokenizer.parseToken("G"), tokenizer.parseToken("H"), tokenizer.parseToken("I"), tokenizer.parseToken("L"), tokenizer.parseToken("K"), tokenizer.parseToken("M"), tokenizer.parseToken("F"), tokenizer.parseToken("P"), tokenizer.parseToken("S"), tokenizer.parseToken("T"), tokenizer.parseToken("W"), tokenizer.parseToken("Y"), tokenizer.parseToken("V"), }; } catch (BioException e) { e.printStackTrace(); } catch (IndexOutOfBoundsException e) { e.printStackTrace(); } }; /* PRIVATE FIELDS */ /** * The internal reader. */ private BufferedReader reader = null; /** * The current read line. */ private String line = null; /** * The key char of the current read section. */ private char keyChar; /** * The value of the current read section. */ private String stringValue; /* PUBLIC CONSTRUCTORS */ /** * Initializes the iterator. * @param reader reader over a stream in the AAindex file format. * @throws IOException if the stream could not be read. * @throws NullPointerException if reader is null. */ public AAindexStreamReader(Reader reader) throws IOException, NullPointerException { this(new BufferedReader(reader)); } /** * Initializes the iterator. * @param reader buffered reader over a stream in the AAindex file format. * @throws IOException if the stream could not be read. * @throws NullPointerException if reader is null. */ public AAindexStreamReader(BufferedReader reader) throws IOException, NullPointerException { if (reader == null) { throw new NullPointerException("reader is null."); } this.reader = reader; line = reader.readLine(); } /* PUBLIC METHODS */ /** * Checks if the end of the file or stream is reached. * @return true if the end of the file is reached, * false otherwise. */ public boolean eof() { if (line == null) { return true; } else { while (line != null && line.length() == 0) { try { line = reader.readLine(); } catch (IOException e) { return true; } } return (line == null); } } /** * Reads a AAindex section. * @throws BioException if the section could not be read. */ private void readSection() throws BioException { keyChar = line.charAt(0); StringBuffer stringBuffer = new StringBuffer(); do { if (line.length() > 2) { stringBuffer.append(line.substring(2)); if (!line.endsWith(" ")) { stringBuffer.append(" "); } } try { line = reader.readLine(); } catch (IOException e) { throw new BioException(e); } } while (!eof() && line.charAt(0) == ' '); stringValue = stringBuffer.toString(); } /* INTERFACE SymbolPropertyTableIterator */ /** * {@inheritDoc} */ public boolean hasNext() { return (!eof()); } /** * {@inheritDoc} */ public SymbolPropertyTable nextTable() throws BioException { if (eof()) { throw new NoSuchElementException(); } readSection(); if (keyChar != 'H') { throw new BioException("Expected 'H' but found: '" + keyChar + "'."); } AAindex aaIndex = new AAindex(stringValue.trim()); readSections: while (!eof()) { readSection(); switch (keyChar) { case 'D': aaIndex.setDescription(stringValue); break; case 'R': aaIndex.setLITDBEntryNumbers(stringValue.split("\\s+")); break; case 'A': aaIndex.setArticleAuthors(stringValue); break; case 'T': aaIndex.setArticleTitle(stringValue); break; case 'J': aaIndex.setJournalReference(stringValue); break; case 'C': String[] keyValuePairs = stringValue.split("\\s+"); Map similarEntries = aaIndex.similarEntries(); for (int i = 0; i < keyValuePairs.length - 1; i += 2) { similarEntries.put(keyValuePairs[i], Double .valueOf(keyValuePairs[i + 1])); } break; case 'I': String[] headersAndIndices = stringValue.split("\\s+"); for (int i = 0; i < 20; i++) { try { aaIndex.setDoubleProperty(aa[i], headersAndIndices[11 + i]); } catch (NumberFormatException e) { aaIndex.setDoubleProperty(aa[i], "NaN"); } } break; case '*': aaIndex.setComment(stringValue); break; case '/': break readSections; default: throw new BioException("Invalid key char found: " + keyChar + "'."); } } return aaIndex; } }