/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ /* * AAindexStreamReader.java */ package org.biojava.bio.proteomics.aaindex; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.util.Map; import java.util.NoSuchElementException; import org.biojava.bio.BioException; import org.biojava.bio.seq.io.SymbolTokenization; import org.biojava.bio.symbol.Symbol; import org.biojava.bio.symbol.SymbolPropertyTable; /** * Iterator over {@link org.biojava.bio.proteomics.aaindex.AAindex} objects that * are stored in a stream in the AAindex1 file format. The format * of such an Amino Acid Index Database file is described in the * AAindex manual * . The {@link #nextTable()} method returns objects of type * {@link org.biojava.bio.proteomics.aaindex.AAindex}. See this class also for * further informations. To hold an AAindex1 file in memory for random access * use the {@link org.biojava.bio.proteomics.aaindex.SimpleSymbolPropertyTableDB} * class: *
* SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB( * new AAindexStreamReader(new FileReader("aaindex1"))); * AAindex hydrophobicity = (AAindex) db.table("CIDH920105"); * SymbolList symbols = ProteinTools.createProtein( * "ARNDCEQGHILKMFPSTWYV"); * double hp = 0.0; * for (int i = 1; i <= symbols.length(); i++) { * hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i)); * } * System.out.println("Average hydrophobicity: " + Double.toString( * hp / symbols.length())); **
References:
* *Kawashima, S. and Kanehisa, M.; AAindex: amino acid index database. * Nucleic Acids Res. 28, 374 (2000).
*Tomii, K. and Kanehisa, M.; Analysis of amino acid indices and mutation * matrices for sequence comparison and structure prediction of proteins. * Protein Eng. 9, 27-36 (1996).
*Nakai, K., Kidera, A., and Kanehisa, M.; Cluster analysis of amino acid * indices for prediction of protein structure and function. * Protein Eng. 2, 93-100 (1988)
* @author Martin Szugat * @version $Revision: 3601 $ */ public class AAindexStreamReader implements SymbolPropertyTableIterator { // public static final void main(String[] args) throws NullPointerException, // FileNotFoundException, BioException, IOException { // SimpleSymbolPropertyTableDB db = new SimpleSymbolPropertyTableDB( // new AAindexStreamReader(new FileReader("aaindex1"))); // AAindex hydrophobicity = (AAindex) db.table("CIDH920105"); // SymbolList symbols = ProteinTools.createProtein( // "ARNDCEQGHILKMFPSTWYV"); // double hp = 0.0; // for (int i = 1; i <= symbols.length(); i++) { // hp += hydrophobicity.getDoubleValue(symbols.symbolAt(i)); // } // System.out.println("Average hydrophobicity: " + Double.toString( // hp / symbols.length())); // } // /* PRIVATE CONSTANTS */ /** * Name of the tokenizer. */ private static final String TOKENIZER = "token"; /* STATIC FIELDS */ /** * List of amino acid symbols. */ private static Symbol[] aa = null; /* STATIC CONSTRUCTOR */ static { try { SymbolTokenization tokenizer = AAindex.PROTEIN_ALPHABET.getTokenization(TOKENIZER); aa = new Symbol[] {tokenizer.parseToken("A"), tokenizer.parseToken("R"), tokenizer.parseToken("N"), tokenizer.parseToken("D"), tokenizer.parseToken("C"), tokenizer.parseToken("Q"), tokenizer.parseToken("E"), tokenizer.parseToken("G"), tokenizer.parseToken("H"), tokenizer.parseToken("I"), tokenizer.parseToken("L"), tokenizer.parseToken("K"), tokenizer.parseToken("M"), tokenizer.parseToken("F"), tokenizer.parseToken("P"), tokenizer.parseToken("S"), tokenizer.parseToken("T"), tokenizer.parseToken("W"), tokenizer.parseToken("Y"), tokenizer.parseToken("V"), }; } catch (BioException e) { e.printStackTrace(); } catch (IndexOutOfBoundsException e) { e.printStackTrace(); } }; /* PRIVATE FIELDS */ /** * The internal reader. */ private BufferedReader reader = null; /** * The current read line. */ private String line = null; /** * The key char of the current read section. */ private char keyChar; /** * The value of the current read section. */ private String stringValue; /* PUBLIC CONSTRUCTORS */ /** * Initializes the iterator. * @param reader reader over a stream in the AAindex file format. * @throws IOException if the stream could not be read. * @throws NullPointerException ifreader
is null
.
*/
public AAindexStreamReader(Reader reader) throws IOException,
NullPointerException {
this(new BufferedReader(reader));
}
/**
* Initializes the iterator.
* @param reader buffered reader over a stream in the AAindex file format.
* @throws IOException if the stream could not be read.
* @throws NullPointerException if reader
is null
.
*/
public AAindexStreamReader(BufferedReader reader) throws IOException,
NullPointerException {
if (reader == null) {
throw new NullPointerException("reader is null.");
}
this.reader = reader;
line = reader.readLine();
}
/* PUBLIC METHODS */
/**
* Checks if the end of the file or stream is reached.
* @return true
if the end of the file is reached,
* false
otherwise.
*/
public boolean eof() {
if (line == null) {
return true;
} else {
while (line != null && line.length() == 0) {
try {
line = reader.readLine();
} catch (IOException e) {
return true;
}
}
return (line == null);
}
}
/**
* Reads a AAindex section.
* @throws BioException if the section could not be read.
*/
private void readSection() throws BioException {
keyChar = line.charAt(0);
StringBuffer stringBuffer = new StringBuffer();
do {
if (line.length() > 2) {
stringBuffer.append(line.substring(2));
if (!line.endsWith(" ")) {
stringBuffer.append(" ");
}
}
try {
line = reader.readLine();
} catch (IOException e) {
throw new BioException(e);
}
} while (!eof() && line.charAt(0) == ' ');
stringValue = stringBuffer.toString();
}
/* INTERFACE SymbolPropertyTableIterator */
/**
* {@inheritDoc}
*/
public boolean hasNext() {
return (!eof());
}
/**
* {@inheritDoc}
*/
public SymbolPropertyTable nextTable() throws BioException {
if (eof()) {
throw new NoSuchElementException();
}
readSection();
if (keyChar != 'H') {
throw new BioException("Expected 'H' but found: '" + keyChar
+ "'.");
}
AAindex aaIndex = new AAindex(stringValue.trim());
readSections: while (!eof()) {
readSection();
switch (keyChar) {
case 'D':
aaIndex.setDescription(stringValue);
break;
case 'R':
aaIndex.setLITDBEntryNumbers(stringValue.split("\\s+"));
break;
case 'A':
aaIndex.setArticleAuthors(stringValue);
break;
case 'T':
aaIndex.setArticleTitle(stringValue);
break;
case 'J':
aaIndex.setJournalReference(stringValue);
break;
case 'C':
String[] keyValuePairs = stringValue.split("\\s+");
Map similarEntries = aaIndex.similarEntries();
for (int i = 0; i < keyValuePairs.length - 1; i += 2) {
similarEntries.put(keyValuePairs[i], Double
.valueOf(keyValuePairs[i + 1]));
}
break;
case 'I':
String[] headersAndIndices = stringValue.split("\\s+");
for (int i = 0; i < 20; i++) {
try {
aaIndex.setDoubleProperty(aa[i],
headersAndIndices[11 + i]);
} catch (NumberFormatException e) {
aaIndex.setDoubleProperty(aa[i],
"NaN");
}
}
break;
case '*':
aaIndex.setComment(stringValue);
break;
case '/':
break readSections;
default:
throw new BioException("Invalid key char found: " + keyChar
+ "'.");
}
}
return aaIndex;
}
}