/*
* BioJava development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public Licence. This should
* be distributed with the code. If you do not have a copy,
* see:
*
* http://www.gnu.org/copyleft/lesser.html
*
* Copyright for this code is held jointly by the individual
* authors. These should be listed in @author doc comments.
*
* For more information on the BioJava project and its aims,
* or to join the biojava-l mailing list, visit the home page
* at:
*
* http://www.biojava.org/
*
*/
package org.biojava.bio.proteomics;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.biojava.bio.BioError;
import org.biojava.bio.BioException;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.AtomicSymbol;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.Symbol;
import org.biojava.bio.symbol.SymbolList;
import org.biojava.bio.symbol.SymbolPropertyTable;
/**
* MassCalc
calculates the mass of peptides which for our
* purposes are SymbolList
s which contain
* Symbol
sfrom the protein Alphabet
. It uses
* the mono-isotopic and average-isotopic masses identical to those
* specified at www.micromass.co.uk
* Note: This class does not handle selenocysteine and pyrrolysine.
*
* @author M. Jones sdfsd
* @author Keith James (minor changes)
* @author Mark Schreiber
* @author George Waldon - getMolecularWeight
*/
public class MassCalc {
//Possibly these should be put in a configurable table somewhere
/**
* Constant value of Carbon monoisotopic mass
*/
public static final double Cmono = 12.00000;
/**
* Constant value of Hydrogen monoisotopic mass
*/
public static final double Hmono = 1.0078250;
/**
* Constant value of Nitrogen monoisotopic mass
*/
public static final double Nmono = 14.0030740;
/**
* Constant value of Oxygen monoisotopic mass
*/
public static final double Omono = 15.9949146;
/**
* Constant value of Carbon average mass
*/
public static final double Cavg = 12.011;
/**
* Constant value of Hydrogen average mass
*/
public static final double Havg = 1.00794;
/**
* Constant value of Nitrogen average mass
*/
public static final double Navg = 14.00674;
/**
* Constant value of Oxygen average mass
*/
public static final double Oavg = 15.9994;
//Save values here so that modifications are not global
private HashMap mSymbolPropertyHash;
private HashMap mVariableModPropertyHash;
/*
* If instance methods are being used this will store the
* isotopically and MH_PLUS correct terminal mass to be created.
* Need to be able to handle N and C term mods in the future.
*/
private double termMass;
/*
* Creates new MassCalc.
* @param isotopicType The type of isotopes to calculate. Either
* mono isotopic or average isotopic. It realy is the name of
* SymbolProperty table. Ex. SymbolPropertyTable.AVG_MASS or
* SymbolPropertyTable.MONO_MASS */
/**
* Creates a new MassCalc
.
*
* @param isotopicType a String
. The type of isotopes
* to calculate. Either mono isotopic or average
* isotopic. Acceptable values are
* SymbolPropertyTable.AVG_MASS
or
* SymbolPropertyTable.MONO_MASS
.
* @param MH_PLUS a boolean
.
*/
public MassCalc(String isotopicType, boolean MH_PLUS) {
//Calculate hydroxyl mass
termMass = calcTermMass(isotopicType, MH_PLUS);
mSymbolPropertyHash = new HashMap();
SymbolPropertyTable symbolPropertyTable =
ProteinTools.getSymbolPropertyTable(isotopicType);
mVariableModPropertyHash = new HashMap();
Iterator symbolList = ProteinTools.getAlphabet().iterator();
for(; symbolList.hasNext(); )
{
Symbol sym = (Symbol) symbolList.next();
//SELENOCYSTEINE and PYRROLYSINE
if(sym==ProteinTools.o() || sym==ProteinTools.u())
continue;
try {
try {
Double value =
new Double(symbolPropertyTable.getDoubleValue(sym));
mSymbolPropertyHash.put(sym, value);
} catch (NullPointerException npe) {
//This seems to be happening when a amino acid is
//in the property table that doesn't have a residue value
}
}
catch (IllegalSymbolException ise)
{
throw new BioError("Error setting properties for Symbol " + sym, ise);
}
}
}
/**
* Use this to set a post translational modification for the
* Symbol
represented by this character. It will only
* affect the current MassCalc
instance and will not
* affect the static method.
*
* @param symbolToken a char
representing a
* Symbol
.
* @param mass a double
to be the new mass of the
* residue.
*
* @exception IllegalSymbolException if the Symbol
is
* not recognised.
*/
public void setSymbolModification(char symbolToken, double mass)
throws IllegalSymbolException
{
SymbolTokenization toke;
try {
toke = ProteinTools.getAlphabet().getTokenization("token");
} catch (BioException ex) {
throw new BioError("Expected a tokenization", ex);
}
Symbol sym = toke.parseToken("" + symbolToken);
mSymbolPropertyHash.put(sym, new Double(mass));
}
/** Add Variable modifications. If multiple masses are
* set by this method more then one mass will be returned for a mass
* calculation. For example if a peptide contains two Mets and the user sets
* the native and oxidized mass for the Met then the masses returned will be
* of the peptide with 0, 1 and 2 modified Mets.
* @param residue The one char id for this residue
* @param masses
* @throws IllegalSymbolException
* @see #getVariableMasses(SymbolList peptide)
* @see #addVariableModification(Symbol residue,double[] masses)
*/
public void addVariableModification(char residue,
double[] masses)
throws IllegalSymbolException{
Symbol sym = getSymbolForChar(residue);
addVariableModification(sym, masses);
}
/** Add Variable modifications. If multiple masses are
* set by this method more then one mass will be returned for a mass
* calculation. For example if a peptide contains two Mets and the user sets
* the native and oxidized mass for the Met then the masses returned will be
* of the peptide with 0, 1 and 2 modified Mets.
*/
public void addVariableModification(Symbol residue,
double[] masses)
throws IllegalSymbolException{
List massList = new LinkedList();
for(int i=0; igetMass calculates the mass of this peptide. This
* only works for the values in the ResidueProperties.xml
* configuration file. It is probably slightly faster than the
* instance method, but it does not handle post-translational
* modifications.
*
* @param proteinSeq a SymbolList
whose mass is to be
* calculated. This should use the protein alphabet.
* @param isotopicType a String
The type of isotopes
* to calculate. Either mono isotopic or average
* isotopic. Acceptable values are
* SymbolPropertyTable.AVG_MASS
or
* SymbolPropertyTable.MONO_MASS
.
* @param MH_PLUS a boolean
true if the value needed
* is the MH+ mass.
*
* @return a double
mass of the peptide.
*
* @exception IllegalSymbolException if the
* SymbolList
contains illegal
* Symbol
s.
*/
public static final double getMass(SymbolList proteinSeq,
String isotopicType,
boolean MH_PLUS)
throws IllegalSymbolException
{
double pepMass = 0.0;
SymbolPropertyTable sPT =
ProteinTools.getSymbolPropertyTable(isotopicType);
for (Iterator it = proteinSeq.iterator(); it.hasNext(); ) {
Symbol s = (Symbol) it.next();
if(!(s instanceof AtomicSymbol)) {
throw new IllegalSymbolException(
"Symbol " + s.getName() + " is not atomic");
}
pepMass += sPT.getDoubleValue(s);
}
//Calculate hydroxyl mass
double termMass = calcTermMass(isotopicType, MH_PLUS);
if (pepMass != 0.0) {
pepMass += termMass;
}
return pepMass;
}
/**
* Get the Mass of this peptide. Use this if you want to set fixed
* modifications and have created an instance of MassCalc. The
* value is calculated using the value of MH_PLUS defined in the
* constructor. The static method may be faster.
*
* @param proteinSeq The sequence for mass calculation
*
* @return The mass of the sequence */
public double getMass(SymbolList proteinSeq)
throws IllegalSymbolException
{
double pepMass = 0.0;
HashMap symbolPropertyMap = getSymbolPropertyMap();
for (Iterator it = proteinSeq.iterator(); it.hasNext(); ) {
Symbol s = (Symbol) it.next();
if(! symbolPropertyMap.containsKey(s)){
throw new IllegalSymbolException(s, "The mass of the symbol "+s.getName()+" is unknown");
}
Double mass = (Double) symbolPropertyMap.get(s);
pepMass += mass.doubleValue();
}
pepMass += getTermMass();
return pepMass;
}
/**
* Get all masses including the variable mass.
* Allgorythm
*
* 1 Get the first residue of the sequence
* create a list of all the standard and non-standard massses for this reidue
* for each residue mass goto 1 with the sequence of all residues after the current residue
* add the residue mass to each mass from 1 to the list
*
*
* @see #addVariableModification
*/
public double[] getVariableMasses(SymbolList peptide) throws IllegalSymbolException {
double[] vMasses = getVMasses(peptide);
for(int i=0; igetTermMass returns the terminal mass being used
* by the instance methods.
*
* @return a double
mass.
*/
public double getTermMass(){
return termMass;
}
/**
*
*/
private double[] getVMasses(SymbolList peptide) throws IllegalSymbolException {
Set allMassList = new HashSet();
Symbol sym = peptide.symbolAt(1);
if(!getSymbolPropertyMap().containsKey(sym)){
String msg = "No mass Set for Symbol " + sym.getName();
throw new IllegalSymbolException(msg);
}
//Create a list for all masses of the current residue
List curResMasses = null;
if(getVariableModMap().containsKey(sym)){
curResMasses = new LinkedList((List) getVariableModMap().get(sym));
}else{
curResMasses = new LinkedList();
}
curResMasses.add(getSymbolPropertyMap().get(sym));
//Move through all masses and calculate the masses of all of the sub peptides
Iterator it = curResMasses.iterator();
while(it.hasNext()){
double resMass = ((Double)it.next()).doubleValue();
if(peptide.length() == 1){
allMassList.add(new Double(resMass));
}else{
//Get all masses of remaining peptide
double[] subMasses = getVMasses(peptide.subList(2, peptide.length()));
//Get next modified mass for symbol
for(int i=0; i