/****************************************************************************** ** @source AJAX nucleic acid functions ** ** @author Copyright (C) 1999 Alan Bleasby ** @version 1.0 ** @modified Feb 25 ajb First version ** @@ ** ** This library is free software; you can redistribute it and/or ** modify it under the terms of the GNU Library General Public ** License as published by the Free Software Foundation; either ** version 2 of the License, or (at your option) any later version. ** ** This library is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** Library General Public License for more details. ** ** You should have received a copy of the GNU Library General Public ** License along with this library; if not, write to the ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, ** Boston, MA 02111-1307, USA. ******************************************************************************/ #include "ajax.h" #include #include #include #define DNAMELTFILE "Edna.melt" #define RNAMELTFILE "Erna.melt" #define MAXMELTSAVE 10000 static AjOMelt meltTable[256]; static AjBool meltInitDone = AJFALSE; static ajint aj_melt_savesize = 0; static AjBool aj_melt_saveinit = 0; static AjBool aj_melt_saveshift = 1; static float meltProbScore(const AjPStr seq1, const AjPStr seq2, ajint len); /* @func ajMeltInit ********************************************************** ** ** Initialises melt entropies, enthalpies and energies. Different data ** files are read for DNA or RNA heteroduplex. Also sets optional flag ** for array saving of the above. ** ** @param [r] isdna [AjBool] true for DNA, false for RNA ** @param [r] savesize [ajint] Size of array to save, or zero if none ** @return [void] Number of energies to save ******************************************************************************/ void ajMeltInit(AjBool isdna, ajint savesize) { AjPFile mfptr; AjPStr mfname = NULL; AjPStr pair = NULL; AjPStr pair1 = NULL; AjPStr pair2 = NULL; AjPStr acgt = NULL; AjPStr line = NULL; ajint i; ajint j; ajint k; char *p; const char *q; float enthalpy; float entropy; float energy; ajuint iline = 0; AjBool got1; AjBool got2; aj_melt_savesize = savesize; aj_melt_saveinit = ajFalse; if(meltInitDone) return; mfname = ajStrNew(); if(isdna) ajStrAssignEmptyC(&mfname,DNAMELTFILE); else ajStrAssignEmptyC(&mfname,RNAMELTFILE); mfptr = ajDatafileNewInNameS(mfname); if(!mfptr) ajFatal("Entropy/enthalpy/energy file '%S' not found\n", mfname); pair1 = ajStrNew(); pair2 = ajStrNew(); line = ajStrNew(); ajStrAssignC(&pair,"AA"); ajStrAssignC(&acgt,"ACGT"); p = ajStrGetuniquePtr(&pair); q = ajStrGetPtr(acgt); for(i=0,k=0;i<4;++i) { *p = *(q+i); for(j=0;j<4;++j) { *(p+1) = *(q+j); meltTable[k++].pair = ajStrNewC(p); } } iline = 0; while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); iline++; p = ajStrGetuniquePtr(&line); if(*p=='#' || *p=='!' || !*p) continue; p = ajSysFuncStrtok(p," "); if(!p) ajDie("Bad melt data file '%F' line %d '%S'", mfptr, iline, line); ajStrAssignC(&pair1,p); p = ajSysFuncStrtok(NULL," "); if(!p) ajDie("Bad melt data file '%F' line %d '%S'", mfptr, iline, line); ajStrAssignC(&pair2,p); p = ajSysFuncStrtok(NULL," "); if(!p) ajDie("Bad melt data file '%F' line %d '%S'", mfptr, iline, line); if(sscanf(p,"%f",&enthalpy)!=1) ajDie("Bad melt data file '%F' line %d '%S'", mfptr, iline, line); p = ajSysFuncStrtok(NULL," "); if(sscanf(p,"%f",&entropy)!=1) ajDie("Bad melt data file '%F' line %d '%S'", mfptr, iline, line); p = ajSysFuncStrtok(NULL," "); if(sscanf(p,"%f",&energy)!=1) ajDie("Bad melt data file '%F' line %d '%S'", mfptr, iline, line); got1 = got2 = ajFalse; for(k=0;k<16;++k) if(!ajStrCmpS(meltTable[k].pair,pair1)) { meltTable[k].enthalpy = enthalpy; meltTable[k].entropy = entropy; meltTable[k].energy = energy; got1 = ajTrue; } for(k=0;k<16;++k) if(!ajStrCmpS(meltTable[k].pair,pair2)) { meltTable[k].enthalpy = enthalpy; meltTable[k].entropy = entropy; meltTable[k].energy = energy; got2 = ajTrue; } if(!got1 || !got2) ajDie("Bad melt data file '%F' line %d '%S' duplicate pair", mfptr, iline, line); } ajStrDel(&mfname); ajStrDel(&pair); ajStrDel(&pair1); ajStrDel(&pair2); ajStrDel(&acgt); ajStrDel(&line); ajFileClose(&mfptr); meltInitDone = ajTrue; return; } /* @funcstatic meltProbScore ************************************************** ** ** Gives a score for the probability of two sequences being the same. ** The sequences are the same length. ** ** Uses IUB ambiguity codes. The result is the sum of the probabilities ** at each position. ** ** @param [r] seq1 [const AjPStr] Pointer to a sequence string ** @param [r] seq2 [const AjPStr] Pointer to a another sequence ** @param [r] len [ajint] Length of sequences ** @return [float] Match probability ******************************************************************************/ static float meltProbScore(const AjPStr seq1, const AjPStr seq2, ajint len) { ajint mlen; float score; ajint i; ajint x; ajint y; const char *p; const char *q; mlen = (ajStrGetLen(seq1) < ajStrGetLen(seq2)) ? ajStrGetLen(seq1) : ajStrGetLen(seq2); if(len > 0) mlen = (mlen < len) ? mlen : len; score = 0.0; if(!mlen) return score; score = 1.0; p = ajStrGetPtr(seq1); q = ajStrGetPtr(seq2); for(i=0; i 0) ? ajTrue : ajFalse; ipos = 0; if(doShift) { if(!aj_melt_saveinit) { ipos = 0; for(i=0;i0.9) { if(doShift) { saveEnergy[ipos] += (ident * meltTable[j].energy); saveEntropy[ipos] += (ident * meltTable[j].entropy); saveEnthalpy[ipos] += (ident * meltTable[j].enthalpy); } else { energy += (ident * meltTable[j].energy); *entropy += (ident * meltTable[j].entropy); *enthalpy += (ident * meltTable[j].enthalpy); } } } if(doShift) { energy += saveEnergy[ipos]; *entropy += saveEntropy[ipos]; *enthalpy += saveEnthalpy[ipos]; } ++ipos; } ajStrDel(&line); return energy; } /* @func ajMeltTemp *********************************************************** ** ** Calculates melt temperature of DNA or RNA ** An optional shift is given for stepping along the sequence and loading ** up energy arrays. ** ** @param [r] strand [const AjPStr] Pointer to a sequence string ** @param [r] len [ajint] Length of sequence ** @param [r] shift [ajint] Stepping value ** @param [r] saltconc [float] mM salt concentration ** @param [r] DNAconc [float] nM DNA concentration ** @param [r] isDNA [AjBool] DNA or RNA ** ** @return [float] Melt temperature ******************************************************************************/ float ajMeltTemp(const AjPStr strand, ajint len, ajint shift, float saltconc, float DNAconc, AjBool isDNA) { double entropy; double enthalpy; double dTm; float Tm; static float sumEntropy; static float sumEnthalpy; float To; float R; double LogDNA; R = (float) 1.987; /* molar gas constant (cal/c * mol) */ LogDNA = R * (float)log((double)(DNAconc/4000000000.0)); /* nM */ To = (float) 273.15; ajMeltEnergy(strand, len, shift, isDNA, ajFalse, &sumEnthalpy, &sumEntropy); entropy = -10.8 - sumEntropy; entropy += (len-1) * (log10((double) (saltconc/1000.0))) * (float) 0.368; enthalpy = -sumEnthalpy; dTm = ((enthalpy*1000.0) / (entropy+LogDNA)) - To; Tm = (float) dTm; return Tm; } /* @obsolete ajTm ** @rename ajMeltTemp */ __deprecated float ajTm(const AjPStr strand, ajint len, ajint shift, float saltconc, float DNAconc, AjBool isDNA) { return ajMeltTemp(strand, len, shift, saltconc, DNAconc, isDNA); } /* @func ajMeltGC ************************************************************* ** ** Calculates GC fraction of a sequence allowing for ambiguity ** ** @param [r] strand [const AjPStr] Pointer to a sequence string ** @param [r] len [ajint] Length of sequence ** ** @return [float] GC fraction ******************************************************************************/ float ajMeltGC(const AjPStr strand, ajint len) { ajint t; ajint i; const char *p; double count; p=ajStrGetPtr(strand); count = 0.0; for(i=0;i.9) { (*saveentr)[i] += (ident * meltTable[j].entropy); (*saveenth)[i] += (ident * meltTable[j].enthalpy); (*saveener)[i] += (ident * meltTable[j].energy); } } } ajStrDel(&line); energy = *enthalpy = *entropy = 0.0; for(i=0;i