/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "primerType.h" #include "matchScores.h" #include "assert.h" #include "bIntersect.h" #include "consedParameters.h" static int nScoreOfPairOfBases_[ 26 ][ 26 ]; static bool bScoreOfPairOfBasesIsSet = false; static void setScoreOfPairOfBases() { // set it to zero initially for( int nBase1 = 0; nBase1 < 26; ++nBase1 ) { for( int nBase2 = 0; nBase2 < 26; ++nBase2 ) { nScoreOfPairOfBases_[ nBase1 ][ nBase2 ] = 0; } } int nA = 'a' - 'a'; int nC = 'c' - 'a'; int nG = 'g' - 'a'; int nT = 't' - 'a'; nScoreOfPairOfBases_[ nA ][ nA ] = nMismatch; nScoreOfPairOfBases_[ nA ][ nC ] = nMismatch; nScoreOfPairOfBases_[ nA ][ nG ] = nMismatch; nScoreOfPairOfBases_[ nA ][ nT ] = nATMatch; nScoreOfPairOfBases_[ nC ][ nA ] = nMismatch; nScoreOfPairOfBases_[ nC ][ nC ] = nMismatch; nScoreOfPairOfBases_[ nC ][ nG ] = nGCMatch; nScoreOfPairOfBases_[ nC ][ nT ] = nMismatch; nScoreOfPairOfBases_[ nG ][ nA ] = nMismatch; nScoreOfPairOfBases_[ nG ][ nC ] = nGCMatch; nScoreOfPairOfBases_[ nG ][ nG ] = nMismatch; nScoreOfPairOfBases_[ nG ][ nT ] = nMismatch; nScoreOfPairOfBases_[ nT ][ nA ] = nATMatch; nScoreOfPairOfBases_[ nT ][ nC ] = nMismatch; nScoreOfPairOfBases_[ nT ][ nG ] = nMismatch; nScoreOfPairOfBases_[ nT ][ nT ] = nMismatch; } bool bDoPCRPrimersStickToEachOther( primerType* pFirstPCRPrimer, primerType* pSecondPCRPrimer ) { if ( !bScoreOfPairOfBasesIsSet ) { bScoreOfPairOfBasesIsSet = true; setScoreOfPairOfBases(); } // -----------> 3' (first primer) // 3' <---- 5' (second primer ) // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> // <---- // -----------> 3' (first primer) // 3' <---- 5' (second primer ) // ^ // 0 // ^ // pSecondPCRPrimer->nUnpaddedLength_ - 1 // ^ // pSecondPCRPrimer->nUnpaddedLength_ - 1 + // pFirstPCRPrimer->nUnpaddedLength_ - 1 // Notice that the 3' end of the first primer moves from // position 0 in the coordinates of the reversed 2nd primer to position int nMaxScore = -66666; int nMax3PrimeEndOfFirstPCRPrimer = -2; int nMaxSubOligoEnd = -2; bool bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer; int nSecondPCRPrimerRight = pSecondPCRPrimer->nUnpaddedLength_ - 1; int nSecondPCRPrimerLeft = 0; // nFirstPCRPrimerRight and nFirstPCRPrimerLeft are in coordinates // of the 2nd PCR primer. That is, // // xxxxxxxxxx // yyyyyyy then nFirstPCRPrimerRight is 0 // and nFirstPCRPrimerLeft -9 // xxxxxxxxxx // yyyyyyy then nFirstPCRPrimerRight is 1 // and nFirstPCRPrimerLeft -8 // xxxxxxxxxx // yyyyyyy then nFirstPCRPrimerRight is 2 // and nFirstPCRPrimerLeft -7 for( int nFirstPCRPrimerRight = 0; nFirstPCRPrimerRight <= pSecondPCRPrimer->nUnpaddedLength_ - 1 + pFirstPCRPrimer->nUnpaddedLength_ - 1; ++nFirstPCRPrimerRight ) { int nFirstPCRPrimerLeft = nFirstPCRPrimerRight - pFirstPCRPrimer->nUnpaddedLength_ + 1; // The primers are like this: // ----------> // <-------- // with the 3' ends exposed // or like this: // ------------> // <------------- // with the 3' ends covered. // Our score system is an indication of how much the priming // efficiency is inhibited, and the greater the score, the // worse things are. // Thus we need to separately calculate the inhibition for // each primer. First we do it for the 2nd primer and then // for the 1st primer. // For the second primer, we calculate the score starting at // the 3' end of the second primer--this is the left end. // If the 3' end is exposed (not covered by the first primer), // that is good--the score is reduced by a mismatch score for // every 3' end base that is exposed. int nScoreFrom3PrimeEndOfSecondPrimer = 0; if ( nFirstPCRPrimerLeft > 0 ) nScoreFrom3PrimeEndOfSecondPrimer = nFirstPCRPrimerLeft * nMismatch; int nOverlapRegionLeft; int nOverlapRegionRight; assert( bIntersect( nFirstPCRPrimerLeft, nFirstPCRPrimerRight, nSecondPCRPrimerLeft, nSecondPCRPrimerRight, nOverlapRegionLeft, nOverlapRegionRight ) ); // nFirstPCRPrimerLeft, // nFirstPCRPrimerRight, // nSecondPCRPrimerLeft, // nSecondPCRPrimerRight, // are in the 0-based coordinates of the 2nd pcr primer, hence // nOverlapRegionLeft, // nOverlapRegionRight // are, too. // Hence nSubOligoEnd are in the coordinate system of the // 2nd PCR primer. int nSubOligoEnd; for( nSubOligoEnd = nOverlapRegionLeft; nSubOligoEnd <= nOverlapRegionRight; ++nSubOligoEnd ) { // convert nSubOligoEnd to the coordinate system of the // 1st primer by subtracting the offset nFirstPCRPrimerLeft char cFirstPrimerBase = pFirstPCRPrimer->szPrimer_[ nSubOligoEnd - nFirstPCRPrimerLeft ]; char cSecondPrimerBase = pSecondPCRPrimer->szPrimer_[ pSecondPCRPrimer->nUnpaddedLength_ - 1 - nSubOligoEnd ]; nScoreFrom3PrimeEndOfSecondPrimer += nScoreOfPairOfBases_[ cSecondPrimerBase - 'a' ][ cFirstPrimerBase - 'a' ]; if ( nScoreFrom3PrimeEndOfSecondPrimer > nMaxScore ) { nMaxScore = nScoreFrom3PrimeEndOfSecondPrimer; nMax3PrimeEndOfFirstPCRPrimer = nFirstPCRPrimerRight; nMaxSubOligoEnd = nSubOligoEnd; bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer = false; } } // now consider the same pair in the same orientations, but consider // suboligos from the 3' end of the 1st primer, hence suboligos // from the right of the first primer // -----------> (first PCR primer) // <---- (second PCR primer ) int nScoreFrom3PrimeEndOfFirstPrimer = 0; if ( nFirstPCRPrimerRight > nSecondPCRPrimerRight ) nScoreFrom3PrimeEndOfFirstPrimer = ( nFirstPCRPrimerRight - nSecondPCRPrimerRight ) * nMismatch; for( nSubOligoEnd = nOverlapRegionRight; nSubOligoEnd >= nOverlapRegionLeft; --nSubOligoEnd ) { char cFirstPrimerBase = pFirstPCRPrimer->szPrimer_[ nSubOligoEnd - nFirstPCRPrimerLeft ]; char cSecondPrimerBase = pSecondPCRPrimer->szPrimer_[ pSecondPCRPrimer->nUnpaddedLength_ - 1 - nSubOligoEnd ]; nScoreFrom3PrimeEndOfFirstPrimer += nScoreOfPairOfBases_[ cSecondPrimerBase - 'a' ][ cFirstPrimerBase - 'a' ]; if ( nScoreFrom3PrimeEndOfFirstPrimer > nMaxScore ) { nMaxScore = nScoreFrom3PrimeEndOfFirstPrimer; nMax3PrimeEndOfFirstPCRPrimer = nFirstPCRPrimerRight; nMaxSubOligoEnd = nSubOligoEnd; bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer = true; } } } // for( int nFirstPCRPrimerRight = 0; ... // int nMaxFirstPCRPrimerLeft = nMax3PrimeEndOfFirstPCRPrimer - // pFirstPCRPrimer->nUnpaddedLength_ + 1; // int nBias = MIN( nMaxFirstPCRPrimerLeft, nSecondPCRPrimerLeft ); // // print 1st PCR primer // int nNumberOfBlanks = nMaxFirstPCRPrimerLeft - nBias; // for( int n = 0; n < nNumberOfBlanks; ++n ) // printf( " " ); // for( n = 0; n < pFirstPCRPrimer->nUnpaddedLength_; ++n ) // printf( "%c", pFirstPCRPrimer->szPrimer_[n] ); // printf( "\n" ); // nNumberOfBlanks = nSecondPCRPrimerLeft - nBias; // for( n = 0; n < nNumberOfBlanks; ++n ) // printf( " " ); // for( n = 0; n < pSecondPCRPrimer->nUnpaddedLength_; ++n ) // printf( "%c", // pSecondPCRPrimer->szPrimer_[ pSecondPCRPrimer->nUnpaddedLength_ - 1 - n ] ); // printf( "\n" ); // nNumberOfBlanks = nMaxSubOligoEnd - nBias; // for( n = 0; n < nNumberOfBlanks; ++n ) // printf( " " ); // printf( "^\n" ); // printf( "maxscore so far = %d with nMax3PrimeEndOfFirstPCRPrimer = %d and nMaxSubOligoEnd = %d %s\n", // nMaxScore, // nMax3PrimeEndOfFirstPCRPrimer, // nMaxSubOligoEnd, // ( bMaxIsFrom3PrimeEndOfFirstNotSecondPrimer ? "from 3' end of first primer" : // "from 3' end of second primer" ) ); // if ( nMaxScore > pCP->nPrimersMaxPrimerDimerScoreForPCR_ ) { // printf( "%d ", nMaxScore ); // } if ( nMaxScore > pCP->nPrimersMaxPrimerDimerScoreForPCR_ ) return( true ); else return( false ); }