/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "pcrPrimerrPair.h" #include "assert.h" #include "fileDefines.h" #include #include "consedParameters.h" #include "bIsPCRPrimerPairOK.h" #include "contig.h" #include "abs.h" #include "whyIsPrimerNotAcceptableTypes.h" #include "szWhichStrand.h" bool pcrPrimerrPair :: bDoesThisPassChecks() { // this should come first since it may eliminate one of the // pcr primers and thus will eliminate many other pairs if ( bDoesThisFormProductWithinWithoutDesiredProduct() ) { ucWhatIsWrong_ = BAD_PRIMER_PAIR_FORMS_PRODUCT_WITHIN_OR_ENCLOSING_DESIRED_PRODUCT; return( false ); } if ( bDoesThisFormFalseProductSomewhereElse() ) { ucWhatIsWrong_ = BAD_PRIMER_PAIR_FORMS_PRODUCT_SOMEWHERE_ELSE; return( false ); } bool bTempDiffTooGreat; bool bPrimersStickToEachOther; if ( bIsPCRPrimerPairOK( &pPCR1_->primer_, &pPCR2_->primer_, bTempDiffTooGreat, bPrimersStickToEachOther ) ) { return( true ); } else { if ( bTempDiffTooGreat ) { ucWhatIsWrong_ = BAD_PRIMER_PAIR_TEMP_DIFF_TOO_GREAT; } else if ( bPrimersStickToEachOther ) { ucWhatIsWrong_ = BAD_PRIMER_PAIR_FORMS_PRIMER_DIMER; } return( false ); } } bool pcrPrimerrPair :: bDoesThisFormProductWithinWithoutDesiredProduct() { if ( pPCR1_->primer_.pContig_ != pPCR2_->primer_.pContig_ ) { return( bDoesThisFormProductWithinWithoutDesiredProductDifferentContigs() ); } else { return( bDoesThisFormProductWithinWithoutDesiredProductSameContig() ); } } bool pcrPrimerrPair :: bDoesThisFormProductWithinWithoutDesiredProductSameContig() { // there are 4 cases to check: // 1) primer 1 at its site with some opposite strand // primer 1 match closer than primer 2's site // 2) primer 1 at its site with some opposite strand // primer 2 match closer than primer 2's site // 3) primer 2 at its site with some opposite strand // primer 1 match closer than primer 1's site // 4) primer 2 at its site with some opposite strand // primer 2 match closer than primer 1's site // ----> <---- // primer 1's site primer 2's site // case 1: <---- another primer 1 match // case 2: <---- another primer 2 match // case 3: another primer 1 match // ----> // case 4: another primer 2 match // ----> // the product includes the primers, so it looks like this: // ----------------------------------- // ----> <---- // primer 1's site primer 2's site // if the 5' end of a misprimer site is off the product, don't // worry about it: // ----------------------------------- // <-------- misprime site // I think I disagree (March 2004): all that matters is that the 3' // end is on the product. In such case, I could extend. // if the 3' end of a misprime site is off the product, // don't worry about it since it can't make a product: // ----------------------------------- // -------> // so the only case we need to worry about is when both the 5' and 3' // ends of the misprime site are within the product: // ----------------------------------- // -----> // since this will preferentially make a shorter product: // --------------------------------- // currently, I am only testing for false sub-products when both // primers are in the same contig Contig* pContigOfProduct = pPCR1_->primer_.pContig_; // what is the position of the product? // If any region is to be amplified, the primers must point towards // each other. assert( pPCR1_->primer_.bTopStrandNotBottomStrand_ != pPCR2_->primer_.bTopStrandNotBottomStrand_ ); primerType* pTopStrandPrimer; primerType* pBottomStrandPrimer; if ( pPCR1_->primer_.bTopStrandNotBottomStrand_ ) { pTopStrandPrimer = &( pPCR1_->primer_ ); pBottomStrandPrimer = &( pPCR2_->primer_ ); } else { pTopStrandPrimer = &( pPCR2_->primer_ ); pBottomStrandPrimer = &( pPCR1_->primer_ ); } int nProductUnpaddedLeft = pTopStrandPrimer->nUnpaddedStart_; int nProductUnpaddedRight = pBottomStrandPrimer->nUnpaddedEnd_; int nBoundaryUnpaddedLeft = nProductUnpaddedLeft - pCP->nAutoPCRAmplifyMaybeRejectPrimerIfThisCloseToDesiredProduct_; int nBoundaryUnpaddedRight = nProductUnpaddedRight + pCP->nAutoPCRAmplifyMaybeRejectPrimerIfThisCloseToDesiredProduct_; // See if any false priming site has both its 5' and 3' ends within // the product. // check for any product that forms between the real pPCR2_ and some // false annealing site of pPCR1_ (that is smaller than the real product) for( int nPrimer1 = 0; nPrimer1 < pPCR1_->aFalseMatchLocations_.length(); ++nPrimer1 ) { if ( nProductUnpaddedLeft <= pPCR1_->aFalseMatchLocations_[ nPrimer1 ] && pPCR1_->aFalseMatchLocations_[ nPrimer1 ] <= nProductUnpaddedRight && pPCR1_->aFalseMatchContigsOfLocations_[ nPrimer1 ] == pContigOfProduct ) { // this will probably form a false product, but is not quite // sufficient. We have concluded the following: // -------------------- product we want // ^ 3' end of a match to primer1 // if it looks like this: // <-- then a false subproduct will be made // if it looks like this: // <------- // then a false subproduct will not be made. // The difference is where the 5' end of the false priming // site is. // DG 2004: I completely disagree, and Brent does also. // // In the situation below, I now believe that there will // be extension and we will get a new product // -------------------- product we want // <------- // ----------------------- // that will certainly be amplified in the other direction: // ----------------------- // ----> // so we will end up with a mixture of the 2 products. // Thus I think the code below should be removed and // should just return( true ) int n5PrimeEndOfFalseMatch; if ( pPCR1_->aFalseMatchStrandsOfLocations_[ nPrimer1 ] ) { n5PrimeEndOfFalseMatch = pPCR1_->aFalseMatchLocations_[ nPrimer1 ] - pPCR1_->primer_.nUnpaddedLength_ + 1; } else { n5PrimeEndOfFalseMatch = pPCR1_->aFalseMatchLocations_[ nPrimer1 ] + pPCR1_->primer_.nUnpaddedLength_ - 1; } // now let's see if the 5' end of the false match lies // within the desired product: if ( nProductUnpaddedLeft <= n5PrimeEndOfFalseMatch && n5PrimeEndOfFalseMatch <= nProductUnpaddedRight ) { // found a false match return( true ); } } // check for the following situations: // ---> --->----------------------<---- <--- // a c desired product d b // false match false match // if either "a" or "b" are too close to the desired product, // they will mess up the sequencing reaction so this combination // of primers should be disallowed. // In such a case, if a == c, we should blackball this primer. // If a == d however, might this primer be ok with another // primer? No, since it will always create a bad sequencing // reaction. if ( nBoundaryUnpaddedLeft <= pPCR1_->aFalseMatchLocations_[ nPrimer1 ] && pPCR1_->aFalseMatchLocations_[ nPrimer1 ] <= nBoundaryUnpaddedRight ) { if ( pPCR1_->aFalseMatchLocations_[ nPrimer1 ] < nProductUnpaddedLeft && pPCR1_->aFalseMatchStrandsOfLocations_[ nPrimer1 ] ) { // this is case "a" above. The primer should be eliminated // from all future pairs. pPCR1_->bEspeciallyBadFalseMatch_ = true; pPCR1_->cTypeOfEspeciallyBadFalseMatch_ = 'e'; fprintf( pAO, "eliminating pcr primer %s from %d to %d reason e false match %s at %d\n", szWhichStrand( pPCR1_->primer_.bTopStrandNotBottomStrand_ ), pPCR1_->primer_.nUnpaddedStart_, pPCR1_->primer_.nUnpaddedEnd_, szWhichStrand( pPCR1_->aFalseMatchStrandsOfLocations_[ nPrimer1 ] ), pPCR1_->aFalseMatchLocations_[ nPrimer1 ] ); return( true ); } if ( nProductUnpaddedRight < pPCR1_->aFalseMatchLocations_[ nPrimer1 ] && ! pPCR1_->aFalseMatchStrandsOfLocations_[ nPrimer1 ] ) { // this is case "b" above. The primer should be eliminated // from all primer pairs. pPCR1_->bEspeciallyBadFalseMatch_ = true; pPCR1_->cTypeOfEspeciallyBadFalseMatch_ = 'f'; fprintf( pAO, "eliminating pcr primer from %d to %d reason: f\n", pPCR1_->primer_.nUnpaddedStart_, pPCR1_->primer_.nUnpaddedEnd_ ); return( true ); } } } // for( int nPrimer1 = 0; nPrimer1 ... // check for any product that forms due to // some false annealing site of pPCR2_ // (that is smaller than the real product) for( int nPrimer2 = 0; nPrimer2 < pPCR2_->aFalseMatchLocations_.length(); ++nPrimer2 ) { if ( nProductUnpaddedLeft <= pPCR2_->aFalseMatchLocations_[ nPrimer2 ] && pPCR2_->aFalseMatchLocations_[ nPrimer2 ] <= nProductUnpaddedRight && pPCR2_->aFalseMatchContigsOfLocations_[ nPrimer2 ] == pContigOfProduct ) { // we have found that the 3' end of the false match lies within // the desired product. See if the 5' end of the false match // also lies within the desired product. int n5PrimeEndOfFalseMatch; if ( pPCR2_->aFalseMatchStrandsOfLocations_[ nPrimer2 ] ) { n5PrimeEndOfFalseMatch = pPCR2_->aFalseMatchLocations_[ nPrimer2 ] - pPCR2_->primer_.nUnpaddedLength_ + 1; } else { n5PrimeEndOfFalseMatch = pPCR2_->aFalseMatchLocations_[ nPrimer2 ] + pPCR2_->primer_.nUnpaddedLength_ - 1; } // let's see if the 5' end of the false match lies // within the desired product: if ( nProductUnpaddedLeft <= n5PrimeEndOfFalseMatch && n5PrimeEndOfFalseMatch <= nProductUnpaddedRight ) { return( true ); } } // check for the following situations: // ---> --->----------------------<---- <--- // a c desired product d b // false match false match // if either "a" or "b" are too close to the desired product, // they will mess up the sequencing reaction so this combination // of primers should be disallowed. // In such a case, if a == c, we should blackball this primer. // If a == d however, might this primer be ok with another // primer? No, since it will always create a bad sequencing // reaction. if ( nBoundaryUnpaddedLeft <= pPCR2_->aFalseMatchLocations_[ nPrimer2 ] && pPCR2_->aFalseMatchLocations_[ nPrimer2 ] <= nBoundaryUnpaddedRight ) { if ( pPCR2_->aFalseMatchLocations_[ nPrimer2 ] < nProductUnpaddedLeft && pPCR2_->aFalseMatchStrandsOfLocations_[ nPrimer2 ] ) { // this is case "a" above. The primer should be eliminated // from all primer pairs. pPCR2_->bEspeciallyBadFalseMatch_ = true; pPCR2_->cTypeOfEspeciallyBadFalseMatch_ = 'g'; fprintf( pAO, "eliminating pcr primer from %d to %d reason: g\n", pPCR2_->primer_.nUnpaddedStart_, pPCR2_->primer_.nUnpaddedEnd_ ); return( true ); } if ( nProductUnpaddedRight < pPCR2_->aFalseMatchLocations_[ nPrimer2 ] && ! pPCR2_->aFalseMatchStrandsOfLocations_[ nPrimer2 ] ) { // this is case "b" above. The primer should be eliminated // from all primer pairs. pPCR2_->bEspeciallyBadFalseMatch_ = true; pPCR2_->cTypeOfEspeciallyBadFalseMatch_ = 'h'; fprintf( pAO, "eliminating pcr primer from %d to %d reason: h\n", pPCR2_->primer_.nUnpaddedStart_, pPCR2_->primer_.nUnpaddedEnd_ ); return( true ); } } } // for( int nPrimer2 = 0; nPrimer2 < pPCR2_->aFalseMatchLocations_ return( false ); } bool pcrPrimerrPair :: bDoesThisFormFalseProductSomewhereElse() { // first let's check that there is no false product // from somewhere else in the assembly if ( bDoesThisFormFalseProductSomewhereElse2( pPCR1_, pPCR2_ ) ) return( true ); if ( bDoesThisFormFalseProductSomewhereElse2( pPCR1_, pPCR1_ ) ) return( true ); if ( bDoesThisFormFalseProductSomewhereElse2( pPCR2_, pPCR2_ ) ) return( true ); return( false ); } bool pcrPrimerrPair :: bDoesThisFormFalseProductSomewhereElse2( pcrPrimer* pPCRA, pcrPrimer* pPCRB ) { int nPrimerA; int nPrimerB; for( nPrimerA = 0; nPrimerA < pPCRA->aFalseMatchLocations_.length(); ++nPrimerA ) { for( nPrimerB = 0; nPrimerB < pPCRB->aFalseMatchLocations_.length(); ++nPrimerB ) { // the primers must be on opposite strands to give a false // product if ( pPCRA->aFalseMatchStrandsOfLocations_[ nPrimerA ] == pPCRB->aFalseMatchStrandsOfLocations_[ nPrimerB ] ) continue; // the primers must be in the same contigs to give a false // product if ( pPCRA->aFalseMatchContigsOfLocations_[ nPrimerA ] != pPCRB->aFalseMatchContigsOfLocations_[ nPrimerB ] ) continue; int nSizeOfProduct; if ( pPCRA->aFalseMatchStrandsOfLocations_[ nPrimerA ] ) { // -----> <----- // pPCRA pPCRB // The primers can actually overlap each other: // -----> // <----- // but they cannot extend beyond each other's 5' end: // -----> // <---- // The 5' end depends on the length of the primer. So // to be safe, eliminate the primer with the longest // length makes the not make a product. Thus if they // primer has a shorter length, it will also not make // a product. if ( ( pPCRA->aFalseMatchLocations_[ nPrimerA ] - pCP->nPrimersMaximumLengthOfAPrimerForPCR_ ) > pPCRB->aFalseMatchLocations_[ nPrimerB ] ) continue; nSizeOfProduct = pPCRB->aFalseMatchLocations_[ nPrimerB ] - pPCRA->aFalseMatchLocations_[ nPrimerA ] + 2 * pCP->nPrimersMinimumLengthOfAPrimerForPCR_; } else { // -----> <----- // pPCRB pPCRA if ( ( pPCRB->aFalseMatchLocations_[ nPrimerB ] - pCP->nPrimersMaximumLengthOfAPrimerForPCR_ ) > pPCRA->aFalseMatchLocations_[ nPrimerA ] ) continue; nSizeOfProduct = pPCRA->aFalseMatchLocations_[ nPrimerA ] - pPCRB->aFalseMatchLocations_[ nPrimerB ] + 2 * pCP->nPrimersMinimumLengthOfAPrimerForPCR_; } // let's look at the size of this false product if ( nSizeOfProduct <= pCP->nAutoPCRAmplifyFalseProductsOKIfLargerThanThis_ ) { fprintf( pAO, "primer pair %d-%d (%s) and %d-%d (%s) has false match at %s %d of %s and %s %d of %s product size %d\n", pPCRA->primer_.nUnpaddedStart_, pPCRA->primer_.nUnpaddedEnd_, ( pPCRA->primer_.bTopStrandNotBottomStrand_ ? "top strand" : "bottom strand" ), pPCRB->primer_.nUnpaddedStart_, pPCRB->primer_.nUnpaddedEnd_, ( pPCRB->primer_.bTopStrandNotBottomStrand_ ? "top strand" : "bottom strand" ), ( pPCRA->aFalseMatchStrandsOfLocations_[ nPrimerA ] ? "top strand" : "bottom strand" ), pPCRA->aFalseMatchLocations_[ nPrimerA ], pPCRA->aFalseMatchContigsOfLocations_[ nPrimerA ]->soGetName().data(), ( pPCRB->aFalseMatchStrandsOfLocations_[ nPrimerB ] ? "top strand" : "bottom strand" ), pPCRB->aFalseMatchLocations_[ nPrimerB ], pPCRB->aFalseMatchContigsOfLocations_[ nPrimerB ]->soGetName().data(), ABS( pPCRB->aFalseMatchLocations_[ nPrimerB ] - pPCRA->aFalseMatchLocations_[ nPrimerA ] ) ); return( true ); } } // for( nPrimerB = 0; } // for( nPrimerA = 0; return( false ); } void pcrPrimerrPair :: printThyself( const RWCString& soRegionID ) { // print the primers primerType* pLeftPrimer; primerType* pRightPrimer; if ( pPCR1_->primer_.bTopStrandNotBottomStrand_ && !pPCR2_->primer_.bTopStrandNotBottomStrand_ ) { pLeftPrimer = &pPCR1_->primer_; pRightPrimer = &pPCR2_->primer_; } else if ( !pPCR1_->primer_.bTopStrandNotBottomStrand_ && pPCR2_->primer_.bTopStrandNotBottomStrand_ ) { pLeftPrimer = &pPCR2_->primer_; pRightPrimer = &pPCR1_->primer_; } int nSizeOfProduct = pRightPrimer->nUnpaddedEnd_ - pLeftPrimer->nUnpaddedStart_ + 1; RWCString soDescription = "id: " + soRegionID; soDescription += " product_size: "; soDescription += RWCString( (long) nSizeOfProduct ); soDescription += "\n"; soDescription += RWCString( pLeftPrimer->szPrimer_, (size_t ) pLeftPrimer->nUnpaddedLength_ ); soDescription += " temp: "; soDescription += RWCString( (long) ( pLeftPrimer->dMeltingTemperature_ + .5 ) ); soDescription += " "; soDescription += RWCString( (long) pLeftPrimer->nUnpaddedStart_ ); soDescription += "-"; soDescription += RWCString( (long) pLeftPrimer->nUnpaddedEnd_ ); soDescription += " in "; soDescription += pLeftPrimer->pContig_->soGetName(); soDescription += "\n"; soDescription+= RWCString( pRightPrimer->szPrimer_, (size_t) pRightPrimer->nUnpaddedLength_ ); soDescription += " temp: "; soDescription += RWCString( (long) ( pRightPrimer->dMeltingTemperature_ + .5 ) ); soDescription += " "; soDescription += RWCString( (long) pRightPrimer->nUnpaddedStart_ ); soDescription += "-"; soDescription += RWCString( (long) pRightPrimer->nUnpaddedEnd_ ); soDescription += " in "; soDescription += pRightPrimer->pContig_->soGetName(); fprintf( pAO, "\nPRIMER_PAIR {\n" ); fprintf( pAO, "%s\n", soDescription.data() ); fprintf( pAO, "}\n\n" ); } bool pcrPrimerrPair :: bDoesThisFormProductWithinWithoutDesiredProductDifferentContigs() { // There are 2 regions to be concerned about. The region about the // 1st primer and the region about the 2nd primer. All false matches // need to be checked against these 2 regions. If a primer lies // within one of these regions, then check whether it is in between // the primers. If so, there is a problem, so the primer pair must // be thrown out. It is even possible, that all primer pairs // involving that one primer must be thrown out. for( int nRegion = 0; nRegion <= 1; ++nRegion ) { pcrPrimer* pPCRPrimer = ( nRegion == 0 ? pPCR1_ : pPCR2_ ); primerType& myPrimer = pPCRPrimer->primer_; bool bTopStrandNotBottomStrand = myPrimer.bTopStrandNotBottomStrand_; Contig* pContigOfRegion = myPrimer.pContig_; int nUnpaddedConsPosBoundary; if ( bTopStrandNotBottomStrand ) { // gap is to the right // (contig) // ----------------------------------- gap // | ----> // nUnpaddedConsPosBoundary // pcr primer nUnpaddedConsPosBoundary = myPrimer.nUnpaddedStart_ - pCP->nAutoPCRAmplifyMaybeRejectPrimerIfThisCloseToDesiredProduct_; } else { // gap ------------------------------------- (contig) // <---- | // pcr primer nUnpaddedConsPosBoundary nUnpaddedConsPosBoundary = myPrimer.nUnpaddedEnd_ + pCP->nAutoPCRAmplifyMaybeRejectPrimerIfThisCloseToDesiredProduct_; } // note that it doesn't matter so much which pcr primer is connected // with region1 and which with region2: At first we are just // looking to see if the false matches fall within one of these // 2 regions. If they do (given some more about their orientation), // the pcr primer pair is eliminated. Only when we start worrying // about whether to eliminate the primer in *every* pair does it // matter which pcr primer is associated with which region. for( int nFalseMatch = 0; nFalseMatch < ( pPCR1_->aFalseMatchLocations_.length() + pPCR2_->aFalseMatchLocations_.length() ); ++nFalseMatch ) { Contig* pContigOfFalseMatch; int nUnpaddedOfFalseMatch; bool bFalseMatchTopNotBottomStrand; bool bFalseMatchToPCRPrimer1NotPCRPrimer2; if ( nFalseMatch < pPCR1_->aFalseMatchLocations_.length() ) { pContigOfFalseMatch = pPCR1_->aFalseMatchContigsOfLocations_[ nFalseMatch ]; if ( pContigOfFalseMatch != pContigOfRegion ) continue; nUnpaddedOfFalseMatch = pPCR1_->aFalseMatchLocations_[ nFalseMatch ]; bFalseMatchTopNotBottomStrand = pPCR1_->aFalseMatchStrandsOfLocations_[ nFalseMatch ]; bFalseMatchToPCRPrimer1NotPCRPrimer2 = true; } else { int nFalseMatch2 = nFalseMatch - pPCR1_->aFalseMatchLocations_.length(); pContigOfFalseMatch = pPCR2_->aFalseMatchContigsOfLocations_[ nFalseMatch2 ]; if ( pContigOfFalseMatch != pContigOfRegion ) continue; nUnpaddedOfFalseMatch = pPCR2_->aFalseMatchLocations_[ nFalseMatch2 ]; bFalseMatchTopNotBottomStrand = pPCR2_->aFalseMatchStrandsOfLocations_[ nFalseMatch2 ]; bFalseMatchToPCRPrimer1NotPCRPrimer2 = false; } if ( bTopStrandNotBottomStrand && nUnpaddedConsPosBoundary <= nUnpaddedOfFalseMatch ) { // oh, oh. Case like this: // gap is to the right // (contig) // ----------------------------------- gap // | ----> // nUnpaddedConsPosBoundary // pcr primer // <--- or ---> (false match) if ( bFalseMatchTopNotBottomStrand ) { // ----------------------------------- gap // | ----> // ----> (false match) or // ----> (false match) // In either case, we don't want this primer to be // used in combination with *any* other primers. // It will create bad sequence (a mixed signal). // Note that this is true even if: // ----------------------------------- gap // | ----> primer1 // ----> (false match) // primer2 // gap--------------------------- // <---- primer2 // since in this case primer2 will always sequence // both ends of the product and give a mixed signal. pcrPrimer* pBadPCRPrimer = ( bFalseMatchToPCRPrimer1NotPCRPrimer2 ? pPCR1_ : pPCR2_ ); pBadPCRPrimer->bEspeciallyBadFalseMatch_ = true; pBadPCRPrimer->cTypeOfEspeciallyBadFalseMatch_ = 'b'; fprintf( pAO, "eliminating pcr primer from %d to %d %s contig %s reason: b false match at %s of %s %d\n", pBadPCRPrimer->primer_.nUnpaddedStart_, pBadPCRPrimer->primer_.nUnpaddedEnd_, szWhichStrand( pBadPCRPrimer->primer_.bTopStrandNotBottomStrand_ ), pBadPCRPrimer->primer_.pContig_->soGetName().data(), szWhichStrand( bFalseMatchTopNotBottomStrand ), pContigOfFalseMatch->soGetName().data(), nUnpaddedOfFalseMatch ); return( true ); } else { // ----------------------------------- gap // | ----> // <---- (false match) or // a <---- (false match) // b // If the case is "a", then no problem. If the case // if "b", this primer pair won't work. If in the "b" // case, the false match is pointing at the same primer, // then we need to eliminate this primer in all other // reactions as well. if ( nUnpaddedOfFalseMatch < myPrimer.nUnpaddedStart_ ) { continue; } else { if ( ( nRegion == 0 && bFalseMatchToPCRPrimer1NotPCRPrimer2 ) || ( nRegion == 1 && !bFalseMatchToPCRPrimer1NotPCRPrimer2 ) ) { // this case: // ----------------------------------- gap // | ----> <---- (false match) // (true match) b // In such a case, we want to blacklist the // primer--not just the primer pair. pcrPrimer* pBadPCRPrimer = ( bFalseMatchToPCRPrimer1NotPCRPrimer2 ? pPCR1_ : pPCR2_ ); pBadPCRPrimer->bEspeciallyBadFalseMatch_ = true; pBadPCRPrimer->cTypeOfEspeciallyBadFalseMatch_ = 'a'; fprintf( pAO, "eliminating pcr primer from %d to %d %s of %s reason: a false match at %s of %s %d\n", pBadPCRPrimer->primer_.nUnpaddedStart_, pBadPCRPrimer->primer_.nUnpaddedEnd_, szWhichStrand( pBadPCRPrimer->primer_.bTopStrandNotBottomStrand_ ), pBadPCRPrimer->primer_.pContig_->soGetName().data(), szWhichStrand( bFalseMatchTopNotBottomStrand ), pContigOfFalseMatch->soGetName().data(), nUnpaddedOfFalseMatch ); // The alternative case is: // ------------------- gap gap----------- // <--- false match <---true match // In this case, there is not necessarily a // problem with the primer, although most // primer pairs won't work. } else { // this case: // ----------------gap ------------------- // contig1 contig2 // ---> <--- <---- primer 1 // false match to primer 1 // primer2 // In such a case, just eliminate the primer pair fprintf( pAO, "eliminating primer pair %s of %s %d-%d and %s of %s %d-%d due to false match %s of %s at %d\n", szWhichStrand( pPCR1_->primer_.bTopStrandNotBottomStrand_ ), pPCR1_->primer_.pContig_->soGetName().data(), pPCR1_->primer_.nUnpaddedStart_, pPCR1_->primer_.nUnpaddedEnd_, szWhichStrand( pPCR2_->primer_.bTopStrandNotBottomStrand_ ), pPCR2_->primer_.pContig_->soGetName().data(), pPCR2_->primer_.nUnpaddedStart_, pPCR2_->primer_.nUnpaddedEnd_, szWhichStrand( bFalseMatchTopNotBottomStrand ), pContigOfFalseMatch->soGetName().data(), nUnpaddedOfFalseMatch ); } return( true ); } } // if ( bFalseMatchTopNotBottomStrand ) { else } // if ( bTopStrandNotBottomStrand ... else if ( !bTopStrandNotBottomStrand && ( nUnpaddedOfFalseMatch < nUnpaddedConsPosBoundary ) ) { // case like this: // gap---------------------------------(contig) // <----primer | (boundary) // <---> <---> // false match somewhere in here if ( !bFalseMatchTopNotBottomStrand ) { // case like this: // gap------------------------------------(contig) // <-----primer1 // <--- <---- // a b // false match in regions either a or b // If false match is to primer1, then never want this // primer. If false match is to primer on other side // of gap, then never want this primer either. pcrPrimer* pBadPCRPrimer = ( bFalseMatchToPCRPrimer1NotPCRPrimer2 ? pPCR1_ : pPCR2_ ); pBadPCRPrimer->bEspeciallyBadFalseMatch_ = true; pBadPCRPrimer->cTypeOfEspeciallyBadFalseMatch_ = 'c'; fprintf( pAO, "eliminating pcr primer %s of %s from %d to %d reason: c false match %s of %s at %d\n", szWhichStrand( pBadPCRPrimer->primer_.bTopStrandNotBottomStrand_ ), pBadPCRPrimer->primer_.pContig_->soGetName().data(), pBadPCRPrimer->primer_.nUnpaddedStart_, pBadPCRPrimer->primer_.nUnpaddedEnd_, szWhichStrand( bFalseMatchTopNotBottomStrand ), pContigOfFalseMatch->soGetName().data(), nUnpaddedOfFalseMatch ); return( true ); } else { // case like this: // gap------------------------------------(contig) // <-----primer1 // ---> ----> // a b // false match in regions either a or b // If the case is "b", then no problem. if ( myPrimer.nUnpaddedEnd_ < nUnpaddedOfFalseMatch ) { continue; } else { // this is case "a" (above). This primer pair will // not do. Shall I also eliminate the primer // for all future pairs? That depends if the // primer "a" and "primer1" are the same primer. if ( ( nRegion == 0 && bFalseMatchToPCRPrimer1NotPCRPrimer2 ) || ( nRegion == 1 && !bFalseMatchToPCRPrimer1NotPCRPrimer2 ) ) { pcrPrimer* pBadPCRPrimer = ( bFalseMatchToPCRPrimer1NotPCRPrimer2 ? pPCR1_ : pPCR2_ ); pBadPCRPrimer->bEspeciallyBadFalseMatch_ = true; pBadPCRPrimer->cTypeOfEspeciallyBadFalseMatch_ = 'd'; fprintf( pAO, "eliminating pcr primer from %d to %d %s of %s reason: d false match at %d %s of %s\n", pBadPCRPrimer->primer_.nUnpaddedStart_, pBadPCRPrimer->primer_.nUnpaddedEnd_, szWhichStrand( pBadPCRPrimer->primer_.bTopStrandNotBottomStrand_ ), pBadPCRPrimer->primer_.pContig_->soGetName().data(), nUnpaddedOfFalseMatch, szWhichStrand( bFalseMatchTopNotBottomStrand ), pContigOfFalseMatch->soGetName().data() ); } return( true ); } } // if ( !bFalseMatchTopNotBottomStrand ) { else } // else if ( !bTopStrandNotBottomStrand && ... } // for( int nFalseMatch = 0; nFalseMatch < ( } // for( int nRegion ... // if made it here, all false matches must be fine return( false ); }