/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include #include "regionToAmplify.h" #include "regionOfContig.h" #include "assembly.h" #include "consed.h" #include "rwtvalorderedvector.h" #include "consedParameters.h" #include "createPrimerCandidate.h" #include "setPrimerFalseMatchScoreArray.h" #include "pcrPrimerrPair.h" #include "fileDefines.h" #include "createPrimerCandidatesWith3PrimeEnd.h" #include "whichPrimersHaveAcceptableMeltingTemp.h" #include "whichPrimersHaveAcceptableSelfMatch.h" #include "checkPrimersForMononucleotideRepeats.h" #include "checkPrimersForACGT.h" #include "whyIsPrimerNotAcceptableTypes.h" #include "exclusive_or.h" #include "oligoTag.h" #include "dGetPrimerMeltingTemperature.h" regionToAmplify :: regionToAmplify() { aPCRPrimersRegion1_.soName_ = "regionToAmplify :: aPCRPrimersRegion1_"; aPCRPrimersRegion2_.soName_ = "regionToAmplify :: aPCRPrimersRegion2_"; nProductSizeOfBestPair_ = -1; bFoundAPair_ = false; } void regionToAmplify :: findPCRPrimerPair() { fprintf( pAO, "looking for pcr primer pair for transcript %s from regions %d to %d of %s (%s) and %d to %d of %s (%s)\n", soRegionID_.data(), region1_.nUnpaddedLeft_, region1_.nUnpaddedRight_, region1_.pContig_->soGetName().data(), ( bPrimersInRegion1PointRightNotLeft_ ? "->" : "<-" ), region2_.nUnpaddedLeft_, region2_.nUnpaddedRight_, region2_.pContig_->soGetName().data(), ( bPrimersInRegion2PointRightNotLeft_ ? "->" : "<-" ) ); if ( pCP->bAutoPCRAmplifyMakePrimerOutOfFirstRegion_ ) { makeOnePrimerOutOfRegion( region1_, bPrimersInRegion1PointRightNotLeft_, true ); // bRegion1NotRegion2 } else { // normal case time_t time1 = time( NULL ); cerr << "working on " << soRegionID_ << endl; cerr << "checkRegionForAcceptablePrimersWithNotTooManyFalseMatches 1..."; cerr.flush(); checkRegionForAcceptablePrimersWithNotTooManyFalseMatches( region1_, bPrimersInRegion1PointRightNotLeft_, true ); // bRegion1NotRegion2 fflush( pAO ); time_t time2 = time( NULL ); double dTimeToCheck1 = difftime( time2, time1 ); cerr << "done in " << dTimeToCheck1 << " seconds" << endl; } time_t time25 = time( NULL ); cerr << "checkRegionForAcceptablePrimersWithNotTooManyFalseMatches 2..." ; cerr.flush(); checkRegionForAcceptablePrimersWithNotTooManyFalseMatches( region2_, bPrimersInRegion2PointRightNotLeft_, false ); // bRegion1NotRegion2 fflush( pAO ); time_t time3 = time( NULL ); double dTimeToCheck2 = difftime( time3, time25 ); cerr << "done in " << dTimeToCheck2 << " seconds" << endl; cerr << "finding best acceptable primer pair..."; cerr.flush(); pcrPrimerrPair* pBestPrimerPair = NULL; findBestAcceptablePCRPrimerPair( pBestPrimerPair ); time_t time4 = time( NULL ); double dTimeToSort = difftime( time4, time3 ); cerr << "done in " << dTimeToSort << " seconds" << endl; cerr.flush(); if ( pBestPrimerPair ) { pBestPrimerPair->printThyself( soRegionID_ ); } else { fprintf( pAO, "found no acceptable pcr primer pair\n" ); } fflush( pAO ); } void regionToAmplify :: checkRegionForAcceptablePrimersWithNotTooManyFalseMatches( const regionOfContig& region, const bool bPrimersPointRightNotLeft, const bool bRegion1NotRegion2 ) { int nPrimersStickToSequenceInFiles = 0; int nPrimersStickToSomewhereElse = 0; int nPrimersStickToSelf = 0; int nPrimersMeltingTemperatureTooLow = 0; int nPrimersMeltingTemperatureTooHigh = 0; int nPrimersTooLowQuality = 0; int nPrimersWithMononucleotideRepeat = 0; int nPrimersWithNoTemplate = 0; int nPrimersWithNotEnoughTemplates = 0; int nPrimersInSingleSubcloneRegion = 0; int nPrimersWhereHighQualityDiscrepancies = 0; int nPrimersWhereUnalignedHighQualityRegion = 0; int nPrimersWithNotAllACGT = 0; int nPrimersWithTooManyFalseMatches = 0; int nPrimersWithMeltingTemperatureTooLow = 0; int nPrimersWithMeltingTemperatureTooHigh = 0; int nPrimersProtrudingOutOfRegion = 0; int nPrimersWithOtherProblem = 0; int nPrimersWithNoProblems = 0; int nTotalPrimersTried = 0; int nLeftMostBaseLeft = region.nUnpaddedLeft_; int nLeftMostBaseRight = region.nUnpaddedRight_ - pCP->nPrimersMinimumLengthOfAPrimerForPCR_ + 1; for( int nLeftMostBase = nLeftMostBaseLeft; nLeftMostBase <= nLeftMostBaseRight; ++nLeftMostBase ) { primerType primer; // strikes me as strange: primer is created just to make a 3' end // for bIsThereAnAcceptablePrimerWithThis3PrimeEnd and then it // is thrown away (DG, March 2004) createPrimerCandidate( &primer, nLeftMostBase, pCP->nPrimersMinimumLengthOfAPrimerForPCR_, bPrimersPointRightNotLeft, region.pContig_, nLeftMostBase ); // nUnpaddedPosOfCursor ++nTotalPrimersTried; int nWhyIsPrimerNotAcceptable = -1; primerType* pShortestAcceptablePrimer = NULL; if ( bIsThereAnAcceptablePrimerWithThis3PrimeEnd( &primer, pShortestAcceptablePrimer, nWhyIsPrimerNotAcceptable ) ) { // the shortest primer might still be so long that it // sticks out beyond region.nUnpaddedRight_ // check it. If it isn't within the range, a longer // primer will certainly also not be within the range. if ( pShortestAcceptablePrimer->nUnpaddedStart_ < region.nUnpaddedLeft_ || region.nUnpaddedRight_ < pShortestAcceptablePrimer->nUnpaddedEnd_ ) { // can't use any primer that has this 3' end. // I don't know all of the reasons this primer failed, // but clearly one of the reasons is a shorter primer // with the same 3' end to fit into the region would // have the wrong melting temperature. Or would it? // Well, we really don't know. So let's create another category // of failures: out_of_region ++nPrimersProtrudingOutOfRegion; continue; } pcrPrimer* pPCRPrimer = new pcrPrimer( pShortestAcceptablePrimer ); if ( bRegion1NotRegion2 ) aPCRPrimersRegion1_.insert( pPCRPrimer ); else aPCRPrimersRegion2_.insert( pPCRPrimer ); ++nPrimersWithNoProblems; } else { switch( nWhyIsPrimerNotAcceptable ) { case BAD_PRIMER_STICKS_TO_SELF: ++nPrimersStickToSelf; break; case BAD_PRIMER_TOO_LOW_QUALITY: ++nPrimersTooLowQuality; break; case BAD_PRIMER_MONONUCLEOTIDE_REPEAT: ++nPrimersWithMononucleotideRepeat; break; case BAD_PRIMER_IN_SINGLE_SUBCLONE_REGION: ++nPrimersInSingleSubcloneRegion; break; case BAD_PRIMER_WHERE_HIGH_QUALITY_DISCREPANCIES: ++nPrimersWhereHighQualityDiscrepancies; break; case BAD_PRIMER_WHERE_UNALIGNED_HIGH_QUALITY_REGION: ++nPrimersWhereUnalignedHighQualityRegion; break; case BAD_PRIMER_NOT_JUST_ACGT: ++nPrimersWithNotAllACGT; break; case BAD_PRIMER_MELTING_TEMPERATURE_TOO_LOW: ++nPrimersWithMeltingTemperatureTooLow; break; case BAD_PRIMER_MELTING_TEMPERATURE_TOO_HIGH: ++nPrimersWithMeltingTemperatureTooHigh; break; default: ++nPrimersWithOtherProblem; break; } } } if ( bRegion1NotRegion2 ) { fprintf( pAO, "%d acceptable primers for region 1\n", aPCRPrimersRegion1_.length() ); } else { fprintf( pAO, "%d acceptable primers for region 2\n", aPCRPrimersRegion2_.length() ); } if ( nTotalPrimersTried != 0 ) { fprintf( pAO, "acceptable primers: %6.2f%%\n", (float) nPrimersWithNoProblems * 100 / nTotalPrimersTried ); fprintf( pAO, "primers with too many false matches: %6.2f%%\n", (float) nPrimersWithTooManyFalseMatches * 100 / nTotalPrimersTried ); fprintf( pAO, "primers anneal to self: %6.2f%%\n", (float) nPrimersStickToSelf * 100 / nTotalPrimersTried ); fprintf( pAO, "primers too low quality: %6.2f%%\n", (float) nPrimersTooLowQuality * 100 / nTotalPrimersTried ); fprintf( pAO, "primers with mononucleotide repeats: %6.2f%%\n", (float) nPrimersWithMononucleotideRepeat * 100 / nTotalPrimersTried ); fprintf( pAO, "primers in single subclone region: %6.2f%%\n", (float) nPrimersInSingleSubcloneRegion * 100 / nTotalPrimersTried ); fprintf( pAO, "primers where high quality discrepancies: %6.2f%%\n", (float) nPrimersWhereHighQualityDiscrepancies * 100 / nTotalPrimersTried ); fprintf( pAO, "primers where high quality unaligned region: %6.2f%%\n", (float) nPrimersWhereUnalignedHighQualityRegion * 100 / nTotalPrimersTried ); fprintf( pAO, "primers with not all acgt: %6.2f%%\n", (float) nPrimersWithNotAllACGT * 100 / nTotalPrimersTried ); fprintf( pAO, "primers melting temp too low: %6.2f%%\n", (float) nPrimersWithMeltingTemperatureTooLow * 100 / nTotalPrimersTried ); fprintf( pAO, "primers melting temp too high: %6.2f%%\n", (float) nPrimersWithMeltingTemperatureTooHigh * 100 / nTotalPrimersTried ); fprintf( pAO, "primer protruding beyond region: %6.2f%%\n", (float) nPrimersProtrudingOutOfRegion * 100 / nTotalPrimersTried ); fprintf( pAO, "primers with some other problem: %6.2f%%\n", (float) nPrimersWithOtherProblem * 100 / nTotalPrimersTried ); } fprintf( pAO, "total primers tried: %d by category: %d\n", nTotalPrimersTried, nPrimersWithNoProblems + nPrimersWithTooManyFalseMatches + nPrimersStickToSelf + nPrimersTooLowQuality + nPrimersWithMononucleotideRepeat + nPrimersInSingleSubcloneRegion + nPrimersWhereHighQualityDiscrepancies + nPrimersWhereUnalignedHighQualityRegion + nPrimersWithNotAllACGT + nPrimersWithMeltingTemperatureTooLow + nPrimersWithMeltingTemperatureTooHigh + nPrimersProtrudingOutOfRegion + nPrimersWithOtherProblem ); } bool regionToAmplify :: bIsThereAnAcceptablePrimerWithThis3PrimeEnd( primerType* pSamplePrimer, primerType*& pShortestAcceptablePrimer, int& nWhyIsPrimerNotAcceptable ) { primerType* pPrimerArray; int nNumberOfPrimers; bool bSomeAcceptablePrimers; int n3PrimeEnd; if ( pSamplePrimer->bTopStrandNotBottomStrand_ ) n3PrimeEnd = pSamplePrimer->nUnpaddedEnd_; else n3PrimeEnd = pSamplePrimer->nUnpaddedStart_; createPrimerCandidatesWith3PrimeEnd( pSamplePrimer->pContig_, n3PrimeEnd, pSamplePrimer->bTopStrandNotBottomStrand_, pPrimerArray, nNumberOfPrimers, bSomeAcceptablePrimers ); if ( !bSomeAcceptablePrimers ) { nWhyIsPrimerNotAcceptable = BAD_PRIMER_TOO_LOW_QUALITY; // due to poor quality return( false ); } whichPrimersHaveAcceptableMeltingTemp( pPrimerArray, nNumberOfPrimers, bSomeAcceptablePrimers ); if ( !bSomeAcceptablePrimers ) { nWhyIsPrimerNotAcceptable = pPrimerArray->nWhyIsPrimerNotAcceptable_; return( false ); } whichPrimersHaveAcceptableSelfMatch( pPrimerArray, nNumberOfPrimers, bSomeAcceptablePrimers ); if ( !bSomeAcceptablePrimers ) { nWhyIsPrimerNotAcceptable = BAD_PRIMER_STICKS_TO_SELF; return( false ); } int nNumberOfAcceptablePrimers; checkPrimersForMononucleotideRepeats( pPrimerArray, nNumberOfPrimers, nNumberOfAcceptablePrimers ); if ( nNumberOfAcceptablePrimers == 0 ) { nWhyIsPrimerNotAcceptable = BAD_PRIMER_MONONUCLEOTIDE_REPEAT; return( false) ; } checkPrimersForACGT( pPrimerArray, nNumberOfPrimers, nNumberOfAcceptablePrimers ); if ( nNumberOfAcceptablePrimers == 0 ) { nWhyIsPrimerNotAcceptable = BAD_PRIMER_NOT_JUST_ACGT; return( false ); } pShortestAcceptablePrimer = NULL; for( int nPrimer = 0; nPrimer < nNumberOfPrimers; ++nPrimer ) { primerType* pPrimer = pPrimerArray + nPrimer; if ( pPrimer->bAcceptable_ ) { if ( !pShortestAcceptablePrimer ) pShortestAcceptablePrimer = pPrimer; else { if ( pPrimer->nUnpaddedLength_ < pShortestAcceptablePrimer->nUnpaddedLength_ ) pShortestAcceptablePrimer = pPrimer; } } } // createPrimerCandidatesWith3PrimeEnd will reuse the memory it // creates primers in. Thus we must copy *pShortestAcceptabler // somewhere before createPrimerCandidatesWith3PrimeEnd is called again assert( pShortestAcceptablePrimer ); return( true ); } static int nComparePCRPrimerPairs( const pcrPrimerrPair* pPCRPrimerPair1, const pcrPrimerrPair* pPCRPrimerPair2 ) { if ( pPCRPrimerPair1->nProductSize_ < pPCRPrimerPair2->nProductSize_ ) return( 1 ); else if ( pPCRPrimerPair1->nProductSize_ > pPCRPrimerPair2->nProductSize_ ) return( -1 ); else return( 0 ); } void regionToAmplify :: findBestAcceptablePCRPrimerPair( pcrPrimerrPair*& pBestPCRPrimerPair) { RWTValOrderedVector aPairs( (size_t) ( aPCRPrimersRegion1_.length() * aPCRPrimersRegion2_.length() ) ); time_t time1 = time( NULL ); // create huge list of primer pairs for( int nPrimer1 = 0; nPrimer1 < aPCRPrimersRegion1_.length(); ++nPrimer1 ) { pcrPrimer* pPrimer1 = aPCRPrimersRegion1_[ nPrimer1 ]; for( int nPrimer2 = 0; nPrimer2 < aPCRPrimersRegion2_.length(); ++nPrimer2 ) { pcrPrimer* pPrimer2 = aPCRPrimersRegion2_[ nPrimer2 ]; int nProductSize; assert( pPrimer1->primer_.bTopStrandNotBottomStrand_ != pPrimer2->primer_.bTopStrandNotBottomStrand_ ); if ( pPrimer1->primer_.bTopStrandNotBottomStrand_ ) { nProductSize = pPrimer2->primer_.nUnpaddedEnd_ - pPrimer1->primer_.nUnpaddedStart_ + 1; aPairs.insert( pcrPrimerrPair( pPrimer1, pPrimer2, nProductSize ) ); } else { nProductSize = pPrimer1->primer_.nUnpaddedEnd_ - pPrimer2->primer_.nUnpaddedStart_ + 1; aPairs.insert( pcrPrimerrPair( pPrimer2, pPrimer1, nProductSize ) ); } } } cerr << "found " << aPairs.length() << " primer pairs. Now try to find one that is acceptable..." << endl; // find largest and smallest acceptable primer pair bool bFoundAnAcceptablePair = false; while( !bFoundAnAcceptablePair ) { // find an untested pair with the largest product size pBestPCRPrimerPair = NULL; int nBestProductSize = 0; for( int nPair = 0; nPair < aPairs.length(); ++nPair ) { pcrPrimerrPair& primerPair = aPairs[ nPair ]; if ( primerPair.bAlreadyTestedAndNotOK_ ) continue; if ( primerPair.pPCR1_->bEspeciallyBadFalseMatch_ ) continue; if ( primerPair.pPCR2_->bEspeciallyBadFalseMatch_ ) continue; if ( !pBestPCRPrimerPair ) { nBestProductSize = primerPair.nProductSize_; pBestPCRPrimerPair = &aPairs[ nPair ]; } else if ( bBiggestNotSmallest_ ) { if ( primerPair.nProductSize_ > nBestProductSize ) { nBestProductSize = primerPair.nProductSize_; pBestPCRPrimerPair = &aPairs[ nPair ]; } } else { if ( primerPair.nProductSize_ < nBestProductSize ) { nBestProductSize = primerPair.nProductSize_; pBestPCRPrimerPair = &aPairs[ nPair ]; } } } // for( int nPair = 0; ... if ( !pBestPCRPrimerPair ) { // we've already tried all pairs and none worked break; } // so pBestPCRPrimerPair is the pair with the largest (or smallest) // product size time_t time2 = time( NULL ); double dTimeToCheck = difftime( time2, time1 ); // in the case of making a primer out of the first region, do // not do any checking that would eliminate it. if ( !pCP->bAutoPCRAmplifyMakePrimerOutOfFirstRegion_ ) { if ( !pBestPCRPrimerPair->pPCR1_->bCheckedForFalseMatches_ ) { // check this primer for too many false matches pBestPCRPrimerPair->pPCR1_->findAllSeriousFalseMatchesInAssembly(); } if ( pBestPCRPrimerPair->pPCR1_->aFalseMatchLocations_.length() > pCP->nAutoPCRAmplifyTooManySeriousFalseMatches_ ) { cerr << " too many first matches " << pBestPCRPrimerPair->pPCR1_->aFalseMatchLocations_.length() << endl; pBestPCRPrimerPair->bAlreadyTestedAndNotOK_ = true; continue; } } if ( !pBestPCRPrimerPair->pPCR2_->bCheckedForFalseMatches_ ) { // check this primer for too many false matches pBestPCRPrimerPair->pPCR2_->findAllSeriousFalseMatchesInAssembly(); } if ( pBestPCRPrimerPair->pPCR2_->aFalseMatchLocations_.length() > pCP->nAutoPCRAmplifyTooManySeriousFalseMatches_ ) { cerr << " too many second matches " << pBestPCRPrimerPair->pPCR2_->aFalseMatchLocations_.length() << endl; pBestPCRPrimerPair->bAlreadyTestedAndNotOK_ = true; continue; } /*********************** here is the work horse ********************/ if ( pBestPCRPrimerPair->bDoesThisPassChecks() ) { bFoundAnAcceptablePair = true; fprintf( pAO, "product: %d from %d-%d to %d-%d passes checks\n", pBestPCRPrimerPair->nProductSize_, pBestPCRPrimerPair->pPCR1_->primer_.nUnpaddedStart_, pBestPCRPrimerPair->pPCR1_->primer_.nUnpaddedEnd_, pBestPCRPrimerPair->pPCR2_->primer_.nUnpaddedStart_, pBestPCRPrimerPair->pPCR2_->primer_.nUnpaddedEnd_ ); } else { cerr << " failed other compatibilities " << endl; pBestPCRPrimerPair->bAlreadyTestedAndNotOK_ = true; fprintf( pAO, "product: %d from %d-%d to %d-%d fails check because %s\n", pBestPCRPrimerPair->nProductSize_, pBestPCRPrimerPair->pPCR1_->primer_.nUnpaddedStart_, pBestPCRPrimerPair->pPCR1_->primer_.nUnpaddedEnd_, pBestPCRPrimerPair->pPCR2_->primer_.nUnpaddedStart_, pBestPCRPrimerPair->pPCR2_->primer_.nUnpaddedEnd_, soWhyIsPrimerNotAcceptable( pBestPCRPrimerPair->ucWhatIsWrong_ ).data() ); } } // while( !bFoundAnAcceptablePair ) { if ( !bFoundAnAcceptablePair ) { fprintf( pAO, "could not find an acceptable primer pair\n" ); pBestPCRPrimerPair = NULL; } else { // the memory that pBestPCRPrimerPair points to is about to be // deallocated so better make it permanent: pcrPrimerrPair* pPCRPrimerrPair = new pcrPrimerrPair(); *pPCRPrimerrPair = *pBestPCRPrimerPair; pBestPCRPrimerPair = pPCRPrimerrPair; } } regionToAmplify :: ~regionToAmplify() { aPCRPrimersRegion1_.clearAndDestroy(); aPCRPrimersRegion2_.clearAndDestroy(); } // below here is all for autoFinish to close gaps // main routine for autoFinish to close gaps void regionToAmplify :: autoFinishPCRToCloseAGap( Contig* pContig1, const int nWhichEnd1, Contig* pContig2, const int nWhichEnd2 ) { soRegionID_ = szWhichGap( nWhichEnd1 ); soRegionID_ += " of "; soRegionID_ += ( pContig1->soGetName() + " to " + szWhichGap( nWhichEnd2 ) + " of " + pContig2->soGetName() ); cerr << "picking pcr primer pairs to span gap from " << soRegionID_ << endl; cerr.flush(); bool bProblems; setRegion( 1, // nWhichRegion pContig1, nWhichEnd1, bProblems ); if ( bProblems ) return; setRegion( 2, // nWhichRegion pContig2, nWhichEnd2, bProblems ); if ( bProblems ) return; bBiggestNotSmallest_ = false; findPCRPrimerPairToCloseGap(); } static int nComparePCRPrimersByDistanceToGap( const pcrPrimer** ppPCRPrimer1, const pcrPrimer** ppPCRPrimer2 ) { if ( (*ppPCRPrimer1)->nDistanceToGap_ < (*ppPCRPrimer2)->nDistanceToGap_ ) return( -1 ); else if ( (*ppPCRPrimer1)->nDistanceToGap_ > (*ppPCRPrimer2)->nDistanceToGap_ ) return( 1 ); else return( 0 ); } void regionToAmplify :: findPCRPrimerPairToCloseGap() { checkRegionForAcceptablePrimersWithNotTooManyFalseMatches( region1_, bPrimersInRegion1PointRightNotLeft_, true ); // bRegion1NotRegion2 if ( aPCRPrimersRegion1_.length() == 0 ) { fprintf( pAO, "could find no pcr primers for %s of %s so cannot close this gap with pcr\n", ( bPrimersInRegion1PointRightNotLeft_ ? "right end" : "left end" ), region1_.pContig_->soGetName().data() ); return; } checkRegionForAcceptablePrimersWithNotTooManyFalseMatches( region2_, bPrimersInRegion2PointRightNotLeft_, false ); // bRegion1NotRegion2 if ( aPCRPrimersRegion2_.length() == 0 ) { fprintf( pAO, "could find no pcr primer for %s of %s so cannot close this gap with pcr\n", ( bPrimersInRegion2PointRightNotLeft_ ? "right end" : "left end" ), region2_.pContig_->soGetName().data() ); return; } // sort the list of pcr primers based on distance to gap calculateDistancesToGap(); qsort( aPCRPrimersRegion1_.data(), aPCRPrimersRegion1_.length(), sizeof( pcrPrimer*), ( (int(*) ( const void*, const void*) ) nComparePCRPrimersByDistanceToGap ) ); qsort( aPCRPrimersRegion2_.data(), aPCRPrimersRegion2_.length(), sizeof( pcrPrimer*), ( (int(*) ( const void*, const void*) ) nComparePCRPrimersByDistanceToGap ) ); checkSorted( aPCRPrimersRegion1_ ); checkSorted( aPCRPrimersRegion2_ ); // the primers in the A list can be up to pCP->nPrimersWindowSizeInLookingToUse_ back from the gap // the primers in the B list can also be up to pCP->nPrimersWindowSizeInLookingToUse_ back from the gap // So: int nMaxMaxProductSize = 2 * pCP->nPrimersWindowSizeInLookingToUse_; const int nIncrement = 50; // set up for nGetIndexFromProductSize nLastUsedIndexA_ = 0; nLastUsedIndexB_ = 0; int nBestA = -1; int nBestB = -1; int nSmallestProductSize = -666; int nLastMaxAIndex = -1; int nLastMaxBIndex = -1; bool bFirstTime = true; bFoundAPair_ = false; for( nMaxProductSize_ = nIncrement; ( nMaxProductSize_ < nMaxMaxProductSize ) && !bFoundAPair_; nMaxProductSize_ += nIncrement ) { int nMaxAIndex = nGetIndexFromProductSize( nMaxProductSize_, true ); int nMaxBIndex = nGetIndexFromProductSize( nMaxProductSize_, false ); // if we haven't found any primers yet on either side, // keep incrementing the max product size until we have // primers on both ends if ( nMaxAIndex == -1 || nMaxBIndex == -1 ) continue; // not first time. From the way bFirstTime = false is set, // we must have already tried some primer pairs and thus // our new region is not a square, but has a square cut out // of it: // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx dimension // x x x B // x old x x | // x x x v // xxxxxxx 1 x // x x x // x x x // x x x // x x x // x x x // x 2 x x // x x x // x x x // x x x // x x x // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx // dimension A -> // where regions 1 and 2 are the ones that we will // examine now. If !bMorePrimersInDimensionA, then region // 1 will be empty. If !bMorePrimersInDimensionB, then // region 2 will be empty. // If regions 1 and 2 are both empty, then there are no // primer pairs to consider, so increase nMaxProductSize // until they are not both empty. bool bMorePrimersInDimensionA = ( nMaxAIndex > nLastMaxAIndex ? true : false ); bool bMorePrimersInDimensionB = ( nMaxBIndex > nLastMaxBIndex ? true : false ); // no--even though there are no more primer pairs in the regions 1 and // 2, there might be some in "old" that have this new nMaxProductSize_ // area. So commented this out: //if ( !bMorePrimersInDimensionA && //!bMorePrimersInDimensionB ) //continue; int nMinAIndex = nLastMaxAIndex + 1; int nMinBIndex = nLastMaxBIndex + 1; // set up for next time nLastMaxAIndex = nMaxAIndex; nLastMaxBIndex = nMaxBIndex; if ( bFirstTime ) { bFirstTime = false; for( int nA = 0; nA <= nMaxAIndex; ++nA ) { for( int nB = 0; nB <= nMaxBIndex; ++nB ) { processPrimerPair( nA, nB ); } // int ( nB } // int ( nA } // if ( bFirstTime else { // first check if any of the aAcceptablePrimers_ array // is ok to use because it has a product size of <= // nMaxProductSize for( int nPair = 0; nPair < aAcceptablePCRPrimerPairs_.length(); ++nPair ) { pcrPrimerPairSubscripts& pcrPP = aAcceptablePCRPrimerPairs_[ nPair ]; if ( pcrPP.bAlreadyChecked_ ) continue; if ( pcrPP.nProductSize_ <= nMaxProductSize_ ) { pcrPP.bAlreadyChecked_ = true; replaceBestPrimerPairIfNecessary( pcrPP.nA_, pcrPP.nB_, pcrPP.nProductSize_ ); } } // not first time. From the way bFirstTime = false is set, // we must have already tried some primer pairs and thus // our new region is not a square, but has a square cut out // of it: // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx dimension // x x x B // x old x x | // x x x v // xxxxxxx 1 x // x x x // x x x // x x x // x x x // x x x // x 2 x x // x x x // x x x // x x x // x x x // xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx // dimension A -> // where regions 1 and 2 are the ones that we will // examine now. If !bMorePrimersInDimensionA, then region // 1 will be empty. If !bMorePrimersInDimensionB, then // region 2 will be empty. if ( bMorePrimersInDimensionA ) { // This is region 1 for( int nA = nMinAIndex; nA <= nMaxAIndex; ++nA ) { for( int nB = 0; nB <= nMaxBIndex; ++nB ) { processPrimerPair( nA, nB ); } } } if ( bMorePrimersInDimensionB ) { // notice "< nMinAIndex" not "<= nMinAIndex" for( int nA = 0; nA < nMinAIndex; ++nA ) { for( int nB = nMinBIndex; nB <= nMaxBIndex; ++nB ) { processPrimerPair( nA, nB ); } } } } // if ( bFirstTime ) { else } // for( int nMaxProductSize = nIncrement; ... // at this point, we have either found pcr primers making the smallest // product we can, or we have found no pcr primer pair if ( !bFoundAPair_ ) { fprintf( pAO, "Could not find an acceptable combination of primers that works for both contigs at once.\n" ); } else { RWTValVector aOligoNames( 2, "" ); // initial name aOligoNames.soName_ = "regionToAmplify::aOligoNames"; RWCString soPCRInstructions( (size_t) 3000 ); soPCRInstructions.appendFormat( "Please make the following PCR primers:\n" ); int n; for( n = 1; n <= 2; ++n ) { primerType* pPrimer = NULL; if ( n == 1 ) { pPrimer = &( aPCRPrimersRegion1_[ nBestAIndex_ ]->primer_ ); } else { pPrimer = &( aPCRPrimersRegion2_[ nBestBIndex_ ]->primer_ ); } RWCString soOligoNameOfChosenPrimer = ConsEd::pGetAssembly()->soGetNextOligoName(); aOligoNames[ n - 1 ] = soOligoNameOfChosenPrimer; soPCRInstructions.increaseMaxLengthIfNecessary( (size_t) 500 ); soPCRInstructions.appendFormat( " %s: ", soOligoNameOfChosenPrimer.data() ); for( int nBase = 0; nBase < pPrimer->nUnpaddedLength_; ++nBase ) { soPCRInstructions.append( pPrimer->szPrimer_[ nBase ] ); } soPCRInstructions.increaseMaxLengthIfNecessary( (size_t) 2000 ); soPCRInstructions.appendFormat( " %d to %d (%s) for %s of %s melt: %5.1f\n", pPrimer->nUnpaddedStart_, pPrimer->nUnpaddedEnd_, ( pPrimer->bTopStrandNotBottomStrand_ ? "top strand" : "bottom strand" ), szWhichGap( ( pPrimer->bTopStrandNotBottomStrand_ ? nRightGap : nLeftGap ) ), pPrimer->pContig_->soGetName().data(), pPrimer->dMeltingTemperature_ ); } // for( n = 1 ... soPCRInstructions += "est. product size: "; soPCRInstructions += RWCString( (long) nSmallestProductSize_ ); soPCRInstructions += "\n"; fprintf( pAO, "%s", soPCRInstructions.data() ); fflush( pAO ); assert( nSmallestProductSize_ == ( aPCRPrimersRegion1_[ nBestAIndex_ ]->nDistanceToGap_ + aPCRPrimersRegion2_[ nBestBIndex_ ]->nDistanceToGap_ ) ); if ( pCP->bWillDoExperiments_ && pCP->bAutoFinishTagOligosWhenDoExperiments_ ) { // need to create oligo tags for these primers for( n = 1 ; n <= 2; ++n ) { primerType* pPrimer = NULL; if ( n == 1 ) { pPrimer = &( aPCRPrimersRegion1_[ nBestAIndex_ ]->primer_ ); } else { pPrimer = &( aPCRPrimersRegion2_[ nBestBIndex_ ]->primer_ ); } bool bComplementedWithRespectToWayPhrapCreatedContig = exclusive_or( pPrimer->pContig_->bComplementedFromWayPhrapCreated_, !pPrimer->bTopStrandNotBottomStrand_ ); oligoTag* pOligoTag = new oligoTag( pPrimer, true, // clone template, not subclone template bComplementedWithRespectToWayPhrapCreatedContig, 666, // primer id--just prevent ctor from creating new name soPCRInstructions ); // fill in real name pOligoTag->soName_ = aOligoNames[ n - 1 ]; ConsEd::pGetAssembly()->pChosenPrimersForAutofinishPCR_->insert( pOligoTag ); } } // if ( pCP->bWillDoExperiment_ } // if ( !bFoundAPair_ ) else... } int regionToAmplify :: nGetIndexFromProductSize( const int nProductSize, const bool bANotB ) { RWTPtrOrderedVector& aPrimer = ( bANotB ? aPCRPrimersRegion1_ : aPCRPrimersRegion2_ ); int nLastUsedIndex = ( bANotB ? nLastUsedIndexA_ : nLastUsedIndexB_ ); // small desired big // s d b // ^will give this // special cases: // desired // d 0th item // ^ will give this // last item desired // l d // ^ give this int nIndexToReturn = -1; // special cases if ( aPrimer.length() == 0 ) return( -1 ); // not sure if this is correct... Yes, it is because I want // to keep increasing the product size in the main loop in // suggestPCRForPairOfContigEnds until it is larger than // the 0th primer rather than have to store the found // primer pairs in aAcceptablePCRPrimerPairs_ and then have to // revisit them later. if ( nProductSize < aPrimer[0]->nDistanceToGap_ ) return( -1 ); for( int nIndex = nLastUsedIndex; nIndex < aPrimer.length(); ++nIndex ) { int nGapSizeAtIndex = aPrimer[ nIndex ]->nDistanceToGap_; // the product size for any pair with this primer will be // at least the nGapSizeAtIndex if ( nGapSizeAtIndex >= nProductSize ) { nIndexToReturn = nIndex; break; } } // case in which the desired product size is greater than // all the primers. In this case, return the last index. if ( nIndexToReturn == -1 ) nIndexToReturn = aPrimer.length() - 1; if ( bANotB ) nLastUsedIndexA_ = nIndexToReturn; else nLastUsedIndexB_ = nIndexToReturn; return( nIndexToReturn ); } void regionToAmplify :: calculateDistancesToGap() { calculateDistanceToGap( aPCRPrimersRegion1_, region1_.pContig_, bPrimersInRegion1PointRightNotLeft_ ); calculateDistanceToGap( aPCRPrimersRegion2_, region2_.pContig_, bPrimersInRegion2PointRightNotLeft_ ); } void regionToAmplify :: calculateDistanceToGap( RWTPtrOrderedVector& aPrimer, Contig* pContig, const bool bRightEndNotLeftEnd ) { if ( !bRightEndNotLeftEnd ) { // llllllllHHHHHHHHHHHHHHHHHHHHH (l, low quality consensus, H, high ) // <----- primer // a b distance = b - a int nUnpaddedHighQualitySegmentStart = pContig->nGetHighQualitySegmentStart(); for( int nPrimer = 0; nPrimer < aPrimer.length(); ++nPrimer ) { pcrPrimer* pPCRPrimerToGap = aPrimer[ nPrimer ]; pPCRPrimerToGap->nDistanceToGap_ = pPCRPrimerToGap->primer_.nUnpaddedStart_ - nUnpaddedHighQualitySegmentStart; } } else { // HHHHHHHHHHHHHHHHHHHHHHHHHHHllllllll // primer -----> // a b distance = b - a int nUnpaddedHighQualitySegmentEnd = pContig->nGetHighQualitySegmentEnd(); for( int nPrimer = 0; nPrimer < aPrimer.length(); ++nPrimer ) { pcrPrimer* pPCRPrimerToGap = aPrimer[ nPrimer ]; pPCRPrimerToGap->nDistanceToGap_ = nUnpaddedHighQualitySegmentEnd - pPCRPrimerToGap->primer_.nUnpaddedEnd_; } } } void regionToAmplify :: checkSorted( const RWTPtrOrderedVector& aPrimer ) { bool bSorted = true; for( int n = 1; n < aPrimer.length(); ++n ) { pcrPrimer* pPCRPrimerToGap1 = aPrimer[ n - 1 ]; pcrPrimer* pPCRPrimerToGap2 = aPrimer[ n ]; if ( pPCRPrimerToGap1->nDistanceToGap_ > pPCRPrimerToGap2->nDistanceToGap_ ) { cerr << "primer at " << n -1 << " value " << pPCRPrimerToGap1->nDistanceToGap_ << " and " << n << " value " << pPCRPrimerToGap2->nDistanceToGap_ << " are out of order" << endl; bSorted = false; } } if ( !bSorted ) { THROW_ERROR( "primers are not sorted by distance to gap--see xterm" ); } } static pcrPrimerrPair tempPCRPrimerPair; // used over and over again // by assigning to its data members. This saves lots of // malloc/free/malloc/free... void regionToAmplify :: processPrimerPair( const int nA, const int nB ) { // first should check that this pair doesn't make a product somewhere else pcrPrimer* pPrimerToGapA = aPCRPrimersRegion1_[ nA ]; pcrPrimer* pPrimerToGapB = aPCRPrimersRegion2_[ nB ]; if ( pPrimerToGapA->bEspeciallyBadFalseMatch_ ) return; if ( pPrimerToGapB->bEspeciallyBadFalseMatch_ ) return; if ( !pPrimerToGapA->bCheckedForFalseMatches_ ) pPrimerToGapA->findAllSeriousFalseMatchesInAssembly(); if ( pPrimerToGapA->aFalseMatchLocations_.length() > pCP->nAutoPCRAmplifyTooManySeriousFalseMatches_ ) { return; } if ( !pPrimerToGapB->bCheckedForFalseMatches_ ) pPrimerToGapB->findAllSeriousFalseMatchesInAssembly(); if ( pPrimerToGapB->aFalseMatchLocations_.length() > pCP->nAutoPCRAmplifyTooManySeriousFalseMatches_ ) { return; } tempPCRPrimerPair.pPCR1_ = pPrimerToGapA; tempPCRPrimerPair.pPCR2_ = pPrimerToGapB; tempPCRPrimerPair.nProductSize_ = pPrimerToGapA->nDistanceToGap_ + pPrimerToGapB->nDistanceToGap_; tempPCRPrimerPair.bAlreadyTestedAndNotOK_ = false; if ( tempPCRPrimerPair.bDoesThisPassChecks() ) { // check product size if ( tempPCRPrimerPair.nProductSize_ > nMaxProductSize_ ) { aAcceptablePCRPrimerPairs_.insert( pcrPrimerPairSubscripts( nA, nB, tempPCRPrimerPair.nProductSize_ ) ); } else { replaceBestPrimerPairIfNecessary( nA, nB, tempPCRPrimerPair.nProductSize_ ); } } } void regionToAmplify :: setRegion( const int nWhichRegion, Contig* pContig, const int nWhichEnd, bool& bProblems ) { assert( nWhichRegion == 1 || nWhichRegion == 2 ); int nUnpaddedLeft; int nUnpaddedRight; pContig->getUnpaddedRangeForMakingPCRPrimers( nWhichEnd, nUnpaddedLeft, nUnpaddedRight, bProblems ); if ( bProblems ) { fprintf( pAO, "cannot get range in %s sufficiently back from end of contig probably since the contig is too small\n", pContig->soGetName().data() ); return; } bool bTopStrandPCRPrimer = ( nWhichEnd == nRightGap ? true : false ); if ( nWhichRegion == 1 ) { bPrimersInRegion1PointRightNotLeft_ = bTopStrandPCRPrimer; region1_.pContig_ = pContig; region1_.nUnpaddedLeft_ = nUnpaddedLeft; region1_.nUnpaddedRight_ = nUnpaddedRight; } else { bPrimersInRegion2PointRightNotLeft_ = bTopStrandPCRPrimer; region2_.pContig_ = pContig; region2_.nUnpaddedLeft_ = nUnpaddedLeft; region2_.nUnpaddedRight_ = nUnpaddedRight; } } void regionToAmplify :: replaceBestPrimerPairIfNecessary( const int nA, const int nB, const int nProductSize ) { if ( !bFoundAPair_ ) { bFoundAPair_ = true; nSmallestProductSize_ = nProductSize; nBestAIndex_ = nA; nBestBIndex_ = nB; } else { // check if this is the best one so far for this if ( nProductSize < nSmallestProductSize_ ) { // we have a better one nSmallestProductSize_ = nProductSize; nBestAIndex_ = nA; nBestBIndex_ = nB; } } } void regionToAmplify :: makeOnePrimerOutOfRegion( const regionOfContig& region, const bool bPrimersPointRightNotLeft, const bool bRegion1NotRegion2 ) { primerType primer; int nLengthOfPrimer = region.nUnpaddedRight_ - region.nUnpaddedLeft_ + 1; createPrimerCandidate( &primer, region.nUnpaddedLeft_, nLengthOfPrimer, bPrimersPointRightNotLeft, region.pContig_, 1 ); // nUnpaddedPosOfCursor--ignored primer.dMeltingTemperature_ = dGetPrimerMeltingTemperature( primer.szPrimer_, primer.nUnpaddedLength_ ); cerr << "melting temp = " << primer.dMeltingTemperature_ << endl; primer.bAcceptable_ = true; // override whatever createPrimerCandidate // set this to pcrPrimer* pPCRPrimer = new pcrPrimer( &primer ); if ( bRegion1NotRegion2 ) aPCRPrimersRegion1_.insert( pPCRPrimer ); else aPCRPrimersRegion2_.insert( pPCRPrimer ); }