/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "pickCombinationsOfPCRPrimers.h" #include "primerType.h" #include "contig.h" #include "assembly.h" #include "consed.h" #include "fileDefines.h" #include "consedParameters.h" #include "bIsPCRPrimerPairOK.h" #include "exclusive_or.h" #include "oligoTag.h" #include "pickPCRPrimersForOneEndOfContig.h" #include "regionToAmplify.h" void pickCombinationsOfPCRPrimers :: suggestPCRIfNecessaryForClosingGaps( RWTPtrOrderedVector* pContigEndsForMultiplexPCR, RWTValOrderedVector* pContigEndsForMultiplexPCRWhichEnd ) { Assembly* pAssembly = ConsEd::pGetAssembly(); // put these contig ends into the class used by pickCombinationsOfPCRPrimers assert( pContigEndsForMultiplexPCR->length() == pContigEndsForMultiplexPCRWhichEnd->length() ); int nContig; for( nContig = 0; nContig < pContigEndsForMultiplexPCR->length(); ++nContig ) { Contig* pContig = (*pContigEndsForMultiplexPCR)[ nContig ]; int nWhichEnd = (*pContigEndsForMultiplexPCRWhichEnd)[ nContig ]; contigEndd* pCE = new contigEndd(); pCE->pContig_ = pContig; pCE->nWhichEnd_ = nWhichEnd; pCE->bOtherEndIsCloneEnd_ = pContig->bIsThisEndTheEndOfTheClone( nOppositeGap( nWhichEnd ) ); aContigEnds_.insert( pCE ); } // now let's find pcr primers for each contig end // If there is some contigEnd for which Consed cannot find any primers, // skip it and consider pcr amongst the remaining contigEnds. int nEnd; for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) { contigEndd* pCE = aContigEnds_[ nEnd ]; cerr << "picking pcr primer candidates for " << szWhichGap( pCE->nWhichEnd_ ) << " of " << pCE->pContig_->soGetName() << "..." << endl; pickPCRPrimersForOneEndOfContig( pCE->pPrimerArray_, pCE->nPrimersInPrimerArray_, pCE->pContig_, pCE->nWhichEnd_, pCE->nAcceptablePrimers_ ); } // remove any contig-ends for which we could find no primers: for( nEnd = aContigEnds_.length() - 1; nEnd >= 0; --nEnd ) { contigEndd* pCE = aContigEnds_[ nEnd ]; if ( pCE->nAcceptablePrimers_ == 0 ) { // there are no acceptable primers for this contig end // so just don't try to do pcr off this contig end fprintf( pAO, "could find no pcr primers for %s of %s so excluding this contig end from pcr\n", szWhichGap( pCE->nWhichEnd_ ), pCE->pContig_->soGetName().data() ); aContigEnds_.removeAt( nEnd ); } } // now we need a list of *pairs* of ends, since not every end can // go with every other end. For example, we are not interested // in doing pcr between the 2 ends of the same contig. Nor are we // interested in doing pcr between these contigs: // (clone end)---------- ------ --------(clone end) // contigA contigB contigC aPairsOfEnds_.resize( aContigEnds_.length() * aContigEnds_.length() + 1 ); // fixed a very interesting bug: if aContigEnds_.length() == 0, then // aContigEnds_.length() - 1 == 4294967296 (or something like that) // since this is unsigned and this loop runs for hours for( int nEnd1 = 0; nEnd1 < ( (int) aContigEnds_.length() - 1 ); ++nEnd1 ) { for( int nEnd2 = nEnd1 + 1; nEnd2 < aContigEnds_.length(); ++nEnd2 ) { contigEndd* pCE1 = aContigEnds_[ nEnd1 ]; contigEndd* pCE2 = aContigEnds_[ nEnd2 ]; if ( pCE1->pContig_ == pCE2->pContig_ ) continue; // case in which there are more than 2 contigs so we don't // want to do pcr between the clone ends if ( pCE1->bOtherEndIsCloneEnd_ && pCE2->bOtherEndIsCloneEnd_ && ( aContigEnds_.length() > 2 ) ) continue; // Consider such a case: // ----- ------ ------- contigs // A B C // ---- ---- (subclones fwd/rev pairs linking) // in such a case, we would not want to do pcr between the // left end of contig A and the right end of contig C if ( pCE1->pContig_->bPlacedInAScaffold_ && pCE2->pContig_->bPlacedInAScaffold_ ) { if ( pAssembly->nGetScaffoldForContig( pCE1->pContig_ ) == pAssembly->nGetScaffoldForContig( pCE2->pContig_ ) ) { // they are in the same scaffold (presumably the // ends of the scaffold) so do not make a pair out of them fprintf( pAO, "contig-ends %s of %s and %s of %s are in the same scaffold so not considering pcr for them\n", szWhichGap( pCE1->nWhichEnd_ ), pCE1->pContig_->soGetName().data(), szWhichGap( pCE2->nWhichEnd_ ), pCE2->pContig_->soGetName().data() ); continue; } } pairOfEnds* pPOE = new pairOfEnds(); pPOE->nEnd1_ = nEnd1; pPOE->nEnd2_ = nEnd2; aPairsOfEnds_.insert( pPOE ); } // for( int nEnd2 = nEn... } // for( int nEnd1 = 0; nEn... if ( aPairsOfEnds_.length() == 0 ) { fprintf( pAO, "There were no pairs of ends available for pcr.\n" ); return; } if ( aPairsOfEnds_.length() >= pCP->nAutoFinishDoNotDoUnorientedPCRIfThisManyOrMoreUnorientedPCRReactions_ ) { fprintf( pAO, "Autofinish could have done %d unoriented pcr reactions, but\nconsed.autoFinishDoNotDoUnorientedPCRIfThisManyOrMoreUnorientedPCRReactions: %d\nso not doing any\n", aPairsOfEnds_.length(), pCP->nAutoFinishDoNotDoUnorientedPCRIfThisManyOrMoreUnorientedPCRReactions_ ); fprintf( pAO, "In case you are wondering, Autofinish was considering the following unoriented pcr reactions:\n" ); for( int nPair = 0; nPair < aPairsOfEnds_.length(); ++nPair ) { pairOfEnds* pPOE = aPairsOfEnds_[ nPair ]; contigEndd* pCE1 = aContigEnds_[ pPOE->nEnd1_ ]; contigEndd* pCE2 = aContigEnds_[ pPOE->nEnd2_ ]; fprintf( pAO, " between %s of %s and %s of %s\n", szWhichGap( pCE1->nWhichEnd_ ), pCE1->pContig_->soGetName().data(), szWhichGap( pCE2->nWhichEnd_ ), pCE2->pContig_->soGetName().data() ); } return; } if ( aPairsOfEnds_.length() == 1 ) { pairOfEnds* pPairOfEnds = aPairsOfEnds_[ 0 ]; contigEndd* pContigEndA = aContigEnds_[ pPairOfEnds->nEnd1_ ]; contigEndd* pContigEndB = aContigEnds_[ pPairOfEnds->nEnd2_ ]; fprintf( pAO, "contig ends were unoriented, but only 2 of them so using method of finding pcr primers for oriented pair of contigs:\n" ); regionToAmplify regionGap; regionGap.autoFinishPCRToCloseAGap( pContigEndA->pContig_, pContigEndA->nWhichEnd_, pContigEndB->pContig_, pContigEndB->nWhichEnd_ ); return; } // try combinations of primers for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) { aContigEnds_[ nEnd ]->nCurrentlyTryingThisPrimer_ = 0; } bool bFoundSetOfPrimers; try { bFoundSetOfPrimers = bConsiderPrimerForOneContigEndWithPrecedingContigEnds( 0 ); } catch(InputDataError ide ) { fprintf( pAO, "could not find compatible set of pcr primers\n" ); fprintf( pAO, "%s\n", ide.szGetDesc() ); return; } if ( !bFoundSetOfPrimers ) { fprintf( pAO, "could not find an acceptable combination of primers that works for all contigs ends at once.\n" ); } else { fprintf( pAO, "Please make the following PCR primers:\n" ); int nEnd; for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) { contigEndd* pCE = aContigEnds_[ nEnd ]; primerType* pPrimer = pCE->pPrimerArray_ + pCE->nCurrentlyTryingThisPrimer_; pCE->soOligoNameOfChosenPrimer_ = pAssembly->soGetNextOligoName(); fprintf( pAO, " %s: ", pCE->soOligoNameOfChosenPrimer_.data() ); for( int nBase = 0; nBase < pPrimer->nUnpaddedLength_; ++nBase ) { putc( pPrimer->szPrimer_[ nBase ], pAO ); } fprintf( pAO, " %d to %d (%s) for %s of %s melt: %5.1f\n", pPrimer->nUnpaddedStart_, pPrimer->nUnpaddedEnd_, ( pPrimer->bTopStrandNotBottomStrand_ ? "top strand" : "bottom strand" ), szWhichGap( pCE->nWhichEnd_ ), pPrimer->pContig_->soGetName().data(), pPrimer->dMeltingTemperature_ ); } // now list combinations of primers RWCString soPCRInstructions( (size_t) 3000 ); soPCRInstructions += "Do PCR reactions with the following pairs of primers:\n"; for( int nPair = 0; nPair < aPairsOfEnds_.length(); ++nPair ) { pairOfEnds* pPOE = aPairsOfEnds_[ nPair ]; contigEndd* pCE1 = aContigEnds_[ pPOE->nEnd1_ ]; contigEndd* pCE2 = aContigEnds_[ pPOE->nEnd2_ ]; primerType* pPrimer1 = pCE1->pPrimerArray_ + pCE1->nCurrentlyTryingThisPrimer_; primerType* pPrimer2 = pCE2->pPrimerArray_ + pCE2->nCurrentlyTryingThisPrimer_; soPCRInstructions += " "; soPCRInstructions += pCE1->soOligoNameOfChosenPrimer_; soPCRInstructions += ": "; int nBase; for( nBase = 0; nBase < pPrimer1->nUnpaddedLength_; ++nBase ) { soPCRInstructions.append( pPrimer1->szPrimer_[ nBase ] ); } soPCRInstructions += " "; soPCRInstructions += pCE2->soOligoNameOfChosenPrimer_; soPCRInstructions += ": "; for( nBase = 0; nBase < pPrimer2->nUnpaddedLength_; ++nBase ) { soPCRInstructions.append( pPrimer2->szPrimer_[ nBase ] ); } soPCRInstructions.append( '\n' ); } // for( int nPair = 0; nPair ... fprintf( pAO, "%s", soPCRInstructions.data() ); if ( pCP->bWillDoExperiments_ && pCP->bAutoFinishTagOligosWhenDoExperiments_ ) { // need to create oligo tags for all these for( nEnd = 0; nEnd < aContigEnds_.length(); ++nEnd ) { contigEndd* pCE = aContigEnds_[ nEnd ]; primerType* pPrimer = pCE->pPrimerArray_ + pCE->nCurrentlyTryingThisPrimer_; bool bComplementedWithRespectToWayPhrapCreatedContig = exclusive_or( pPrimer->pContig_->bComplementedFromWayPhrapCreated_, !pPrimer->bTopStrandNotBottomStrand_ ); oligoTag* pOligoTag = new oligoTag( pPrimer, true, // clone template, not subclone template bComplementedWithRespectToWayPhrapCreatedContig, 666, // primer id--just prevent ctor from creating new name soPCRInstructions ); // fill in real name pOligoTag->soName_ = pCE->soOligoNameOfChosenPrimer_; pAssembly->pChosenPrimersForAutofinishPCR_->insert( pOligoTag ); } } // if ( pCP->bWillDoExperiments_ && ... } // if ( !bFoundSetOfPrimers ) { else ... } // recursive bool pickCombinationsOfPCRPrimers :: bConsiderPrimerForOneContigEndWithPrecedingContigEnds( const int nContigEnd ) { // try to find a primer on contig end nContigEnd that is compatible with // set of primers already chosen. If cannot find *any*, then return false. // If we *can* find such a primer, then set pCE->nCurrentlyTryingThisPrimer_ contigEndd* pCE = aContigEnds_[ nContigEnd ]; for( int nPrimer = 0; nPrimer < pCE->nPrimersInPrimerArray_; ++nPrimer ) { primerType* pPrimer = pCE->pPrimerArray_ + nPrimer; if ( !pPrimer->bAcceptable_ ) continue; if ( bIsThisPrimerCompatibleWithSetOfPrimersChosenSoFar( pPrimer, nContigEnd ) ) { pCE->nCurrentlyTryingThisPrimer_ = nPrimer; // great! Now consider the next contigEnd and see if we // can find a primer that works with the set we've found // so far. // check if we already have considered all contig ends if ( nContigEnd == ( aContigEnds_.length() - 1 ) ) return( true ); else if ( bConsiderPrimerForOneContigEndWithPrecedingContigEnds( nContigEnd + 1 ) ) return( true ); // if bConsiderPrimerForOneContigEndWithPrecedingContigEnds( // nContigEnd + 1 ) returns false, that means that there was // no full set of primers that worked with the ones already // chosen, including the nPrimer in this routine. Thus we // must try ++nPrimer and see if we can make it all work with // that primer. } } // if reached here, then we were unable to find a set of primers that // included the ones chosen so far. Thus we should return false so // that the calling program can increment nPrimer and thus try a different // set return( false ); } bool pickCombinationsOfPCRPrimers :: bIsThisPrimerCompatibleWithSetOfPrimersChosenSoFar( primerType* pPrimerToTry, const int nContigEndOfCurrentPrimer ) { // note that if nContigEndOfCurrentPrimer == 0, this returns true for( int nContigEnd = 0; nContigEnd < nContigEndOfCurrentPrimer; ++nContigEnd ) { ++dNumberOfPrimerPairsCompared_; if ( dNumberOfPrimerPairsCompared_ > pCP->dAutoFinishDoNotComparePCRPrimersMoreThanThisManyTimes_ ) { THROW_ERROR( "could not find a compatible set of PCR primers after trying consed.autoFinishDoNotComparePCRPrimersMoreThanThisManyTimes" ); } contigEndd* pCE = aContigEnds_[ nContigEnd ]; primerType* pAlreadyChosenPrimer = pCE->pPrimerArray_ + pCE->nCurrentlyTryingThisPrimer_; bool bTempDiffTooGreat = false; bool bPrimersStickToEachOther = false; if ( !bIsPCRPrimerPairOK( pAlreadyChosenPrimer, pPrimerToTry, bTempDiffTooGreat, bPrimersStickToEachOther ) ) { return( false ); } } return( true ); }