/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "contig.h" #include "primerType.h" #include "experiment.h" #include "locatedFragment.h" #include "mbtValVector.h" #include "consedParameters.h" #include "min.h" #include "max.h" #include "subcloneTTemplate.h" #include "consed.h" #include "fileDefines.h" static int nMaxSubcloneCoverage = 10000; // some big number // This file contains the parts of autofinish for covering single // subclone regions. void Contig :: coverSingleSubcloneRegions() { // now this is already done in autoFinish.cpp // so that evalExpCluster can take advantage of where the // single subclone regions are. // autoFinishFindSingleSubcloneBases(); if ( nCountSingleSubcloneBases() == 0 ) { fprintf( pAO, "No single subclone regions\n" ); return; } autoFinishFindSingleSubcloneRegionsFromSuggestedExperiments(); autoFinishSetCumulativeSingleSubcloneArray(); autoFinishCreateCandidateExperimentsToCoverSingleSubcloneRegions(); autoFinishChooseSingleSubcloneExperiments(); } bool Contig :: bGetLikelyAlignedRegionOnContig( experiment* pExp, int& nConsPosLeftUnpadded, int& nConsPosRightUnpadded ) { if ( pExp->nReadType_ == nWalk ) return( bGetLikelyAlignedRegionOnContig2( pExp->nFirstBaseOfReadUnpadded_, pExp->bTopStrandNotBottomStrand_, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ); else { // clearly vector bases are not going to fix single subclone bases // if the read is a universal primer read, // nUnpaddedLeft_ and nUnpaddedRight_ have already excluded // vector bases. The average aligned portion of reads // may not do this, hence the intersection. if ( bGetLikelyAlignedRegionOnContig2( pExp->nFirstBaseOfReadUnpadded_, pExp->bTopStrandNotBottomStrand_, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) { // now must exclude vector bases return( bIntersect( nConsPosLeftUnpadded, nConsPosRightUnpadded, pExp->nUnpaddedLeft_, pExp->nUnpaddedRight_, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ); } else return( false ); } } // I think this is only valid for walks since it doesn't // consider vector bases at the beginning bool Contig :: bGetLikelyAlignedRegionOnContig2( const int nFirstBaseOfReadUnpadded, const bool bTopStrandNotBottomStrand, int& nConsPosLeftUnpadded, int& nConsPosRightUnpadded ) { if ( bTopStrandNotBottomStrand ) { nConsPosLeftUnpadded = nFirstBaseOfReadUnpadded + ConsEd::pGetAssembly()->nModelReadAlignedSegmentStart_ - 1; nConsPosRightUnpadded = nFirstBaseOfReadUnpadded + ConsEd::pGetAssembly()->nModelReadAlignedSegmentEnd_ - 1; } else { nConsPosRightUnpadded = nFirstBaseOfReadUnpadded - ConsEd::pGetAssembly()->nModelReadAlignedSegmentStart_ + 1; nConsPosLeftUnpadded = nFirstBaseOfReadUnpadded - ConsEd::pGetAssembly()->nModelReadAlignedSegmentEnd_ + 1; } if ( bIntersect( nConsPosLeftUnpadded, nConsPosRightUnpadded, nGetUnpaddedStartIndex(), nGetUnpaddedEndIndex(), nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) { return( true ); } else return( false ); } void Contig :: sortTemplatesByHowManySingleSubcloneBasesEachFixes( primerType* pPrimer, int nSingleSubcloneBasesFixedByThisTemplate[ nNUMBER_OF_TEMPLATES] ) { int nTemplate; for( nTemplate = 0; nTemplate < ( nNUMBER_OF_TEMPLATES - 1); ++nTemplate ) { for( int nTemplate2 = nTemplate + 1; nTemplate2 < nNUMBER_OF_TEMPLATES; ++nTemplate2 ) { if ( nSingleSubcloneBasesFixedByThisTemplate[ nTemplate2 ] > nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ] ) { int nTemp = nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ]; subcloneTTemplate* pSubTemp = pPrimer->pSubcloneTemplate_[ nTemplate ]; nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ] = nSingleSubcloneBasesFixedByThisTemplate[ nTemplate2 ]; pPrimer->pSubcloneTemplate_[ nTemplate ] = pPrimer->pSubcloneTemplate_[ nTemplate2 ]; nSingleSubcloneBasesFixedByThisTemplate[ nTemplate2 ] = nTemp; pPrimer->pSubcloneTemplate_[ nTemplate2 ] = pSubTemp; } } } // check above sort for( nTemplate = 1; nTemplate < nNUMBER_OF_TEMPLATES; ++nTemplate ) { assert( nSingleSubcloneBasesFixedByThisTemplate[ nTemplate - 1 ] >= nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ] ); } } void Contig :: eliminateTemplatesThatDoNotSolveAnySingleSubcloneBases( primerType* pPrimer, int nSingleSubcloneBasesFixedByThisTemplate[ nNUMBER_OF_TEMPLATES] ) { for( int nTemplate = 0; nTemplate < nNUMBER_OF_TEMPLATES; ++nTemplate ) { if ( pPrimer->pSubcloneTemplate_[ nTemplate ] && nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ] == 0 ) { pPrimer->pSubcloneTemplate_[ nTemplate ] = NULL; } } } void Contig :: autoFinishSetSingleSubcloneArrays( const bool bObserveDoNotFinishTags ) { int nUnpaddedLength = nGetUnpaddedSeqLength(); pWhichSingleSubcloneIsCoveringEachBase_ = new mbtPtrVector( nUnpaddedLength, nGetUnpaddedStartIndex() ); pNumberOfSubclonesCoveringEachBase_ = new mbtValVector( nUnpaddedLength, nGetUnpaddedStartIndex(), 0 ); pNumberOfSubclonesCoveringEachBaseWithoutSuggestedReads_ = new mbtValVector( nUnpaddedLength, nGetUnpaddedStartIndex(), 0 ); autoFinishFindSingleSubcloneRegionsFromExistingReads(); if ( bObserveDoNotFinishTags ) autoFinishExamineDoNotFinishTagsForSingleSubcloneRegions(); saveCopyOfSingleSubcloneArray(); } void Contig :: saveCopyOfSingleSubcloneArray() { for( int nUnpadded = pNumberOfSubclonesCoveringEachBase_->nGetStartIndex(); nUnpadded <= pNumberOfSubclonesCoveringEachBase_->nGetEndIndex(); ++nUnpadded ) { (*pNumberOfSubclonesCoveringEachBaseWithoutSuggestedReads_)[ nUnpadded ] = (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ]; } } void Contig :: autoFinishSetCumulativeSingleSubcloneArray() { // set cumulative subclone array. This will just be used // once--to decide which experiment candidates to create int nUnpaddedLength = nGetUnpaddedSeqLength(); // the offset is nGetUnpaddedStartIndex() - 1 instead of // nGetUnpaddedStartIndex() // This allows us to take the part of the experiment that is on the // consensus (nLeft, nRight), and then just use nLeft - 1 and nRight // as subscripts to the array below without worrying about subscript // errors. If we had used nGetUnpaddedStartIndex() as the offset, // then if the experiment hung out over the left edge of the consensus, // nLeft - 1 would give a subscript error. Furthermore, we need to // include the number of single subclone bases at left-most base of the // consensus. pCumNumberOfSingleSubcloneBases_ = new mbtValVector( nUnpaddedLength + 1, nGetUnpaddedStartIndex() - 1, 0 ); // set the element at nGetUnpaddedStartIndex() - 1 to 0 so that // every other element is indeed cumulative from this base (*pCumNumberOfSingleSubcloneBases_)[ pCumNumberOfSingleSubcloneBases_->nGetStartIndex() ] = 0; for( int nUnpaddedConsPos = pCumNumberOfSingleSubcloneBases_->nGetStartIndex() + 1; nUnpaddedConsPos <= pCumNumberOfSingleSubcloneBases_->nGetEndIndex(); ++nUnpaddedConsPos ) { int nSingleSubcloneOnThisBase = ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpaddedConsPos ] < 2 ) ? 1 : 0; (*pCumNumberOfSingleSubcloneBases_)[ nUnpaddedConsPos ] = (*pCumNumberOfSingleSubcloneBases_)[ nUnpaddedConsPos - 1 ] + nSingleSubcloneOnThisBase; } } void Contig :: autoFinishCreateCandidateExperimentsToCoverSingleSubcloneRegions() { // just throw away previous list of experiment candidates // (If consed were to stay up, rather than terminate, this would // be a memory leak. In that case, we would need to find the // experiment candidates that are not also on the experiments list // and delete them.) pCandidateExperiments_ = new mbtPtrOrderedVector; autoFinishCreateTopStrandCandidateExperimentsForSingleSubcloneRegions(); autoFinishCreateBottomStrandCandidateExperimentsForSingleSubcloneRegions(); } void Contig :: autoFinishCreateTopStrandCandidateExperimentsForSingleSubcloneRegions() { pCP->setParametersForSequencingPrimersNotPCRPrimers( true ); int nFirstExperimentPrimer3PrimeEnd = nGetUnpaddedStartIndex() + consedParameters::pGetConsedParameters()->nPrimersMaximumLengthOfAPrimerToUse_ - 1; int nLastExperimentPrimer3PrimeEnd = nGetUnpaddedEndIndex(); for( int nPrimer3PrimeEnd = nFirstExperimentPrimer3PrimeEnd; nPrimer3PrimeEnd <= nLastExperimentPrimer3PrimeEnd; ++nPrimer3PrimeEnd ) { int nFirstBaseOfReadUnpadded = nPrimer3PrimeEnd + 1; int nLikelyAlignedLeftEndUnpadded; int nLikelyAlignedRightEndUnpadded; // this likely aligned portion of the read may not be completely // on the contig. In fact, it might not be on at all. // case in which the high quality portion is completely off // the contig so no single subclone bases are fixed if ( !bGetLikelyAlignedRegionOnContig2( nFirstBaseOfReadUnpadded, true, // top strand not bottom nLikelyAlignedLeftEndUnpadded, nLikelyAlignedRightEndUnpadded ) ) continue; // This will give a rough idea of how many single subclone // bases will be fixed by this experiment // (A precise number must take into account which template // is already covering each base.) int nMaxNumberOfSingleSubcloneBasesFixedByThisRead = (*pCumNumberOfSingleSubcloneBases_)[ nLikelyAlignedRightEndUnpadded ] - (*pCumNumberOfSingleSubcloneBases_)[ nLikelyAlignedLeftEndUnpadded - 1 ]; if ( nMaxNumberOfSingleSubcloneBasesFixedByThisRead < consedParameters::pGetConsedParameters()->nAutoFinishMinNumberOfSingleSubcloneBasesFixedByAnExp_ ) continue; // check that the primer is high enough quality if ( !bCustomPrimerQualityTest( nFirstBaseOfReadUnpadded, true ) ) continue; if ( consedParameters::pGetConsedParameters()->bAutoFinishAllowWholeCloneReads_ ) { maybeCreateExperimentWithCustomPrimer( TOP_STRAND_TERMINATOR_READS, true, // bTopStrandNotBottonStrand, nFirstBaseOfReadUnpadded, true, // bCloneTemplateNotSubcloneTemplate true ); // for single subclone regions } if ( pCP->bAutoFinishAllowCustomPrimerSubcloneReads_ ) { maybeCreateExperimentWithCustomPrimer( TOP_STRAND_TERMINATOR_READS, true, // bTopStrandNotBottomStrand, nFirstBaseOfReadUnpadded, false, // bCloneTemplateNotSubcloneTemplate true ); // for single subclone regions } } } void Contig :: autoFinishCreateBottomStrandCandidateExperimentsForSingleSubcloneRegions() { pCP->setParametersForSequencingPrimersNotPCRPrimers( true ); int nFirstExperimentPrimer3PrimeEnd = nGetUnpaddedEndIndex() - consedParameters::pGetConsedParameters()->nPrimersMaximumLengthOfAPrimerToUse_ + 1; int nLastExperimentPrimer3PrimeEnd = nGetUnpaddedStartIndex(); for( int nPrimer3PrimeEnd = nFirstExperimentPrimer3PrimeEnd; nPrimer3PrimeEnd >= nLastExperimentPrimer3PrimeEnd; --nPrimer3PrimeEnd ) { int nFirstBaseOfReadUnpadded = nPrimer3PrimeEnd - 1; int nLikelyAlignedLeftEndUnpadded; int nLikelyAlignedRightEndUnpadded; if ( !bGetLikelyAlignedRegionOnContig2( nFirstBaseOfReadUnpadded, false, // top strand not bottom nLikelyAlignedLeftEndUnpadded, nLikelyAlignedRightEndUnpadded ) ) continue; // This will give a rough idea of how many single subclone // bases will be fixed by this experiment. (A precise number // must take into account which template is already covering // each base.) int nMaxNumberOfSingleSubcloneBasesFixedByThisRead = (*pCumNumberOfSingleSubcloneBases_)[ nLikelyAlignedRightEndUnpadded ] - (*pCumNumberOfSingleSubcloneBases_)[ nLikelyAlignedLeftEndUnpadded - 1 ]; if ( nMaxNumberOfSingleSubcloneBasesFixedByThisRead < consedParameters::pGetConsedParameters()->nAutoFinishMinNumberOfSingleSubcloneBasesFixedByAnExp_ ) continue; // check that the primer is high enough quality to be made if ( !bCustomPrimerQualityTest( nFirstBaseOfReadUnpadded, false ) // bTopStrandNotBottomStrand ) continue; if ( consedParameters::pGetConsedParameters()->bAutoFinishAllowWholeCloneReads_ ) { maybeCreateExperimentWithCustomPrimer( BOTTOM_STRAND_TERMINATOR_READS, false, // bTopStrandNotBottomStrand, nFirstBaseOfReadUnpadded, true, // bCloneTemplateNotSubcloneTemplate true ); // for single subclone regions } if ( pCP->bAutoFinishAllowCustomPrimerSubcloneReads_ ) { maybeCreateExperimentWithCustomPrimer( BOTTOM_STRAND_TERMINATOR_READS, false, // bTopStrandNotBottomStrand, nFirstBaseOfReadUnpadded, false, // bCloneTemplateNotSubcloneTemplate true ); // for single subclone regions } } // for( int nPrimer3PrimeEnd = nFirstExperiment3PrimeEnd; } void Contig :: autoFinishFindSingleSubcloneRegionsFromExistingReads() { for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead ); if ( pLocFrag->bIsWholeReadUnaligned() ) continue; int nUnpaddedConsPosStart = nUnpaddedIndex( pLocFrag->nGetAlignClipStart() ); int nUnpaddedConsPosEnd = nUnpaddedIndex( pLocFrag->nGetAlignClipEnd() ); nUnpaddedConsPosStart = MAX( nUnpaddedConsPosStart, nGetUnpaddedStartIndex() ); nUnpaddedConsPosEnd = MIN( nUnpaddedConsPosEnd, nGetUnpaddedEndIndex() ); // this takes care of reads that are totally off the contig if ( nUnpaddedConsPosEnd < nUnpaddedConsPosStart ) continue; RWCString soSubcloneName = pLocFrag->soGetTemplateName(); for( int nConsPosUnpadded = nUnpaddedConsPosStart; nConsPosUnpadded <= nUnpaddedConsPosEnd; ++nConsPosUnpadded ) { // if *any* of the reads covering // this region are whole clone reads, then I should // consider the region to be covered by more than one // subclone since, if there is a deletion in the whole clone, // additional reads through the region won't help if ( pLocFrag->bIsWholeCloneTemplateNotSubcloneTemplate() ) { (*pNumberOfSubclonesCoveringEachBase_)[ nConsPosUnpadded ] = nMaxSubcloneCoverage; // in case someone wants more than double subclone coverage } else { if ( !(*pNumberOfSubclonesCoveringEachBase_)[ nConsPosUnpadded ] ) { // there is no read so far covering this position (*pNumberOfSubclonesCoveringEachBase_)[ nConsPosUnpadded ] = 1; (*pWhichSingleSubcloneIsCoveringEachBase_)[ nConsPosUnpadded ] = pLocFrag; } else if ( (*pNumberOfSubclonesCoveringEachBase_)[ nConsPosUnpadded ] == 1 ) { // there is some other read covering this base or // this base *will* be covered by some read that // will be suggested by autofinish // Let's see if it is the same subclone. LocatedFragment* pLocFrag2 = (*pWhichSingleSubcloneIsCoveringEachBase_)[ nConsPosUnpadded ]; if ( pLocFrag2->soGetTemplateName() != soSubcloneName ) { (*pNumberOfSubclonesCoveringEachBase_)[ nConsPosUnpadded ] = 2; } } // else if ( (*pNumberOfSubclonesCoveringEachBase_)... // if there are already 2 subclones covering this region, // don't bother checking for more } // if ( pLocFrag->bIsWholeCloneTemplate() ) ... else } // for( int nConsPosUnpadded = nConsPosStart... } // for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) // Now we have found all the regions covered by existing reads. } void Contig :: autoFinishFindSingleSubcloneRegionsFromSuggestedExperiments() { // Next, add to that the regions that will be covered by the // potential high quality portion of suggested experiments // Suggested experiments are of the following 3 types: // 1) universal primer reads, which won't help at all // 2) custom primer whole clone reads, which will solve every base // regardless of what else covers those bases // 3) custom primer subclone reads, which may or may not solve // a base, depending on what other subclone covers that base for( int nExp = 0; nExp < pChosenExperiments_->length(); ++nExp ) { experiment* pExp = (*pChosenExperiments_)[ nExp ]; // if ( !pExp->bCustomPrimerNotUniversalPrimer_ ) continue; if ( pExp->bIsResequencingRead() ) continue; int nConsPosLeftUnpadded; int nConsPosRightUnpadded; // handle case in which the high quality portion of the // read is completely off the contig if (!bGetLikelyAlignedRegionOnContig( pExp, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) continue; if ( pExp->bIsUniversalPrimerRead() ) { subcloneTTemplate* pSub = pExp->pSubcloneTemplate_; LocatedFragment* pLocFragOfSub = pSub->aReads_[0]; for( int nUnpadded = nConsPosLeftUnpadded; nUnpadded <= nConsPosRightUnpadded; ++nUnpadded ) { if ( !(*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] ) { // there is no read so far covering this position (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = 1; (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ] = pLocFragOfSub; } else if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 1 ) { // there is some other read covering this base or // this base is covered by some other read that has been // suggested by Autofinish in this round. // is it the same subclone? LocatedFragment* pLocFrag2 = (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ]; if ( pLocFrag2->soGetTemplateName() != pSub->soTemplateName_ ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = 2; } } // if ( !(*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] ) else } // for( int nUnpadded ... } // if ( pExp->bIsUniveralPrimerRead() ) { else if ( pExp->bCloneTemplateNotSubcloneTemplate_ ) { for( int nUnpadded = nConsPosLeftUnpadded; nUnpadded <= nConsPosRightUnpadded; ++nUnpadded ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = nMaxSubcloneCoverage; } } else { // subclone template // Let's find which subclone it is from primerType* pPrimer = pExp->pCustomPrimer_; for( int nTemplate = 0; ( nTemplate < nNUMBER_OF_TEMPLATES ) && ( nTemplate < consedParameters::pGetConsedParameters()->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_ ) && ( pPrimer->pSubcloneTemplate_[ nTemplate ] != NULL ); ++nTemplate ) { subcloneTTemplate* pSubcloneTemplate = pPrimer->pSubcloneTemplate_[ nTemplate ]; RWCString soSubcloneName = pSubcloneTemplate->soTemplateName_; // fix bug found by Darren Grafham, Sanger: // ssss TTTTTTTTTTTT where sss is single subclone and TTT is // a template. In making a bottom strand read, Autofinish used // to ignore the left end of the TTT and just assume that a read // length of bases would be made, thus fixing this single // subclone region int nUnpaddedLeftOnTemplate; int nUnpaddedRightOnTemplate; if (!bIntersect( nConsPosLeftUnpadded, nConsPosRightUnpadded, pSubcloneTemplate->nUnpaddedStart_, pSubcloneTemplate->nUnpaddedEnd_, nUnpaddedLeftOnTemplate, nUnpaddedRightOnTemplate ) ) continue; for( int nUnpadded = nUnpaddedLeftOnTemplate; nUnpadded <= nUnpaddedRightOnTemplate; ++nUnpadded ) { if ( !(*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] ) { // there is no read so far covering this position (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = 1; (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ] = pSubcloneTemplate->aReads_[0]; // I just picked an arbitrary read from the subclone // template as a stand-in for it } else if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 1 ) { // there is some other read covering this base // or this base is covered by some read that has // been suggested by autofinish // Let's see if it is the same subclone LocatedFragment* pLocFrag2 = (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ]; if ( pLocFrag2->soGetTemplateName() != soSubcloneName ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = 2; } } // ignore case in which there are already 2 or more // subclones covering this base }// for( nUnpadded ... } // for( int nTemplate = 0; ( nTemplate < nNUMBER_OF_TEMPLATES ) && } // if ( pExp->bCloneTemplateNotSubcloneTemplate_ ) else } // for( int nExp ... } void Contig :: autoFinishChooseSingleSubcloneExperiments() { // should continue to find experiments until there are // absolutely no single subclone regions left in this contig bool bMoreSingleSubcloneRegions = true; while( bMoreSingleSubcloneRegions ) { experiment* pBestExp = pGetBestSingleSubcloneExperiment(); if ( !pBestExp ) { bMoreSingleSubcloneRegions = false; } else { pBestExp->nPurpose_ = nCoverSingleSubcloneBases; findWindowOfMostSingleSubcloneBasesFixed( pBestExp ); pBestExp->chooseExperiment(); pBestExp->print(); pChosenExperiments_->insert( pBestExp ); ConsEd::pGetAssembly()->pAllChosenExperiments_->insert( pBestExp ); adjustSingleSubcloneRegionsInRemainingExperiments( pBestExp ); // the reason this should be called is so that, when we are done // with single subclone regions, we can find any // remaining weak regions and print them out. adjustConsensusDistributionsAndArrays( pBestExp ); } } int nRemainingSingleSubcloneBases = nCountSingleSubcloneBases(); fprintf( pAO, "Number of remaining single subclone bases: %d\n", nCountSingleSubcloneBases() ); if ( nRemainingSingleSubcloneBases > 0 ) { fprintf( pAO, "Could find no experiment to resolve these remaining regions {\n" ); autoFinishPrintSingleSubcloneRegions(); fprintf( pAO, "}\n" ); } } void Contig :: adjustSingleSubcloneRegionsInRemainingExperiments( experiment* pChosenExperiment ) { autoFinishAdjustSingleSubcloneArrays( pChosenExperiment ); for( int nExp = 0; nExp < pCandidateExperiments_->length(); ++nExp ) { experiment* pExpToBeAdjusted = (*pCandidateExperiments_)[ nExp ]; // universal primer reads will almost never help if ( pExpToBeAdjusted->nReadType_ != nWalk ) continue; // if ( !pExpToBeAdjusted->bCustomPrimerNotUniversalPrimer_ ) continue; if ( pExpToBeAdjusted->nState_ != nAvailableExperiment ) continue; if ( ! pChosenExperiment->bOverlaps( pExpToBeAdjusted ) ) continue; if ( pExpToBeAdjusted == pChosenExperiment ) continue; // if reached here, the pExpToBeAdjusted overlaps the experiment // just chosen. Recompute the number of single subclone bases // it will fix. // If the pExpToBeAdjusted already doesn't fix any single subclone bases, // don't bother recomputing anything if ( pExpToBeAdjusted->nSingleSubcloneBasesFixed_ == 0 ) continue; adjustSingleSubcloneBasesFixedByExperiment( pExpToBeAdjusted ); } } void Contig :: adjustSingleSubcloneBasesFixedByExperiment( experiment* pExpToBeAdjusted ) { int nConsPosLeftUnpadded; int nConsPosRightUnpadded; if ( !bGetLikelyAlignedRegionOnContig( pExpToBeAdjusted, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) return; if ( pExpToBeAdjusted->bCloneTemplateNotSubcloneTemplate_ ) { // every single subclone base will be fixed by a whole clone // experiment adjustSingleSubcloneBasesFixedByWholeCloneExperiment( pExpToBeAdjusted, nConsPosLeftUnpadded, nConsPosRightUnpadded ); } // if ( pExpToBeAdjusted->bCloneTemplateNotSubcloneTemplate_ ) { else { // Case of subclone template for primer adjustSingleSubcloneBasesFixedBySubcloneExperiment( pExpToBeAdjusted, nConsPosLeftUnpadded, nConsPosRightUnpadded ); } } void Contig :: adjustSingleSubcloneBasesFixedBySubcloneExperiment( experiment* pExpToBeAdjusted, const int nConsPosLeftUnpadded, const int nConsPosRightUnpadded ) { primerType* pPrimer = pExpToBeAdjusted->pCustomPrimer_; int nSingleSubcloneBasesFixedByThisTemplate[ nNUMBER_OF_TEMPLATES ]; int nTemplate; for( nTemplate = 0; nTemplate < nNUMBER_OF_TEMPLATES; ++nTemplate ) { nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ] = 0; } for( nTemplate = 0; nTemplate < nNUMBER_OF_TEMPLATES; ++nTemplate ) { // There is a problem here--each different template may fix a // different number of errors. In effect, each template is a // different experiment. More and more I am thinking that // single subclone coverage should be done in a separate // pass. The way I would do it differently is that I would // split out the different templates. I am concerned about // the case in which the top most quality templates do no // good for single subclone coverage. Thus it is a waste of // money to do them. Perhaps we could handle this as a // special case--if a template does no good, it isn't done. // But that would still leave the possibility of doing some // templates that only fix a few single subclone regions due // to some template of lower quality that fixes a great // number of them. Perhaps for single subclone regions I // could split the experiment list into a much longer list // (up to 5x as long) in which each experiment only has 1 // template. This has the disadvantage in that several // templates may each work to solve a single subclone region // and you might want to pick more than one in order to be // sure of having one that works. Alternatively, I could // score the toal experiment based on how well the best // template works. This has the problem that the group might // not do that experiment, especially if it is near the end // of the list. How about if I score the experiment based on // the best (or all) templates within the list of templates // that are in the top range of templates that the user // specified as making? What about if the last template is // the one that will be most useful in solving a single // subclone region? Perhaps the best thing is to asterisk // the template that is the one that is best. Alternatively, // we could keep track of a score for each of the templates. // Each experiment could have up to 5 different counts of // single subclone bases fixed. And thus there should be up // to 5 different single subclone bases per cost. There are // competing objectives with picking templates--the one with // the most single subclone bases fixed vs the one that has // the highest quality. I think that we should go with Pat's // quality cut-off idea--just don't use a template at all if // it has too poor a quality. If 2 templates are both above // the quality cut-off, then use the one that fixes the most // single subclone bases. OK, so we could reorder the // subcloneTemplates array. (We may need to reorder it more // than once. Shall we remove a template entirely if it // fixes less than a minimum number of subclone bases? // Certainly if it fixes *no* single subclone bases, it // should be removed. Shall the finishers be informed if the // 2nd fixes al subcloneTTemplate* pSubcloneTemplate = pPrimer->pSubcloneTemplate_[ nTemplate ]; if ( ! pSubcloneTemplate ) continue; RWCString soSubcloneName = pSubcloneTemplate->soTemplateName_; // fix bug found by Darren Grafham, Sanger: // ssss TTTTTTTTTTTT where sss is single subclone and TTT is // a template. In making a bottom strand read, Autofinish used // to ignore the left end of the TTT and just assume that a read // length of bases would be made, thus fixing this single // subclone region int nUnpaddedLeftOnTemplate; int nUnpaddedRightOnTemplate; if (!bIntersect( nConsPosLeftUnpadded, nConsPosRightUnpadded, pSubcloneTemplate->nUnpaddedStart_, pSubcloneTemplate->nUnpaddedEnd_, nUnpaddedLeftOnTemplate, nUnpaddedRightOnTemplate ) ) continue; for( int nUnpadded = nUnpaddedLeftOnTemplate; nUnpadded <= nUnpaddedRightOnTemplate; ++nUnpadded ) { if ( !(*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] ) ++nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ]; else if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 1 ) { LocatedFragment* pLocFrag = (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ]; if ( pLocFrag->soGetTemplateName() != soSubcloneName ) { ++nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ]; } } } } // for( int nTemplate = ... // sort the templates based on how many single subclone bases // each solves sortTemplatesByHowManySingleSubcloneBasesEachFixes( pPrimer, nSingleSubcloneBasesFixedByThisTemplate ); // make a pass through the array eliminating any template // that doesn't solve any single subclone bases eliminateTemplatesThatDoNotSolveAnySingleSubcloneBases( pPrimer, nSingleSubcloneBasesFixedByThisTemplate ); // the # of ssb's fixed by the experiment is defined to be the // greatest # of ssb's fixed by any template of the experiment pExpToBeAdjusted->nSingleSubcloneBasesFixed_ = nSingleSubcloneBasesFixedByThisTemplate[ 0 ]; pExpToBeAdjusted->dSingleSubcloneBasesFixedPerDollar_ = ( (double) pExpToBeAdjusted->nSingleSubcloneBasesFixed_ ) / ( (double) pExpToBeAdjusted->fCost_ ); } // if ( pExpToBeAdjusted->bCloneTemplateNotSubcloneTemplate_ ) else void Contig :: adjustSingleSubcloneBasesFixedByWholeCloneExperiment( experiment* pExpToBeAdjusted, const int nConsPosLeftUnpadded, const int nConsPosRightUnpadded ) { int nNumberOfSingleSubcloneBasesFixed = 0; for( int nUnpadded = nConsPosLeftUnpadded; nUnpadded <= nConsPosRightUnpadded; ++nUnpadded ) { if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] < 2 ) ++nNumberOfSingleSubcloneBasesFixed; } pExpToBeAdjusted->nSingleSubcloneBasesFixed_ = nNumberOfSingleSubcloneBasesFixed; pExpToBeAdjusted->dSingleSubcloneBasesFixedPerDollar_ = ( (double) pExpToBeAdjusted->nSingleSubcloneBasesFixed_ ) / ( (double) pExpToBeAdjusted->fCost_ ); } experiment* Contig :: pGetBestSingleSubcloneExperiment() { experiment* pBestExperiment = NULL; bool bTryAgain = false; do { pBestExperiment = pGetBestSingleSubcloneExperimentButMightHaveBeenTriedBefore(); if ( pBestExperiment ) { if ( pBestExperiment->bHasThisExperimentOrOneLikeItBeenTriedBefore() ) { pBestExperiment->nState_ = nRejectedExperiment; bTryAgain = true; } else bTryAgain = false; } else // case in which there are no more available experiments bTryAgain = false; } while ( bTryAgain ); return( pBestExperiment ); } experiment* Contig :: pGetBestSingleSubcloneExperimentButMightHaveBeenTriedBefore() { experiment* pBestSoFar = NULL; double dSingleSubcloneBasesFixedPerDollarBestSoFar = 0.0; for( int nCan = 0; nCan < pCandidateExperiments_->length(); ++nCan ) { experiment* pExp = (*pCandidateExperiments_)[ nCan ]; if ( pExp->nState_ == nChosenExperiment ) continue; if ( pExp->nState_ == nRejectedExperiment ) continue; if ( pExp->nSingleSubcloneBasesFixed_ == 0 ) continue; if ( pExp->dSingleSubcloneBasesFixedPerDollar_ > dSingleSubcloneBasesFixedPerDollarBestSoFar ) { // check that, if the read is a subclone custom primer read, // it isn't too close to another subclone custom primer read // on the same strand that has already been chosen if (pCP->bAutoFinishDoNotAllowSubcloneCustomPrimerReadsCloseTogether_ ) { if ( pExp->bIsCustomPrimerSubcloneRead() && bThereIsAnotherSubcloneCustomPrimerReadTooCloseOnTheSameStrand( pExp ) ) { // this experiment can never be considered again in this // round of autofinish, so reject it pExp->nState_ = nRejectedExperiment; continue; } } // if (pCP->bAutoFinishDoNotAllowSubcloneCustomPrimer... pBestSoFar = pExp; dSingleSubcloneBasesFixedPerDollarBestSoFar = pExp->dSingleSubcloneBasesFixedPerDollar_; } } // if ( pBestSoFar == NULL ) // printf( "no best single subclone experiment\n" ); // Note: This might return NULL return( pBestSoFar ); } void Contig :: autoFinishAdjustSingleSubcloneArrays( experiment* pChosenExperiment ) { int nConsPosLeftUnpadded; int nConsPosRightUnpadded; if ( !bGetLikelyAlignedRegionOnContig( pChosenExperiment, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) return; // case in which the high quality region is not on the contig if ( pChosenExperiment->bCloneTemplateNotSubcloneTemplate_ ) autoFinishAdjustSingleSubcloneArraysDueToPickingWholeCloneExperiment( pChosenExperiment ); else autoFinishAdjustSingleSubcloneArraysDueToPickingSubcloneExperiment( pChosenExperiment ); } void Contig :: autoFinishAdjustSingleSubcloneArraysDueToPickingSubcloneExperiment( experiment* pChosenExperiment ) { int nConsPosLeftUnpadded; int nConsPosRightUnpadded; if (!bGetLikelyAlignedRegionOnContig( pChosenExperiment, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) return; // I'm going to assume that the lab will only do some of the templates primerType* pPrimer = pChosenExperiment->pCustomPrimer_; for( int nTemplate = 0; ( nTemplate < nNUMBER_OF_TEMPLATES ) && ( nTemplate < consedParameters::pGetConsedParameters()->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_ ) && ( pPrimer->pSubcloneTemplate_[ nTemplate ] != NULL ); ++nTemplate ) { subcloneTTemplate* pSubcloneTemplate = pPrimer->pSubcloneTemplate_[ nTemplate ]; RWCString soSubcloneName = pSubcloneTemplate->soTemplateName_; // fix bug found by Darren Grafham, Sanger: // ssss TTTTTTTTTTTT where sss is single subclone and TTT is // a template. In making a bottom strand read, Autofinish used // to ignore the left end of the TTT and just assume that a read // length of bases would be made, thus fixing this single // subclone region int nUnpaddedLeftOnTemplate; int nUnpaddedRightOnTemplate; if (!bIntersect( nConsPosLeftUnpadded, nConsPosRightUnpadded, pSubcloneTemplate->nUnpaddedStart_, pSubcloneTemplate->nUnpaddedEnd_, nUnpaddedLeftOnTemplate, nUnpaddedRightOnTemplate ) ) continue; for( int nUnpadded = nUnpaddedLeftOnTemplate; nUnpadded <= nUnpaddedRightOnTemplate; ++nUnpadded ) { if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 0 ) { // there is no aligned read so far covering this position (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = 1; (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ] = pSubcloneTemplate->aReads_[0]; // just picked an arbitrary read to represent the template } else if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 1 ) { // there is some other read covering this base // or this base is covered by some read that has // been suggested by autofinish // Let's see if it is the same subclone LocatedFragment* pLocFrag2 = (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ]; if ( pLocFrag2->soGetTemplateName() != soSubcloneName ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = 2; } } // ignore case in which there are already 2 or more subclones // covering this base } // for( int nUnpadded ... } // for( int nTemplate... } void Contig :: autoFinishAdjustSingleSubcloneArraysDueToPickingWholeCloneExperiment( experiment* pChosenExperiment ) { int nConsPosLeftUnpadded; int nConsPosRightUnpadded; if (!bGetLikelyAlignedRegionOnContig( pChosenExperiment, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) return; for( int nUnpadded = nConsPosLeftUnpadded; nUnpadded <= nConsPosRightUnpadded; ++nUnpadded ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = nMaxSubcloneCoverage; } } int Contig :: nCountSingleSubcloneBases() { int nNumberOfSingleSubcloneBases = 0; for( int nUnpadded = nGetUnpaddedStartIndex(); nUnpadded <= nGetUnpaddedEndIndex(); ++nUnpadded ) { if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] < 2 ) ++nNumberOfSingleSubcloneBases; } return( nNumberOfSingleSubcloneBases ); } void Contig :: autoFinishPrintSingleSubcloneRegions() { bool bInSingleSubcloneRegion = false; int nSingleSubcloneRegionStart; for( int nUnpadded = nGetUnpaddedStartIndex(); nUnpadded <= nGetUnpaddedEndIndex(); ++nUnpadded ) { if ( bInSingleSubcloneRegion ) { if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] >= 2 ) { bInSingleSubcloneRegion = false; int nSingleSubcloneRegionEnd = nUnpadded - 1; fprintf( pAO, " Single subclone region from %d to %d (%d bp)\n", nSingleSubcloneRegionStart, nSingleSubcloneRegionEnd, nSingleSubcloneRegionEnd - nSingleSubcloneRegionStart + 1 ); } } else { if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] < 2 ) { bInSingleSubcloneRegion = true; nSingleSubcloneRegionStart = nUnpadded; } } } // handle case in which single subclone region goes to the end if ( bInSingleSubcloneRegion ) { int nSingleSubcloneRegionEnd = nGetUnpaddedEndIndex(); fprintf( pAO, " Single subclone region from %d to %d (%d bp)\n", nSingleSubcloneRegionStart, nSingleSubcloneRegionEnd, nSingleSubcloneRegionEnd - nSingleSubcloneRegionStart + 1 ); } } void Contig :: orderTemplatesAndSetSingleSubcloneValues( experiment* pExp ) { int nConsPosLeftUnpadded; int nConsPosRightUnpadded; if ( !bGetLikelyAlignedRegionOnContig( pExp, nConsPosLeftUnpadded, nConsPosRightUnpadded ) ) { pExp->nSingleSubcloneBasesFixed_ = 0; pExp->dSingleSubcloneBasesFixedPerDollar_ = 0.0; return; } if ( pExp->bCloneTemplateNotSubcloneTemplate_ ) { // note that when counting single subclone bases this way, // if there is a single subclone base that has no aligned // read covering it (Phil on 12/23/98 said this is possible), // then this is counted as fixing a single base even though // it would take 2 M13 reads to cover the same base. // Thus there is a slight bias in favor of M13 reads. pExp->nSingleSubcloneBasesFixed_ = (*pCumNumberOfSingleSubcloneBases_)[ nConsPosRightUnpadded ] - (*pCumNumberOfSingleSubcloneBases_)[ nConsPosLeftUnpadded - 1 ]; pExp->dSingleSubcloneBasesFixedPerDollar_ = ( (double) pExp->nSingleSubcloneBasesFixed_ ) / ( (double) pExp->fCost_ ); } else { primerType* pPrimer = pExp->pCustomPrimer_; int nSingleSubcloneBasesFixedByThisTemplate[ nNUMBER_OF_TEMPLATES ]; int nTemplate; for( nTemplate = 0; nTemplate < nNUMBER_OF_TEMPLATES; ++nTemplate ) { nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ] = 0; } for( nTemplate = 0; nTemplate < nNUMBER_OF_TEMPLATES; ++nTemplate ) { subcloneTTemplate* pSubcloneTemplate = pPrimer->pSubcloneTemplate_[ nTemplate ]; if ( ! pSubcloneTemplate ) continue; RWCString soSubcloneName = pSubcloneTemplate->soTemplateName_; // fix bug found by Darren Grafham, Sanger: // ssss TTTTTTTTTTTT where sss is single subclone and TTT is // a template. In making a bottom strand read, Autofinish used // to ignore the left end of the TTT and just assume that a read // length of bases would be made, thus fixing this single // subclone region int nUnpaddedLeftOnTemplate; int nUnpaddedRightOnTemplate; if (!bIntersect( nConsPosLeftUnpadded, nConsPosRightUnpadded, pSubcloneTemplate->nUnpaddedStart_, pSubcloneTemplate->nUnpaddedEnd_, nUnpaddedLeftOnTemplate, nUnpaddedRightOnTemplate ) ) continue; for( int nUnpadded = nUnpaddedLeftOnTemplate; nUnpadded <= nUnpaddedRightOnTemplate; ++nUnpadded ) { if ( !(*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] ) { // just an approximation since it will take 2 M13 reads to // completely fix the single subclone base if ( nUnpadded % 2 == 0 ) ++nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ]; } else if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 1 ) { LocatedFragment* pLocFrag = (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ]; if ( pLocFrag->soGetTemplateName() != soSubcloneName ) { ++nSingleSubcloneBasesFixedByThisTemplate[ nTemplate ]; } } } // for( int nUnpadded ... } // for( int nTemplate ... sortTemplatesByHowManySingleSubcloneBasesEachFixes( pPrimer, nSingleSubcloneBasesFixedByThisTemplate ); eliminateTemplatesThatDoNotSolveAnySingleSubcloneBases( pPrimer, nSingleSubcloneBasesFixedByThisTemplate ); // use the template that fixes the most single subclone bases pExp->nSingleSubcloneBasesFixed_ = nSingleSubcloneBasesFixedByThisTemplate[0]; pExp->dSingleSubcloneBasesFixedPerDollar_ = ( (double) pExp->nSingleSubcloneBasesFixed_ ) / ( (double) pExp->fCost_ ); } // if ( pExp->bCloneTemplateNotSubcloneTemplate_ ) { else } void Contig :: autoFinishExamineDoNotFinishTagsForSingleSubcloneRegions() { // look for do-not-finish tags in both reads and in the consensus // First look in the reads. for( int nRead = 0; nRead < nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead ); for( int nTag = 0; nTag < pLocFrag->nGetNumberOfTags(); ++nTag ) { tag* pTag = pLocFrag->pGetTag( nTag ); if ( pTag->soGetTagType() == "cloneEnd" ) dealWithCloneEndTagForSingleSubcloneRegions( pTag ); else if ( consedParameters::pGetConsedParameters()->aAutoFinishTagsToNotFinish_.index( pTag->soGetTagType() ) == RW_NPOS ) continue; int nUnpaddedConsPosStart = nUnpaddedIndex( pTag->nPaddedConsPosStart_ ); int nUnpaddedConsPosEnd = nUnpaddedIndex( pTag->nPaddedConsPosEnd_ ); // read tags might be on a read that protrudes beyond the // end of the consensus int nOverlapLeft; int nOverlapRight; if ( bIntersect( nUnpaddedConsPosStart, nUnpaddedConsPosEnd, pNumberOfSubclonesCoveringEachBase_->nGetStartIndex(), pNumberOfSubclonesCoveringEachBase_->nGetEndIndex(), nOverlapLeft, nOverlapRight ) ) { for( int nUnpadded = nOverlapLeft; nUnpadded <= nOverlapRight; ++nUnpadded ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = nMaxSubcloneCoverage; } } } } // now look for consensus tags for( int nConsensusTag = 0; nConsensusTag < nGetNumberOfTags(); ++nConsensusTag ) { tag* pTag = pGetTag( nConsensusTag ); if ( pTag->soGetTagType() == "cloneEnd" ) dealWithCloneEndTagForSingleSubcloneRegions( pTag ); else if ( consedParameters::pGetConsedParameters()->aAutoFinishTagsToNotFinish_.index( pTag->soGetTagType() ) == RW_NPOS ) continue; int nUnpaddedConsPosStart = nUnpaddedIndex( pTag->nPaddedConsPosStart_ ); int nUnpaddedConsPosEnd = nUnpaddedIndex( pTag->nPaddedConsPosEnd_ ); for( int nUnpadded = nUnpaddedConsPosStart; nUnpadded <= nUnpaddedConsPosEnd; ++nUnpadded ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = nMaxSubcloneCoverage; } } } void Contig :: dealWithCloneEndTagForSingleSubcloneRegions( tag* pTag ) { int nUnpaddedLeft; int nUnpaddedRight; if ( pTag->soMiscData_ == "->" ) { nUnpaddedLeft = pNumberOfSubclonesCoveringEachBase_->nGetStartIndex(); nUnpaddedRight = nUnpaddedIndex( pTag->nPaddedConsPosEnd_ ); } else if ( pTag->soMiscData_ == "<-" ) { nUnpaddedLeft = nUnpaddedIndex( pTag->nPaddedConsPosStart_ ); nUnpaddedRight = pNumberOfSubclonesCoveringEachBase_->nGetEndIndex(); } else return; // corrupted cloneEnd tag for( int nUnpadded = nUnpaddedLeft; nUnpadded <= nUnpaddedRight; ++nUnpadded ) { (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] = nMaxSubcloneCoverage; } } static int nWindowOfSingleSubcloneBasesFixed[10]; void Contig :: findWindowOfMostSingleSubcloneBasesFixed( experiment* pExp ) { int nSingleSubcloneBasesFixedInWindowOfMostFixed = 0; int nSingleSubcloneBasesFixedInCurrentWindow = 0; int nWindowOfMostSingleSubcloneBasesFixedUnpaddedRight; int nPointerToLastAddedBase = -1; int nUnpaddedLeftOnContig; int nUnpaddedRightOnContig; if ( !bIntersect( pNumberOfSubclonesCoveringEachBase_->nGetStartIndex(), pNumberOfSubclonesCoveringEachBase_->nGetEndIndex(), pExp->nUnpaddedLeft_, pExp->nUnpaddedRight_, nUnpaddedLeftOnContig, nUnpaddedRightOnContig ) ) return; // for the case of using a subclone walk, we will just consider the // number of single subclone bases fixed by the first template. I'm // being a little sloppy here, but this window is only used for people // looking at the results, and has no effect on which reads are chosen. RWCString soSubcloneName; if ( pExp->bIsCustomPrimerSubcloneRead() ) { subcloneTTemplate* pSub = pExp->pCustomPrimer_->pSubcloneTemplate_[0]; if (!bIntersect( nUnpaddedLeftOnContig, nUnpaddedRightOnContig, pSub->nUnpaddedStart_, pSub->nUnpaddedEnd_, nUnpaddedLeftOnContig, nUnpaddedRightOnContig ) ) return; soSubcloneName = pSub->soTemplateName_; } else soSubcloneName = ""; int n10thBaseUnpadded = nUnpaddedLeftOnContig + 9; for( int nUnpadded = nUnpaddedLeftOnContig; nUnpadded <= nUnpaddedRightOnContig; ++nUnpadded ) { int nNumberOfSingleSubcloneBasesFixedAtBase = 0; if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 0 ) nNumberOfSingleSubcloneBasesFixedAtBase = 1; else if ( (*pNumberOfSubclonesCoveringEachBase_)[ nUnpadded ] == 1 ) { if ( (*pWhichSingleSubcloneIsCoveringEachBase_)[ nUnpadded ]->soGetTemplateName() != soSubcloneName ) nNumberOfSingleSubcloneBasesFixedAtBase = 1; } if ( nUnpadded <= n10thBaseUnpadded ) { ++nPointerToLastAddedBase; nWindowOfSingleSubcloneBasesFixed[ nPointerToLastAddedBase ] = nNumberOfSingleSubcloneBasesFixedAtBase; nSingleSubcloneBasesFixedInCurrentWindow += nNumberOfSingleSubcloneBasesFixedAtBase; if ( nUnpadded == n10thBaseUnpadded ) { nSingleSubcloneBasesFixedInWindowOfMostFixed = nSingleSubcloneBasesFixedInCurrentWindow; nWindowOfMostSingleSubcloneBasesFixedUnpaddedRight = nUnpadded; } } else { ++nPointerToLastAddedBase; if ( nPointerToLastAddedBase > 9 ) nPointerToLastAddedBase = 0; nSingleSubcloneBasesFixedInCurrentWindow -= nWindowOfSingleSubcloneBasesFixed[ nPointerToLastAddedBase ]; nSingleSubcloneBasesFixedInCurrentWindow += nNumberOfSingleSubcloneBasesFixedAtBase; nWindowOfSingleSubcloneBasesFixed[ nPointerToLastAddedBase ] = nNumberOfSingleSubcloneBasesFixedAtBase; if ( nSingleSubcloneBasesFixedInCurrentWindow > nSingleSubcloneBasesFixedInWindowOfMostFixed ) { nSingleSubcloneBasesFixedInWindowOfMostFixed = nSingleSubcloneBasesFixedInCurrentWindow; nWindowOfMostSingleSubcloneBasesFixedUnpaddedRight = nUnpadded; } } } pExp->nWindowOfMostSingleSubcloneBasesFixedUnpaddedRight_ = nWindowOfMostSingleSubcloneBasesFixedUnpaddedRight; pExp->nSingleSubcloneBasesFixedInWindow_ = nSingleSubcloneBasesFixedInWindowOfMostFixed; }