/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "experiment.h" #include "primerType.h" #include using namespace std; #include "consedParameters.h" #include "bIsDyePrimerNotDyeTerminator.h" #include "contig.h" #include #include "autoFinishExpTag.h" #include "abs.h" #include "consed.h" #include "fileDefines.h" bool experiment :: bOverlaps( experiment* pExp ) { return( bIntervalsIntersect( nUnpaddedLeft_, nUnpaddedRight_, pExp->nUnpaddedLeft_, pExp->nUnpaddedRight_ ) ); } void experiment :: print() { bool bAddedDummyExpID = false; // to allow experiments to be printed out during debugging if ( aExpID_.length() == 0 ) { aExpID_.insert( -1 ); bAddedDummyExpID = true; } fprintf( pAO, "EXPERIMENT {\n" ); if ( ( nReadType_ == nUniversalReverse ) && (nPurpose_ == nFlankGap ) ) { fprintf( pAO, "reverse univeral primer read\n" ); fprintf( pAO, "for flanking gap on %s end of contig\n", ( nExtendsIntoWhichGap_ == nLeftGap ) ? "left" : "right" ); assert( aExpID_.length() == 1 ); fprintf( pAO, "expID_and_template: %d %s\n", aExpID_[ 0 ], (char*) pSubcloneTemplate_->soTemplateName_.data() ); fprintf( pAO, "existing reads: "); for( int nRead = 0; nRead < pSubcloneTemplate_->aReads_.entries(); ++nRead ) { LocatedFragment* pLocFrag = pSubcloneTemplate_->aReads_[ nRead ]; if ( nRead > 0 ) fprintf( pAO, " " ); fprintf( pAO, "%s (%s in %s) from %d to %d\n", (char*) pLocFrag->soGetName().data(), ( pLocFrag->bComp() ? "bottom strand" : "top strand" ), (char*) pLocFrag->pContig_->soGetName().data(), pLocFrag->nGetAlignStartUnpadded(), pLocFrag->nGetAlignEndUnpadded() ); if ( pLocFrag->pGetContig() == pContig_ ) { int nNumberOfUnpaddedBasesFromEndOfConsensus; if ( nExtendsIntoWhichGap_ == nLeftGap ) { nNumberOfUnpaddedBasesFromEndOfConsensus = pLocFrag->nGetAlignEndUnpadded() - pContig_->nGetUnpaddedStartIndex(); } else { nNumberOfUnpaddedBasesFromEndOfConsensus = pContig_->nGetUnpaddedEndIndex() - pLocFrag->nGetAlignStartUnpadded(); } fprintf( pAO, "read starts %d bases from %s end of consensus\n", nNumberOfUnpaddedBasesFromEndOfConsensus, ( nExtendsIntoWhichGap_ == nLeftGap ) ? "left" : "right" ); } } // for( int nRead ... fprintf( pAO, "}\n\n" ); return; } fprintf( pAO, "%s %s ", ( bTopStrandNotBottomStrand_ ? "topStrand" : "bottomStrand" ), ( bIsDyePrimerNotDyeTerminator( ucStrandAndChemistry_ ) ? "dyePrimer" : "dyeTerm" ) ); if ( bIsUniversalPrimerRead() ) { if ( nReadType_ == nUniversalForward ) fprintf( pAO, "universalPrimer fwd " ); else if ( nReadType_ == nUniversalReverse ) fprintf( pAO, "universalPrimer rev " ); if ( bDeNovoUniversalPrimerRead_ ) fprintf( pAO, "de_novo\n" ); else fprintf( pAO, "resequence\n" ); } else fprintf( pAO, "customPrimer\n" ); if ( nPurpose_ == nCoverSingleSubcloneBases ) fprintf( pAO, "%s %d single subclone bases fixed in region from %d to %d fixing %d single subclone bases from %d to %d", (char*) pContig_->soGetName().data(), nSingleSubcloneBasesFixed_, nUnpaddedLeft_, nUnpaddedRight_, nSingleSubcloneBasesFixedInWindow_, nWindowOfMostSingleSubcloneBasesFixedUnpaddedRight_ - 9, nWindowOfMostSingleSubcloneBasesFixedUnpaddedRight_ ); else fprintf( pAO, "%s %.2f errors fixed in region from %d to %d fixing %.2f errors from %d to %d", (char*) pContig_->soGetName().data(), dErrorsFixed_, nUnpaddedLeft_, nUnpaddedRight_, dErrorsFixedInWindow_, nWindowOfMostErrorsFixedUnpaddedRight_ - 9, nWindowOfMostErrorsFixedUnpaddedRight_ ); if ( nExtendsIntoWhichGap_ == nLeftGap ) { fprintf( pAO, " (extends %d into gap on left)", pContig_->nGetUnpaddedStartIndex() - nUnpaddedLeft_ ); } else if ( nExtendsIntoWhichGap_ == nRightGap ) { fprintf( pAO, " (extends %d into gap on right)", nUnpaddedRight_ - pContig_->nGetUnpaddedEndIndex() ); } fprintf( pAO, "\n" ); if ( nPurpose_ == nCoverSingleSubcloneBases ) { fprintf( pAO, "Single subclone bases fixed per dollar: %.2f\n", dSingleSubcloneBasesFixedPerDollar_ ); } else { fprintf( pAO, "Errors fixed per dollar: %.2f\n", dErrorsFixedPerDollar_ ); fprintf( pAO, "Errors fixed just in consensus: %.2f\n", dErrorsFixedJustInConsensus_ ); } if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { fprintf( pAO, "primer: " ); for( int n = 0; n < pCustomPrimer_->nUnpaddedLength_; ++n ) putc( pCustomPrimer_->szPrimer_[n], pAO ); fprintf( pAO, " from %d to %d id: %s\n", pCustomPrimer_->nUnpaddedStart_, pCustomPrimer_->nUnpaddedEnd_, (const char*) ConsEd::pGetAssembly()->soGetOligoNameFromOligoNumber( nPrimerID_ ) ); if ( bCloneTemplateNotSubcloneTemplate_ ) { assert( aExpID_.length() == 1 ); fprintf( pAO, "expID_and_template: %d whole clone\n", aExpID_[ 0 ] ); } else { fprintf( pAO, "expID_and_template:" ); RWCString soExpIDsAndTemplates; for( int n = 0; n < aExpID_.length(); ++n ) { if ( n > 0 ) soExpIDsAndTemplates += ", "; else soExpIDsAndTemplates += " "; soExpIDsAndTemplates += RWCString( (long) aExpID_[ n ] ); soExpIDsAndTemplates += " "; soExpIDsAndTemplates += pCustomPrimer_->pSubcloneTemplate_[n]->soTemplateName_; if ( pCP->bAutoFinishPrintForwardOrReverseStrandWhenPrintingSubcloneTemplatesForCustomPrimerReads_ ) { bool bForwardNotReverseStrand = pCustomPrimer_->pSubcloneTemplate_[ n ]->bIsForwardNotReverseStrand( bTopStrandNotBottomStrand_, pContig_ ); soExpIDsAndTemplates += ( bForwardNotReverseStrand ? " (fwd) " : " (rev) " ); } } // for( int n = 0; n < aExpID_.length(); ++n ) { fprintf( pAO, "%s\n", (char*) soExpIDsAndTemplates.data() ); } fprintf( pAO, "melting temp: %d\n", (int) (pCustomPrimer_->dMeltingTemperature_ + .5 ) ); } // if ( nReadType_ == nWalk || nReadType_ == nPCREND ) { ... else { // so read is a resequencing univ primer or a de novo univ primer read assert( aExpID_.length() == 1 ); fprintf( pAO, "expID_and_template: %d %s\n", aExpID_[ 0 ], (char*) pSubcloneTemplate_->soTemplateName_.data() ); // this is primarily for the benefit of calling reverses, // but may be useful when calling forwards as well fprintf( pAO, "existing reads: "); for( int nRead = 0; nRead < pSubcloneTemplate_->aReads_.entries(); ++nRead ) { LocatedFragment* pLocFrag = pSubcloneTemplate_->aReads_[ nRead ]; if ( nRead > 0 ) fprintf( pAO, " " ); fprintf( pAO, "%s (%s in %s) from %d to %d\n", (char*) pLocFrag->soGetName().data(), ( pLocFrag->bComp() ? "bottom strand" : "top strand" ), (char*) pLocFrag->pContig_->soGetName().data(), pLocFrag->nGetAlignStartUnpadded(), pLocFrag->nGetAlignEndUnpadded() ); // when we integrate the flanking gaps use of reverses // into this code, then we should also check for // nPurpose_ == nFlankGap if ( nPurpose_ == nExtendContig ) { if ( pLocFrag->pGetContig() == pContig_ ) { int nNumberOfUnpaddedBasesFromEndOfConsensus; if ( nExtendsIntoWhichGap_ == nLeftGap ) { nNumberOfUnpaddedBasesFromEndOfConsensus = pLocFrag->nGetAlignEndUnpadded() - pContig_->nGetUnpaddedStartIndex(); } else { nNumberOfUnpaddedBasesFromEndOfConsensus = pContig_->nGetUnpaddedEndIndex() - pLocFrag->nGetAlignStartUnpadded(); } fprintf( pAO, "read starts %d bases from %s end of consensus\n", nNumberOfUnpaddedBasesFromEndOfConsensus, ( nExtendsIntoWhichGap_ == nLeftGap ) ? "left" : "right" ); } } // if ( nPurpose_ ... } // for( int nRead ... } // if ( bCustomPrimerNotUniversalPrimer_ ) else if ( pCP->bAutoFinishSuggestSpecialChemistryForRunsAndStops_ ) { if ( bCrossesRun_ || bCrossesStop_ ) { fprintf( pAO, "special chem:" ); if ( bCrossesRun_ ) fprintf( pAO, " run" ); if ( bCrossesStop_ ) fprintf( pAO, " stop" ); fprintf( pAO, "\n" ); } } fprintf( pAO, "}\n\n" ); if ( bAddedDummyExpID ) aExpID_.removeAt( aExpID_.length() - 1 ); } void experiment :: setGotoItemDescription( char* szDescription ) { char* szState; if ( nState_ == nAvailableExperiment ) szState = "avl"; else if ( nState_ == nChosenExperiment ) szState = "sel"; else if ( nState_ == nRejectedExperiment ) szState = "rej"; else assert( false ); sprintf( szDescription, "%5d %3s %6.1f %7d %7d %s %7d ", nPriorityRanking_, szState, dErrorsFixed_, nUnpaddedLeft_, nUnpaddedRight_, ( bTopStrandNotBottomStrand_ ? "top" : "bot" ), nFirstBaseOfReadUnpadded_ ); strncat( szDescription, pCustomPrimer_->szPrimer_, pCustomPrimer_->nUnpaddedLength_ ); } static RWCString soTagData( (RWSize_2T) 1000 ); void experiment :: writeAutoFinishExpTagToAceFile( ofstream& ofsAceFile ) { // I think the way to do this is to convert the experiment to // an autoFinishExpTag and then to have that tag write itself. // That way we only use 1 code to write. int nConsPosOfTag; int nUnpaddedOffsetOfBeginningOfReadFromTag; if ( pContig_->nGetUnpaddedStartIndex() <= nFirstBaseOfReadUnpadded_ && nFirstBaseOfReadUnpadded_ <= pContig_->nGetUnpaddedEndIndex() ) { // normal case in which the beginning of the read is on the contig so // everything is fine. nConsPosOfTag = pContig_->nPaddedIndexFast( nFirstBaseOfReadUnpadded_ ); nUnpaddedOffsetOfBeginningOfReadFromTag = 0; } else { if ( nFirstBaseOfReadUnpadded_ < pContig_->nGetUnpaddedStartIndex() ) { // notice that nConsPosOfTag is given in padded coordinates int nUnpaddedConsPosOfTag; if ( pContig_->bIsThereAHighQualitySegment() ) nUnpaddedConsPosOfTag = pContig_->nUnpaddedHighQualitySegmentStart_; else nUnpaddedConsPosOfTag = pContig_->nGetUnpaddedStartIndex(); nConsPosOfTag = pContig_->nPaddedIndexFast( nUnpaddedConsPosOfTag ); // // ----------------------- // ^ ^ ^ // exp start tag // of contig // // <-- offset -> // suppose // -3 -2 -1 0 1 2 3 // ^ ^ // exp tag // offset = 6 nUnpaddedOffsetOfBeginningOfReadFromTag = nUnpaddedConsPosOfTag - nFirstBaseOfReadUnpadded_; } else if ( pContig_->nGetUnpaddedEndIndex() < nFirstBaseOfReadUnpadded_ ) { int nUnpaddedConsPosOfTag; // notice that nConsPosOfTag is given in padded coordinates if ( pContig_->bIsThereAHighQualitySegment() ) nUnpaddedConsPosOfTag = pContig_->nUnpaddedHighQualitySegmentEnd_; else nUnpaddedConsPosOfTag = pContig_->nGetUnpaddedEndIndex(); nConsPosOfTag = pContig_->nPaddedIndexFast( nUnpaddedConsPosOfTag ); nUnpaddedOffsetOfBeginningOfReadFromTag = nFirstBaseOfReadUnpadded_ - nUnpaddedConsPosOfTag; } } soTagData = ""; if ( ( pContig_->bComplementedFromWayPhrapCreated_ && bTopStrandNotBottomStrand_ ) || ( !pContig_->bComplementedFromWayPhrapCreated_ && !bTopStrandNotBottomStrand_ ) ) { soTagData = "C\n"; } else { soTagData = "U\n"; } soTagData += "purpose: "; if ( nPurpose_ == nExtendContig ) { soTagData += "extend contig "; int nBasesExtended; if ( nExtendsIntoWhichGap_ == nLeftGap ) { nBasesExtended = pContig_->nGetUnpaddedStartIndex() - nUnpaddedLeft_; } else if ( nExtendsIntoWhichGap_ == nRightGap ) { nBasesExtended = nUnpaddedRight_ - pContig_->nGetUnpaddedEndIndex(); } else { assert( false ); } soTagData += RWCString( (long) nBasesExtended ); soTagData += " bases\n"; } else if ( nPurpose_ == nFixWeakRegion ) { soTagData += "weak\n"; } else if ( nPurpose_ == nCoverSingleSubcloneBases ) { soTagData += "single subclone bases fixed: "; soTagData += RWCString( (long) nSingleSubcloneBasesFixed_ ); soTagData += "\n"; } else if ( nPurpose_ == nFlankGap ) { soTagData += "flank gap\n"; } else assert( false ); // now for high quality region left and right int nUnpaddedStartFromFirstBase = ABS( nUnpaddedLeft_ - nFirstBaseOfReadUnpadded_ ); int nUnpaddedEndFromFirstBase = ABS( nUnpaddedRight_ - nFirstBaseOfReadUnpadded_ ); if ( nUnpaddedStartFromFirstBase > nUnpaddedEndFromFirstBase ) { int nTemp = nUnpaddedStartFromFirstBase; nUnpaddedStartFromFirstBase = nUnpaddedEndFromFirstBase; nUnpaddedEndFromFirstBase = nTemp; } soTagData = soTagData + RWCString( (long) nUnpaddedStartFromFirstBase ) + " " + RWCString( (long) nUnpaddedEndFromFirstBase ) + " " + RWCString( (long) nUnpaddedOffsetOfBeginningOfReadFromTag ) + "\n"; if ( ucStrandAndChemistry_ == TOP_STRAND_PRIMER_READS || ucStrandAndChemistry_ == BOTTOM_STRAND_PRIMER_READS ) soTagData += "dyePrimer "; else soTagData += "dyeTerm "; if ( nReadType_ == nUniversalReverse ) { soTagData += "univ rev\n"; } else if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { soTagData += "customPrimer\n"; } else { // note: in the future when we are calling reverses for // reasons other than flanking gaps, these will not all be univ fwd soTagData += "univ fwd\n"; } if ( nPurpose_ == nExtendContig || nPurpose_ == nFixWeakRegion ) { soTagData += "fix cons errors: "; soTagData += RWCString( dErrorsFixedJustInConsensus_, 2 ); double dOriginalConsensusErrors = pContig_->dGetOriginalConsensusErrors( nUnpaddedLeft_, nUnpaddedRight_ ); soTagData += " original cons errors: "; soTagData += RWCString( dOriginalConsensusErrors, 2 ); soTagData += "\n"; } if ( nPurpose_ != nFlankGap ) { // note: this says "original" because, when this ace file is later // run after additional reads are added, this value will be "original" // and the nGetCurrentSingleSubcloneBases will then be "current" soTagData += "original single subclone bases: "; // case of the high quality region of the read completely off the // consensus is handled within this routine // so how many single subclone bases were there? // I guess every base is a single subclone base since, if a join // is made and there are a few single subclone bases in the joined // region, this read should be considered a success--not a failure // for increasing the number of single subclone bases. soTagData += RWCString( (long) pContig_->nGetCurrentSingleSubcloneBases( nUnpaddedLeft_, nUnpaddedRight_ ) ); soTagData += "\n"; } if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { soTagData += "primer: "; char szTemp[2]; szTemp[1] = '\0'; for( int n = 0; n < pCustomPrimer_->nUnpaddedLength_; ++n ) { szTemp[0] = pCustomPrimer_->szPrimer_[n]; soTagData += szTemp; } soTagData += " temp: "; soTagData += RWCString( (long) ( pCustomPrimer_->dMeltingTemperature_ + .5 ) ); soTagData += " id: "; soTagData += ConsEd::pGetAssembly()->soGetOligoNameFromOligoNumber( nPrimerID_ ); soTagData += "\n"; } // now for the template line soTagData += "expID_and_template:"; if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { if ( bCloneTemplateNotSubcloneTemplate_ ) { assert( aExpID_.length() == 1 ); soTagData += " "; soTagData += RWCString( (long) aExpID_[0] ); soTagData += " whole clone\n"; } else { for( int n = 0; n < MIN( nNUMBER_OF_TEMPLATES, pCP->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_ ); ++n ) { if ( pCustomPrimer_->pSubcloneTemplate_[n] ) { if ( n > 0 ) soTagData += ", "; else soTagData += " "; soTagData += RWCString( (long) aExpID_[n] ); soTagData += " "; soTagData += pCustomPrimer_->pSubcloneTemplate_[n]->soTemplateName_; } } soTagData += "\n"; } } else { // universal primer read assert( aExpID_.length() == 1 ); soTagData += " "; soTagData += RWCString( (long) aExpID_[ 0 ] ); soTagData += " "; soTagData += pSubcloneTemplate_->soTemplateName_; soTagData += "\n"; } if ( bIsUniversalPrimerRead() && nPurpose_ == nFlankGap ) { soTagData += "existing reads:"; for( int nRead = 0; nRead < pSubcloneTemplate_->aReads_.entries(); ++nRead ) { LocatedFragment* pLocFrag = pSubcloneTemplate_->aReads_[ nRead ]; soTagData += " "; soTagData += pLocFrag->soGetName(); } } autoFinishExpTag* pAutoFinishExpTag = new autoFinishExpTag( pContig_, nConsPosOfTag, nConsPosOfTag, "autofinish", soEmptyString, soTagData ); pAutoFinishExpTag->writeTagToAceFile( ofsAceFile ); // free up memory for next tag delete pAutoFinishExpTag; } void experiment :: setPrimerIDIfNecessary() { if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { nPrimerID_ = ConsEd::pGetAssembly()->nGetNextOligoNumber(); } } void experiment :: chooseExperiment() { setPrimerIDIfNecessary(); nState_ = nChosenExperiment; if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { if ( bCloneTemplateNotSubcloneTemplate_ ) { ++( consedParameters::pGetConsedParameters()->nLastUsedExpID_ ); aExpID_.insert( consedParameters::pGetConsedParameters()->nLastUsedExpID_ ); } else { for( int n = 0; n < MIN( nNUMBER_OF_TEMPLATES, pCP->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_ ); ++n ) { if ( pCustomPrimer_->pSubcloneTemplate_[ n ] ) { ++( consedParameters::pGetConsedParameters()->nLastUsedExpID_ ); aExpID_.insert( consedParameters::pGetConsedParameters()->nLastUsedExpID_ ); // printing these experiments depends on the same // index finding the experiment id and the template // so make sure they are in sync (even though they // are in different structures) assert( aExpID_.length() == ( n + 1 ) ); } } } // if ( bCloneTemplateNotSubcloneTemplate_ ) { else } else { // if the experiment is not with a custom primer (it is either a // reverse or a universal primer forward), then it only has // one expID ++( consedParameters::pGetConsedParameters()->nLastUsedExpID_ ); aExpID_.insert( consedParameters::pGetConsedParameters()->nLastUsedExpID_ ); } // if ( bCustomPrimerNotUniversalPrimer_ ) { else if ( pCP->bAutoFinishSuggestSpecialChemistryForRunsAndStops_ ) { pContig_->determineWhetherThisExperimentNeedsSpecialChemistry( this ); } } bool experiment :: bHasThisExperimentOrOneLikeItBeenTriedBefore() { if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { return( bHasCloseCustomPrimerExperimentBeenTriedBefore() ); } else { if ( nReadType_ == nUniversalReverse ) { // reverse universal primer return( bHasUniversalPrimerExperimentBeenTriedBefore( false ) ); } else { // forward universal primer return( bHasUniversalPrimerExperimentBeenTriedBefore( true ) ); } } } bool experiment :: bHasCloseCustomPrimerExperimentBeenTriedBefore() { int nFirstBaseOfReadConsPos = pContig_->nPaddedIndexFast( nFirstBaseOfReadUnpadded_ ); int nConsPosLeft = nFirstBaseOfReadConsPos - consedParameters::pGetConsedParameters()->nAutoFinishNewCustomPrimerReadThisFarFromOldCustomPrimerRead_; int nConsPosRight = nFirstBaseOfReadConsPos + consedParameters::pGetConsedParameters()->nAutoFinishNewCustomPrimerReadThisFarFromOldCustomPrimerRead_; for( int nTag = 0; nTag < pContig_->nGetNumberOfTags(); ++nTag ) { tag* pTag = pContig_->pGetTag( nTag ); if ( pTag->soType_ != "autoFinishExp" ) continue; if ( nConsPosLeft <= pTag->nPaddedConsPosStart_ && pTag->nPaddedConsPosEnd_ <= nConsPosRight ) { // found an autoFinishExp tag "close" to the beginning of the // read that we think we want to make autoFinishExpTag* pAutoFinishExp = (autoFinishExpTag*) pTag; // check if it is in the correct orientation bool bTopStrandTag; if ( ( pContig_->bComplementedFromWayPhrapCreated_ && pAutoFinishExp->bComplementedFromWayPhrapCreatedContig_ ) || ( !pContig_->bComplementedFromWayPhrapCreated_ && !pAutoFinishExp->bComplementedFromWayPhrapCreatedContig_ ) ) bTopStrandTag = true; else bTopStrandTag = false; if ( bTopStrandTag != bTopStrandNotBottomStrand_ ) continue; // if reached here, they are in the correct orientation if ( pAutoFinishExp->nReadType_ != nWalk && pAutoFinishExp->nReadType_ != nPCREnd ) continue; // I think at this point we don't want to make this experiment // because there is another that has already been called close by. // Thus, even if it worked, we don't want to do it. The only // exception might be a read used for extending a contig. // In that case, the acceptable distance that a previously called // experiment can be away might be less. printRejectExperiment( pAutoFinishExp ); return( true ); // if a read of the same strand failed close by, then don't // make this read // if ( !pAutoFinishExp->bExperimentWorked() ) return( true ); // experiment did work // just one more check--we don't want to repeat the precise // same experiment, even if it worked before // if ( memcmp( pCustomPrimer_->szPrimer_, // (char*) pAutoFinishExp->soPrimerBases_.data(), // nUnpaddedLength_ ) == 0 ) { // // this is precisely the same primer // // don't make a primer over again! Something must have // // failed to have this happen. // return( true ); // } } } // for( int nTag = 0; nTag < pContig_->nGetNumberOfTags(); ++nTag ) { // if reached here, have tried all the tags and it doesn't appear that // this read has been tried before nor is any similar read (custom primer // on same strand) close to this one return( false ); } bool experiment :: bHasUniversalPrimerExperimentBeenTriedBefore( const bool bForwardNotReverse ) { // do not allow a resequence of an Autofinish read if ( bIsResequencingRead() && !pCP->bAutoFinishAllowResequencingAUniversalPrimerAutofinishRead_ ) { LocatedFragment* pExistingLocFrag = NULL; if ( pSubcloneTemplate_->bThereIsAlreadySuchAnAutofinishUniversalPrimerRead( bForwardNotReverse, pExistingLocFrag ) ) { printRejectResequencingUniversalPrimerAutofinishExperiment( pExistingLocFrag ); return( true ); } } int nFirstBaseOfReadConsPos = pContig_->nPaddedIndexFast( nFirstBaseOfReadUnpadded_ ); if ( ConsEd::pGetAssembly()->bHasUniversalPrimerReadBeenTriedBeforeByAutoFinish( pSubcloneTemplate_, nReadType_ ) ) { fprintf( pAO, "%s read from template %s was tried before by Autofinish so not trying it again\n", szGetReadType2( nReadType_ ), pSubcloneTemplate_->soGetName().data() ); return( true ); } // could not find any previously called experiment with this same // universal primer read return( false ); } void experiment :: printRejectExperiment( autoFinishExpTag* pAutoFinishExp ) { fprintf( pAO, "rejecting experiment: " ); if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { fprintf( pAO, "custom primer read starts at %d\n", nFirstBaseOfReadUnpadded_ ); for( int n = 0; n < pCustomPrimer_->nUnpaddedLength_; ++n ) putc( pCustomPrimer_->szPrimer_[n], pAO ); fprintf( pAO, " from %d to %d\n", pCustomPrimer_->nUnpaddedStart_, pCustomPrimer_->nUnpaddedEnd_ ); } else { // universal primer read if ( nReadType_ == nUniversalReverse ) { fprintf( pAO, "reverse universal primer read with template %s\n", (char*) pSubcloneTemplate_->soTemplateName_.data() ); } else { // forward read fprintf( pAO, "forward universal primer read with template %s\n", (char*) pSubcloneTemplate_->soTemplateName_.data() ); fprintf( pAO, "%s %s read to was to start at %d\n", ( bTopStrandNotBottomStrand_ ? "topStrand" : "bottomStrand" ), ( bIsDyePrimerNotDyeTerminator( ucStrandAndChemistry_ ) ? "dyePrimer" : "dyeTerm" ), nFirstBaseOfReadUnpadded_ ); } } RWCString soReadType; if ( pAutoFinishExp->nReadType_ == nUniversalForward ) soReadType = "universal forward"; else if ( pAutoFinishExp->nReadType_ == nUniversalReverse ) soReadType = "universal reverse"; else if ( pAutoFinishExp->nReadType_ == nWalk ) soReadType = "custom primer walk"; else if ( pAutoFinishExp->nReadType_ == nPCREnd ) soReadType = "custom primer pcr end"; else assert( false ); fprintf( pAO, "because %s read was called at %d in an earlier autofinish round with\n", (char*) soReadType.data(), pAutoFinishExp->pContig_->nUnpaddedIndex( pAutoFinishExp->nPaddedConsPosStart_ ) ); for( int nExpID = 0; nExpID < pAutoFinishExp->aExpID_.length(); ++nExpID ) { fprintf( pAO, " expid: %d template: %s\n", pAutoFinishExp->aExpID_[ nExpID ], (char*) pAutoFinishExp->aTemplates_[ nExpID ].data() ); } } void experiment :: printToSummaryFile( FILE* pFile ) { if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { for( int n = 0; n < pCustomPrimer_->nUnpaddedLength_; ++n ) putc( pCustomPrimer_->szPrimer_[n], pFile ); fprintf( pFile, ",%d,%s,%s,%d,%d,%d,%s,", ( (int) ( pCustomPrimer_->dMeltingTemperature_ + .5 ) ), (const char*) ConsEd::pGetAssembly()->soGetOligoNameFromOligoNumber( nPrimerID_ ), ( bTopStrandNotBottomStrand_ ? "->" : "<-" ), nFirstBaseOfReadUnpadded_, nUnpaddedLeft_, nUnpaddedRight_, (char*) pContig_->soGetName().data() ); if ( bCloneTemplateNotSubcloneTemplate_ ) { fprintf( pFile, "%d,whole clone", aExpID_[ 0 ] ); } else { // case of subclone templates bool bFirstOne = true; for( int nTemplate = 0; nTemplate < MIN( nNUMBER_OF_TEMPLATES, pCP->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_ ); ++nTemplate ) { if ( pCustomPrimer_->pSubcloneTemplate_[ nTemplate ] ) { if ( bFirstOne ) bFirstOne = false; else putc( ',', pFile ); fprintf( pFile, "%d,%s", aExpID_[ nTemplate ], (char*) pCustomPrimer_->pSubcloneTemplate_[ nTemplate ]->soTemplateName_.data() ); if ( pCP->bAutoFinishPrintForwardOrReverseStrandWhenPrintingSubcloneTemplatesForCustomPrimerReads_ ) { bool bForwardNotReverseStrand = pCustomPrimer_->pSubcloneTemplate_[ nTemplate ]->bIsForwardNotReverseStrand( bTopStrandNotBottomStrand_, pContig_ ); fprintf( pFile, " (%s)", ( bForwardNotReverseStrand ? "fwd" : "rev" ) ); } } } } } // if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { else { assert( nReadType_ == nUniversalForward || nReadType_ == nUniversalReverse ); fprintf( pFile, "%s,,,%s,%d,%d,%d,%s,%d,%s", ( ( nReadType_ == nUniversalForward ) ? "univ fwd" : "univ rev" ), ( bTopStrandNotBottomStrand_ ? "->" : "<-" ), nFirstBaseOfReadUnpadded_, nUnpaddedLeft_, nUnpaddedRight_, (char*) pContig_->soGetName().data(), aExpID_[ 0 ], (char*) pSubcloneTemplate_->soTemplateName_.data() ); } // if if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) else if ( pCP->bAutoFinishSuggestSpecialChemistryForRunsAndStops_ ) { if ( bCrossesRun_ || bCrossesStop_ ) { fprintf( pFile, " (sp chem:" ); if ( bCrossesRun_ ) fprintf( pFile, " run" ); if ( bCrossesStop_ ) fprintf( pFile, " stop" ); putc( ')', pFile ); } } putc( '\n', pFile ); } void experiment :: printToSummaryFile2( FILE* pFile ) { fprintf( pFile, "%-10s %7d %7d ", pContig_->soGetName().data(), nUnpaddedLeft_, nUnpaddedRight_); if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { for( int n = 0; n < pCustomPrimer_->nUnpaddedLength_; ++n ) putc( pCustomPrimer_->szPrimer_[n], pFile ); fprintf( pFile, ",%d,%s,%s,%d,", ( (int) ( pCustomPrimer_->dMeltingTemperature_ + .5 ) ), (const char*) ConsEd::pGetAssembly()->soGetOligoNameFromOligoNumber( nPrimerID_ ), ( bTopStrandNotBottomStrand_ ? "->" : "<-" ), nFirstBaseOfReadUnpadded_ ); if ( bCloneTemplateNotSubcloneTemplate_ ) { fprintf( pFile, "%d,whole clone", aExpID_[ 0 ] ); } else { // case of subclone templates bool bFirstOne = true; for( int nTemplate = 0; nTemplate < MIN( nNUMBER_OF_TEMPLATES, pCP->nAutoFinishHowManyTemplatesYouIntendToUseForCustomPrimerSubcloneReactions_ ); ++nTemplate ) { if ( pCustomPrimer_->pSubcloneTemplate_[ nTemplate ] ) { if ( bFirstOne ) bFirstOne = false; else putc( ',', pFile ); fprintf( pFile, "%d,%s", aExpID_[ nTemplate ], (char*) pCustomPrimer_->pSubcloneTemplate_[ nTemplate ]->soTemplateName_.data() ); if ( pCP->bAutoFinishPrintForwardOrReverseStrandWhenPrintingSubcloneTemplatesForCustomPrimerReads_ ) { bool bForwardNotReverseStrand = pCustomPrimer_->pSubcloneTemplate_[ nTemplate ]->bIsForwardNotReverseStrand( bTopStrandNotBottomStrand_, pContig_); fprintf( pFile, " (%s)", ( bForwardNotReverseStrand ? "fwd" : "rev" ) ); } } } } } // if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) { else { assert( nReadType_ == nUniversalForward || nReadType_ == nUniversalReverse ); fprintf( pFile, "%s (%s),%s,%d,%d,%s", ( ( nReadType_ == nUniversalForward ) ? "univ fwd" : "univ rev" ), ( bDeNovoUniversalPrimerRead_ ? "denovo" : "reseq" ), ( bTopStrandNotBottomStrand_ ? "->" : "<-" ), nFirstBaseOfReadUnpadded_, aExpID_[ 0 ], (char*) pSubcloneTemplate_->soTemplateName_.data() ); } // if if ( nReadType_ == nWalk || nReadType_ == nPCREnd ) else if ( pCP->bAutoFinishSuggestSpecialChemistryForRunsAndStops_ ) { if ( bCrossesRun_ || bCrossesStop_ ) { fprintf( pFile, " (sp chem:" ); if ( bCrossesRun_ ) fprintf( pFile, " run" ); if ( bCrossesStop_ ) fprintf( pFile, " stop" ); putc( ')', pFile ); } } putc( '\n', pFile ); } // The reason I want to use nExtendsIntoWhichGap instead of // nExtendsIntoWhichGap_ is that I want to know whether the read is // pointing in depending on whether it is being *considered* for // extending the left or right gap. It is theoretically possible for // a read to extend into both gaps (for tiny contigs.) bool experiment :: bIsPointingIn( const int nExtendsIntoWhichGap ) { if ( nExtendsIntoWhichGap == nLeftGap && bTopStrandNotBottomStrand_ ) return( true ); else if ( nExtendsIntoWhichGap == nRightGap && !bTopStrandNotBottomStrand_ ) return( true ); else return( false ); } void experiment :: printToCustomNavigationFile( FILE* pFile ) { fprintf( pFile, "BEGIN_REGION\n" ); fprintf( pFile, "TYPE: CONSENSUS\n" ); fprintf( pFile, "CONTIG: %s\n", pContig_->soGetName().data() ); // must make the read fit on the consensus int nIntersectLeft; int nIntersectRight; if ( !bIntersect( nUnpaddedLeft_, nUnpaddedRight_, pContig_->nGetUnpaddedStartIndex(), pContig_->nGetUnpaddedEndIndex(), nIntersectLeft, nIntersectRight ) ) { // case in which the entire read is off the consensus // Which end of the consensus is it off? if ( nUnpaddedLeft_ < pContig_->nGetUnpaddedStartIndex() ) { nIntersectLeft = pContig_->nGetUnpaddedStartIndex(); nIntersectRight = pContig_->nGetUnpaddedStartIndex(); } else { nIntersectLeft = pContig_->nGetUnpaddedEndIndex(); nIntersectRight = pContig_->nGetUnpaddedEndIndex(); } } fprintf( pFile, "UNPADDED_CONS_POS: %d %d\n", nIntersectLeft, nIntersectRight ); fprintf( pFile, "COMMENT: " ); printToSummaryFile2( pFile ); fprintf( pFile, "END_REGION\n\n" ); } void experiment :: printRejectResequencingUniversalPrimerAutofinishExperiment( LocatedFragment* pExistingLocFrag ) { assert( !pExistingLocFrag->bIsWalkingNotUniversalPrimerRead() ); fprintf( pAO, "rejecting experiment that would have been a resequence of\nread: %s (%s) %s %d to %d \nsince this read is already an Autofinish read\n\n", pExistingLocFrag->soGetName().data(), ( pExistingLocFrag->bIsForwardNotReverseRead() ? "fwd" : "rev" ), ( pExistingLocFrag->bComp() ? "bottom strand" : "top strand" ), pExistingLocFrag->nGetAlignStartUnpadded(), pExistingLocFrag->nGetAlignEndUnpadded() ); }