/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "autoFinishExpTag.h" #include "min.h" #include "contig.h" #include "bIntersect.h" #include "fileDefines.h" #include "consedParameters.h" RWCString autoFinishExpTag :: soGetExtraInformationForNavigator( const int nMaxLength ) { RWCString soData = soMiscData_; // this improves the readability so there aren't all these little // vt's all over for( int n = 0; n < soData.length(); ++n ) { if ( soData[n] == '\n' ) soData[n] = ' '; } return( soData ); } void autoFinishExpTag :: getUnpaddedHighQualityRegion( int& nUnpaddedHighQualityRegionLeft, int& nUnpaddedHighQualityRegionRight, int& nUnpaddedHighQualityRegionLeftOnConsensus, int& nUnpaddedHighQualityRegionRightOnConsensus, bool& bCompletelyOffConsensus ) { // find the potential high quality region bool bTopStrandRead; if ( ( pContig_->bComplementedFromWayPhrapCreated_ && bComplementedFromWayPhrapCreatedContig_ ) || ( !pContig_->bComplementedFromWayPhrapCreated_ && !bComplementedFromWayPhrapCreatedContig_ ) ) bTopStrandRead = true; else bTopStrandRead = false; int nUnpaddedStartOfRead = pContig_->nUnpaddedIndex( nPaddedConsPosStart_ ); if ( nUnpaddedOffsetOfBeginningOfReadFromTag_ != 0 ) { if ( bTopStrandRead ) // this means the read is probably off the left end of the // consensus and pointing right nUnpaddedStartOfRead -= nUnpaddedOffsetOfBeginningOfReadFromTag_; else // this means the read is probably off the right end of the // consensus and pointing left nUnpaddedStartOfRead += nUnpaddedOffsetOfBeginningOfReadFromTag_; } if ( bTopStrandRead ) { nUnpaddedHighQualityRegionLeft = nUnpaddedStartOfRead + nUnpaddedOffsetToStartOfHighQualityRegion_; nUnpaddedHighQualityRegionRight = nUnpaddedStartOfRead + nUnpaddedOffsetToEndOfHighQualityRegion_; } else { nUnpaddedHighQualityRegionLeft = nUnpaddedStartOfRead - nUnpaddedOffsetToEndOfHighQualityRegion_; nUnpaddedHighQualityRegionRight = nUnpaddedStartOfRead - nUnpaddedOffsetToStartOfHighQualityRegion_; } // Note: these positions are not necessarily on the consensus // So let's reduce them to being on the consensus if ( !bIntersect( nUnpaddedHighQualityRegionLeft, nUnpaddedHighQualityRegionRight, pContig_->nGetUnpaddedStartIndex(), pContig_->nGetUnpaddedEndIndex(), nUnpaddedHighQualityRegionLeftOnConsensus, nUnpaddedHighQualityRegionRightOnConsensus ) ) { // the read that was supposed to be made must have // failed miserably--went in the wrong location, blank lane, // or didn't go into the assembly. That is because it didn't // extend the consensus at all. bCompletelyOffConsensus = true; } else bCompletelyOffConsensus = false; } void autoFinishExpTag :: howWellDidExpClusterExtendContig() { if ( nExtendingIntoGapThisManyBases_ == 0 ) return; // we need to try to figure out which gap this experiment cluster // was supposed to extend. I think we can do that by seeing which // end of the contig it was nearest. This would only fail for tiny // contigs. int nUnpaddedHighQualityRegionLeft; int nUnpaddedHighQualityRegionRight; int nUnpaddedHighQualityRegionLeftOnConsensus; int nUnpaddedHighQualityRegionRightOnConsensus; bool bCompletelyOffConsensus; getUnpaddedHighQualityRegion( nUnpaddedHighQualityRegionLeft, nUnpaddedHighQualityRegionRight, nUnpaddedHighQualityRegionLeftOnConsensus, nUnpaddedHighQualityRegionRightOnConsensus, bCompletelyOffConsensus ); if ( pContig_->nGetUnpaddedStartIndex() <= nUnpaddedHighQualityRegionLeft && nUnpaddedHighQualityRegionRight <= pContig_->nGetUnpaddedEndIndex() ) { fprintf( pAO, "The high quality portion of this read is now completely contained within the consensus. It was supposed to extend the consensus by %d bases. It must have done so.\n", nExtendingIntoGapThisManyBases_ ); return; } int nExtendedThisManyBases; bool bExtendToRight; if ( nUnpaddedHighQualityRegionLeft < pContig_->nGetUnpaddedStartIndex() ) { // purpose probably was to extend to left, but didn't completely succeed int nProbableOldLeftPosition = nUnpaddedHighQualityRegionLeft + nExtendingIntoGapThisManyBases_; nExtendedThisManyBases = nProbableOldLeftPosition - pContig_->nGetUnpaddedStartIndex(); bExtendToRight = false; } else { // purpose probably was to extend to the right, but didn't completely succeed // --------------> (old contig) // // ---------- (high quality portion of suggested read) //-------------------> (new contig) // ^ (nUnpaddedHighQualityRegionRight) // < > (nExtendingIntoGapThisManyBases_ ) // ^ (nProbableOldRightPosition ) // int nProbableOldRightPosition = nUnpaddedHighQualityRegionRight - nExtendingIntoGapThisManyBases_; nExtendedThisManyBases = pContig_->nGetUnpaddedEndIndex() - nProbableOldRightPosition; bExtendToRight = true; } fprintf( pAO, "Purpose: extend consensus %d bp to %s but did only %d bp\n", nExtendingIntoGapThisManyBases_, ( bExtendToRight ? "right" : "left" ), nExtendedThisManyBases ); } void autoFinishExpTag :: howWellDidExpClusterFixErrors() { // figure out how many errors are in the location of the experiment // To do this, we must figure out where these reads were supposed to go. int nUnpaddedHighQualityRegionLeft; int nUnpaddedHighQualityRegionRight; int nUnpaddedHighQualityRegionLeftOnConsensus; int nUnpaddedHighQualityRegionRightOnConsensus; bool bCompletelyOffConsensus; getUnpaddedHighQualityRegion( nUnpaddedHighQualityRegionLeft, nUnpaddedHighQualityRegionRight, nUnpaddedHighQualityRegionLeftOnConsensus, nUnpaddedHighQualityRegionRightOnConsensus, bCompletelyOffConsensus ); if ( bCompletelyOffConsensus ) { // what do we do with current errors? fprintf( pAO, "this read should have been completely off the consensus\n" ); fprintf( pAO, "original errors: %.2f supposed to fix: %.2f\n", dOriginalNumberOfErrors_, dErrorsFixed_ ); } else { // figure out how many errors are currently in this high quality region double dCurrentErrors = 0.0; for( int nUnpadded = nUnpaddedHighQualityRegionLeftOnConsensus; nUnpadded <= nUnpaddedHighQualityRegionRightOnConsensus; ++nUnpadded ) { dCurrentErrors += (*pContig_->pUnpaddedErrorProbabilities_)[ nUnpadded ]; } fprintf( pAO, "original errors: %.2f current errors: %.2f supposed to fix: %.2f actually fixed: %.2f\n", dOriginalNumberOfErrors_, dCurrentErrors, dErrorsFixed_, ( dOriginalNumberOfErrors_ - dCurrentErrors ) ); } } void autoFinishExpTag :: evaluateExperimentCluster() { fprintf( pAO, "\n\neval exp. supposed to start: %d contig %s purpose: %s\n", pContig_->nUnpaddedIndex( nPaddedConsPosStart_ ), (char*) pContig_->soGetName().data(), szGetPurpose( nPurpose_ ) ); if ( nPurpose_ == nFixWeakRegion || nPurpose_ == nExtendContig ) howWellDidExpClusterFixErrors(); // If the contig was supposed to be extended, see if it was, and by how // much. The way to do this is to figure out where the old end of the // consensus was. Then see where the current end of the consensus is. if ( nPurpose_ == nFixWeakRegion || nPurpose_ == nExtendContig ) howWellDidExpClusterExtendContig(); if ( nPurpose_ != nFlankGap ) howWellDidExpClusterCoverSingleSubcloneBases(); // we need to add a method to check whether the gap-flanking read // when into a different contig (or, better yet, if the gap was // closed) if ( nPurpose_ == nFlankGap ) howWellDidExpClusterFlankGap(); } void autoFinishExpTag :: howWellDidExpClusterCoverSingleSubcloneBases() { int nUnpaddedHighQualityRegionLeft; int nUnpaddedHighQualityRegionRight; int nUnpaddedHighQualityRegionLeftOnConsensus; int nUnpaddedHighQualityRegionRightOnConsensus; bool bCompletelyOffConsensus; getUnpaddedHighQualityRegion( nUnpaddedHighQualityRegionLeft, nUnpaddedHighQualityRegionRight, nUnpaddedHighQualityRegionLeftOnConsensus, nUnpaddedHighQualityRegionRightOnConsensus, bCompletelyOffConsensus ); if ( bCompletelyOffConsensus ) { char szTemp[100]; if ( nSingleSubcloneBasesFixed_ == -1 ) strcpy( szTemp, "?" ); else sprintf( szTemp, "%d", nSingleSubcloneBasesFixed_ ); fprintf( pAO, "this read should have been completely off the consensus in this assembly\n" ); fprintf( pAO, "but was supposed to fix %s single subclone bases\n", szTemp ); fprintf( pAO, "Thus the previous and current assemblies are not consistent with each other.\n" ); return; } int nCurrentSingleSubcloneBases = pContig_->nGetCurrentSingleSubcloneBases( nUnpaddedHighQualityRegionLeft, nUnpaddedHighQualityRegionRight ); char szTemp[100]; if ( nSingleSubcloneBasesFixed_ == -1 ) strcpy( szTemp, "?" ); else sprintf( szTemp, "%d", nSingleSubcloneBasesFixed_ ); fprintf( pAO, "single subclone bases: original: %d current: %d supposed to fix: %s actually fixed: %d\n", nOriginalNumberOfSingleSubcloneBases_, nCurrentSingleSubcloneBases, szTemp, ( nOriginalNumberOfSingleSubcloneBases_ - nCurrentSingleSubcloneBases ) ); } void autoFinishExpTag :: howWellDidExpClusterFlankGap() { // we really need to know where the reads went--so perhaps we can't // decide how well this experiment cluster worked based just on // looking at the new consensus. In fact, the consensus could not // change at all, but we could get the information we want from the // new reads. }