/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "contigview.h" #include using namespace std; #include "locatedFragment.h" #include "consedParameters.h" #include "contig.h" #include "cGetProtein.h" #include "bIntervalsIntersect.h" #include "cComplementBase.h" #include "abs.h" #include "editMakeHighQuality.h" #include "popupErrorMessage2.h" #include "guicontigwin.h" const int nDefaultMaxAminoAcidsDisplayedPerLine = 50; ContigView :: ContigView( Contig* pContig, const int nLeftEdgeConsensusPosition, const int nEnd) : nStartPos_(nLeftEdgeConsensusPosition), nEndPos_(nEnd), nForFragsInView_(0), nRevFragsInView_(0), dapLocatedFragment_(0), nSizeOfLargestReadPrefix_( 0 ), pContig_( pContig ), cHowAreReadsSorted_( cUNKNOWN ) { if ( consedParameters::pGetConsedParameters()->nShowProteinTranslation_ != nDontShowProteinTranslation ) { // always create these arrays, even if there are no bases // on the screen for( int nReadingFrame = 1; nReadingFrame <= 3; ++nReadingFrame ) { pProteinTranslation_[ nReadingFrame ] = new proteinTranslationArrayType( nDefaultMaxAminoAcidsDisplayedPerLine ); pProteinTranslationScreenCharX_[ nReadingFrame ] = new proteinTranslationScreenCharXArrayType( nDefaultMaxAminoAcidsDisplayedPerLine ); } // case in which there are no consensus bases on the screen if ( !bIntervalsIntersect( nLeftEdgeConsensusPosition, nEnd, pContig->nGetStartIndex(), pContig->nGetEndIndex() ) ) return; // so if were got to here, there are some bases on the screen // However, they may not start at nLeftEdgetConsensusPosition int nConsPos = nLeftEdgeConsensusPosition - 1; if ( nConsPos < pContig->nGetStartIndex() ) nConsPos = pContig->nGetStartIndex(); int nEndConsPos = nEnd; if ( nEndConsPos > pContig->nGetEndIndex() ) nEndConsPos = pContig->nGetEndIndex(); assert( nConsPos <= nEndConsPos ); // backup to previous non-pad if there is a previous non-pad bool bFoundBase = false; while( !bFoundBase && pContig->nGetStartIndex() <= nConsPos ) { if ( !pContig->ntGetCons( nConsPos ).bIsPad() ) bFoundBase = true; else --nConsPos; } RWCString soBases; RWTValOrderedVector aPaddedConsPos_( (size_t) (3 * nDefaultMaxAminoAcidsDisplayedPerLine ) ); soBases = ""; if ( bFoundBase ) { soBases.append( pContig->ntGetCons( nConsPos ).cGetBase() ); aPaddedConsPos_.append( nConsPos ); } ++nConsPos; while( nConsPos <= nEndConsPos ) { if ( !pContig->ntGetCons( nConsPos ).bIsPad() ) { soBases.append( pContig->ntGetCons( nConsPos ).cGetBase() ); aPaddedConsPos_.append( nConsPos ); } ++nConsPos; } // now find one more non-pad base, if there is one bFoundBase = false; while( !bFoundBase && nConsPos <= pContig->nGetEndIndex() ) { if ( !pContig->ntGetCons( nConsPos ).bIsPad() ) { bFoundBase = true; soBases.append( pContig->ntGetCons( nConsPos ).cGetBase() ); aPaddedConsPos_.append( nConsPos ); } else ++nConsPos; } // now we have an array of unpadded bases and their padded consensus // positions. Convert these to amino acids and screen positions. if ( aPaddedConsPos_.length() < 3 ) // got problems return; // find the unpadded position of the first base of the codon // for top stand and last base of the codon for bottom strand int nUnpaddedConsPos = ( consedParameters::pGetConsedParameters()->nShowProteinTranslation_ == nShowProteinTranslationTopStrand ) ? pContig->nUnpaddedIndex( aPaddedConsPos_[0] ) - 1 : pContig->nUnpaddedIndex( aPaddedConsPos_[0] ) + 1; int nUnpaddedLength = pContig->nGetUnpaddedSeqLength(); for( int nIndex = 1; nIndex < ( aPaddedConsPos_.length() - 1 ); ++nIndex ) { ++nUnpaddedConsPos; // nUnpaddedConsPos is the unpadded cons pos of the // first base of the codon // reading frames are given by the unpadded position // of the first base of a codon int nReadingFrame; char cProtein; if ( consedParameters::pGetConsedParameters()->nShowProteinTranslation_ == nShowProteinTranslationTopStrand ) { nReadingFrame = nUnpaddedConsPos % 3; if ( nReadingFrame == 0 ) nReadingFrame = 3; cProtein = cGetProtein( soBases[nIndex - 1], soBases[nIndex], soBases[nIndex + 1 ] ); } else { nReadingFrame = ( nUnpaddedLength + 1 - nUnpaddedConsPos ) % 3; if ( nReadingFrame == 0 ) nReadingFrame = -3; else if ( nReadingFrame == 1 ) nReadingFrame = -1; else if ( nReadingFrame == 2 ) nReadingFrame = -2; cProtein = cGetProtein( cComplementBase( soBases[ nIndex + 1 ] ), cComplementBase( soBases[ nIndex ] ), cComplementBase( soBases[ nIndex - 1 ] ) ); } int nScreenCharX = aPaddedConsPos_[ nIndex ] - nLeftEdgeConsensusPosition; (pProteinTranslation_[ ABS(nReadingFrame) ])->append( cProtein ); (pProteinTranslationScreenCharX_[ ABS(nReadingFrame) ])->append( nScreenCharX ); } } // if ( consedParameters::pGetConsedParameters()->bShowProteinTranslation_ ) } void ContigView :: operator=( const ContigView& contigView ) { nStartPos_ = contigView.nStartPos_; nEndPos_ = contigView.nEndPos_; for( int nRead = 0; nRead < contigView.dapLocatedFragment_.length(); ++nRead ) { dapLocatedFragment_.insert( contigView.dapLocatedFragment_[ nRead ] ); } pContig_ = contigView.pContig_; for( int nReadingFrame = 0; nReadingFrame < 4; ++nReadingFrame ) { pProteinTranslation_[ nReadingFrame ] = contigView.pProteinTranslation_[ nReadingFrame ]; pProteinTranslationScreenCharX_[ nReadingFrame ] = contigView.pProteinTranslationScreenCharX_[ nReadingFrame ]; } } ContigView :: ContigView( const ContigView& contigView ) { /* can I use the assignment operator? */ this->operator=( contigView ); } static int nCmpLocatedFragmentAlphabetically( const LocatedFragment** ppLocFrag1, const LocatedFragment** ppLocFrag2 ) { if ( (*ppLocFrag1)->soSequenceName_ < (*ppLocFrag2)->soSequenceName_ ) return( -1 ); else if ( (*ppLocFrag2)->soSequenceName_ < (*ppLocFrag1)->soSequenceName_ ) return( 1 ); else return( 0 ); } bool ContigView :: bIsSortedAlphabetically() { size_t nNumberOfReadsInContigView = dapLocatedFragment_.entries(); bool bSorted = true; if ( nNumberOfReadsInContigView >= 2 ) { for( int nRead = 0; nRead <= nNumberOfReadsInContigView - 2; ++nRead ) { if ( dapLocatedFragment_[ nRead ]->soSequenceName_ > dapLocatedFragment_[ nRead + 1 ]->soSequenceName_ ) { bSorted = false; cout << "Reads " << nRead << " " << dapLocatedFragment_[ nRead ]->soGetName() << " and " << nRead + 1 << " " << dapLocatedFragment_[ nRead + 1 ]->soGetName() << " are out of order." << endl; } } } return( bSorted ); } static int nCmpLocatedFragmentByStrandAndLeftEnd( const LocatedFragment** ppLocFrag1, const LocatedFragment** ppLocFrag2 ) { if ( !(*ppLocFrag1)->bIsComplemented_ && (*ppLocFrag2)->bIsComplemented_ ) return( -1 ); else if ( (*ppLocFrag1)->bIsComplemented_ && !(*ppLocFrag2)->bIsComplemented_ ) return( 1 ); else { // both are top strand or both are bottom strand if ( (*ppLocFrag1)->nAlignStartPos_ < (*ppLocFrag2)->nAlignStartPos_ ) return( -1 ); else if ( (*ppLocFrag2)->nAlignStartPos_ < (*ppLocFrag1)->nAlignStartPos_ ) return( 1 ); else return( 0 ); } } void ContigView :: sortReads() { if ( pCP->bShowReadsInAlignedReadsWindowOrderedByFile_ ) { sortReadsByFile(); return; } void* pArray = (void*) dapLocatedFragment_.data(); size_t nNumberOfReadsInContigView = dapLocatedFragment_.entries(); size_t nSizeOfAnElement = sizeof( LocatedFragment* ); if ( pCP->bShowReadsAlphabetically_ ) { qsort( pArray, nNumberOfReadsInContigView, nSizeOfAnElement, ( (int(*) ( const void*, const void*) ) nCmpLocatedFragmentAlphabetically )); } else { qsort( pArray, nNumberOfReadsInContigView, nSizeOfAnElement, ( (int(*) ( const void*, const void*) ) nCmpLocatedFragmentByStrandAndLeftEnd )); } bool bOK; if ( pCP->bShowReadsAlphabetically_ ) { bOK = bIsSortedAlphabetically(); cHowAreReadsSorted_ = cALPHABETICALLY; } else { bOK = bIsSortedByStrandAndLeftEnd(); cHowAreReadsSorted_ = cSTRAND_AND_POSITION; } if ( !bOK ) { RWCString soError = RWCString( "ContigView::sortReads didn't work. pCP->bShowReadsAlphabetically_ = ") + szPrintBool( pCP->bShowReadsAlphabetically_ ); THROW_ERROR( soError ); } } bool ContigView :: bIsSortedByStrandAndLeftEnd() { size_t nNumberOfReadsInContigView = dapLocatedFragment_.entries(); if ( nNumberOfReadsInContigView >= 2 ) { for( int nRead = 0; nRead <= nNumberOfReadsInContigView - 2; ++nRead ) { if ( ! ( ( !dapLocatedFragment_[ nRead ]->bIsComplemented_ && dapLocatedFragment_[ nRead + 1 ]->bIsComplemented_ ) || ( ( dapLocatedFragment_[ nRead ]->bIsComplemented_ == dapLocatedFragment_[ nRead + 1 ]->bIsComplemented_ ) && ( dapLocatedFragment_[ nRead ]->nAlignStartPos_ <= dapLocatedFragment_[ nRead + 1 ]->nAlignStartPos_ ) ) ) ) { cout << "Reads " << nRead << " " << dapLocatedFragment_[ nRead ]->soGetName() << " and " << nRead + 1 << " " << dapLocatedFragment_[ nRead + 1 ]->soGetName() << " are out of order." << endl; int nReadStart = nRead; int nReadEnd = nRead + 1; if ( nReadStart >= 1 ) nReadStart = 0; if ( nReadEnd < nNumberOfReadsInContigView - 1 ) ++nReadEnd; for( int nReadTemp = nReadStart; nReadTemp <= nReadEnd; ++nReadTemp ) { cout << "Read " << nReadTemp << " " << dapLocatedFragment_[ nReadTemp ]->soGetName() << " nAlignStartPos_ = " << dapLocatedFragment_[ nReadTemp ]->nAlignStartPos_ << ( dapLocatedFragment_[ nReadTemp ]->bComp() ? " comp " : " not comp " ) << endl; } return( false ); } } } return( true ); } static int nCmpLocatedFragmentByFileOrder( const LocatedFragment** ppLocFrag1, const LocatedFragment** ppLocFrag2 ) { if ( (*ppLocFrag1)->nReadOrderInAlignedReadsWindow_ < (*ppLocFrag2)->nReadOrderInAlignedReadsWindow_ ) return( -1 ); else if ( (*ppLocFrag1)->nReadOrderInAlignedReadsWindow_ > (*ppLocFrag2)->nReadOrderInAlignedReadsWindow_ ) return( 1 ); else { // if there are 2 reads of the same level by file (or // both not in the file), sort them alphabetically if ( (*ppLocFrag1)->soSequenceName_ < (*ppLocFrag2)->soSequenceName_ ) return( -1 ); else if ( (*ppLocFrag2)->soSequenceName_ < (*ppLocFrag1)->soSequenceName_ ) return( 1 ); else return( 0 ); } } void ContigView :: sortReadsByFile() { for( int nRead = 0; nRead < dapLocatedFragment_.length(); ++nRead ) { LocatedFragment* pLocFrag = dapLocatedFragment_[ nRead ]; if ( pLocFrag->nReadOrderInAlignedReadsWindow_ == nReadOrderInAlignedReadsWindowNotYetSet ) { pLocFrag->findReadOrderInAlignedReadsWindowFromFile(); } assert( pLocFrag->nReadOrderInAlignedReadsWindow_ != nReadOrderInAlignedReadsWindowNotYetSet ); } void* pArray = (void*) dapLocatedFragment_.data(); size_t nNumberOfReadsInContigView = dapLocatedFragment_.entries(); size_t nSizeOfAnElement = sizeof( LocatedFragment*); qsort( pArray, nNumberOfReadsInContigView, nSizeOfAnElement, ( (int(*) (const void*, const void* ) ) nCmpLocatedFragmentByFileOrder ) ); if ( !bIsSortedByFileOrder() ) { THROW_ERROR( "ContigView::sortReadsByFile didn't work" ); } cHowAreReadsSorted_ = cFILE; } bool ContigView :: bIsSortedByFileOrder() { for( int nRead = 0; nRead < (int) dapLocatedFragment_.length() - 1; ++nRead ) { LocatedFragment* pLocFrag1 = dapLocatedFragment_[ nRead ]; LocatedFragment* pLocFrag2 = dapLocatedFragment_[ nRead + 1 ]; assert( pLocFrag1->nReadOrderInAlignedReadsWindow_ != nReadOrderInAlignedReadsWindowNotYetSet ); assert( pLocFrag2->nReadOrderInAlignedReadsWindow_ != nReadOrderInAlignedReadsWindowNotYetSet ); if ( ! ( pLocFrag1->nReadOrderInAlignedReadsWindow_ <= pLocFrag2->nReadOrderInAlignedReadsWindow_ ) ) { return false; } } return true; } const unsigned char qualAtLeastThis = 20; // typedef szArrayOfMotifsType[][5]; #define pAddressOfNthMotif( N ) (pArrayOfMotifs + nNumberOfBasesToMakeHighQuality * (N)) void ContigView :: tellPhrapNotToOverlapReadsDiscrepantAtThisLocation( const int nConsPosWhereToSplit, const bool bUsingGui, ContigWin* pContigWin ) { int nConsPosMin = nConsPosWhereToSplit - nWINDOW_SIZE / 2; int nConsPosMax = nConsPosWhereToSplit + nWINDOW_SIZE / 2; // find each motif const int nNumberOfBasesToMakeHighQuality = 5; char* pArrayOfMotifs = (char*) malloc( sizeof( char ) * nNumberOfBasesToMakeHighQuality * nGetNumberOfFragments() ); // szArrayOfMotifsType szArrayOfMotifs = (szArrayOfMotifsType) malloc( sizeof( char ) * nNumberOfBasesToMakeHighQuality * nGetNumberOfFragments() ); int nNumberOfMotifsFoundSoFar = 0; // notice starting from 0 so including all reads, including // those that are possibly scrolled off the screen int nReadLine; for( nReadLine = 0; nReadLine < nGetNumberOfFragments(); ++nReadLine) { LocatedFragment* pLocFrag = pLocatedFragmentGet(nReadLine); // to avoid Rogue Wave subscript errors, check that the window // of positions lies within the read if ( !pLocFrag->bIsInRead( nConsPosMin ) ) continue; if ( !pLocFrag->bIsInRead( nConsPosMax ) ) continue; if ( pLocFrag->bIsWholeReadUnaligned() ) continue; if ( ! ( pLocFrag->nGetAlignClipStart() - nWINDOW_SIZE <= nConsPosMin ) && ( nConsPosMax <= pLocFrag->nGetAlignClipEnd() + nWINDOW_SIZE ) ) // skip this read--its unaligned region is too close to // the place to prevent overlap continue; bool bOKSoFar = true; int nConsPos; for( nConsPos = nConsPosMin; bOKSoFar && ( nConsPos <= nConsPosMax); ++nConsPos ) { if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99() < qualAtLeastThis ) bOKSoFar = false; } if ( !bOKSoFar ) continue; // if made it here, then this read is a candidate for a new motif. // First see if it matches any of the motifs we have already found. char szMotif[4]; assert( nConsPosMax - nConsPosMin <= nNumberOfBasesToMakeHighQuality ); for( nConsPos = nConsPosMin; nConsPos <= nConsPosMax; ++nConsPos ) { szMotif[ nConsPos - nConsPosMin ] = pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase(); } bool bFoundThisMotif = false; for( int nMotif = 0; nMotif < nNumberOfMotifsFoundSoFar && !bFoundThisMotif; ++nMotif ) { if ( strncmp( szMotif, pAddressOfNthMotif( nMotif ), nNumberOfBasesToMakeHighQuality ) == 0 ) bFoundThisMotif = true; } if (! bFoundThisMotif ) { // this is a new motif, so add it to the list strncpy( pAddressOfNthMotif( nNumberOfMotifsFoundSoFar ), szMotif, nNumberOfBasesToMakeHighQuality ); ++nNumberOfMotifsFoundSoFar; } } int nNumberOfMotifs = nNumberOfMotifsFoundSoFar; // for( int nMotif = 0; nMotif < nNumberOfMotifsFoundSoFar; ++nMotif ) { // cout << "Motif: " ; // for( int nBase = 0; nBase < nNumberOfBasesToMakeHighQuality; ++nBase ) // cout << *(pAddressOfNthMotif( nMotif ) + nBase); // cout << endl; // } // cout << endl; if ( nNumberOfMotifsFoundSoFar <= 1 && bUsingGui ) { tryToHelpUserToTellPhrapToNotOverlapDiscrepantReads( nConsPosWhereToSplit, pContigWin->pGcw_-> widGetGuiContigWinTopLevel() ); return; } for( nReadLine = 0; nReadLine < nGetNumberOfFragments(); ++nReadLine) { LocatedFragment* pLocFrag = pLocatedFragmentGet(nReadLine); // to avoid Rogue Wave subscript errors, check that the window // of positions lies within the read if ( !pLocFrag->bIsInRead( nConsPosMin ) ) continue; if ( !pLocFrag->bIsInRead( nConsPosMax ) ) continue; // check that the read is aligned within this region for this read // (or at least the region is close to the aligned region ) if ( pLocFrag->bIsWholeReadUnaligned() ) continue; if ( !( pLocFrag->nGetAlignClipStart() - nWINDOW_SIZE <= nConsPosMin ) ) continue; if ( !( nConsPosMax <= pLocFrag->nGetAlignClipEnd() + nWINDOW_SIZE ) ) continue; // check that the bases match one of the motifs char szMotif[5]; for( int nConsPos = nConsPosMin; nConsPos <= nConsPosMax; ++nConsPos ) szMotif[ nConsPos - nConsPosMin ] = pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase(); // now compare to each motif bool bFoundMotif = false; for( int nMotif = 0; ( nMotif < nNumberOfMotifs ) && !bFoundMotif; ++nMotif ) { if ( strncmp( szMotif, pAddressOfNthMotif( nMotif ), nNumberOfBasesToMakeHighQuality ) == 0 ) bFoundMotif = true; } if ( bFoundMotif ) { // will this work even if there is no Gui ? EditAction* pEditAction = new editMakeHighQuality( pLocFrag, nConsPosMin, nConsPosMax ); ConsEd::pGetConsEd()->doEditAction( pEditAction, true ); // write to edit history file } } // for( nReadLine ... free( pArrayOfMotifs ); } void ContigView :: tryToHelpUserToTellPhrapToNotOverlapDiscrepantReads( const int nConsPosWhereToSplit, Widget widTopLevelShell ) { int nConsPosMin = nConsPosWhereToSplit - ContigView::nWINDOW_SIZE / 2; int nConsPosMax = nConsPosWhereToSplit + ContigView::nWINDOW_SIZE / 2; // find the highest quality read int nBestQual = 0; LocatedFragment* pBestRead = NULL; // notice starting from 0 so including all reads, including those // that are possibly scrolled off the screen int nReadLine; for( nReadLine = 0; nReadLine < nGetNumberOfFragments(); ++nReadLine ) { LocatedFragment* pLocFrag = pLocatedFragmentGet( nReadLine ); // to avoid Rogue Wave subscript errors, check that the window // of positions lies within the read if ( !pLocFrag->bIsInRead( nConsPosMin ) ) continue; if ( !pLocFrag->bIsInRead( nConsPosMax ) ) continue; int nQualSum = 0; for( int nConsPos = nConsPosMin; nConsPos <= nConsPosMax; ++nConsPos ) { nQualSum += pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99(); } if ( nQualSum > nBestQual ) { nBestQual = nQualSum; pBestRead = pLocFrag; } } // now we've found the highest quality read in this region. Now find // the highest quality discrepant read char cBaseOfBestRead = pBestRead->ntGetFragFromConsPos( nConsPosWhereToSplit).cGetBase(); int nBestDiscrepantQual = 0; LocatedFragment* pBestDiscrepantRead = NULL; for( nReadLine = 0; nReadLine < nGetNumberOfFragments(); ++nReadLine ) { LocatedFragment* pLocFrag = pLocatedFragmentGet( nReadLine ); // to avoid Rogue Wave subscript errors, check that the window // of positions lies within the read if ( !pLocFrag->bIsInRead( nConsPosMin ) ) continue; if ( !pLocFrag->bIsInRead( nConsPosMax ) ) continue; char cDiscrepantBase = pLocFrag->ntGetFragFromConsPos( nConsPosWhereToSplit).cGetBase(); if ( cDiscrepantBase == cBaseOfBestRead ) continue; // have found a discepant read int nQualSum = 0; for( int nConsPos = nConsPosMin; nConsPos <= nConsPosMax; ++nConsPos ) { nQualSum += pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99(); } if ( nQualSum > nBestDiscrepantQual ) { nBestDiscrepantQual = nQualSum; pBestDiscrepantRead = pLocFrag; } } if ( ! pBestDiscrepantRead ) { popupErrorMessage2( widTopLevelShell, "Sorry--I can't help you at this location since I can't find a discrepant read here" ); return; } int nLeftConsPos = MAX( pBestRead->nGetAlignClipStart() - ContigView::nWINDOW_SIZE + ContigView::nWINDOW_SIZE / 2, pBestDiscrepantRead->nGetAlignClipStart() - ContigView::nWINDOW_SIZE + ContigView::nWINDOW_SIZE / 2 ); // make sure the area that we check for includes both reads // (hence more than left end of both reads) nLeftConsPos = MAX( nLeftConsPos, pBestRead->nGetAlignStart() ); nLeftConsPos = MAX( nLeftConsPos, pBestDiscrepantRead->nGetAlignStart() ); int nRightConsPos = MIN( pBestRead->nGetAlignClipEnd() + ContigView::nWINDOW_SIZE - ContigView::nWINDOW_SIZE / 2, pBestDiscrepantRead->nGetAlignClipEnd() + ContigView::nWINDOW_SIZE - ContigView::nWINDOW_SIZE / 2 ); // make sure the area that we check for includes both reads // (hence less than right end of both reads ) nRightConsPos = MIN( nRightConsPos, pBestRead->nGetAlignEnd() ); nRightConsPos = MIN( nRightConsPos, pBestDiscrepantRead->nGetAlignEnd() ); if ( nRightConsPos - nLeftConsPos + 1 < ContigView::nWINDOW_SIZE ) { popupErrorMessage2( widTopLevelShell, "I cannot help you. I assume you are trying to tell phrap not to overlap reads %s and %s. However, the aligned region of these reads does not overlap each other by enough bases. (Turn on dimming of unaligned region.) You can manually mark regions as high quality, but I'm not guaranteeing anything. It may help to decrease phrap stringency (-minmatch)", (char*) pBestRead->soGetName().data(), (char*) pBestDiscrepantRead->soGetName().data() ); return; } int nSuggestedConsPos; bool bFoundWindow = false; for( int nConsPos = nLeftConsPos; ( nConsPos < nRightConsPos ) && !bFoundWindow; ++nConsPos ) { Ntide ntBest = pBestRead->ntGetFragFromConsPos( nConsPos ); Ntide ntBestDiscrepant = pBestDiscrepantRead->ntGetFragFromConsPos( nConsPos ); if ( ntBest.cGetBase() != ntBestDiscrepant.cGetBase() ) { // found discrepant base // Can I find a 5 base window around it that is all // above the quality threshold? int nWindowLeft = nConsPos - ContigView::nWINDOW_SIZE/2; int nWindowRight = nConsPos + ContigView::nWINDOW_SIZE/2; // make sure taht the window is contained within both // reads. If it isn't, find another window if ( nWindowLeft < pBestRead->nGetAlignStart() ) continue; if ( nWindowLeft < pBestDiscrepantRead->nGetAlignStart() ) continue; if ( nWindowRight > pBestRead->nGetAlignEnd() ) continue; if ( nWindowRight > pBestDiscrepantRead->nGetAlignEnd() ) continue; bool bAllHighQuality = true; for( int nWindowConsPos = nWindowLeft; ( nWindowConsPos <= nWindowRight ) && bAllHighQuality; ++nWindowConsPos ) { if ( pBestRead->ntGetFragFromConsPos( nWindowConsPos ).qualGetQualityWithout98or99() < qualAtLeastThis ) bAllHighQuality = false; else if ( pBestDiscrepantRead->ntGetFragFromConsPos( nWindowConsPos ).qualGetQualityWithout98or99() < qualAtLeastThis ) bAllHighQuality = false; } if ( bAllHighQuality ) { bFoundWindow = true; nSuggestedConsPos = nConsPos; } } // if ( ntBest.cGetBase() != ... } // for( int nConsPos ... if ( bFoundWindow ) popupErrorMessage2( widTopLevelShell, "I assume you are trying to tell phrap to not overlap reads %s and %s. You cannot do this here, but you can do this by going to consensus position %d and using this same function. At that location there is a discrepancy between the reads and the reads are both >= quality 20 in a 5 base window sufficiently close or in the aligned region of both reads.", (char*) pBestRead->soGetName().data(), (char*) pBestDiscrepantRead->soGetName().data(), pContig_->nUnpaddedIndex( nSuggestedConsPos ) ); else popupErrorMessage2( widTopLevelShell, "I assume you are trying to tell phrap to not overlap reads %s and %s. You cannot do this here, and I can't help you because these reads do not have a 5 base window that meets the following criteria: is of quality 20 or greater in both reads; contains a discrepancy between the reads; is within or just adjacent to the aligned region of both reads (turn on dim unaligned)", (char*) pBestRead->soGetName().data(), (char*) pBestDiscrepantRead->soGetName().data() ); } int ContigView :: nWhatLineIsReadOn( LocatedFragment* pLocFragToFind ) { for( int nLine = 0; nLine < dapLocatedFragment_.length(); ++nLine ) { if ( dapLocatedFragment_[ nLine ] == pLocFragToFind ) { return( nLine ); } } return( RW_NPOS ); } static RWTValVector* paReadQualityValue = NULL; static int nCmpReadByQualityAtCursor( const int* pReadIndex1, const int* pReadIndex2 ) { int nQuality1 = (*paReadQualityValue)[ *pReadIndex1 ]; int nQuality2 = (*paReadQualityValue)[ *pReadIndex2 ]; if ( nQuality1 < nQuality2 ) return 1; else if ( nQuality1 > nQuality2 ) return -1; else return 0; } void ContigView :: maybeSortReadsAtCursor( const int nConsPosAtCursor ) { if ( pCP->soShowReadsAtCursorSortedHow_ == "none" ) return; // if ( !pCP->bShowReadsSortedByQualityValuesAtCursor_ ) return; if ( pCP->soShowReadsAtCursorSortedHow_ == "quality" ) sortReadsByQualityAtConsPos( nConsPosAtCursor ); else if ( pCP->soShowReadsAtCursorSortedHow_ == "base" ) sortReadsByBaseAtConsPos( nConsPosAtCursor ); } void ContigView :: sortReadsByQualityAtConsPos( const int nConsPos ) { RWTValVector aReadQualityValue( nGetNumberOfFragments() ); RWTValVector aReadIndex( nGetNumberOfFragments() ); aReadQualityValue.soName_ = "aReadQualityValue"; aReadIndex.soName_ = "aReadIndex"; paReadQualityValue = &aReadQualityValue; int nRead; for( nRead = 0; nRead < nGetNumberOfFragments(); ++nRead ) { aReadIndex[ nRead ] = nRead; LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead ); int nSumOfQualities; if ( pLocFrag->bIsInAlignedPartOfRead( nConsPos ) ) { const int nHalfWindowSize = 4; int nWindowLeft = nConsPos - nHalfWindowSize; int nWindowRight = nConsPos + nHalfWindowSize; assert( bIntersect( pLocFrag->nGetAlignClipStart(), pLocFrag->nGetAlignClipEnd(), nWindowLeft, nWindowRight, nWindowLeft, nWindowRight ) ); nSumOfQualities = 0; for( int nConsPos2 = nWindowLeft; nConsPos2 <= nWindowRight; ++nConsPos2 ) { nSumOfQualities += pLocFrag->ntGetFragFromConsPos( nConsPos2 ).qualGetQualityWithout98or99(); } } else if ( pLocFrag->bIsInRead( nConsPos ) ) { nSumOfQualities = -1; } else { // not even in read nSumOfQualities = -2; } aReadQualityValue[ nRead ] = nSumOfQualities; } qsort( aReadIndex.pArray_, aReadIndex.length(), sizeof( int ), ( int(*)( const void*, const void*) ) nCmpReadByQualityAtCursor ); // check that the array is now sorted bool bSorted = true; for( nRead = 1; nRead < nGetNumberOfFragments(); ++nRead ) { int nReadIndex1 = aReadIndex[ nRead - 1 ]; int nReadIndex2 = aReadIndex[ nRead ]; if ( aReadQualityValue[ nReadIndex1 ] < aReadQualityValue[ nReadIndex2 ] ) { bSorted = false; cerr << "reads out of order indices " << nReadIndex1 << " and " << nReadIndex2 << " with qualities " << aReadQualityValue[ nReadIndex1 ] << " and " << aReadQualityValue[ nReadIndex2 ] << endl; } } assert( bSorted ); // now resort according to the aReadIndex list RWTPtrOrderedVector aCopyOfContigView( dapLocatedFragment_ ); for( nRead = 0; nRead < nGetNumberOfFragments(); ++nRead ) { dapLocatedFragment_[ nRead ] = aCopyOfContigView[ aReadIndex[ nRead ] ]; } cHowAreReadsSorted_ = cQUALITY_AT_CURSOR; nConsPosOfSortWindow_ = nConsPos; } static RWTValVector* paReadBase = NULL; static ContigView* pContigViewStatic = NULL; static int nConsPosStatic = 0; static int nCmpReadByBaseAtCursor( const int* pReadIndex1, const int* pReadIndex2 ) { char cBase1 = (*paReadBase)[ *pReadIndex1 ]; char cBase2 = (*paReadBase)[ *pReadIndex2 ]; LocatedFragment* pRead1 = pContigViewStatic->pLocatedFragmentGet( *pReadIndex1 ); LocatedFragment* pRead2 = pContigViewStatic->pLocatedFragmentGet( *pReadIndex2 ); // sanity check if ( pRead1->bIsInRead( nConsPosStatic ) && pRead2->bIsInRead( nConsPosStatic ) ) { assert( tolower( pRead1->ntGetFragFromConsPos( nConsPosStatic ).cGetBase() ) == cBase1 ); assert( tolower( pRead2->ntGetFragFromConsPos( nConsPosStatic ).cGetBase() ) == cBase2 ); } if ( cBase1 < cBase2 ) return -1; else if ( cBase1 > cBase2 ) return 1; else { // same base. Sort by strand and then left end position // don't bother sorting reads that have no base at this position if ( cBase1 == 'z' ) return 0; LocatedFragment* pRead1 = pContigViewStatic->pLocatedFragmentGet( *pReadIndex1 ); LocatedFragment* pRead2 = pContigViewStatic->pLocatedFragmentGet( *pReadIndex2 ); if ( !pRead1->bComp() && pRead2->bComp() ) return -1; else if ( pRead1->bComp() && !pRead2->bComp() ) return 1; else { // same base and same strand if ( pRead1->nGetAlignStart() < pRead2->nGetAlignStart() ) return -1; else if ( pRead2->nGetAlignStart() < pRead1->nGetAlignStart() ) return 1; else return 0; } } } void ContigView :: sortReadsByBaseAtConsPos( const int nConsPos ) { RWTValVector aReadBase( nGetNumberOfFragments() ); RWTValVector aReadIndex( nGetNumberOfFragments() ); aReadBase.soName_ = "aReadBase"; aReadIndex.soName_ = "aReadIndex"; paReadBase = &aReadBase; pContigViewStatic = this; nConsPosStatic = nConsPos; int nRead; for( nRead = 0; nRead < nGetNumberOfFragments(); ++nRead ) { aReadIndex[ nRead ] = nRead; LocatedFragment* pLocFrag = pLocatedFragmentGet( nRead ); if ( pLocFrag->bIsInRead( nConsPos ) ) { aReadBase[ nRead ] = tolower( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() ); } else { // put at bottom of sort order aReadBase[ nRead ] = 'z'; } } qsort( aReadIndex.pArray_, aReadIndex.length(), sizeof( int ), ( int(*)( const void*, const void*) ) nCmpReadByBaseAtCursor ); // check if sorted for( nRead = 1; nRead < nGetNumberOfFragments(); ++nRead ) { int nRead1 = aReadIndex[ nRead - 1 ]; int nRead2 = aReadIndex[ nRead ]; LocatedFragment* pLocFrag1 = pLocatedFragmentGet( nRead1 ); LocatedFragment* pLocFrag2 = pLocatedFragmentGet( nRead2 ); if ( !pLocFrag1->bIsInRead( nConsPos ) && pLocFrag2->bIsInRead( nConsPos) ) { cerr << "out of order not in read " << pLocFrag1->soGetName() << " and " << pLocFrag2->soGetName() << endl; } if ( pLocFrag1->bIsInRead( nConsPos ) && pLocFrag2->bIsInRead( nConsPos ) ) { if ( tolower( pLocFrag1->ntGetFragFromConsPos( nConsPos ).cGetBase() ) > tolower( pLocFrag2->ntGetFragFromConsPos( nConsPos ).cGetBase() ) ) { cerr << "out of order: " << pLocFrag1->soGetName() << " and " << pLocFrag2->soGetName() << endl; } if ( tolower( pLocFrag1->ntGetFragFromConsPos( nConsPos ).cGetBase() ) == tolower( pLocFrag2->ntGetFragFromConsPos( nConsPos ).cGetBase() ) ) { if ( pLocFrag1->bComp() && !pLocFrag2->bComp() ) { cerr << "strand out of order: " << pLocFrag1->soGetName() << " and " << pLocFrag2->soGetName() << endl; } if ( pLocFrag1->bComp() == pLocFrag2->bComp() ) { if ( pLocFrag1->nGetAlignStart() > pLocFrag2->nGetAlignStart() ) { cerr << "start pos out of order: " << pLocFrag1->soGetName() << " and " << pLocFrag2->soGetName() << endl; } } } } // if ( pLocFrag1->bIsInRead( nConsPos ) && pLocFrag2->bIsInRead() ) { } // now resort according to the aReadIndex list RWTPtrOrderedVector aCopyOfContigView( dapLocatedFragment_ ); for( nRead = 0; nRead < nGetNumberOfFragments(); ++nRead ) { dapLocatedFragment_[ nRead ] = aCopyOfContigView[ aReadIndex[ nRead ] ]; } cHowAreReadsSorted_ = cBASE_AT_CURSOR; nConsPosOfSortWindow_ = nConsPos; } RWCString ContigView :: soGetHowReadsAreSorted() { if ( cHowAreReadsSorted_ == cQUALITY_AT_CURSOR ) { RWCString soMessage( (size_t) 200 ); soMessage.nCurrentLength_ = sprintf( soMessage.data(), "reads sorted by quality in 9bp window about %d", pContig_->nUnpaddedIndex( nConsPosOfSortWindow_ ) ); return( soMessage ); } else if ( cHowAreReadsSorted_ == cBASE_AT_CURSOR ) { RWCString soMessage( (size_t) 200 ); soMessage.nCurrentLength_ = sprintf( soMessage.data(), "reads sorted by base at position %d", pContig_->nUnpaddedIndex( nConsPosOfSortWindow_ ) ); return( soMessage ); } else if ( cHowAreReadsSorted_ == cSTRAND_AND_POSITION ) return( "reads sorted by strand and then position" ); else if ( cHowAreReadsSorted_ == cALPHABETICALLY ) return( "reads sorted alphabetically" ); else if ( cHowAreReadsSorted_ == cFILE ) { return( RWCString( "reads are sorted by file " ) + pCP->filShowReadsInAlignedReadsWindowOrderedByThisFile_ ); } else { return( "reads are not sorted" ); } }