/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "assembly.h" #include "clScaffold.h" #include "contigEndPair.h" #include "restrictionFragment.h" #include "please_wait.h" #include "textbox.h" #include "rwctokenizer.h" #include "consed.h" #include "gotoList.h" #include "highQualityDiscrepancyGotoList.h" #include "fwdRevPair.h" #include "popupInfoMessage.h" #include "hp_exception_kludge.h" #include "bIsNumeric.h" #include "popupErrorMessage2.h" #include "bIsNumericMaybeWithWhitespace.h" #include "oligoTag.h" #include "popupErrorMessage.h" #include "fileDefines.h" #include "soAddCommas.h" #include static void cbTellPhrapNotToOverlapReadsDiscrepantAtThisLocation( const gotoItem* pGotoItem, void* pClientData ) { TRY_CATCH_WRAPPER( ConsEd::pGetAssembly()->userPushedTellPhrapToNotOverlapReadsDiscrepantAtThisLocation( pGotoItem, pClientData ) ); } RWCString Assembly :: soGetContigMap() { figureOutContigOrderAndOrientation(); soContigMap_ = ""; for( int nChain = 0; nChain < aHeadsOfScaffoldsOfContigs_.length(); ++nChain ) { if ( nChain != 0 ) soContigMap_ += ","; Contig* pContig = aHeadsOfScaffoldsOfContigs_[ nChain ]; // first contig in chain: // which end of the contig is null? If it is the left end, // then the contig is not complemented in the chain. if ( pContig->bThisContigIsComplementedInTheScaffold_ ) { if ( pContig->bIsThisEndTheEndOfTheClone( nRightGap ) ) { soContigMap_ += "E-"; } } else { if ( pContig->bIsThisEndTheEndOfTheClone( nLeftGap ) ) { soContigMap_ += "E-"; } } // the head of a contig always has one null end. Contig* pPreviousContig = NULL; bool bFirstContigInChain = true; while( pContig ) { if ( bFirstContigInChain ) bFirstContigInChain = false; else soContigMap_ += "-"; soContigMap_ += pContig->soGetAbbreviatedName(); if ( pContig->bThisContigIsComplementedInTheScaffold_ ) soContigMap_ += "c"; pPreviousContig = pContig; pContig = pContig->pNextContig_; } // exited here due to pContig being null. pContig = pPreviousContig; if ( pContig->bThisContigIsComplementedInTheScaffold_ ) { // so the left end is the end of the chain. Could it be the // end of the // clone? if ( pContig->bIsThisEndTheEndOfTheClone( nLeftGap ) ) soContigMap_ += "-E"; } else { // so the right end is the end of the chain. Could it be the // end of the clone? if ( pContig->bIsThisEndTheEndOfTheClone( nRightGap ) ) soContigMap_ += "-E"; } // case of this being a circular scaffold: if ( aScaffoldIsCircularNotLinear_[ nChain ] ) { pContig = aHeadsOfScaffoldsOfContigs_[ nChain ]; soContigMap_ += "-"; soContigMap_ += pContig->soGetAbbreviatedName(); if ( pContig->bThisContigIsComplementedInTheScaffold_ ) soContigMap_ += "c"; soContigMap_ += "(circle)"; } } return( soContigMap_ ); } void Assembly :: setContigClonePositions( clScaffold* pScaffold ) { int nScaffoldLengthSoFar = 0; for( int nContig = 0; nContig < pScaffold->aContigs_.length(); ++nContig ) { Contig* pContig = pScaffold->aContigs_[ nContig ]; pContig->nClonePosLeft_ = nScaffoldLengthSoFar; int nUnpaddedLengthOfAddedBases = pContig->nGetHighQualitySegmentLengthUnpadded(); nScaffoldLengthSoFar += nUnpaddedLengthOfAddedBases; pContig->nClonePosRight_ = nScaffoldLengthSoFar - 1; } } void Assembly :: getScaffoldBases( clScaffold* pScaffold, RWCString& soUnpaddedScaffold, bool& bLeftEndIsCloneEnd, bool& bRightEndIsCloneEnd ) { soUnpaddedScaffold = ""; bLeftEndIsCloneEnd = pScaffold->bFirstContigIsCloneEnd_; bRightEndIsCloneEnd = pScaffold->bLastContigIsCloneEnd_; for( int nContig = 0; nContig < pScaffold->aContigs_.length(); ++nContig ) { Contig* pContig = pScaffold->aContigs_[ nContig ]; // the problem with storing this in the Contig objects // is that another digest at the same time will overwrite these. // However, I believe these are only used when creating // the restrictionFragments and thus it doesn't matter if they // are soon overwritten. pContig->nClonePosLeft_ = soUnpaddedScaffold.length(); if ( nContig == 0 && pScaffold->bUseFirstContigUnpaddedStart_ && nContig == pScaffold->aContigs_.nGetMaxIndex() && pScaffold->bUseLastContigUnpaddedEnd_ ) { // special case in which there is a single contig the user // wants a section of which digested pContig->appendPartOfUnpaddedConsensus( soUnpaddedScaffold, pScaffold->aIfContigsAreComplemented_[ nContig ], pScaffold->nFirstContigUnpaddedStart_, pScaffold->nLastContigUnpaddedEnd_ ); } else if ( nContig == 0 && pScaffold->bUseFirstContigUnpaddedStart_ ) { pContig->appendPartOfUnpaddedHighQualitySegment( soUnpaddedScaffold, pScaffold->aIfContigsAreComplemented_[ nContig ], pScaffold->nFirstContigUnpaddedStart_, true ); // bPreviousArgumentIsFirstNotLast } else if ( nContig == pScaffold->aContigs_.nGetMaxIndex() && pScaffold->bUseLastContigUnpaddedEnd_ ) { pContig->appendPartOfUnpaddedHighQualitySegment( soUnpaddedScaffold, pScaffold->aIfContigsAreComplemented_[ nContig ], pScaffold->nLastContigUnpaddedEnd_, false ); // bPreviousArgumentIsFirstNotLast } else { pContig->appendUnpaddedHighQualitySegment( soUnpaddedScaffold, pScaffold->aIfContigsAreComplemented_[ nContig ] ); } pContig->nClonePosRight_ = soUnpaddedScaffold.length() - 1; // if there is another contig in the scaffold, add gap n's if ( nContig < (int) pScaffold->aContigs_.length() - 1 ) { contigEndPair* pCEP = pScaffold->aContigEndPairs_[ nContig ]; int nGapBases; // there might be no linking information between the contigs, // if the contig was inserted based on links to other contigs // further away (DG, 6/04) if ( pCEP ) { nGapBases = pCEP->nGapSize_; } else { nGapBases = 0; } // need to figure out how many low quality bases were clipped // off the ends of the 2 contigs int nWhichEndPreviousContig = pScaffold->aIfContigsAreComplemented_[ nContig ] ? nLeftGap : nRightGap; int nWhichEndNextContig = pScaffold->aIfContigsAreComplemented_[ nContig + 1 ] ? nRightGap : nLeftGap; int nClippedLowQualityBasesPreviousContig = pContig->nGetUnpaddedClippedLowQualityBases( nWhichEndPreviousContig ); Contig* pNextContig = pScaffold->aContigs_[ nContig + 1 ]; int nClippedLowQualityBasesNextContig = pNextContig->nGetUnpaddedClippedLowQualityBases( nWhichEndNextContig ); int nNs = nGapBases + nClippedLowQualityBasesPreviousContig + nClippedLowQualityBasesNextContig; if ( nNs > 0 ) { soUnpaddedScaffold.appendLotsOfCopies( 'n', nNs ); } } // if ( pNextContig ) { } } static int cmpScaffoldsBySize( const clScaffold** ppScaffold1, const clScaffold** ppScaffold2 ) { // notice that these are sorted in *descending* order if ( (*ppScaffold1)->nScaffoldLength_ < (*ppScaffold2)->nScaffoldLength_ ) return( 1 ); else if ( (*ppScaffold1)->nScaffoldLength_ > (*ppScaffold2)->nScaffoldLength_ ) return( -1 ); else return( 0 ); } void Assembly :: getScaffolds( RWTPtrOrderedVector& aScaffolds ) { figureOutContigOrderAndOrientation(); // this will call setContigTemplateArrays aScaffolds.clear(); int nScaffold; for( nScaffold = 0; nScaffold < aHeadsOfScaffoldsOfContigs_.length(); ++nScaffold ) { clScaffold* pScaffold = pCreateScaffold( nScaffold ); setContigClonePositions( pScaffold ); pScaffold->setScaffoldLength(); aScaffolds.insert( pScaffold ); } // sort the scaffolds by size void* pArray = (void*) aScaffolds.data(); size_t nNumberOfElements = aScaffolds.entries(); size_t nSizeOfAnElement = sizeof( clScaffold* ); qsort( pArray, nNumberOfElements, nSizeOfAnElement, ( (int(*) ( const void*, const void*) ) cmpScaffoldsBySize ) ); // check that they are in order for( nScaffold = 1; nScaffold < aScaffolds.entries(); ++nScaffold ) { if ( aScaffolds[ nScaffold - 1 ]->nScaffoldLength_ < aScaffolds[ nScaffold ]->nScaffoldLength_ ) { THROW_ERROR( "qsort failed to sort scaffolds by length in Assembly::getScaffolds" ); } } } void Assembly :: flagAllSubclonesIfBadTemplateOrBadLibrary() { for( int nTemplate = 0; nTemplate < subcloneTTemplateArray_.entries(); ++nTemplate ) { subcloneTTemplate* pSub = subcloneTTemplateArray_[ nTemplate ]; pSub->flagSubcloneIfBadTemplateOrBadLibrary(); } } void Assembly :: getRestrictionFragmentsForOneScaffold( const RWTPtrOrderedVector& aIndexedByEnzymeBases, RWTPtrOrderedVector& aRestrictionFragmentsNotEndingAtVectorInsertJunction, RWTPtrOrderedVector& aRestrictionFragmentsEndingAtVectorInsertJunction, clScaffold* pScaffold, int& nPositionOrderOfFragment ) { // aRestrictionFragments will be appended to--it may already contain // fragments from other scaffolds RWCString soUnpaddedScaffold; bool bLeftEndIsCloneEnd; bool bRightEndIsCloneEnd; getScaffoldBases( pScaffold, soUnpaddedScaffold, bLeftEndIsCloneEnd, bRightEndIsCloneEnd ); RWTValOrderedVector aCutSites; for( int nScaffoldBase = 0; nScaffoldBase < ( (int) soUnpaddedScaffold.length() - (int) aIndexedByEnzymeBases.length() + 1 ); ++nScaffoldBase ) { bool bMatch = true; for( int nEnzymeBase = 0; nEnzymeBase < aIndexedByEnzymeBases.length(); ++nEnzymeBase ) { char cScaffoldBase = soUnpaddedScaffold[ nScaffoldBase + nEnzymeBase]; if ( ! (aIndexedByEnzymeBases[ nEnzymeBase ] )[cScaffoldBase] ) { bMatch = false; break; } } if ( bMatch ) { // found a perfect match aCutSites.insert( nScaffoldBase ); } } // for( int nScaffoldBase = 0; nScaffoldBase < ... restrictionFragment* pPreviousRes = NULL; for( int nCutSite = 0; nCutSite < aCutSites.length(); ++nCutSite ) { restrictionFragment* pRes = new restrictionFragment( restrictionFragment::PREDICTED_FRAGMENT ); pRes->nPositionOrder_ = nPositionOrderOfFragment; ++nPositionOrderOfFragment; // check for cut site at the very beginning of the insert. In // this case the cut site does not delineate a real fragment. I // wish there would be a way to tell the vector fragment that // ends at the vector/insert junction that it is a complete // fragment, as well. if ( nCutSite == 0 && aCutSites[ nCutSite ] == 0 ) { // just setting these values so the next fragment can // use them for its left end pRes->fragRight_ = restrictionFragment::IN_CONTIG; getContigAndPositionFromScaffoldPos( pScaffold, 0, pRes->pRightContig_, pRes->nUnpaddedRight_ ); pPreviousRes = pRes; continue; // do not put this in any of the lists } // case of the first cut site that does not occur at the beginning // of the scaffold if ( nCutSite == 0 ) { if ( bLeftEndIsCloneEnd ) { pRes->fragLeft_ = restrictionFragment::AT_VECTOR_INSERT_JUNCTION; // this is set so that we know which contig and which end // of the contig is present at the vector-insert junction getContigAndPositionFromScaffoldPos( pScaffold, 0, pRes->pLeftContig_, pRes->nUnpaddedLeft_ ); } else { pRes->fragLeft_ = restrictionFragment::UNKNOWN; // even though the actual cut site is not in this contig, // the user will want to know which contig it is *off* getContigAndPositionFromScaffoldPos( pScaffold, 0, pRes->pLeftContig_, pRes->nUnpaddedLeft_ ); } pRes->nSize_ = aCutSites[ nCutSite ]; } else { pRes->fragLeft_ = pPreviousRes->fragRight_; pRes->pLeftContig_ = pPreviousRes->pRightContig_; pRes->nUnpaddedLeft_ = pPreviousRes->nUnpaddedRight_; pRes->nSize_ = aCutSites[ nCutSite ] - aCutSites[ nCutSite - 1 ]; } Contig* pRightContig; int nUnpaddedRight; getContigAndPositionFromScaffoldPos( pScaffold, aCutSites[ nCutSite ], pRightContig, nUnpaddedRight ); pRes->fragRight_ = restrictionFragment::IN_CONTIG; pRes->pRightContig_ = pRightContig; pRes->nUnpaddedRight_ = nUnpaddedRight; if ( pRes->fragLeft_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION || pRes->fragRight_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) aRestrictionFragmentsEndingAtVectorInsertJunction.insert( pRes ); else aRestrictionFragmentsNotEndingAtVectorInsertJunction.insert( pRes ); pPreviousRes = pRes; } // for( int nCutSite = 0; nCutSite < aCutSites.length(); ++nCutSite ) { // deal with last partial fragment, keeping in mind that there may // be no partial fragment if ( aCutSites.length() > 0 ) { int nScaffoldPositionOfCutSite = aCutSites[ aCutSites.length() - 1 ]; if ( nScaffoldPositionOfCutSite == ( soUnpaddedScaffold.length() - aIndexedByEnzymeBases.length() ) ) { // There is no fragment after the final cut site--the last // fragment simply ends at the vector/insert junction but it // should not be combined with any vector fragment. return; } } restrictionFragment* pRes = new restrictionFragment( restrictionFragment::PREDICTED_FRAGMENT ); pRes->nPositionOrder_ = nPositionOrderOfFragment; ++nPositionOrderOfFragment; if ( aCutSites.length() == 0 ) { // there is only a single fragment if ( bLeftEndIsCloneEnd ) { pRes->fragLeft_ = restrictionFragment::AT_VECTOR_INSERT_JUNCTION; // this is set so that we know which contig and which end // of the contig is present at the vector-insert junction getContigAndPositionFromScaffoldPos( pScaffold, 0, pRes->pLeftContig_, pRes->nUnpaddedLeft_ ); } else { pRes->fragLeft_ = restrictionFragment::UNKNOWN; // even though the cut site is not in this contig, the // user will want to know which contig it is off of. getContigAndPositionFromScaffoldPos( pScaffold, 0, pRes->pLeftContig_, pRes->nUnpaddedLeft_ ); } pRes->nSize_ = soUnpaddedScaffold.length(); } else { // normal case in which there pPreviousRes refers to the // previous restrictionFragment and we have a partial one // that left off from there pRes->fragLeft_ = pPreviousRes->fragRight_; pRes->pLeftContig_ = pPreviousRes->pRightContig_; pRes->nUnpaddedLeft_ = pPreviousRes->nUnpaddedRight_; pRes->nSize_ = soUnpaddedScaffold.length() - aCutSites[ aCutSites.length() - 1 ]; } if ( bRightEndIsCloneEnd ) { pRes->fragRight_ = restrictionFragment::AT_VECTOR_INSERT_JUNCTION; // this is set so that we know which contig and which end // of the contig is present at the vector-insert junction getContigAndPositionFromScaffoldPos( pScaffold, soUnpaddedScaffold.length() - 1, pRes->pRightContig_, pRes->nUnpaddedRight_ ); } else { pRes->fragRight_ = restrictionFragment::UNKNOWN; // even though the cut site is not in this contig, the user // will want to know which contig the cut site is off of. getContigAndPositionFromScaffoldPos( pScaffold, soUnpaddedScaffold.length() - 1, pRes->pRightContig_, pRes->nUnpaddedRight_ ); } if ( pRes->fragLeft_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION || pRes->fragRight_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) aRestrictionFragmentsEndingAtVectorInsertJunction.insert( pRes ); else aRestrictionFragmentsNotEndingAtVectorInsertJunction.insert( pRes ); } void Assembly :: getContigAndPositionFromScaffoldPos( clScaffold* pScaffold, const int nScaffoldPos, Contig*& pContig, int& nUnpaddedConsPos ) { bool bFirstContig = false; bool bLastContig = false; bool bComplemented; pContig = NULL; for( int nContig = 0; nContig < pScaffold->aContigs_.length(); ++nContig ) { Contig* pTempContig = pScaffold->aContigs_[ nContig ]; if ( nScaffoldPos <= pTempContig->nClonePosRight_ ) { pContig = pTempContig; if ( nContig == 0 ) bFirstContig = true; if ( nContig == pScaffold->aContigs_.nGetMaxIndex() ) bLastContig = true; bComplemented = pScaffold->aIfContigsAreComplemented_[ nContig ]; break; } } if ( !pContig ) { RWCString soError = "could not find contig/pos in scaffold for scaffold position " + RWCString( (long) nScaffoldPos ); THROW_ERROR2( soError ); } if ( ! ( pContig->nClonePosLeft_ <= nScaffoldPos && nScaffoldPos <= pContig->nClonePosRight_ ) ) { RWCString soError = "found contig " + pContig->soGetName() + " but could not find contig pos for scaffold position " + RWCString( (long) nScaffoldPos ); THROW_ERROR2( soError ); } if ( !bComplemented ) { // llllllllllllHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHlllllllllllll // ^ ^ // nClonePosLeft_ nClonePosRight_ // (scaffold positions, has no pads) // nUnpaddedHighQualitySegmentStart_ // nUnpaddedHighQualitySegmentEnd_ // (unpadded contig positions) // or ^ // clScaffold::nFirstContigUnpaddedStart_ // -------> (length is nScaffoldPos - pContig->nClonePosLeft_) // ^ // clScaffold::nFirstContigUnpaddedStart_ + // nScaffoldPos - pContig->nClonePosLeft_ // // Notice that if this is the last contig, and the last contig is chopped // off due to it being the end of a subclone, this does not affect // the calculation of the nUnpaddedConsPos int nUnpaddedOffset; if ( pScaffold->bUseFirstContigUnpaddedStart_ && bFirstContig ) nUnpaddedOffset = pScaffold->nFirstContigUnpaddedStart_; else nUnpaddedOffset = pContig->nGetHighQualitySegmentStart(); nUnpaddedConsPos = nUnpaddedOffset + nScaffoldPos - pContig->nClonePosLeft_; } else { // uncomplemented contig: // llllllllllllHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHlllllllllllll // ^ ^ // nClonePosRight_ nClonePosLeft_ // (scaffold positions, has no pads) // nUnpaddedHighQualitySegmentStart_ // nUnpaddedHighQualitySegmentEnd_ // (unpadded contig positions) // <-------- // distance is nScaffoldPos - nClonePosLeft_ // but position is: nUnpaddedHighQualitySegmentEnd_ - nScaffoldPos + nClonePosLeft_ // // but if this is the last contig in the scaffold and is cut // off by an end of a subclone, then // // llllllllllllHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHlllllllllllll // ^ ^ // nClonePosRight_ nClonePosLeft_ // (scaffold positions, has no pads) // nUnpaddedHighQualitySegmentStart_ // clScaffold::nLastContigUnpaddedEnd_ // (unpadded contig positions) // <-------- // distance is nScaffoldPos - nClonePosLeft_ // but contig pos is clScaffold::nLastContigUnpaddedEnd_ - nScaffoldPos + nClonePosLeft_ int nRightmostUnpaddedInContig; if ( pScaffold->bUseLastContigUnpaddedEnd_ && bLastContig ) nRightmostUnpaddedInContig = pScaffold->nLastContigUnpaddedEnd_; else nRightmostUnpaddedInContig = pContig->nGetHighQualitySegmentEnd(); nUnpaddedConsPos = nRightmostUnpaddedInContig - nScaffoldPos + pContig->nClonePosLeft_; } } void Assembly :: showScaffoldsOfContigs() { // always get contig map, even if you just got it. That's because // it has been a problem with it not updating. June 2007 // if ( soContigMap_.isNull() ) { PleaseWait pl(); soGetContigMap(); // } int nNumberOfRowsVisible = soContigMap_.nGetNumberOfLinesOfText(); const int nMaxNumberOfRows = 30; if ( nNumberOfRowsVisible > nMaxNumberOfRows ) nNumberOfRowsVisible = nMaxNumberOfRows; TextBox* pTextBox = new TextBox( "Scaffolds of Contigs", nNumberOfRowsVisible ); pTextBox->append( soContigMap_ ); pTextBox->makeVisible(); } void Assembly :: findRestrictionFragmentInContigMap( restrictionFragment* pRes, int& nStartPos, int& nEndPos ) { assert( pRes->cActualOrPredictedFragment_ == restrictionFragment::PREDICTED_FRAGMENT ); nStartPos = -1; nEndPos = -1; figureOutContigOrderAndOrientation(); soContigMap_ = ""; bool bAlreadyStartedRegion = false; for( int nChain = 0; nChain < aHeadsOfScaffoldsOfContigs_.length(); ++nChain ) { if ( nChain != 0 ) soContigMap_ += ","; Contig* pContig = aHeadsOfScaffoldsOfContigs_[ nChain ]; // first contig in chain: // which end of the contig is null? If it is the left end, // then the contig is not complemented in the chain. if ( pContig->bThisContigIsComplementedInTheScaffold_ ) { if ( pContig->bIsThisEndTheEndOfTheClone( nRightGap ) ) { soContigMap_ += "E-"; } } else { if ( pContig->bIsThisEndTheEndOfTheClone( nLeftGap ) ) { soContigMap_ += "E-"; } } // the head of a contig always has one null end. Contig* pPreviousContig = NULL; bool bFirstContigInChain = true; while( pContig ) { if ( bFirstContigInChain ) bFirstContigInChain = false; else soContigMap_ += "-"; // is this contig part of the restriction digest? if ( pRes->bIsFragmentPartOfThisContig( pContig ) ) { if ( !bAlreadyStartedRegion ) { bAlreadyStartedRegion = true; // want the 0-based position of the next character. // That is the current length. nStartPos = soContigMap_.length(); } } soContigMap_ += pContig->soGetAbbreviatedName(); if ( pContig->bThisContigIsComplementedInTheScaffold_ ) soContigMap_ += "c"; if ( pRes->bIsFragmentPartOfThisContig( pContig ) ) { // 0-based position nEndPos = soContigMap_.length() - 1; } pPreviousContig = pContig; pContig = pContig->pNextContig_; } // exited here due to pContig being null. pContig = pPreviousContig; if ( pContig->bThisContigIsComplementedInTheScaffold_ ) { // so the left end is the end of the chain. Could it be the // end of the // clone? if ( pContig->bIsThisEndTheEndOfTheClone( nLeftGap ) ) soContigMap_ += "-E"; } else { // so the right end is the end of the chain. Could it be the // end of the clone? if ( pContig->bIsThisEndTheEndOfTheClone( nRightGap ) ) soContigMap_ += "-E"; } // case of this being a circular scaffold: if ( aScaffoldIsCircularNotLinear_[ nChain ] ) { pContig = aHeadsOfScaffoldsOfContigs_[ nChain ]; soContigMap_ += "-"; soContigMap_ += pContig->soGetAbbreviatedName(); if ( pContig->bThisContigIsComplementedInTheScaffold_ ) soContigMap_ += "c"; soContigMap_ += "(circle)"; } } } Contig* Assembly :: pGetContigByAbbreviatedName( const RWCString& soAbbreviatedContigName ) { for ( int nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); if ( pContig->soGetAbbreviatedName() == soAbbreviatedContigName ) { return( pContig ); } } return( NULL ); } Contig* Assembly :: pGetContigByVariousNames( const RWCString& soContigName ) { RWCString soContigName2; if ( soContigName.bStartsWithCaseInsensitive( "contig" ) ) { soContigName2 = "Contig" + soContigName.soGetRestOfString( 6 ); } else { // maybe soContigName has the "12" of "Contig12" soContigName2 = "Contig" + soContigName; } // changed June 2009 for contigs names of other assemblers such as MIRA Contig* pContig = pGetContigByName( soContigName2 ); if ( pContig ) return pContig; // see if the contig name really doesn't begin with "Contig" return( pGetContigByName( soContigName ) ); } Contig* Assembly :: pGetContigByVariousNamesCaseInsensitive( const RWCString& soContigName ) { RWCString soContigName2; if ( soContigName.bStartsWithCaseInsensitive( "contig" ) ) { soContigName2 = "Contig" + soContigName.soGetRestOfString( 6 ); } else { // maybe soContigName has the "12" of "Contig12" soContigName2 = "Contig" + soContigName; } // changed June 2009 for contigs names of other assemblers such as MIRA Contig* pContig = pGetContigByNameCaseInsensitive( soContigName2 ); if ( pContig ) return pContig; // if reached here, haven't found it so try soContigName itself return( pGetContigByNameCaseInsensitive( soContigName ) ); } void Assembly :: createScaffoldsFromUserContigMap( const RWCString& soUserEnteredContigMap, const int nStartUnpaddedConsPos, const int nEndUnpaddedConsPos, RWTPtrOrderedVector* pScaffoldArray, RWCString& soErrorMessage ) { soErrorMessage = ""; clScaffold* pScaffold = new clScaffold( -1 ); pScaffoldArray->insert( pScaffold ); RWCTokenizer tokContig( soUserEnteredContigMap ); Contig* pPreviousContig = NULL; Contig* pNextContig = NULL; bool bPreviousContigComplemented; bool bNextContigComplemented; RWCString soContigAbb; while( !( soContigAbb = tokContig( '-' ) ).isNull() ) { if ( soContigAbb.cGetLastChar() == 'c' ) { bNextContigComplemented = true; soContigAbb.removeLastChar(); } else bNextContigComplemented = false; pNextContig = ConsEd::pGetAssembly()->pGetContigByAbbreviatedName( soContigAbb ); if ( !pNextContig ) { soErrorMessage = "could not find contig "; soErrorMessage += soContigAbb; return; } if ( pPreviousContig ) { // check if there is a contigEndPair that connects these two // contigs int nWhichGapForPreviousContig = ( bPreviousContigComplemented ? nLeftGap : nRightGap ); int nWhichGapForNextContig = ( bNextContigComplemented ? nRightGap : nLeftGap ); contigEndPair* pCEP = pGetContigEndPairForGap( pPreviousContig, nWhichGapForPreviousContig, pNextContig, nWhichGapForNextContig ); // this might be null, indicating not enough supporting // fwd/rev pairs if ( pCEP ) { pScaffold->aContigs_.insert( pNextContig ); pScaffold->aIfContigsAreComplemented_.insert( bNextContigComplemented ); pScaffold->aContigEndPairs_.insert( pCEP ); } else { soErrorMessage += "No forward/reverse pair information connecting "; soErrorMessage += szWhichGap( nWhichGapForPreviousContig ); soErrorMessage += " of "; soErrorMessage += pPreviousContig->soGetName(); soErrorMessage += " to "; soErrorMessage += szWhichGap( nWhichGapForNextContig ); soErrorMessage += " of "; soErrorMessage += pNextContig->soGetName(); soErrorMessage += " "; pScaffold = new clScaffold( -1 ); pScaffoldArray->insert( pScaffold ); pScaffold->aContigs_.insert( pNextContig ); pScaffold->aIfContigsAreComplemented_.insert( bNextContigComplemented ); } } else { // this is the first contig--there is no gap yet pScaffold->aContigs_.insert( pNextContig ); pScaffold->aIfContigsAreComplemented_.insert( bNextContigComplemented ); pScaffold->bFirstContigIsCloneEnd_ = true; pScaffold->bUseFirstContigUnpaddedStart_ = true; pScaffold->nFirstContigUnpaddedStart_ = nStartUnpaddedConsPos; } pPreviousContig = pNextContig; bPreviousContigComplemented = bNextContigComplemented; } pScaffold->bLastContigIsCloneEnd_ = true; pScaffold->bUseLastContigUnpaddedEnd_ = true; pScaffold->nLastContigUnpaddedEnd_ = nEndUnpaddedConsPos; } clScaffold* Assembly :: pCreateScaffold( const int nScaffold ) { clScaffold* pScaffold = new clScaffold( nScaffold ); Contig* pContig = aHeadsOfScaffoldsOfContigs_[ nScaffold ]; int nWhichEnd = ( pContig->bThisContigIsComplementedInTheScaffold_ ? nRightGap : nLeftGap ); if ( pContig->bIsThisEndTheEndOfTheClone( nWhichEnd ) ) pScaffold->bFirstContigIsCloneEnd_ = true; pScaffold->aContigs_.insert( pContig ); pScaffold->aIfContigsAreComplemented_.insert( pContig->bThisContigIsComplementedInTheScaffold_ ); while( pContig->pNextContig_ ) { pContig = pContig->pNextContig_; pScaffold->aContigs_.insert( pContig ); pScaffold->aIfContigsAreComplemented_.insert( pContig->bThisContigIsComplementedInTheScaffold_ ); // consider the gap that connects this contig to the previous one nWhichEnd = ( pContig->bThisContigIsComplementedInTheScaffold_ ? nRightGap : nLeftGap ); pScaffold->aContigEndPairs_.insert( pContig->pCEP_[ nWhichEnd ] ); } // reached end nWhichEnd = ( pContig->bThisContigIsComplementedInTheScaffold_ ? nLeftGap : nRightGap ); if ( pContig->bIsThisEndTheEndOfTheClone( nWhichEnd ) ) pScaffold->bLastContigIsCloneEnd_ = true; return( pScaffold ); } contigEndPair* Assembly :: pGetContigEndPairForGap( Contig* pContig1, const int nWhichEnd1, Contig* pContig2, const int nWhichEnd2 ) { Contig* pContigFirst; Contig* pContigLast; int nWhichEndFirst; int nWhichEndLast; if ( pContig1->soGetName() < pContig2->soGetName() ) { pContigFirst = pContig1; pContigLast = pContig2; nWhichEndFirst = nWhichEnd1; nWhichEndLast = nWhichEnd2; } else { pContigFirst = pContig2; pContigLast = pContig1; nWhichEndFirst = nWhichEnd2; nWhichEndLast = nWhichEnd1; } contigEndPair* pFoundContigEndPair = NULL; for( int nPair = 0; nPair < pContigEndPairArray_->length(); ++nPair ) { contigEndPair* pPair = (*pContigEndPairArray_)[ nPair ]; if ( pPair->pContig_[0] == pContigFirst && pPair->pContig_[1] == pContigLast && pPair->nWhichEnd_[0] == nWhichEndFirst && pPair->nWhichEnd_[1] == nWhichEndLast ) { return( pPair ); } } return( NULL ); } void Assembly :: navigateByDifferenceBetweenRecalculatedAndOriginalConsensus() { PleaseWait* pPleaseWait = new PleaseWait(); gotoList* pGotoList = new gotoList(); for ( int nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); pContig->appendListOfDifferencesBetweenRecalculatedAndOriginalConsensus( pGotoList ); } ConsEd::pGetConsEd()->addGuiMultiContigNavigator( new guiMultiContigNavigator( "Recalculated Quality Differences", "", "", 80, "", NULL, NULL, NULL, pGotoList, NULL ) ); delete pPleaseWait; } void Assembly :: navigateByHighQualityDiscrepanciesInAllReads(const bool bExcludeSpuriousPads ) { PleaseWait* pPleaseWait = new PleaseWait(); gotoList* pGotoList = new gotoList(); for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); gotoList* pHighQualityDiscrepancyGotoListForOneContig = new highQualityDiscrepancyGotoList( pContig, true, // exclude compression bExcludeSpuriousPads ); // exclude spurious pads pGotoList->appendAnotherList( pHighQualityDiscrepancyGotoListForOneContig ); } RWCString soTitle; if ( bExcludeSpuriousPads ) soTitle = "High Quality Discrepancies (omitting in unaligned regions, chimera tags, G dropouts, and most pads)"; else soTitle = "High Quality Discrepancies (omitting in unaligned regions, chimera tags, and G dropouts)"; ConsEd::pGetConsEd()->addGuiMultiContigNavigator( new guiMultiContigNavigator( soTitle, "", "", 80, "", NULL, NULL, NULL, pGotoList, NULL ) ); delete pPleaseWait; } void Assembly :: figureOutConsistentForwardReversePairDepthOfAllContigs( const bool bDoNotShowTemplatesInDoNotShowTemplatesFile ) { int nContig; for( nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); pContig->nApproxSizeOfSubcloneTemplatesForFwdRevPairDepth_ = 0; } int nSub; for( nSub = 0; nSub < subcloneTTemplateArray_.length(); ++nSub ) { subcloneTTemplate* pSub = subcloneTTemplateArray_[ nSub ]; if ( !pSub->bHasForwardAndReversePair_ ) continue; if ( bDoNotShowTemplatesInDoNotShowTemplatesFile && pSub->bDoNotShowInAssemblyView_ ) continue; // get the forward and reverse reads. If there is more than one // of either, which should we use? The best of each. LocatedFragment* pForwardLocFrag = NULL; LocatedFragment* pReverseLocFrag = NULL; char cProblem = ' '; bool bHasAFwdRevPair; bool bIsConsistent = pSub->bHasAConsistentFwdRevPair( pForwardLocFrag, pReverseLocFrag, bHasAFwdRevPair, cProblem); if ( !bHasAFwdRevPair ) continue; // redundant, I think if ( pForwardLocFrag->bIsWholeReadUnaligned() ) continue; if ( pReverseLocFrag->bIsWholeReadUnaligned() ) continue; if ( bIsConsistent && ( pForwardLocFrag->pGetContig() == pReverseLocFrag->pGetContig() ) ) { ++pForwardLocFrag->pContig_->nApproxSizeOfSubcloneTemplatesForFwdRevPairDepth_; } } for( nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); if ( pContig->pSubcloneTemplatesForFwdRevPairDepth_ ) { pContig->pSubcloneTemplatesForFwdRevPairDepth_->clear(); delete pContig->pSubcloneTemplatesForFwdRevPairDepth_; } pContig->pSubcloneTemplatesForFwdRevPairDepth_ = new mbtPtrOrderedVector( pContig->nApproxSizeOfSubcloneTemplatesForFwdRevPairDepth_, "Contig::pSubcloneTemplatesForFwdRevPairDepth_" ); } for( nSub = 0; nSub < subcloneTTemplateArray_.length(); ++nSub ) { subcloneTTemplate* pSub = subcloneTTemplateArray_[ nSub ]; if ( !pSub->bHasForwardAndReversePair_ ) continue; if ( bDoNotShowTemplatesInDoNotShowTemplatesFile && pSub->bDoNotShowInAssemblyView_ ) continue; // get the forward and reverse reads. If there is more than one // of either, which should we use? The best of each. LocatedFragment* pForwardLocFrag = NULL; LocatedFragment* pReverseLocFrag = NULL; char cProblem = ' '; bool bHasAFwdRevPair; bool bIsConsistent = pSub->bHasAConsistentFwdRevPair( pForwardLocFrag, pReverseLocFrag, bHasAFwdRevPair, cProblem); if ( !bHasAFwdRevPair ) continue; // redundant, I think if ( pForwardLocFrag->bIsWholeReadUnaligned() ) continue; if ( pReverseLocFrag->bIsWholeReadUnaligned() ) continue; if ( bIsConsistent && ( pForwardLocFrag->pGetContig() == pReverseLocFrag->pGetContig() ) ) { pForwardLocFrag->pGetContig()->pSubcloneTemplatesForFwdRevPairDepth_->insert( pSub ); } } // for( nSub = 0; nSub < pAssembly->subcloneTTemplateArray_.length(); for( nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); pContig->figureOutConsistentForwardReversePairDepth(); // futz around to free up the memory pContig->pSubcloneTemplatesForFwdRevPairDepth_->clear(); delete pContig->pSubcloneTemplatesForFwdRevPairDepth_; pContig->pSubcloneTemplatesForFwdRevPairDepth_ = NULL; } } // void Assembly :: figureOutConsistentForwardReversePairDepthOfAllContigs( void Assembly :: filterInconsistentFwdRevPairs() { arrayOfFwdRevPairs aUnfilteredArrayOfFwdRevPairs( subcloneTTemplateArray_.length() ); for( int nSub = 0; nSub < subcloneTTemplateArray_.length(); ++nSub ) { subcloneTTemplate* pSub = subcloneTTemplateArray_[ nSub ]; if ( !pSub->bHasForwardAndReversePair_ ) continue; LocatedFragment* pFwdRead; LocatedFragment* pRevRead; bool bHasFwdRevPair = false; char cProblem = ' '; if ( pSub->bHasAConsistentFwdRevPair( pFwdRead, pRevRead, bHasFwdRevPair, cProblem ) ) continue; if ( !bHasFwdRevPair ) continue;// probably redundant, but safe fwdRevPair* pFwdRevPair = new fwdRevPair( pFwdRead, pRevRead, cProblem ); assert( cProblem != ' ' ); aUnfilteredArrayOfFwdRevPairs.insert( pFwdRevPair ); } aUnfilteredArrayOfFwdRevPairs.resort(); int nPair; for( nPair = 1; nPair < aUnfilteredArrayOfFwdRevPairs.length(); ++nPair ) { fwdRevPair* pFwdRevPair = aUnfilteredArrayOfFwdRevPairs[ nPair ]; // now look backwards in the list for any pairs that are within 2kb // of the first read and in the same contig bool bStillCloseEnough = true; for( int nPair2 = nPair - 1; nPair2 >= 0 && bStillCloseEnough; --nPair2 ) { fwdRevPair* pFwdRevPair2 = aUnfilteredArrayOfFwdRevPairs[ nPair2 ]; if ( pFwdRevPair->pContigOfRead1_ != pFwdRevPair2->pContigOfRead1_ ) bStillCloseEnough = false; else { // They are in the same contig. See if they are close enough. if ( ABS( pFwdRevPair->nUnpaddedStartOfRead1_ - pFwdRevPair2->nUnpaddedStartOfRead1_ ) > pCP->nAssemblyViewFilterInconsistentFwdRevPairsIfThisClose_ ) // too far away bStillCloseEnough = false; else { // see if the reason for the pairs being inconsistent // is the same in both cases if ( pFwdRevPair->cProblem_ != pFwdRevPair2->cProblem_ ) continue; // See if the other reads of the 2 pairs are also in the // same contig and close enough if ( pFwdRevPair->pContigOfRead2_ == pFwdRevPair2->pContigOfRead2_ && ABS( pFwdRevPair->nUnpaddedStartOfRead2_ - pFwdRevPair2->nUnpaddedStartOfRead2_ ) < pCP->nAssemblyViewFilterInconsistentFwdRevPairsIfThisClose_ ) { // found a confirming pair of pairs. // We should definitely flag the latest one. // The other one might already have been flagged. pFwdRevPair->bInsertIntoFilteredArray_ = true; pFwdRevPair2->bInsertIntoFilteredArray_ = true; } } // if ( ABS( pFwdRevPair->nUnpaddedStartOfRead1_ - ... } // if ( pFwdRevPair->pContigOfRead1_ != ... } // for( int nPair2 = nPair1 - 1; nPair2 >= 0 && bStillCloseEnough; } // for( int nPair = 1; // count how many confirmed inconsistent fwd rev pairs there are int nConfirmedInconsistentFwdRevPairs = 0; for( nPair = 0; nPair < aUnfilteredArrayOfFwdRevPairs.length(); ++nPair ) { fwdRevPair* pFwdRevPair = aUnfilteredArrayOfFwdRevPairs[ nPair ]; if ( pFwdRevPair->bInsertIntoFilteredArray_) ++nConfirmedInconsistentFwdRevPairs; } if ( pArrayOfConfirmedInconsistentFwdRevPairs_ ) delete pArrayOfConfirmedInconsistentFwdRevPairs_; pArrayOfConfirmedInconsistentFwdRevPairs_ = new arrayOfFwdRevPairs( nConfirmedInconsistentFwdRevPairs ); for( nPair = 0; nPair < aUnfilteredArrayOfFwdRevPairs.length(); ++nPair ) { fwdRevPair* pFwdRevPair = aUnfilteredArrayOfFwdRevPairs[ nPair ]; if ( pFwdRevPair->bInsertIntoFilteredArray_) pArrayOfConfirmedInconsistentFwdRevPairs_->insert( pFwdRevPair ); } } void Assembly :: findAllTagsOfAType( const RWCString& soTagType, RWTPtrOrderedVector* pArrayOfTags ) { for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); // look through the read tags for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); for( int nTag = 0; nTag < pLocFrag->nGetNumberOfTags(); ++nTag ) { tag* pTag = pLocFrag->pGetTag( nTag ); if ( pTag->soGetTagType() == soTagType ) pArrayOfTags->insert( pTag ); } } // look through the consensus tags for( int nTag = 0; nTag < pContig->nGetNumberOfTags(); ++nTag ) { tag* pTag = pContig->pGetTag( nTag ); if ( pTag->soGetTagType() == soTagType ) pArrayOfTags->insert( pTag ); } } // for( int nContig = 0; nContig < pAssembly->nNumContigs()... } void Assembly :: navigateToUnalignedReads() { gotoList* pGotoList = new gotoList(); for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); if ( pLocFrag->bIsWholeReadUnaligned() ) { gotoItem* pGotoItem = new gotoItem( pContig, pLocFrag, pLocFrag->nGetAlignStart(), pLocFrag->nGetAlignEnd(), pLocFrag->nGetAlignStartUnpadded(), pLocFrag->nGetAlignEndUnpadded(), "" ); pGotoList->addToList( pGotoItem ); } } } ConsEd::pGetConsEd()->addGuiMultiContigNavigator( new guiMultiContigNavigator( "Unaligned Reads", "", "", 80, "", NULL, NULL, NULL, pGotoList, NULL ) ); } void Assembly :: navigateToUnalignedBaseSegments() { PleaseWait* pPleaseWait = new PleaseWait(); gotoList* pGotoList = new gotoList(); for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for (int nSeg = 0; nSeg < pContig->baseSegArray_.dapBs_.length(); nSeg++) { BaseSegment* pBaseSeg = pContig->baseSegArray_.dapBs_[ nSeg ]; LocatedFragment* pLocFrag = pBaseSeg->pLocFrag_; if ( pLocFrag->bIsWholeReadUnaligned() ) { RWCString soDescription = "whole read is unaligned"; gotoItem* pGotoItem = new gotoItem( pContig, pLocFrag, pBaseSeg->nStartInConsensus_, pBaseSeg->nEndInConsensus_, pContig->nUnpaddedIndex( pBaseSeg->nStartInConsensus_ ), pContig->nUnpaddedIndex( pBaseSeg->nEndInConsensus_ ), soDescription ); pGotoList->addToList( pGotoItem ); } else { if ( pBaseSeg->nStartInConsensus_ < pLocFrag->nAlignClipStart_ || pLocFrag->nAlignClipEnd_ < pBaseSeg->nEndInConsensus_ ) { RWCString soDescription = "aligned region is from "; soDescription += RWCString( (long) pContig->nUnpaddedIndex( pLocFrag->nAlignClipStart_) ); soDescription += " to "; soDescription += RWCString( (long) pContig->nUnpaddedIndex( pLocFrag->nAlignClipEnd_) ); gotoItem* pGotoItem = new gotoItem( pContig, pLocFrag, pBaseSeg->nStartInConsensus_, pBaseSeg->nEndInConsensus_, pContig->nUnpaddedIndex( pBaseSeg->nStartInConsensus_ ), pContig->nUnpaddedIndex( pBaseSeg->nEndInConsensus_ ), soDescription ); pGotoList->addToList( pGotoItem ); } } } // for (int nSeg = 0; } // for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { delete pPleaseWait; ConsEd::pGetConsEd()->addGuiMultiContigNavigator( new guiMultiContigNavigator( "Unaligned Base Segments", "", "", 80, "", NULL, NULL, NULL, pGotoList, NULL ) ); } void Assembly :: navigateByMultipleHighQualityDiscrepancies() { PleaseWait* pPleaseWait = new PleaseWait(); setContigTemplateArrays(); gotoList* pGotoList = new gotoList(); for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); pContig->findMultipleHighQualityDiscrepancies( pGotoList ); } pGotoList->sortByPosition(); delete pPleaseWait; guiMultiContigNavigator* pGCN = new guiMultiContigNavigator( "Multiple High Quality Discrepancies (excludes pads in consensus)", "", "", 80, "Tell Phrap No Overlap", cbTellPhrapNotToOverlapReadsDiscrepantAtThisLocation, NULL, NULL, pGotoList, NULL ); ConsEd::pGetConsEd()->addGuiMultiContigNavigator( pGCN ); pGCN->pClientDataForSpecialPurposeButton_ = pGCN; popupInfoMessage( pGCN->widPopupShell_, //GuiApp::pGetGuiApp()->widGetTopLevel(), "Multiple High Quality is obsolete and will be removed from Consed shortly. Instead use Search for Highly Discrepant Positions" ); } void Assembly :: navigateByCertainOligoTags( const RWCString& soPartOfTagName ) { gotoList* pGotoList = new gotoList(); for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); // first look through read tags. Are there ever // oligo tags on reads? No--see saveTagTypes.cpp // now look through contig tags for( int nConsensusTag = 0; nConsensusTag < pContig->nGetNumberOfTags(); ++nConsensusTag ) { oligoTag* pOligoTag = (oligoTag*) pContig->pGetTag( nConsensusTag ); if ( pOligoTag->soGetTagType() != "oligo" ) continue; if ( !pOligoTag->soName_.bContains( soPartOfTagName ) ) continue; // want this one gotoItem* pGotoItem = pOligoTag->pGetGotoItemForThisTag(); pGotoList->addToList( pGotoItem ); } } pGotoList->sortByPosition(); // cases: list has 0, 1, or more items if ( pGotoList->nGetNumGotoItems() == 0 ) { RWCString soErrorMessage = "no tags containing " + soPartOfTagName + " found"; popupErrorMessage( soErrorMessage ); } else if ( pGotoList->nGetNumGotoItems() == 1 ) { // just go there gotoItem* pGotoItem = pGotoList->pGetGotoItem( 0 ); Contig* pContig = pGotoItem->pContig_; int nConsPos = pGotoItem->nGotoItemStart_; // get rid of goto items pGotoList->apGotoItem_.clearAndDestroy(); // get rid of list itself delete pGotoList; ContigWin* pContigWin = ConsEd::pGetConsEd()->pScrollExistingContigWinOrMakeNewContigWin( pContig, nConsPos ); pContigWin->moveCursorToConsPos( nConsPos ); pContigWin->raiseWindow(); if ( pCP->bNavigateAutomaticAllTracesPopup_ ) { pContigWin->showAllTracesByConsensusPosition( nConsPos ); } } else { // there are several. So bring up the navigator window for the // user to choose. RWCString soWindowName = "oligo tags containing " + soPartOfTagName; ConsEd::pGetConsEd()->addGuiMultiContigNavigator( new guiMultiContigNavigator( soWindowName, "", // text for 1st line "", // text for 2nd line 75, // nWidthInChars "", // special purpose button NULL, // special purpose button NULL, // special purpose button NULL, //top level shell to be connected pGotoList, NULL ) // ContigWin ); } } void Assembly :: navigateByTagsFindStringInComment( const RWCString& soStringToSearchFor ) { gotoList* pGotoList = new gotoList(); for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); // first look through read tags for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); for( int nReadTag = 0; nReadTag < pLocFrag->nGetNumberOfTags(); ++nReadTag ) { tag* pTag = pLocFrag->pGetTag( nReadTag ); if ( pTag->soComment_.bContains( soStringToSearchFor ) ) { // want this tag gotoItem* pGotoItem = pTag->pGetGotoItemForThisTag(); pGotoList->addToList( pGotoItem ); } } } // now through consensus tags for( int nConsensusTag = 0; nConsensusTag < pContig->nGetNumberOfTags(); ++nConsensusTag ) { tag* pTag = pContig->pGetTag( nConsensusTag ); if ( pTag->soComment_.bContains( soStringToSearchFor ) ) { // want this tag gotoItem* pGotoItem = pTag->pGetGotoItemForThisTag(); pGotoList->addToList( pGotoItem ); } } } pGotoList->sortByPosition(); if ( pGotoList->nGetNumGotoItems() == 0 ) { RWCString soErrorMessage = "no tags with comment containing " + soStringToSearchFor; popupErrorMessage( soErrorMessage ); } // else if ( pGotoList->nGotoList->nGetNumGotoItems() == 1 ) { // // just go there // gotoItem* pGotoItem = pGotoList->pGetGotoItem( 0 ); // Contig* pContig = pGotoItem->pContig_; // int nConsPos = pGotoItem->nGotoItemStart_; // LocatedFragment* pLocFrag = pGotoItem->pLocFrag_; // // get rid of goto items // pGotoList->apGotoItem_.clearAndDestroy(); // // get rid of list itself // delete pGotoList; // ContigWin* pContigWin = // ConsEd::pGetConsEd()->pScrollExistingContigWinOrMakeNewContigWin( else { RWCString soWindowName = "tags with comments containing " + soStringToSearchFor; ConsEd::pGetConsEd()->addGuiMultiContigNavigator( new guiMultiContigNavigator( soWindowName, "", // text for 1st line "", // text for 2nd line 75, // nWidthChars "", // special purpose button NULL, NULL, NULL, pGotoList, NULL ) // ContigWin ); } } int Assembly :: nGetScaffoldForContig( Contig* pContigToFind ) { for( int nScaffold = 0; nScaffold < aHeadsOfScaffoldsOfContigs_.length(); ++nScaffold ) { Contig* pContig = aHeadsOfScaffoldsOfContigs_[ nScaffold ]; while( pContig ) { if ( pContig == pContigToFind ) { return( nScaffold ); } pContig = pContig->pNextContig_; } } return( NULL ); } RWCString Assembly :: soGetNextContigEndPairID() { if ( !bHighestContigEndPairIDSet_ ) { setHighestContigEndPairID(); } ++nHighestContigEndPairID_; return( RWCString( (long) nHighestContigEndPairID_ ) ); } void Assembly :: setHighestContigEndPairID() { nHighestContigEndPairID_ = 1; for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for( int nConsensusTag = 0; nConsensusTag < pContig->nGetNumberOfTags(); ++nConsensusTag ) { tag* pTag = pContig->pGetTag( nConsensusTag ); if ( pTag->soType_ == "contigEndPair" ) { RWCTokenizer tok( pTag->soMiscData_ ); RWCString soID = tok( '\n' ); // convert to a number assert( bIsNumeric( soID ) ); int nNumber = atoi( soID.data() ); if ( nNumber > nHighestContigEndPairID_ ) nHighestContigEndPairID_ = nNumber; } } } bHighestContigEndPairIDSet_ = true; } class tagAndID { public: tag* pTag_; int nID_; public: tagAndID( tag* pTag, const int nID ) : pTag_( pTag ), nID_( nID ) {} // for RWTValOrderedVector: tagAndID() : pTag_( NULL ), nID_( -666 ) {} bool operator==( const tagAndID& tagAndIDD ) const { return( this == &tagAndIDD ); } }; static int nCompareTagAndIDs( const tagAndID* pTagAndID1, const tagAndID* pTagAndID2 ) { if ( pTagAndID1->nID_ < pTagAndID2->nID_ ) return( -1 ); else if ( pTagAndID1->nID_ == pTagAndID2->nID_ ) { if ( pTagAndID1->pTag_->pContig_ == pTagAndID2->pTag_->pContig_ ) return( 0 ); else if ( pTagAndID1->pTag_->pContig_->soGetName() < pTagAndID2->pTag_->pContig_->soGetName() ) return( -1 ); else return( 1 ); } else return( 1 ); } #define REPORT_CONTIG_END_PAIR_TAG_ERROR( pTag, soErrorMessage ) { \ RWCString soError = "contigEndPair tag is corrupted at "; \ soError += pTag->pContig_->soGetName(); \ soError += " "; \ soError += " unpadded: "; \ soError += RWCString( (long) \ pTag->pContig_->nUnpaddedIndex( pTag->nPaddedConsPosStart_ ) \ ); \ soError += " soMiscData_ = "; \ soError += pTag->soMiscData_; \ soError += "\n"; \ soError += soErrorMessage; \ popupErrorMessage( soError ); \ } void Assembly :: getUserDefinedContigEndPairs( RWTPtrOrderedVector& aUserDefinedContigEndPairs ) { aUserDefinedContigEndPairs.clear(); RWTValOrderedVector aTagAndIDArray; for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for( int nConsensusTag = 0; nConsensusTag < pContig->nGetNumberOfTags(); ++nConsensusTag ) { tag* pTag = pContig->pGetTag( nConsensusTag ); if ( pTag->soType_ == "contigEndPair" ) { RWCTokenizer tok( pTag->soMiscData_ ); RWCString soID = tok( '\n' ); // convert to a number assert( bIsNumeric( soID ) ); int nID = atoi( soID.data() ); aTagAndIDArray.insert( tagAndID( pTag, nID ) ); } } } qsort( aTagAndIDArray.data(), aTagAndIDArray.length(), sizeof( tagAndID ), ( int(*) ( const void *, const void * ) ) nCompareTagAndIDs ); // check that it is sorted bool bSorted = true; int nTagAndID; for( nTagAndID = 1; nTagAndID < aTagAndIDArray.length(); ++nTagAndID ) { tagAndID& tagID1 = aTagAndIDArray[ nTagAndID - 1 ]; tagAndID& tagID2 = aTagAndIDArray[ nTagAndID ]; if ( tagID1.nID_ > tagID2.nID_ ) { cerr << tagID1.nID_ << " at index " << nTagAndID - 1 << " and " << tagID2.nID_ << " at index " << nTagAndID << " are out of order" << endl; bSorted = false; } } assert( bSorted ); // now go through the sorted list finding all pairs of tags for( nTagAndID = 0; nTagAndID < ( aTagAndIDArray.length() - 1 ); ) { tagAndID& tagID1 = aTagAndIDArray[ nTagAndID ]; tagAndID& tagID2 = aTagAndIDArray[ nTagAndID + 1 ]; if ( tagID1.nID_ == tagID2.nID_ ) { // we have a pair of contigEndPair tags with the same id. // Thus we can form a contigEndPair object from them. tag* pTag1 = tagID1.pTag_; tag* pTag2 = tagID2.pTag_; if ( pTag1->pContig_ == pTag2->pContig_ ) { // not interested in this case--this could be due to the gap // having been closed, or it might be due to tags being duplicated // In any case, doesn't make sense to have a contigEndPair // for it. ++nTagAndID; } else { // different contigs--this is what we are interested in // Must figure out which end each tag is pointing at. int nGapSize = -666; bool bGapSizeFound = false; int nWhichEnd[2]; bool bError = false; for( int nTag = 0; nTag <= 1; ++nTag ) { tag*& pTag = ( nTag == 0 ? pTag1 : pTag2 ); RWCTokenizer tok( pTag->soMiscData_ ); RWCString soID = tok('\n'); RWCString soGap = tok('\n'); RWCString soBases = tok('\n'); RWCString soMaybeGapSize = tok('\n'); if ( soGap == "gap->" ) { nWhichEnd[nTag] = nRightGap; } else if ( soGap == "<-gap" ) { nWhichEnd[nTag] = nLeftGap; } else { REPORT_CONTIG_END_PAIR_TAG_ERROR( pTag, "neither gap-> nor <-gap"); bError = true; break; } // see if gap size is specified if ( soMaybeGapSize.bStartsWithAndRemove("gap_size:" ) ) { if ( bIsNumericMaybeWithWhitespace( soMaybeGapSize, nGapSize ) ) { bGapSizeFound = true; } else { REPORT_CONTIG_END_PAIR_TAG_ERROR( pTag, "gap_size: is not followed by numeric" ); bError = true; break; } } // now check that bases match RWCString soBasesFromContig; int nUnpaddedStart = pTag->pContig_->nUnpaddedIndex( pTag->nPaddedConsPosStart_ ); if ( nWhichEnd[nTag] == nRightGap ) { pTag->pContig_->getPartOfUnpaddedConsensus( nUnpaddedStart, 10, soBasesFromContig, false, // bComplemented true ); // bTowardsIncreasingUnpaddedPositions if ( soBasesFromContig != soBases ) { RWCString soErrorMessage = "bases from contig: "; soErrorMessage += soBasesFromContig; soErrorMessage += " do not match tag bases: "; soErrorMessage += soBases; soErrorMessage += " This may be due to getting additional more accurate data, in which case nothing is wrong."; REPORT_CONTIG_END_PAIR_TAG_ERROR( pTag, soErrorMessage ); bError = true; break; } } else { // left gap pTag->pContig_->getPartOfUnpaddedConsensus( nUnpaddedStart, 10, soBasesFromContig, true, // bComplemented false ); // bTowardsIncreasingUnpaddedPositions if ( soBasesFromContig != soBases ) { RWCString soErrorMessage = "bases from contig: "; soErrorMessage += soBasesFromContig; soErrorMessage += " do not match tag bases: "; soErrorMessage += soBases; soErrorMessage += " This may be due to getting additional more accurate data, in which case nothing is wrong."; REPORT_CONTIG_END_PAIR_TAG_ERROR( pTag, soErrorMessage ); bError = true; break; } } // if made it here, the bases must have agreed with nWhichEnd // so we can trust them } // may have terminated loop due to an error if ( bError ) ++nTagAndID; else { contigEndPair* pCEP = new contigEndPair( pTag1->pContig_, pTag2->pContig_, nWhichEnd[0], nWhichEnd[1] ); pCEP->recordTags( pTag1, pTag2 ); pCEP->bUserDefined_ = true; if ( bGapSizeFound ) { pCEP->nGapSize_ = nGapSize; pCEP->bGapSizeSet_ = true; } aUserDefinedContigEndPairs.insert( pCEP ); nTagAndID += 2; } } // if ( pTag1->pContig_ == pTag2->pContig_ ) { else } else ++nTagAndID; } } void Assembly :: reverseScaffold( const int nScaffold ) { Contig* pLastContig = NULL; Contig* pContig = aHeadsOfScaffoldsOfContigs_[ nScaffold ]; while( 1 ) { Contig* pOldNextContig = pContig->pNextContig_; pContig->pNextContig_ = pContig->pPreviousContig_; pContig->pPreviousContig_ = pOldNextContig; pContig->bThisContigIsComplementedInTheScaffold_ = !pContig->bThisContigIsComplementedInTheScaffold_; if ( !pOldNextContig ) { pLastContig = pContig; break; } pContig = pOldNextContig; } aHeadsOfScaffoldsOfContigs_[ nScaffold ] = pLastContig; } void Assembly :: findLargestTagIDSoFar( const long lID ) { if ( lID != nUndefinedTagID ) { if ( lID > lLastUsedTagID_ ) lLastUsedTagID_ = lID; } } void Assembly :: lookForCloneEndTags3( RWTPtrOrderedVector* pCloneEndTags ) { for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); pContig->lookForCloneEndTags2( pCloneEndTags ); } } tag* Assembly :: pFindTagWithThisID( const long ID ) { for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for( int nConsensusTag = 0; nConsensusTag < pContig->nGetNumberOfTags(); ++nConsensusTag ) { tag* pTag = pContig->pGetTag( nConsensusTag ); if ( pTag->lID_ == ID ) return( pTag ); } for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); for( int nTag = 0; nTag < pLocFrag->nGetNumberOfTags(); ++nTag ) { tag* pTag = pLocFrag->pGetTag( nTag ); if ( pTag->lID_ == ID ) return ( pTag ); } } } return NULL; } void Assembly :: userPushedTellPhrapToNotOverlapReadsDiscrepantAtThisLocation( const gotoItem* pGotoItem, void* pClientData ) { guiMultiContigNavigator* pGMCN = (guiMultiContigNavigator*) pClientData; if ( !pGotoItem ) { popupErrorMessage2( pGMCN->widPopupShell_, "you must click a position first" ); return; } pGotoItem->pContig_->tellPhrapNotToOverlapReadsDiscrepantAtThisLocation( pGotoItem->nGotoItemStart_ ); pGMCN->gotoNextOrPreviousItem( true ); // bNextNotPrevious } class regionWithReadDepth { public: Contig* pContig_; int nConsPosStart_; int nConsPosEnd_; int nMinDepthOfCoverage_; int nMaxDepthOfCoverage_; public: regionWithReadDepth( Contig* pContig, int nConsPosStart, int nConsPosEnd, int nMinDepthOfCoverage, int nMaxDepthOfCoverage ) : pContig_( pContig ), nConsPosStart_( nConsPosStart ), nConsPosEnd_( nConsPosEnd ), nMinDepthOfCoverage_( nMinDepthOfCoverage ), nMaxDepthOfCoverage_( nMaxDepthOfCoverage ) {} bool operator==( const regionWithReadDepth& region ) const { return( this == ®ion ); } }; void Assembly :: getHighOrLowDepthOfCoverageRegions( const bool bHighNotLowDepthOfCoverage, const int nQualityThreshold, const int nMinDepth, gotoList*& pGotoList ) { PleaseWait* pPleaseWait = new PleaseWait(); pGotoList = new gotoList(); for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); RWTValVector aReadDepth( pContig->nGetPaddedConsLength() + 2, 0, "aReadDepth" + pGetContig( nContig )->soGetName() ); for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); if ( pLocFrag->bIsWholeReadUnaligned() ) continue; int nAlignedStart = pLocFrag->nGetAlignClipStart(); int nAlignedEnd = pLocFrag->nGetAlignClipEnd(); if ( !bIntersect( nAlignedStart, nAlignedEnd, pContig->nGetStartConsensusIndex(), pContig->nGetEndConsensusIndex(), nAlignedStart, nAlignedEnd ) ) continue; bool bInHighQualityRegion = false; for( int nConsPos = nAlignedStart; nConsPos <= nAlignedEnd; ++nConsPos ) { bool bHighQuality = ( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99() >= nQualityThreshold ) ? true : false; if ( bHighQuality && !bInHighQualityRegion ) { ++aReadDepth[ nConsPos ]; bInHighQualityRegion = true; } else if ( !bHighQuality && bInHighQualityRegion ) { --aReadDepth[ nConsPos ]; bInHighQualityRegion = false; } } if ( bInHighQualityRegion ) { // the read was high quality up to the end so after the end, // terminate this section of high quality --aReadDepth[ nAlignedEnd + 1 ]; } } // for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); // count number of regions int nNumberOfRegions = 0; { int nConsPos; int nDepthOfCoverage = 0; bool bInHighLowDepthOfCoverageRegion = false; for( nConsPos = pContig->nGetStartConsensusIndex(); nConsPos <= pContig->nGetEndConsensusIndex(); ++nConsPos ) { nDepthOfCoverage += aReadDepth[ nConsPos ]; bool bHighLowDepthOfCoverageBase; if ( bHighNotLowDepthOfCoverage ) { bHighLowDepthOfCoverageBase = ( nDepthOfCoverage >= nMinDepth ? true : false ); } else { bHighLowDepthOfCoverageBase = ( nDepthOfCoverage <= nMinDepth ? true : false ); } if ( !bInHighLowDepthOfCoverageRegion && bHighLowDepthOfCoverageBase ) { ++nNumberOfRegions; } bInHighLowDepthOfCoverageRegion = bHighLowDepthOfCoverageBase; } // for( nConsPos = pContig->nGetStartConsensusIndex(); } RWTPtrOrderedVector aListOfRegions( nNumberOfRegions, "Assembly::aListOfRegions" ); int nDepthOfCoverageAtStartOfRegion; bool bInHighLowDepthOfCoverageRegion = false; int nDepthOfCoverage = 0; int nStartConsPosOfHighLowDepthRegion; for( int nConsPos = pContig->nGetStartConsensusIndex(); nConsPos <= pContig->nGetEndConsensusIndex(); ++nConsPos ) { nDepthOfCoverage += aReadDepth[ nConsPos ]; bool bHighLowDepthOfCoverageBase; if ( bHighNotLowDepthOfCoverage ) { bHighLowDepthOfCoverageBase = ( nDepthOfCoverage >= nMinDepth ? true : false ); } else { bHighLowDepthOfCoverageBase = ( nDepthOfCoverage <= nMinDepth ? true : false ); } if ( !bInHighLowDepthOfCoverageRegion && bHighLowDepthOfCoverageBase ) { bInHighLowDepthOfCoverageRegion = true; nStartConsPosOfHighLowDepthRegion = nConsPos; nDepthOfCoverageAtStartOfRegion = nDepthOfCoverage; } else if ( bInHighLowDepthOfCoverageRegion && ( !bHighLowDepthOfCoverageBase || ( nConsPos == pContig->nGetEndConsensusIndex() ) ) ) { bInHighLowDepthOfCoverageRegion = false; // let's calculate the range of depth of coverage int nMinDepthOfCoverage = 100000000; int nMaxDepthOfCoverage = -666; int nReadDepth2 = nDepthOfCoverageAtStartOfRegion; for( int nConsPos2 = nStartConsPosOfHighLowDepthRegion; nConsPos2 <= nConsPos - 1; ++nConsPos2 ) { // we have to do this check because at // nConsPos2 == nStartConsPosOfHighLowDepthRegion, // aReadDepth[ nConsPos2 ] has already been added to // nReadDepth2 by definition of nReadDepthAtStartOfRegion if ( nConsPos2 > nStartConsPosOfHighLowDepthRegion ) nReadDepth2 += aReadDepth[ nConsPos2 ]; if ( nReadDepth2 < nMinDepthOfCoverage ) nMinDepthOfCoverage = nReadDepth2; if ( nReadDepth2 > nMaxDepthOfCoverage ) nMaxDepthOfCoverage = nReadDepth2; } regionWithReadDepth* pRegion = new regionWithReadDepth( pContig, nStartConsPosOfHighLowDepthRegion, nConsPos - 1, nMinDepthOfCoverage, nMaxDepthOfCoverage ); aListOfRegions.insert( pRegion ); } } // for( int nConsPos = pContig->nGetStartConsensusIndex(); // fortunately, we don't need to do another check at the // end whether we are still in a high/low region since // we have the check above: ( nConsPos == pContig->nGetEndConsensusIndex() ) // now coalesce regions that are close to each other if ( aListOfRegions.length() >= 2 ) { for( int nRegion = aListOfRegions.length() - 1; nRegion >= 1; --nRegion ) { regionWithReadDepth* pRegionLeft = aListOfRegions[ nRegion - 1 ]; regionWithReadDepth* pRegionRight = aListOfRegions[ nRegion ]; if ( ( pRegionLeft->nConsPosEnd_ + pCP->nNavigateByHighOrLowDepthCoalesceRegionsIfThisClose_ ) >= pRegionRight->nConsPosStart_ ) { // --- ---- ---- // a nRegion - 1 // nRegion // coalesce the regions into: // --- --------- // a nRegion - 1 // thus the next iteration of the loop will consider a and // nRegion - 1 pRegionLeft->nConsPosEnd_ = pRegionRight->nConsPosEnd_; pRegionLeft->nMinDepthOfCoverage_ = MIN( pRegionLeft->nMinDepthOfCoverage_, pRegionRight->nMinDepthOfCoverage_ ); pRegionLeft->nMaxDepthOfCoverage_ = MAX( pRegionLeft->nMaxDepthOfCoverage_, pRegionRight->nMaxDepthOfCoverage_ ); aListOfRegions.removeAt( nRegion ); delete pRegionRight; } } // for( int nRegion = aListOfRegions.length() - 1; nRegion >= 1; } // if ( aListOfRegions.length() >= 2 ) { // now make gotoItems out of the coalesced regions pGotoList->resizeGotoList( pGotoList->nGetNumGotoItems() + aListOfRegions.length() ); for( int nRegion = 0; nRegion < aListOfRegions.length(); ++nRegion ) { regionWithReadDepth* pRegion = aListOfRegions[ nRegion ]; Contig* pContig = pRegion->pContig_; RWCString soNote = "read depth " + RWCString( (long) pRegion->nMinDepthOfCoverage_ ) + "-" + RWCString( (long) pRegion->nMaxDepthOfCoverage_ ); gotoItem* pGotoItem = new gotoItem( pRegion->pContig_, NULL, // LocatedFragment pRegion->nConsPosStart_, pRegion->nConsPosEnd_, pContig->nUnpaddedIndex( pRegion->nConsPosStart_ ), pContig->nUnpaddedIndex( pRegion->nConsPosEnd_ ), soNote ); pGotoList->addToList( pGotoItem ); } aListOfRegions.clearAndDestroy(); } // for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { delete pPleaseWait; } void Assembly :: zeroAddNewReadsContigCounts() { for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); pContig->nReadsToAdd_ = 0; } } int Assembly :: nGetNumberOfReadsInAssemblyWithTemplates() { int nNotShort = 0; for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); if ( pLocFrag->soChemistry_ != "solexa" && pLocFrag->soChemistry_ != "454" && pLocFrag->bIsAFakeRead() ) { ++nNotShort; } } } return nNotShort; } void Assembly :: printReadNamesInRegion() { Contig* pDesiredContig = NULL; for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); if ( pContig->soGetName() == pCP->soAutoReportPrintReadNamesInRegionContig_ ) { pDesiredContig = pContig; break; } } if ( !pDesiredContig ) { fprintf( pAO, "couldn't find contig %s\n", pCP->soAutoReportPrintReadNamesInRegionContig_ ); } assert( pCP->nAutoReportPrintReadNamesInRegionLeftPos_ <= pCP->nAutoReportPrintReadNamesInRegionRightPos_ ); fprintf( pAO, "printReadNamesInRegion contig %s %d - %d {\n", pCP->soAutoReportPrintReadNamesInRegionContig_.data(), pCP->nAutoReportPrintReadNamesInRegionLeftPos_, pCP->nAutoReportPrintReadNamesInRegionRightPos_ ); for( int nRead = 0; nRead < pDesiredContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pDesiredContig->pLocatedFragmentGet( nRead ); if ( pLocFrag->bIsWholeReadUnaligned() ) continue; if ( bIntervalsIntersect( pLocFrag->nGetAlignClipStartUnpadded(), pLocFrag->nGetAlignClipEndUnpadded(), pCP->nAutoReportPrintReadNamesInRegionLeftPos_, pCP->nAutoReportPrintReadNamesInRegionRightPos_ ) ) { fprintf( pAO, "%s\n", pLocFrag->soGetName().data() ); } } fprintf( pAO, "} printReadNamesInRegion\n" ); } void Assembly :: navigateByQuestionableConsensusBases() { PleaseWait* pPleaseWait = new PleaseWait(); gotoList* pGotoList = new gotoList(); for ( int nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); pContig->appendListOfQuestionableConsensusBases( pGotoList ); } ConsEd::pGetConsEd()->addGuiMultiContigNavigator( new guiMultiContigNavigator( "Questionable Consensus Bases", "", "", 80, "", NULL, NULL, NULL, pGotoList, NULL ) ); delete pPleaseWait; } void Assembly :: printAssemblySummary() { RWTValOrderedVector aHistogramOfContigSize; RWTValOrderedVector aHistogramOfContigNumberOfReads; RWTValOrderedVector aHistogramOfContigMedianReadDepth; RWTValOrderedVector aHistogramOfContigMeanReadDepth; const int nContigSizeIntervalSize = 100; const int nContigNumberOfReadsIntervalSize = 100; const int nContigMedianReadDepthIntervalSize = 10; const int nContigMeanReadDepthIntervalSize = 10; // print contigs by size and by # of reads fprintf( pAO, "name # reads length depth: min max median mean\n" ); int nContig; for( nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); int nMinReadDepth; int nMaxReadDepth; int nMedianReadDepth; int nMeanReadDepth; pContig->getDepthOfCoverage( nMinReadDepth, nMaxReadDepth, nMedianReadDepth, nMeanReadDepth ); fprintf( pAO, "%-10s %10s %10s %5d %5d %5d %5d\n", pContig->soGetName().data(), soAddCommas( pContig->nGetNumberOfReads() ).data(), soAddCommas( pContig->nGetUnpaddedSeqLength() ).data(), nMinReadDepth, nMaxReadDepth, nMedianReadDepth, nMeanReadDepth ); int nBin; nBin = floor( pContig->nGetUnpaddedSeqLength() / (float) nContigSizeIntervalSize ); while( aHistogramOfContigSize.length() <= nBin ) { aHistogramOfContigSize.append( 0 ); } ++aHistogramOfContigSize[ nBin ]; nBin = floor( pContig->nGetNumberOfReads() / (float) nContigNumberOfReadsIntervalSize ); while( aHistogramOfContigNumberOfReads.length() <= nBin ) { aHistogramOfContigNumberOfReads.append( 0 ); } ++aHistogramOfContigNumberOfReads[ nBin ]; nBin = floor( nMedianReadDepth / (float) nContigMedianReadDepthIntervalSize ); while( aHistogramOfContigMedianReadDepth.length() <= nBin ) { aHistogramOfContigMedianReadDepth.append( 0 ); } ++aHistogramOfContigMedianReadDepth[ nBin ]; nBin = floor( nMeanReadDepth / (float) nContigMeanReadDepthIntervalSize ); while( aHistogramOfContigMeanReadDepth.length() <= nBin ) { aHistogramOfContigMeanReadDepth.append( 0 ); } ++aHistogramOfContigMeanReadDepth[ nBin ]; } int nBin; fprintf( pAO, "histogram of contig sizes\n" ); for( nBin = 0; nBin < aHistogramOfContigSize.length(); ++nBin ) { fprintf( pAO, "%d <= x < %d : %d\n", nBin*nContigSizeIntervalSize, ( nBin + 1 ) * nContigSizeIntervalSize, aHistogramOfContigSize[ nBin ] ); } fprintf( pAO, "histogram of contig # of reads\n" ); for( nBin = 0; nBin < aHistogramOfContigNumberOfReads.length(); ++nBin ) { fprintf( pAO, "%d <= x < %d : %d\n", nBin* nContigNumberOfReadsIntervalSize, ( nBin + 1 ) * nContigNumberOfReadsIntervalSize, aHistogramOfContigNumberOfReads[ nBin ] ); } fprintf( pAO, "histogram of median depth of coverage\n" ); for( nBin = 0; nBin < aHistogramOfContigMedianReadDepth.length(); ++nBin ) { fprintf( pAO, "%d <= x < %d : %d\n", nBin * nContigMedianReadDepthIntervalSize, ( nBin + 1 ) * nContigMedianReadDepthIntervalSize, aHistogramOfContigMedianReadDepth[ nBin ] ); } fprintf( pAO, "histogram of mean depth of coverage\n" ); for( nBin = 0; nBin < aHistogramOfContigMeanReadDepth.length(); ++nBin ) { fprintf( pAO, "%d <= x < %d : %d\n", nBin * nContigMeanReadDepthIntervalSize, ( nBin + 1 ) * nContigMeanReadDepthIntervalSize, aHistogramOfContigMeanReadDepth[ nBin ] ); } // now figure out summary information about template names if ( pReadsSortedByTemplateName_ ) { pReadsSortedByTemplateName_->clear(); pReadsSortedByTemplateName_->resize( nGetNumberOfReadsInAssembly() ); } else { pReadsSortedByTemplateName_ = new readsSortedByTemplateName( nGetNumberOfReadsInAssembly() ); } int nSmallFractionOfReads = nGetNumberOfReadsInAssembly() / 100; int nNumberOfWholeCloneReads = 0; int nNumberOfFakeReads = 0; int nNumberOfReads = 0; int nNumberOfSubcloneReads = 0; for( nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); ++nNumberOfReads; pLocFrag->setDefaultTemplateNameIfNecessary(); pReadsSortedByTemplateName_->append( pLocFrag ); } } pReadsSortedByTemplateName_->resort2(); // int nPairsInWhichJustUnmappedIsInAssembly = 0; // int nPairsInWhichJustMappedIsInAssembly = 0; // int nPairsBothAreInAssembly = 0; // // add a sentinel here // LocatedFragment* pLocFragStart = new LocatedFragment(); // pLocFragStart->soName_ = "sentinel at beginning"; // pLocFragStart->soTemplate_ = "sentinel at beginning"; // pReadsSortedByTemplateName_.insertAt( 0, pLocFragStart ); // LocatedFragment* pLocFragEnd = new LocatedFragment(); // pLocFragEnd->soName_ = "sentinel at end"; // pLocFragEnd->soTemplate_ = "sentinel at end"; // pReadsSortedByTemplateName_.append( pLocFragEnd ); // for( int nRead = 1; nRead < ( pReadsSortedByTemplateName_.length() - 1); // ++nRead ) { // LocatedFragment* pLocFragBefore = (*pReadsSortedByTemplateName_)[ nRead - 1 ]; // LocatedFragment* pLocFragHere = (*pReadsSortedByTemplateName_)[ nRead ]; // LocatedFragment* pLocFragAfter = (*pReadsSortedByTemplateName_)[ nRead + 1 ]; // if ( pLocFragBefore->soTemplate_ == pLocFragHere->soTemplate_ ) { // // both pairs are in the assembly // if ( pLocFragBefore->pGetContig() == pLocFragHere->pGetContig() ) { // LocatedFragment* pLocFragLeft; // LocatedFragment* pLocFragRight; // if ( pLocFragBefore->nGetAlignStart() < // pLocFragHere->nGetAlignStart() ) { // pLocFragLeft = pLocFragBefore; // pLocFragRight = pLocFragHere; // } // else { // pLocFragLeft = pLocFragHere; // pLocFragRight = pLocFragBefore; // } // if (! ( !pLocFragLeft->bComp() && // pLocFragRight->bComp() ) ) { // ++nIncorrectOrientation; // } // else { // // see if distance ok // } } bool Assembly :: bReadLengthAndTracesLengthAreEqualAllReads() { bool bOK = true; for( int nContig = 0; nContig < nNumContigs(); ++nContig ) { Contig* pContig = pGetContig( nContig ); for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); if ( !pLocFrag->bReadLengthAndTracesLengthAreEqual() ) { cerr << "read length not same as trace length " << pLocFrag->soGetName() << endl; bOK = false; } } } return bOK; }