/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "guiTearContig.h" #include #include #include #include #include #include "contig.h" #include "popupErrorMessage.h" #include "handleWindowManagerDelete2.h" #include "contigview.h" #include #include #include "guiapp.h" #include "assert.h" #include "bIntervalsIntersect.h" #include "bIntersect.h" #include "popupErrorMessage.h" #include "consed.h" #include "ucNormalQualityFrom9899Quality.h" #include "tagTypes.h" #include "min.h" #include "max.h" #include "popupInfoMessage.h" #include "guicontigwin.h" #include "hp_exception_kludge.h" #include "soGetDateTime.h" #include "soAddCommas.h" #include "createTearTags.h" #include "breakIntoShorterLines.h" #include "bGuiGetAnswerYesNo.h" static void cbUserPushedCancel( Widget wid, XtPointer pClientData, XtPointer pCallData ) { guiTearContig* pGuiTearContig = ( guiTearContig* ) pClientData; TRY_CATCH_WRAPPER( delete pGuiTearContig ); } static void cbUserPushedDoTear( Widget wid, XtPointer pClientData, XtPointer pCallData ) { guiTearContig* pGuiTearContig = ( guiTearContig* ) pClientData; TRY_CATCH_WRAPPER( pGuiTearContig->doTear(); // pop down the box with the list of reads near the cursor in it delete pGuiTearContig; ); } guiTearContig :: guiTearContig( ContigWin* pContigWin, Contig* pContig, ContigView* pContigView, const int nConsPosOfCursor ) : pContigWin_( pContigWin ), pContig_( pContig ), nConsPosOfCursor_( nConsPosOfCursor ) { setHighlightingOfReadNames( pContigView ); // figure out where to place the box on the screen. // Place it right below the aligned reads window Dimension nHeight; Dimension nWidth; Position nY; Position nX; Widget widParentShell = pContigWin_->pGcw_->widGetGuiContigWinTopLevel(); XtVaGetValues( widParentShell, XmNheight, &nHeight, XmNwidth, &nWidth, XmNy, &nY, XmNx, &nX, NULL ); widPopupShell_ = XtVaCreatePopupShell( "guiTearContig", topLevelShellWidgetClass, widParentShell, XmNtitle, (char*) "Tear Contig", XmNtransient, False, XmNdeleteResponse, XmDO_NOTHING, XmNy, nY + nHeight, XmNx, nX, NULL ); handleWindowManagerDelete2( widPopupShell_, cbUserPushedCancel, this ); Widget widForm = XtVaCreateManagedWidget( "form", xmFormWidgetClass, widPopupShell_, XmNancestorSensitive, True, NULL ); widDoTearButton_ = XtVaCreateManagedWidget( "Do Tear", xmPushButtonWidgetClass, widForm, XmNbottomAttachment, XmATTACH_FORM, XmNbottomOffset, 10, XmNleftAttachment, XmATTACH_POSITION, XmNleftPosition, 35, XmNrightAttachment, XmATTACH_POSITION, XmNrightPosition, 45, NULL ); XtAddCallback( widDoTearButton_, XmNactivateCallback, cbUserPushedDoTear, this ); widCancelButton_ = XtVaCreateManagedWidget( "Cancel", xmPushButtonWidgetClass, widForm, XmNtopAttachment, XmATTACH_OPPOSITE_WIDGET, XmNtopWidget, widDoTearButton_, XmNbottomAttachment, XmATTACH_OPPOSITE_WIDGET, XmNbottomWidget, widDoTearButton_, XmNleftAttachment, XmATTACH_POSITION, XmNleftPosition, 55, XmNrightAttachment, XmATTACH_POSITION, XmNrightPosition, 65, NULL ); XtAddCallback( widCancelButton_, XmNactivateCallback, cbUserPushedCancel, this ); RWCString soMessage = "Where are all of the read names?\nUse the read names in the Aligned Reads Window. Click to highlight any that you want to go into the new left contig. Any unhighlighted reads will go into the new right contig."; const int nDefTextBoxCols = 80; breakIntoShorterLines( soMessage, nDefTextBoxCols ); Arg aArg[40]; int nArgs = 0; const int nNumberOfRowsVisible = 4; // create the scrolled text widget XtSetArg(aArg[nArgs], XmNbottomAttachment, XmATTACH_WIDGET); nArgs++; XtSetArg(aArg[nArgs], XmNbottomWidget, widDoTearButton_); nArgs++; XtSetArg(aArg[nArgs], XmNleftAttachment, XmATTACH_FORM); nArgs++; XtSetArg(aArg[nArgs], XmNrightAttachment, XmATTACH_FORM); nArgs++; XtSetArg( aArg[nArgs], XmNtopAttachment, XmATTACH_FORM ); nArgs++; XtSetArg(aArg[nArgs], XmNtopOffset,4); nArgs++; XtSetArg(aArg[nArgs], XmNleftOffset,4); nArgs++; XtSetArg(aArg[nArgs], XmNrightOffset,4); nArgs++; XtSetArg(aArg[nArgs], XmNrows, nNumberOfRowsVisible ); nArgs++; XtSetArg(aArg[nArgs], XmNcolumns, nDefTextBoxCols); nArgs++; XtSetArg(aArg[nArgs], XmNeditMode, XmMULTI_LINE_EDIT); nArgs++; XtSetArg(aArg[nArgs], XmNeditable, False); nArgs++; XtSetArg(aArg[nArgs], XmNtraversalOn, False); nArgs++; XtSetArg(aArg[nArgs], XmNcursorPositionVisible, False); nArgs++; if ( !soMessage.isNull() ) { XtSetArg( aArg[nArgs], XmNvalue, (const char*) soMessage ); ++nArgs; } widScrolledText_ = XmCreateScrolledText(widForm, "textBox", aArg, nArgs); XtManageChild(widScrolledText_); XtPopup( widPopupShell_, XtGrabNone ); ConsEd::pGetAssembly()->setReadOnly( "You cannot edit the assembly when you are in the middle of tearing a contig. Either click cancel on the tear contig window or else do not make edits" ); } guiTearContig :: ~guiTearContig() { XtPopdown( widPopupShell_ ); XtDestroyWidget( widPopupShell_ ); ConsEd::pGetAssembly()->clearReadOnly(); } void guiTearContig :: doTear() { // check if the user has moved the cursor int nCurrentCursor = pContigWin_->pGetEditCursor()->nConsPosGet(); if ( nCurrentCursor != nConsPosOfCursor_ ) { RWCString soQuestion = "You have moved the cursor from " + soAddCommas( pContig_->nUnpaddedIndex( nConsPosOfCursor_ ) ) + " to " + soAddCommas( pContig_->nUnpaddedIndex( nCurrentCursor ) ) + ".\nThe tear will be done with respect to the first position.\nIs that ok?"; if (!bGuiGetAnswerYesNo( soQuestion ) ) { return; } } aReadsForLeftContig_.clear(); aReadsForRightContig_.clear(); // find which reads go which way aReadsForLeftContig_.resize( (size_t) pContig_->nGetNumberOfFragsInContig() ); aReadsForRightContig_.resize( (size_t) pContig_->nGetNumberOfFragsInContig() ); int nRead; for( nRead = 0; nRead < pContig_->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig_->pLocatedFragmentGet( nRead ); // check whether this read is in the left or the right contig // If it doesn't intersect the cursor location, but is on its left, // it is in the left contig(s) // If it doesn't intersect the cursor location, but is on its right, // it is in the right contigs(s). // If it *does* intersect the cursor location, then go by the // user's selection. if ( pLocFrag->nGetAlignEnd() < nConsPosOfCursor_ ) aReadsForLeftContig_.append( pLocFrag ); else if ( nConsPosOfCursor_ < pLocFrag->nGetAlignStart() ) aReadsForRightContig_.append( pLocFrag ); else { // so the read intersects the cursor. Thus // we should be able to find it in the list. if ( pLocFrag->bReadCurrentlyHighlighted_ ) aReadsForLeftContig_.append( pLocFrag ); else aReadsForRightContig_.append( pLocFrag ); } } // for( nRead = 0; nRead < pContig_->nGetNumberOfFragsInContig(); ++nRead ) { assert( ( aReadsForLeftContig_.length() + aReadsForRightContig_.length() ) == pContig_->nGetNumberOfFragsInContig() ); if ( aReadsForLeftContig_.length() == 0 ) { popupErrorMessage( "You are putting all of the reads into the right contig so this is not a tear" ); return; } if ( aReadsForRightContig_.length() == 0 ) { popupErrorMessage( "You are putting all of the reads into the left contig so this is not a tear" ); return; } // at this point, all reads in the contig have been put into 2 lists: // a list of reads that the user wants to go into the 'left' contig, // and a list of reads that the user wants to go into the 'right contig. // (Note that these contigs themselves may fall apart into more contigs.) // Find the torn region--To the left and right of this region, any column // will be precisely the same as it was before the tear: the same reads // be in that column. Within the tear, the reads will be different, // and thus we will have to recompute the consensus, Golden path, and // base segment. LocatedFragment* pRightMostLeftRead; LocatedFragment* pLeftMostLeftRead; int nLeftMostLeftRead; int nRightMostLeftRead; for( nRead = 0; nRead < aReadsForLeftContig_.length(); ++nRead ) { LocatedFragment* pLocFrag = aReadsForLeftContig_[ nRead ]; if ( nRead == 0 ) { nRightMostLeftRead = pLocFrag->nGetAlignEnd(); pRightMostLeftRead = pLocFrag; nLeftMostLeftRead = pLocFrag->nGetAlignStart(); pLeftMostLeftRead = pLocFrag; } else { if ( nRightMostLeftRead < pLocFrag->nGetAlignEnd() ) { nRightMostLeftRead = pLocFrag->nGetAlignEnd(); pRightMostLeftRead = pLocFrag; } if ( pLocFrag->nGetAlignStart() < nLeftMostLeftRead ) { nLeftMostLeftRead = pLocFrag->nGetAlignStart(); pLeftMostLeftRead = pLocFrag; } } } LocatedFragment* pLeftMostRightRead; LocatedFragment* pRightMostRightRead; int nRightMostRightRead; int nLeftMostRightRead; for( nRead = 0; nRead < aReadsForRightContig_.length(); ++nRead ) { LocatedFragment* pLocFrag = aReadsForRightContig_[ nRead ]; if ( nRead == 0 ) { nLeftMostRightRead = pLocFrag->nGetAlignStart(); pLeftMostRightRead = pLocFrag; nRightMostRightRead = pLocFrag->nGetAlignEnd(); pRightMostRightRead = pLocFrag; } else { if ( pLocFrag->nGetAlignStart() < nLeftMostRightRead ) { nLeftMostRightRead = pLocFrag->nGetAlignStart(); pLeftMostRightRead = pLocFrag; } if ( nRightMostRightRead < pLocFrag->nGetAlignEnd() ) { nRightMostRightRead = pLocFrag->nGetAlignEnd(); pRightMostRightRead = pLocFrag; } } } // now find the overlap region // the assertion is due to the fact that, since the assembly was // original contiguous, there is no way that it can be divided into // two groups of reads that don't overlap // the nLeftEndOfTear_, nRightEndOfTear_ region marks the torn region--the // region in which each column has a different number of reads than // it did in the original contig. Note that this is this region: // ------ // --------------------------- // --------------------------- // +++++++++++++++++++++++++++++++++++ // +++++++++++++++++++++++++++++++++++ // +++++++++++++++++++++++++++++++++++ // ^ ^ // | | // nLeftEndOfTear_ nRightEndOfTear_ // It is NOT: // ^ ^ // here to here // That is, a read that is at the cursor where the user is tearing, // may not be completely contained within the torn region. assert( bIntersect( nLeftMostRightRead, nRightMostRightRead, nLeftMostLeftRead, nRightMostLeftRead, nLeftEndOfTear_, nRightEndOfTear_ ) ); cout << "tearing contigs from " << pContig_->nUnpaddedIndex( nLeftEndOfTear_ ) << " to " << pContig_->nUnpaddedIndex( nRightEndOfTear_ ) << endl; // check that this is not happending: // ------------------------------------- // ++++++++++++++++ // where --- goes into the left contig and +++ goes into the right // This would be a problem when moving base segments, so we just // disallow it and make the user use PutReadIntoItsOwnContig if ( nLeftMostLeftRead > nLeftMostRightRead ) { popupErrorMessage( "There is a problem because the new right contig has a read %s which has left position %d which is more left than any read in the new left contig. (The leftmost position of the left contig is %d due to read %s.) Thus this is not a tear. Instead you could use PutReadIntoItsOwnContig", (char*) pLeftMostRightRead->soGetName().data(), pContig_->nUnpaddedIndex( nLeftMostRightRead ), pContig_->nUnpaddedIndex( nLeftMostLeftRead ), (char*) pLeftMostLeftRead->soGetName().data() ); return; } if ( nRightMostRightRead < nRightMostLeftRead ) { popupErrorMessage( "There is a problem because the new left contig has a read %s which protrudes to the right to %d, further than any read in the new right contig. (The rightmost read in the right contig is %s which extends to %d.) Thus this is not a tear. Instead you could use PutReadIntoItsOwnContig", (char*) pRightMostLeftRead->soGetName().data(), pContig_->nUnpaddedIndex( nRightMostLeftRead ), (char*) pRightMostRightRead->soGetName().data(), pContig_->nUnpaddedIndex( nRightMostRightRead ) ); return; } // now lets find if things fall into more than 2 contigs in the torn region // To do this, let's make two little lists: The left reads in the // torn region, and the right reads in the torn region. for( nRead = 0; nRead < aReadsForLeftContig_.length(); ++nRead ) { LocatedFragment* pLocFrag = aReadsForLeftContig_[ nRead ]; if ( bIntervalsIntersect( nLeftEndOfTear_, nRightEndOfTear_, pLocFrag->nGetAlignStart(), pLocFrag->nGetAlignEnd() ) ) aLeftReadsInTornRegion_.append( pLocFrag ); } for( nRead = 0; nRead < aReadsForRightContig_.length(); ++nRead ) { LocatedFragment* pLocFrag = aReadsForRightContig_[ nRead ]; if ( bIntervalsIntersect( nLeftEndOfTear_, nRightEndOfTear_, pLocFrag->nGetAlignStart(), pLocFrag->nGetAlignEnd() ) ) aRightReadsInTornRegion_.append( pLocFrag ); } // Now let's check (for the left contig and the right contig) // that there is a base for every position // (unaligned or aligned) for this entire region if ( !bCheckContiguous( LEFT ) ) return; if ( !bCheckContiguous( RIGHT ) ) return; // point of no return // At this point, there is no longer any error checking. // Errors indicate a program bug. Thus clear the read-only // flag in case there is a bug and the entire assembly is left read-only. // save into .wrk file what we intend to do so if there is a bug, // we can reproduce it: ConsEd::pGetAssembly()->writeEditToEditHistoryFile( "!tearing contig %s with cursor at unpadded pos %d padded pos %d at %s", pContig_->soGetName().data(), pContig_->nUnpaddedIndex( nConsPosOfCursor_ ), nConsPosOfCursor_, soGetDateTime( nNoSlashes ).data() ); ConsEd::pGetAssembly()->writeEditToEditHistoryFile( "!reads at cursor:" ); ConsEd::pGetConsEd()->whatToDoBeforeModifyAssembly(); for( int nReadWithButton = 0; nReadWithButton < aReadsAtCursor_.length(); ++nReadWithButton ) { LocatedFragment* pLocFragWithButton = aReadsAtCursor_[ nReadWithButton ]; Widget widLeftButton = (Widget) aLeftButtons_[ nReadWithButton ]; ConsEd::pGetAssembly()->writeEditToEditHistoryFile( "! %s goes into %s contig", pLocFragWithButton->soGetName().data(), ( XmToggleButtonGetState( widLeftButton ) ? "left" : "right" ) ); } // record in .wrk file just to help user recreate any problems for( int nContigWin = ConsEd::pGetConsEd()->dapContigWin_.length() - 1; nContigWin >= 0; --nContigWin ) { ContigWin* pContigWin = ConsEd::pGetConsEd()->dapContigWin_[ nContigWin ]; if ( pContigWin->pContig_ != pContig_ ) continue; EditCursor* pCursor = pContigWin->pGetEditCursor(); if ( pCursor->bCursorValid() ) { RWCString soMessage = "! cursor on one contigwin was on contig " + pCursor->pGetContigEvenIfCursorIsOnRead()->soGetName(); if ( pCursor->bCursorOnCons() ) { soMessage = soMessage + " on consensus at unpadded " + pContig_->nUnpaddedIndex( pCursor->nConsPosGet() ) + " and padded " + pCursor->nConsPosGet(); } else if ( pCursor->bCursorOnFrag() ) { soMessage = soMessage + " on read " + pCursor->pLocFragGet()->soGetName() + " at unpadded cons pos " + pContig_->nUnpaddedIndex( pCursor->nConsPosGet() ) + " and padded " + pCursor->nConsPosGet(); } ConsEd::pGetAssembly()->writeEditToEditHistoryFile( soMessage ); } } // end of recording in .wrk file ConsEd::pGetAssembly()->clearReadOnly(); ConsEd::pGetConsEd()->deleteAllContigWinsForContig( pContig_ ); // this will be used later after creating the 2 new contigs int nOldConsPosForTags = nConsPosOfCursor_; if ( ! ( ( nLeftEndOfTear_ <= nOldConsPosForTags ) && ( nOldConsPosForTags <= nRightEndOfTear_ ) ) ) { nOldConsPosForTags = ( nLeftEndOfTear_ + nRightEndOfTear_ ) / 2; } RWCString soPartOfTagComments = "old contig: " + pContig_->soGetName() + " reads: " + soAddCommas( pContig_->nGetNumberOfReads() ) + " length: " + soAddCommas( pContig_->nGetUnpaddedSeqLength() ) + " torn at: " + soAddCommas( pContig_->nUnpaddedIndex( nConsPosOfCursor_ ) ) + "\n" + " ace file: " + ConsEd::pGetAssembly()->filGetAceFileFullPathname() + "\n"; // Transfer reads to new contigs. // Fix alignment positions and tag positions of the right contig RWCString soLeftContigName = ConsEd::pGetConsEd()->pGetAssembly()->soGetNewContigName(); int nNewNumberOfReads = aReadsForLeftContig_.length(); int nNewNumberOfBaseSegments; // fix for Velvet assemblies with no base segments March 2009 (DG) if ( pContig_->baseSegArray_.nGetNumSegments() > 0 ) { // estimate for number of base segments nNewNumberOfBaseSegments = pContig_->baseSegArray_.nGetBaseSegmentIndexByConsPos( nLeftEndOfTear_ ) + nRightEndOfTear_ - nLeftEndOfTear_ + 1; } else { nNewNumberOfBaseSegments = 0; } bool bComplementedFromWayPhrapCreatedContig = pContig_->bIsComplementedFromWayPhrapCreated(); pNewLeftContig_ = new Contig( (char*) soLeftContigName.data(), nNewNumberOfReads, nNewNumberOfBaseSegments, bComplementedFromWayPhrapCreatedContig, ConsEd::pGetAssembly() ); pNewLeftContig_->resize( nRightEndOfTear_ + 1 ); ConsEd::pGetConsEd()->pGetAssembly()->addContig( pNewLeftContig_ ); // adding reads to new contig for( nRead = 0; nRead < aReadsForLeftContig_.length(); ++nRead ) { LocatedFragment* pLocFrag = aReadsForLeftContig_[ nRead ]; pLocFrag->pContig_ = pNewLeftContig_; // no need to change tag positions--just the contig of the // tag pLocFrag->transferTagsToNewContig( pNewLeftContig_, 0 ); pNewLeftContig_->addLocatedFragment( pLocFrag ); } // in the torn region, figure out the consensus base and quality // The fast method of doing this is to go through each read once, // and mark two arrays--the highest quality at a cons pos and the // read that attained that highest quality. I will ignore the // "aligned" clipping at this point, since a read could be high // quality unaligned precisely because there is a mis-join, which // is the reason the user is tearing the contig. Thus the correct // consensus could be this unaligned high quality region. mbtValVector aBestQualityRead( nRightEndOfTear_ - nLeftEndOfTear_ + 1, nLeftEndOfTear_, (LocatedFragment*) NULL ); mbtValVector aBestQuality( nRightEndOfTear_ - nLeftEndOfTear_ + 1, nLeftEndOfTear_, 0 ); for( nRead = 0; nRead < aLeftReadsInTornRegion_.length(); ++nRead ) { LocatedFragment* pLocFrag = aLeftReadsInTornRegion_[ nRead ]; // while we are going through these reads anyway, reset the // alignment clipping region since we want to use the entire // length of these reads for int nLeftIntersect; int nRightIntersect; assert( bIntersect( nLeftEndOfTear_, nRightEndOfTear_, pLocFrag->nGetAlignStart(), pLocFrag->nGetAlignEnd(), nLeftIntersect, nRightIntersect ) ); for( int nConsPos = nLeftIntersect; nConsPos <= nRightIntersect; ++nConsPos ) { if ( !aBestQualityRead[ nConsPos ] ) { aBestQualityRead[ nConsPos ] = pLocFrag; if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() == 'x' ) aBestQuality[ nConsPos ] = 0; else aBestQuality[ nConsPos ] = ucNormalQualityFrom9899Quality( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ); } else { Quality q; // fixed bug in which X's were often chosen for the consensus if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() == 'x' ) q = 0; else q = ucNormalQualityFrom9899Quality( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ); if ( q > aBestQuality[ nConsPos ] ) { aBestQualityRead[ nConsPos ] = pLocFrag; aBestQuality[ nConsPos ] = q; } } } } // adding the bases one at a time int nConsPos; for( nConsPos = pContig_->nGetStartIndex(); nConsPos < nLeftEndOfTear_; ++nConsPos ) { pNewLeftContig_->append( pContig_->ntGetCons( nConsPos ) ); } // don't create consensus bases where there weren't any before int nConsPosStart = MAX( nLeftEndOfTear_, pContig_->nGetStartIndex() ); int nConsPosEnd = MIN( nRightEndOfTear_, pContig_->nGetEndIndex() ); assert( nConsPosStart <= nConsPosEnd ); for( nConsPos = nConsPosStart; nConsPos <= nConsPosEnd; ++nConsPos ) { LocatedFragment* pLocFrag = aBestQualityRead[ nConsPos ]; pNewLeftContig_->append( pLocFrag->ntGetFragFromConsPos( nConsPos ) ); // while we are going through each read in the torn region that // contributes to the new consensus there, let's also fix the // alignment clipping positions. pLocFrag->extendAlignedRegion( nConsPos ); } // updateLastDisplayableContigPos must come before // setPaddedPositionsArray since the latter uses // ntGetCons which does a check that the nConsPos // is within the displayable contig positions pNewLeftContig_->setUnpaddedPositionsArray(); pNewLeftContig_->setPaddedPositionsArray(); // transferring base segments // which is the last base segment that can be transferred? //There could be a situation in which we are tearing a contig like this: // cccccccccccccccccccccccccc // --------------------------- // +++++++++++++++++++++++++++++++++++ // // where the --- go into the new left contig and the // +++ go into the new right contig. // In this case, the torn region starts before the consensus // and thus there is no base segments before the torn region. // Thus no transferring of base segments before the torn region. // fix for case of no base segments March 2009 (DG) if ( pContig_->baseSegArray_.nGetNumSegments() > 0 ) { assert( pContig_->baseSegArray_.bGetDataStructureOk() ); if ( pContig_->nGetStartIndex() < nLeftEndOfTear_ ) { int nAfterLastBaseSegmentToBeTransferred = pContig_->baseSegArray_.nGetBaseSegmentIndexByConsPos( nLeftEndOfTear_ ); if ( nAfterLastBaseSegmentToBeTransferred == RW_NPOS ) { cerr << "nLastBaseSegmentToBeTransferred == RW_NPOS, nLeftEndOfTear_ = " << nLeftEndOfTear_ << endl; assert( false ); } int nLastBaseSegmentToBeTransferred = nAfterLastBaseSegmentToBeTransferred - 1; // note that if the base segment that covers nLeftEndOfTear_ is the // first base segment, then nLastBaseSegmentToBeTransferred will // be -1, so no base segments will be transferred in the loop // below. for( int nBaseSeg = 0; nBaseSeg <= nLastBaseSegmentToBeTransferred; ++nBaseSeg ) { BaseSegment* pBaseSeg = pContig_->baseSegArray_[ nBaseSeg ]; BaseSegment* pNewBaseSeg = new BaseSegment( pBaseSeg->pLocFrag_, pBaseSeg->nStartInConsensus_, pBaseSeg->nEndInConsensus_ ); pNewLeftContig_->baseSegArray_.addPtrToNewBaseSeg( pNewBaseSeg ); } // now look at the base segment that perhaps must be cut short // We now guarantee that this base segment (or the one we just added), // will end precisely before the tear region BaseSegment* pBoundaryBaseSeg = pContig_->baseSegArray_[ nLastBaseSegmentToBeTransferred + 1 ]; if ( pBoundaryBaseSeg->nStartInConsensus_ < nLeftEndOfTear_ ) { assert( pBoundaryBaseSeg->nEndInConsensus_ >= nLeftEndOfTear_ ); BaseSegment* pNewBoundaryBaseSeg = new BaseSegment( pBoundaryBaseSeg->pLocFrag_, pBoundaryBaseSeg->nStartInConsensus_, nLeftEndOfTear_ - 1 ); pNewLeftContig_->baseSegArray_.addPtrToNewBaseSeg( pNewBoundaryBaseSeg ); } } // if ( pContig_->nGetStartIndex() < nLeftEndOfTear_ ) { // now add base segments for the tear region // We will add a new base segment for each base and not worry about // trying to consolidate adjacent base segments that share the same // read. // Fixed Bug: DG, October 4, 2000. It is possible that this torn region // can extend beyond the end of the consensus. Clearly it doesn't // make sense to have base segments there, in either new contig. So // only create base segments in the torn region that overlaps the consensus. int nTornRegionOnConsensusLeft; int nTornRegionOnConsensusRight; if ( bIntersect( nLeftEndOfTear_, nRightEndOfTear_, pContig_->nGetStartIndex(), pContig_->nGetEndIndex(), nTornRegionOnConsensusLeft, nTornRegionOnConsensusRight ) ) { for( nConsPos = nTornRegionOnConsensusLeft; nConsPos <= nTornRegionOnConsensusRight; ++nConsPos ) { BaseSegment* pNewBaseSegment = new BaseSegment( aBestQualityRead[ nConsPos ], nConsPos, nConsPos ); pNewLeftContig_->baseSegArray_.addPtrToNewBaseSeg( pNewBaseSegment ); } } } // if ( pContig_->baseSegArray_.nGetNumSegments() > 0 ) { // now recalculate the consensus quality values for the new left contig // (The reason for the intersect is that there may be reads that // stick out on the left to negative consensus positions and some // of these may be in the right contig and thus the nLeftEndOfTear_ // may be to the left of where the new left contig starts.) int nConsPosLeftToRecalculate; int nConsPosRightToRecalculate; if ( bIntersect( pNewLeftContig_->nGetStartIndex(), pNewLeftContig_->nGetEndIndex(), nLeftEndOfTear_, nRightEndOfTear_, nConsPosLeftToRecalculate, nConsPosRightToRecalculate ) ) { pNewLeftContig_->recalculateConsensusQualitiesAndChange( nConsPosLeftToRecalculate, nConsPosRightToRecalculate ); } // That completes the base segments for the new left contig // save info for adding tear tag RWCString soCommentForNewLeftContigTag = RWCString( "left contig " ) + pNewLeftContig_->soGetName() + " reads: " + soAddCommas( pNewLeftContig_->nGetNumberOfReads() ) + " length: " + soAddCommas( pNewLeftContig_->nGetUnpaddedSeqLength() ) + "\n" + soPartOfTagComments; // Now work on the new right contig. RWCString soNewRightContigName = ConsEd::pGetConsEd()->pGetAssembly()->soGetNewContigName(); nNewNumberOfReads = aReadsForRightContig_.length(); // estimate the number of base segments // fix for case of Velvet assemblies with no base segments March 2009 (DG) if ( pContig_->baseSegArray_.nGetNumSegments() > 0 ) { int nTemp = pContig_->baseSegArray_.nGetBaseSegmentIndexByConsPos( nRightEndOfTear_ ); nNewNumberOfBaseSegments = pContig_->baseSegArray_.nGetNumSegments() - nTemp + nRightEndOfTear_ - nLeftEndOfTear_ + 1; } else { nNewNumberOfBaseSegments = 0; } pNewRightContig_ = new Contig( (char*) soNewRightContigName.data(), nNewNumberOfReads, nNewNumberOfBaseSegments, bComplementedFromWayPhrapCreatedContig, ConsEd::pGetAssembly() ); pNewRightContig_->resize( pContig_->nGetPaddedSeqLength() - nLeftEndOfTear_ + 1 ); ConsEd::pGetConsEd()->pGetAssembly()->addContig( pNewRightContig_ ); // reset the consensus base and quality arrays to refigure them // based on the reads from the right contig. Note that it is // possible (likely!) that the consensus bases and quality in the // torn region will be different for the two ends of the new contigs for( nConsPos = nLeftEndOfTear_; nConsPos <= nRightEndOfTear_; ++nConsPos ) { aBestQualityRead[ nConsPos ] = NULL; aBestQuality[ nConsPos ] = 0; } for( nRead = 0; nRead < aRightReadsInTornRegion_.length(); ++nRead ) { LocatedFragment* pLocFrag = aRightReadsInTornRegion_[ nRead ]; int nLeftIntersect; int nRightIntersect; assert( bIntersect( nLeftEndOfTear_, nRightEndOfTear_, pLocFrag->nGetAlignStart(), pLocFrag->nGetAlignEnd(), nLeftIntersect, nRightIntersect ) ); for( nConsPos = nLeftIntersect; nConsPos <= nRightIntersect; ++nConsPos ) { if ( !aBestQualityRead[ nConsPos ] ) { aBestQualityRead[ nConsPos ] = pLocFrag; if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() == 'x' ) aBestQuality[ nConsPos ] = 0; else aBestQuality[ nConsPos ] = ucNormalQualityFrom9899Quality( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ); } else { Quality q; if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() == 'x' ) q = 0; else q = ucNormalQualityFrom9899Quality( pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() ); if ( q > aBestQuality[ nConsPos ] ) { aBestQualityRead[ nConsPos ] = pLocFrag; aBestQuality[ nConsPos ] = q; } } } } // now we have found out the bases and qualities for the torn region // of the right contig // add bases one at a time from what we have determined is the best // base in the torn region // do not add bases where there was no consensus before nConsPosStart = MAX( nLeftEndOfTear_, pContig_->nGetStartIndex() ); nConsPosEnd = MIN( nRightEndOfTear_, pContig_->nGetEndIndex() ); assert( nConsPosStart <= nConsPosEnd ); for( nConsPos = nConsPosStart; nConsPos <= nConsPosEnd; ++nConsPos ) { LocatedFragment* pLocFrag = aBestQualityRead[ nConsPos ]; pNewRightContig_->append( pLocFrag->ntGetFragFromConsPos( nConsPos ) ); // note that this will fix the aligned clipping positions in // OLD CONSENSUS POSITIONS. They will be changed to new consensus // positions below in moveReadAndReadTagsToNewContig pLocFrag->extendAlignedRegion( nConsPos ); } // add the rest of the bases from the right of the torn region for( nConsPos = nRightEndOfTear_ + 1; nConsPos <= pContig_->nGetEndIndex(); ++nConsPos ) { pNewRightContig_->append( pContig_->ntGetCons( nConsPos ) ); } // add reads to new contig // This is done after several uses above with ntGetFragFromConsPos // which assume the old contig positions. // moveReadAndReadTagsToNewContig will completely change the locations of things int nRightContigConsPosFromConsPos = -nLeftEndOfTear_ + 1; // handle case like this: // cccccccccccccccccccccccccc // --------------------------- // +++++++++++++++++++++++++++++++++++ // // where the --- go into the new left contig and the // +++ go into the new right contig. // In this case, the beginning of the ccc... is 1 in // both the new left contig and the new right contig // Thus there is no translation. if ( nLeftEndOfTear_ < pContig_->nGetStartIndex() ) nRightContigConsPosFromConsPos = 0; for( nRead = 0; nRead < aReadsForRightContig_.length(); ++nRead ) { LocatedFragment* pLocFrag = aReadsForRightContig_[ nRead ]; pLocFrag->moveReadAndReadTagsToNewContig( pNewRightContig_, nRightContigConsPosFromConsPos ); } pNewRightContig_->updateFirstAndLastDisplayableContigPos(); // updateLastDisplayableContigPos must come before // setPaddedPositionsArray since the latter uses // ntGetCons which does a check that the nConsPos // is within the displayable contig positions pNewRightContig_->setUnpaddedPositionsArray(); pNewRightContig_->setPaddedPositionsArray(); // now create base segments for the torn region // Fixed Bug: DG, October 4, 2000. It is possible that this torn region // can extend beyond the end of the consensus. Clearly it doesn't // make sense to have base segments there, in either new contig. So // only create base segments in the torn region that overlaps the consensus. // I saw this in a case in which the torn region went all the way to // the left end of the contig, before the consensus: // cccccccccccccccc (consensus) // lllllllllllllllll (goes into left contig) // rrrrrrrrrrrrrrrrrr (goes into right contig) // This ended up with all kinds of base segments with negative // positions (since they were before the consensus.) // fix for case of no base segments March 2009 (DG) if ( pContig_->baseSegArray_.nGetNumSegments() > 0 ) { int nTornRegionOnConsensusLeft; int nTornRegionOnConsensusRight; if ( bIntersect( nLeftEndOfTear_, nRightEndOfTear_, pContig_->nGetStartIndex(), pContig_->nGetEndIndex(), nTornRegionOnConsensusLeft, nTornRegionOnConsensusRight ) ) { // nConsPos is w.r.t. the old contig positions for( nConsPos = nTornRegionOnConsensusLeft; nConsPos <= nTornRegionOnConsensusRight; ++nConsPos ) { BaseSegment* pNewBaseSegment = new BaseSegment( aBestQualityRead[ nConsPos ], nConsPos + nRightContigConsPosFromConsPos, nConsPos + nRightContigConsPosFromConsPos ); pNewRightContig_->baseSegArray_.addPtrToNewBaseSeg( pNewBaseSegment ); } } //There could be a situation in which we are tearing a contig like this: // -------------------------------------------- // ------------------------------ // ----------------------- // ++++++++++++++++++++++++++ // ++++++++++++++++++++++++ // +++++++++++++++++++++ // where the --- go into the new left contig and the // +++ go into the new right contig. // In this case, there is *no* region to the right of the torn // region. The new right contig is formed entirely from the torn // region. That is the reason for the if( ) below if ( nRightEndOfTear_ < pContig_->nGetEndIndex() ) { int nBoundaryBaseSegment = pContig_->baseSegArray_.nGetBaseSegmentIndexByConsPos( nRightEndOfTear_ + 1 ); BaseSegment* pBoundaryBaseSegment = pContig_->baseSegArray_[ nBoundaryBaseSegment ]; // create a new base segment which starts precisely at // nRightEndOfTear_ + 1. This may be the same as this // base segment, or it may be to the right of it. BaseSegment* pNewBoundaryBaseSegment = new BaseSegment( pBoundaryBaseSegment->pLocFrag_, nRightEndOfTear_ + 1 + nRightContigConsPosFromConsPos, pBoundaryBaseSegment->nEndInConsensus_ + nRightContigConsPosFromConsPos); pNewRightContig_->baseSegArray_.addPtrToNewBaseSeg( pNewBoundaryBaseSegment ); for( int nBaseSeg = nBoundaryBaseSegment + 1; nBaseSeg < pContig_->baseSegArray_.nGetNumSegments(); ++nBaseSeg ) { BaseSegment* pOldBaseSeg = pContig_->baseSegArray_[ nBaseSeg ]; BaseSegment* pNewBaseSeg = new BaseSegment( pOldBaseSeg->pLocFrag_, pOldBaseSeg->nStartInConsensus_ + nRightContigConsPosFromConsPos, pOldBaseSeg->nEndInConsensus_ + nRightContigConsPosFromConsPos ); pNewRightContig_->baseSegArray_.addPtrToNewBaseSeg( pNewBaseSeg ); } } // if ( nRightEndOfTear_ < pContig_->nGetEndIndex() ) { // do full checking once assert( pNewRightContig_->baseSegArray_.bGetDataStructureOk( true ) ); assert( pNewLeftContig_->baseSegArray_.bGetDataStructureOk( true ) ); } // if ( pContig_->baseSegArray_.nGetNumSegments() > 0 ) { // modify consensus values in torn region in right contig int nLeftEndOfTearRightContigConsPos = 1; int nRightEndOfTearRightContigConsPos = nRightEndOfTear_ + nRightContigConsPosFromConsPos; int nTearOnConsensusLeft; int nTearOnConsensusRight; if ( bIntersect( nLeftEndOfTearRightContigConsPos, nRightEndOfTearRightContigConsPos, pNewRightContig_->nGetStartIndex(), pNewRightContig_->nGetEndIndex(), nTearOnConsensusLeft, nTearOnConsensusRight ) ) { pNewRightContig_->recalculateConsensusQualitiesAndChange( nTearOnConsensusLeft, nTearOnConsensusRight ); } // add a tag to the new right contig RWCString soCommentForNewRightContigTag = RWCString( "right contig " ) + pNewRightContig_->soGetName() + " reads: " + soAddCommas( pNewRightContig_->nGetNumberOfReads() ) + " length: " + soAddCommas( pNewRightContig_->nGetUnpaddedSeqLength() ) + "\n" + soPartOfTagComments; int nConsPosOfRightTag = nOldConsPosForTags + nRightContigConsPosFromConsPos; int nConsPosOfLeftTag = nOldConsPosForTags; createTearTags( pNewLeftContig_, nConsPosOfLeftTag, soCommentForNewLeftContigTag, pNewRightContig_, nConsPosOfRightTag, soCommentForNewRightContigTag ); // transfer consensus tags for( int nTag = 0; nTag < pContig_->nGetNumberOfTags(); ++nTag ) { tag* pTag = pContig_->pGetTag( nTag ); if ( pTag->nPaddedConsPosEnd_ <= nRightEndOfTear_ && pTag->nPaddedConsPosStart_ < nLeftEndOfTear_ ) { // like this: // -------------------- // | | // or this: // --------- // | | pTag->pContig_ = pNewLeftContig_; pNewLeftContig_->addConsensusTag( pTag ); } else if ( nLeftEndOfTear_ <= pTag->nPaddedConsPosStart_ && nRightEndOfTear_ < pTag->nPaddedConsPosEnd_ ) { // like this: // ---------------------------- // | | // or this: // -------------------- // | | pTag->pContig_ = pNewRightContig_; pTag->nPaddedConsPosStart_ += nRightContigConsPosFromConsPos; pTag->nPaddedConsPosEnd_ += nRightContigConsPosFromConsPos; pNewRightContig_->addConsensusTag( pTag ); } else { // What is left is this: // --------------------------------- // | | // or this: // --------- // | | // split the tag tag* pRightTag = tagTypes::pGetTagTypes()->pDuplicateTag( pTag ); tag* pLeftTag = pTag; int nOldConsPosEnd = pTag->nPaddedConsPosEnd_; pLeftTag->nPaddedConsPosEnd_ = MIN( nRightEndOfTear_, pTag->nPaddedConsPosEnd_ ); pLeftTag->pContig_ = pNewLeftContig_; pNewLeftContig_->addConsensusTag( pLeftTag ); if ( pRightTag ) { pRightTag->pContig_ = pNewRightContig_; pRightTag->nPaddedConsPosStart_ = MAX( pRightTag->nPaddedConsPosStart_, nLeftEndOfTear_ ); pRightTag->nPaddedConsPosStart_ += nRightContigConsPosFromConsPos; pRightTag->nPaddedConsPosEnd_ = nOldConsPosEnd + + nRightContigConsPosFromConsPos; pNewRightContig_->addConsensusTag( pRightTag ); popupErrorMessage( "had to split tag of type %s into two: one from %d to %d in contig %s and the other from %d to %d in contig %s", (char*) pTag->soType_.data(), pNewLeftContig_->nUnpaddedIndex( pLeftTag->nPaddedConsPosStart_ ), pNewLeftContig_->nUnpaddedIndex( pLeftTag->nPaddedConsPosEnd_ ), (char*) pNewLeftContig_->soGetName().data(), pNewRightContig_->nUnpaddedIndex( pRightTag->nPaddedConsPosStart_ ), pNewRightContig_->nUnpaddedIndex( pRightTag->nPaddedConsPosEnd_ ), (char*) pNewRightContig_->soGetName().data() ); } } } // this must come before updateContigListOnMainConsedWindow so that the contig is removed // from the assembly's array of contigs and thus the contig is not // again put in the list of contigs delete pContig_; // update the top level window's list of contigs and reads ConsEd::pGetConsEd()->updateContigListOnMainConsedWindow(); // make 2 new contigwins and put the new contigs in them ConsEd::pGetConsEd()->pScrollExistingContigWinOrMakeNewContigWin( pNewLeftContig_, pNewLeftContig_->nGetEndIndex() ); ContigWin* pSecondNewContigWin = ConsEd::pGetConsEd()->pScrollExistingContigWinOrMakeNewContigWin( pNewRightContig_, pNewRightContig_->nGetStartIndex() ); // so that when user tries to exit out of consed, he is warned that there // are unsaved changes ConsEd::pGetAssembly()->setChanged(); ConsEd::pGetConsEd()->disallowUndos(); popupInfoMessage( pSecondNewContigWin->pGcw_->widGetGuiContigWinTopLevel(), "tear complete. You must save the assembly before doing any other edits or else the .wrk recovery file will not be usable and consed may crash if you try to undo any edits" ); } typedef RWTPtrOrderedVector arrayOfPointersToLocatedFragments; bool guiTearContig :: bCheckContiguous( const leftOrRight nLeftOrRight ) { mbtValVector aCoveredByReadInTornRegion( nRightEndOfTear_ - nLeftEndOfTear_ + 1, nLeftEndOfTear_, 0 ); arrayOfPointersToLocatedFragments* pArrayOfPointersToLocFrags; if ( nLeftOrRight == LEFT ) pArrayOfPointersToLocFrags = &aLeftReadsInTornRegion_; else pArrayOfPointersToLocFrags = &aRightReadsInTornRegion_; int nPos; for( nPos = nLeftEndOfTear_; nPos <= nRightEndOfTear_; ++nPos ) { aCoveredByReadInTornRegion[ nPos ] = 0; } for( int nRead = 0; nRead < pArrayOfPointersToLocFrags->length(); ++nRead ) { LocatedFragment* pLocFrag = (*pArrayOfPointersToLocFrags)[ nRead ]; // I tried to decide whether to allow a tear when there might // only be unaligned sequence holding one of the resulting // contigs together. I decided 'yes', because there would be // unaligned at the ends of each read, hence at the end of each // resulting contig. That is normal, and should be allowed. int nReadInTornRegionLeft; int nReadInTornRegionRight; assert( bIntersect( nLeftEndOfTear_, nRightEndOfTear_, pLocFrag->nGetAlignStart(), pLocFrag->nGetAlignEnd(), nReadInTornRegionLeft, nReadInTornRegionRight ) ); for( nPos = nReadInTornRegionLeft; nPos <= nReadInTornRegionRight; ++nPos ) { aCoveredByReadInTornRegion[ nPos ] = (unsigned char) 1; } } for( nPos = nLeftEndOfTear_; nPos <= nRightEndOfTear_; ++nPos ) { if ( !aCoveredByReadInTornRegion[ nPos ] ) { // hole in the contig popupErrorMessage( "This tear cannot be done because, after tearing, there would be a region in the resulting %s contig starting at old consensus position %d (position counting *'s: %d) that would not be covered by any read. Perhaps you should go to that area first and try tearing the contig, although this is not guaranteed to work either.\n", ( ( nLeftOrRight == LEFT ) ? "left" : "right" ), pContig_->nUnpaddedIndex( nPos ), nPos ); return( false ); } } // if reached here, there is no hole in the part of the new contig in // the torn region return( true ); } bool guiTearContig :: bDoesThisReadGoLeftNotRightByDefault( LocatedFragment* pLocFrag ) { int nConsPosOfCenterOfRead; if ( pLocFrag->bWholeReadIsLowQuality_ ) { nConsPosOfCenterOfRead = ( pLocFrag->nGetAlignStart() + pLocFrag->nGetAlignEnd() ) / 2; } else { nConsPosOfCenterOfRead = ( pLocFrag->nGetHighQualityStart() + pLocFrag->nGetHighQualityEnd() ) / 2; } if ( nConsPosOfCenterOfRead < nConsPosOfCursor_ ) return( true ); else return( false ); } void guiTearContig :: setHighlightingOfReadNames( ContigView* pContigView ) { for( int nRead = 0; nRead < pContigView->nGetNumberOfFragments(); ++nRead ) { LocatedFragment* pLocFrag = pContigView->pLocatedFragmentGet( nRead ); bool bThisReadGoesLeftNotRight; if ( pLocFrag->nGetAlignEnd() < nConsPosOfCursor_ ) { bThisReadGoesLeftNotRight = true; } else if ( nConsPosOfCursor_ < pLocFrag->nGetAlignStart() ) { bThisReadGoesLeftNotRight = false; } else { bThisReadGoesLeftNotRight = bDoesThisReadGoLeftNotRightByDefault( pLocFrag ); } pLocFrag->bReadCurrentlyHighlighted_ = bThisReadGoesLeftNotRight; } // when done, need to refresh the contigwin pContigWin_->drawBasesAndConsensus( true ); // erase first }