/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "compareContigs.h" #include "extendAlignment.h" #include "contig.h" #include using namespace std; #include #include "guiCompareContigs.h" #include "numutil.h" #include "scaleTickSizes.h" #include "consed.h" #include "makeUpper.h" #include "paddedPieceOfRead.h" #include "bIsAPad.h" #include "compareContigsTypes.h" #include "translateRWCString.h" #include "contigwin.h" #include "getAlignment.h" #include "please_wait.h" #include "consedParameters.h" #include "sequence.h" #include "filename.h" #include "szGetTime.h" #include "popupErrorMessage.h" #include "popupInfoMessage.h" #include "guicontigwin.h" #include "soGetDateTime.h" #include "soAddCommas.h" // Overview: when making a join, we must decide what the consensus // should be. This is *not* done by looking at the highest quality // base in each column. Rather it is done by taking one of the other // of the old consensus over a stretch of bases, some of which may be // of lower quality than the base in the other consensus. These // stretches of bases are defined as follows: we look for stretches of // the alignment in which (I think) 5 bases or more in a row match // precisely. These are "match segments". The regions in between are // "discrepant segments". It is a little more complicated because // discrepant segments include some of the matching bases on each end. // Then each such segment is scored by adding the quality values. The // high scoring segment is used for the consensus. const int nNumberOfMatchingBasesInARowInAMatchSegment = 5; const int nBackUpInMatchSegment = 3; // in closing a discrepant contig segment, after found 5 bases in a row, // back up 3 to put the end on the 2nd matching base, ready to put the // start of the next contig segment on the 3rd matching base const int nBackUpFromFirstDiscrepancy = 4; // in closing a match contig segment, when find a discrepancy, backup // 4 bases hence putting the discrepant contig segment starting with // 3 matching bases void compareContigs :: joinContigs() { if (!bAlignmentExists_ ) { popupErrorMessage( "You must first click 'Align' to create an alignment before joining the contigs" ); return; } if ( pContig1_ == pContig2_ ) { popupErrorMessage( "Give me a break! You can't join a contig to itself. You can only join a contig to a different contig." ); return; } if ( bIsContigComplemented( eContig1 ) || bIsContigComplemented( eContig2 ) ) { popupErrorMessage( "You have complemented one of the contigs relative to the Aligned Reads Window, so you cannot make a join with this alignment. Go to the Aligned Reads Window, complement the contig there, and then make this alignment without clicking on \"complement just in this window\"" ); return; } assert( pContig1_->baseSegArray_.bGetDataStructureOk( true ) ); assert( pContig2_->baseSegArray_.bGetDataStructureOk( true ) ); // point of no return--well, not quite. There is a point below // where it bails out if the alignment is not sufficiently good to make // a join. pGuiCompareContigs_->makeAlignButtonInsensitive(); // this fixes a bug in which the user would try to make a join, // get an error about the alignment being insufficient, but by then // the Aligned Reads Windows had been popped down. Then the // user might push "join" again, and this time there would // be a segmentation fault since pContigWin1_ would be null. // This prevents the user from trying to make the join again. pGuiCompareContigs_->makeJoinButtonInsensitive(); // writing to edit history file for debugging use ConsEd::pGetAssembly()->writeEditToEditHistoryFile( "! joining contigs %s and %s ", pContig1_->soGetName().data(), pContig2_->soGetName().data() ); compareContigsCursor& curTopContig1 = curGetCompareContigsTopContigCursor( eContig1 ); compareContigsCursor& curTopContig2 = curGetCompareContigsTopContigCursor( eContig2 ); ConsEd::pGetAssembly()->writeEditToEditHistoryFile( "! aligned with pinned positions at %d (unpadded %d )", curTopContig1.nPosition_, pContig1_->nUnpaddedIndex( curTopContig1.nPosition_ ) ); ConsEd::pGetAssembly()->writeEditToEditHistoryFile( "! and %d (unpadded %d ) respectively", curTopContig2.nPosition_, pContig2_->nUnpaddedIndex( curTopContig2.nPosition_ ) ); ConsEd::pGetConsEd()->whatToDoBeforeModifyAssembly(); for( int nContigWin = ConsEd::pGetConsEd()->dapContigWin_.length() - 1; nContigWin >= 0; --nContigWin ) { ContigWin* pContigWin = ConsEd::pGetConsEd()->dapContigWin_[ nContigWin ]; if ( pContigWin->pContig_ != pContig1_ && pContigWin->pContig_ != pContig2_ ) continue; EditCursor* pCursor = pContigWin->pGetEditCursor(); if ( pCursor->bCursorValid() ) { RWCString soMessage = "! cursor on one contigwin was on contig " + pCursor->pGetContigEvenIfCursorIsOnRead()->soGetName(); if ( pCursor->bCursorOnCons() ) { soMessage = soMessage + " on consensus at unpadded " + pCursor->pGetContig()->nUnpaddedIndex( pCursor->nConsPosGet() ) + " and padded " + pCursor->nConsPosGet(); } else if ( pCursor->bCursorOnFrag() ) { soMessage = soMessage + " on read " + pCursor->pLocFragGet()->soGetName() + " at unpadded cons pos " + pCursor->pGetContigEvenIfCursorIsOnRead()->nUnpaddedIndex( pCursor->nConsPosGet() ) + " and padded " + pCursor->nConsPosGet(); } ConsEd::pGetAssembly()->writeEditToEditHistoryFile( soMessage ); } } // end writing to edit history file PleaseWait* pPleaseWait = new PleaseWait( pGuiCompareContigs_->widPopupShell_ ); // other compare contigs windows must be deleted so that the // aligned reads windows (that contain the contigs being joined) can // be deleted ConsEd::pGetConsEd()->deleteOtherCompareContigsWindowsWithSameContigs( this, pContig1_, pContig2_ ); // now delete the 2 aligned reads windows that contain the two contigs // that we are joining // pContigWin1_ and pContigWin2_ must be null in order to // deleteIfOK to work ContigWin* pContigWin1Temp = pContigWin1_; ContigWin* pContigWin2Temp = pContigWin2_; pContigWin1_ = NULL; pContigWin2_ = NULL; // if compareContigs is called from assemblyView, pContigWin1_ // and pContigWin2 will be null. Avoid seg fault: if ( pContigWin1Temp ) pContigWin1Temp->deleteIfOK(); if ( pContigWin2Temp ) pContigWin2Temp->deleteIfOK(); pContig1_->setUnpaddedPositionsArray(); pContig2_->setUnpaddedPositionsArray(); pContig1_->setPaddedPositionsArray(); pContig2_->setPaddedPositionsArray(); cout << "adding columns of pads to align contigs" << endl; cout.flush(); // If you try to insert a pad into a read that didn't have a // phd file, you will get an exception since it will try // to modify the peak positions array. Thus report to the // user that this has happened: try { addColumnsOfPadsToAlignContigs(); } catch( InputDataError ide ) { RWCString soErrorMessage = ide.szGetDesc(); soErrorMessage += " This could be caused by not having a phd file for a read in one of the contigs being joined. You must have all phd files. Do not save the ace file since now these contigs are probably corrupted. Rather, restore phd files and then restart Consed using the last saved ace file "; popupErrorMessage( soErrorMessage ); delete pPleaseWait; return; } pContig1_->setUnpaddedPositionsArray(); pContig2_->setUnpaddedPositionsArray(); pContig1_->setPaddedPositionsArray(); pContig2_->setPaddedPositionsArray(); cout << "checking that contigs are now alignable" << endl; cout.flush(); assert( bCheckAddColumnsOfPadsToAlignContigs() ); pContig1_->setUnpaddedPositionsArray(); pContig2_->setUnpaddedPositionsArray(); pContig1_->setPaddedPositionsArray(); pContig2_->setPaddedPositionsArray(); // save some information for when we create a tag (far below) RWCString soCommentForTag = "old contigs:\n" + pContig1_->soGetName() + " pinned pos: " + soAddCommas( pContig1_->nUnpaddedIndex( curTopContig1.nPosition_ ) ) + " length: " + soAddCommas( pContig1_->nGetUnpaddedSeqLength() ) + " reads: " + soAddCommas( pContig1_->nGetNumberOfReads() ) + "\n" + pContig2_->soGetName() + " pinned pos: " + soAddCommas( pContig2_->nUnpaddedIndex( curTopContig2.nPosition_ ) ) + " length: " + soAddCommas( pContig2_->nGetUnpaddedSeqLength() ) + " reads: " + soAddCommas( pContig2_->nGetNumberOfReads() ) + "\n" + "ace file: " + ConsEd::pGetAssembly()->filGetAceFileFullPathname(); cout << "finding contig segments" << endl; cout.flush(); // find contigSegments for each contig // terminate when reach end of alignment int nPaddedStart = pPaddedPieceOfReadTop_->nPaddedAlignmentStart_; int nUnpaddedStart1 = 1; int nUnpaddedStart2 = 1; bool bFirstTime = true; bool bEndOfAlignment = false; while( !bEndOfAlignment ) { int nPaddedEnd; int nUnpaddedEnd1; int nUnpaddedEnd2; findEndOfDiscrepantSegment( nPaddedStart, nPaddedEnd, nUnpaddedEnd1, nUnpaddedEnd2, bEndOfAlignment ); if ( bEndOfAlignment && bFirstTime ) { popupErrorMessage("The alignment is not sufficiently good to make a join" ); delete pPleaseWait; return; } bFirstTime = false; addContigSegment( nUnpaddedStart1, nUnpaddedEnd1, nUnpaddedStart2, nUnpaddedEnd2 ); if ( !bEndOfAlignment ) { nUnpaddedStart1 = nUnpaddedEnd1 + 1; nUnpaddedStart2 = nUnpaddedEnd2 + 1; nPaddedStart = nPaddedEnd + 1; bool bMatchSegmentExists; findEndOfMatchSegment( nPaddedStart, nPaddedEnd, nUnpaddedEnd1, nUnpaddedEnd2, bMatchSegmentExists ); if ( bMatchSegmentExists ) { addContigSegment( nUnpaddedStart1, nUnpaddedEnd1, nUnpaddedStart2, nUnpaddedEnd2 ); } nUnpaddedStart1 = nUnpaddedEnd1 + 1; nUnpaddedStart2 = nUnpaddedEnd2 + 1; nPaddedStart = nPaddedEnd + 1; } } // while( !bEndOfAlignment ) cout << "calculating total qualities" << endl; cout.flush(); // calculate total qualities for each contig segment int nContigSeg; for( nContigSeg = 0; nContigSeg < contigSegments_.length(); ++nContigSeg ) { calculateTotalQualitiesOfContigSegment( contigSegments_[ nContigSeg ] ); } // which contig is the left contig? (This is necessary in order to // determine how to modify the read positions in the new contig.) // This is *not* determined by which contig sticks out furthest to // the left (although usually that will turn out to be the case. // Instead, it is determined by which contig has the highest sum of // quality values up to the first run of matches within the // alignment. I believe that "run" is defined as 5 matches in a // row, and count up to the 2nd match. // Similary, the rightmost contig is defined as the contig that has the // highest sum of quality values in the last contig segment. That // means the sum of quality values from the last run of 5 matches // within the alignment to the end of the contig. compareContigsTypes eLeftContig; compareContigsTypes eNotLeftContig; Contig* pLeftContig; Contig* pNotLeftContig; Contig* pBestContig; int nBestPaddedStart; int nBestPaddedEnd; getBestContigSegment( 0, pBestContig, nBestPaddedStart, nBestPaddedEnd ); if ( pBestContig == pContig1_ ) { eLeftContig = eContig1; eNotLeftContig = eContig2; pLeftContig = pContig1_; pNotLeftContig = pContig2_; } else { eLeftContig = eContig2; eNotLeftContig = eContig1; pLeftContig = pContig2_; pNotLeftContig = pContig1_; } if ( pGuiCompareContigs_->bHighlightRightReads() ) { pLeftContig->unhighlightAllReads(); pNotLeftContig->highlightAllReads(); } // Which old contig is used at the end of the new contig? This is // important for trimming back the aligned region of reads--some will get // trimmed back more than others. compareContigsTypes eRightContig; compareContigsTypes eNotRightContig; Contig* pRightContig; Contig* pNotRightContig; getBestContigSegment( contigSegments_.length() - 1, pBestContig, nBestPaddedStart, nBestPaddedEnd ); if ( pBestContig == pContig1_ ) { eRightContig = eContig1; eNotRightContig = eContig2; pRightContig = pContig1_; pNotRightContig = pContig2_; } else { eRightContig = eContig2; eNotRightContig = eContig1; pRightContig = pContig2_; pNotRightContig = pContig1_; } // This is the padded offset of the not left contig from the // left contig. // Presumably, the left-most aligned bases are both not pads and // they are aligned. Thus the difference of their padded positions // in their respective coordinate systems gives the offset between // the coordinate systems (padded cons pos) int nNewConsPosFromNotLeftConsPos = pGetPaddedPieceOfRead( eLeftContig )->pContig_->nPaddedIndexFast( pGetPaddedPieceOfRead( eLeftContig )->nUnpaddedAlignmentStart_ ) - pGetPaddedPieceOfRead( eNotLeftContig)->pContig_->nPaddedIndexFast( pGetPaddedPieceOfRead( eNotLeftContig)->nUnpaddedAlignmentStart_ ); makeContigSegmentsContiguous( pLeftContig, nNewConsPosFromNotLeftConsPos ); assert( bContigSegmentsOK( ) ); nInNewJoinedContigAlignedRegionLeftConsPos_ = pGetPaddedPieceOfRead( eLeftContig )->pContig_->nPaddedIndexFast( pGetPaddedPieceOfRead( eLeftContig )->nUnpaddedAlignmentStart_ ); nInNewJoinedContigAlignedRegionRightConsPos_ = pGetPaddedPieceOfRead( eLeftContig )->pContig_->nPaddedIndexFast( pGetPaddedPieceOfRead( eLeftContig )->nUnpaddedAlignmentEnd_ ); // make a new contig. How long should it be? int nPaddedBasesInNewContig = 0; for( nContigSeg = 0; nContigSeg < contigSegments_.length(); ++nContigSeg ) { int nNewPaddedStart; int nNewPaddedEnd; getBestContigSegment( nContigSeg, pBestContig, nNewPaddedStart, nNewPaddedEnd ); int nPaddedBasesInContigSegment = nNewPaddedEnd - nNewPaddedStart + 1; nPaddedBasesInNewContig += nPaddedBasesInContigSegment; } RWCString soNewContigName = ConsEd::pGetConsEd()->pGetAssembly()->soGetNewContigName(); int nNewNumberOfReads = pContig1_->nGetNumberOfFragsInContig() + pContig2_->nGetNumberOfFragsInContig(); // now try to figure out how many base segments the contig will have // As a guess, let's use the sum of the base segments in the 2 contigs int nNewNumberOfBaseSegments = pContig1_->baseSegArray_.nGetNumSegments() + pContig2_->baseSegArray_.nGetNumSegments(); // try to not screw up most of the tags that depend on direction // fix me!!! bool bComplementedFromWayPhrapCreated; if (pContig1_->bIsComplementedFromWayPhrapCreated() ) { if ( pContig2_->bIsComplementedFromWayPhrapCreated() ) bComplementedFromWayPhrapCreated = true; else { // ambiguous case--one contig is complemented and // the other is now. if ( pContig1_->nGetNumberOfFragsInContig() > pContig2_->nGetNumberOfFragsInContig() ) bComplementedFromWayPhrapCreated = true; else bComplementedFromWayPhrapCreated = false; } } else { if ( pContig2_->bIsComplementedFromWayPhrapCreated() ) { // ambiguous case again if ( pContig1_->nGetNumberOfFragsInContig() > pContig2_->nGetNumberOfFragsInContig() ) bComplementedFromWayPhrapCreated = false; else bComplementedFromWayPhrapCreated = true; } else bComplementedFromWayPhrapCreated = false; } cout << "ctor for new contig" << endl; cout.flush(); // now create space for new contig pNewContig_ = new Contig( (char*) soNewContigName.data(), nNewNumberOfReads, nNewNumberOfBaseSegments, bComplementedFromWayPhrapCreated, ConsEd::pGetAssembly() ); pNewContig_->resize( nPaddedBasesInNewContig ); ConsEd::pGetConsEd()->pGetAssembly()->addContig( pNewContig_ ); cout << " adding new bases " << endl; cout.flush(); // add bases to new contig for( nContigSeg = 0; nContigSeg < contigSegments_.length(); ++nContigSeg ) { int nNewPaddedStart; int nNewPaddedEnd; getBestContigSegment( nContigSeg, pBestContig, nNewPaddedStart, nNewPaddedEnd ); // convert back to the padded positions of the old contigs int nPaddedStart; int nPaddedEnd; if ( pBestContig == pLeftContig ) { nPaddedStart = nNewPaddedStart; nPaddedEnd = nNewPaddedEnd; } else { nPaddedStart = nNewPaddedStart - nNewConsPosFromNotLeftConsPos; nPaddedEnd = nNewPaddedEnd - nNewConsPosFromNotLeftConsPos; } // where consensus bases are made for( int nConsPos = nPaddedStart; nConsPos <= nPaddedEnd; ++nConsPos ) { pNewContig_->append( pBestContig->ntGetCons( nConsPos ) ); } } cout << "transferring reads to new contig" << endl; cout.flush(); // transfer reads to new contig // At the same time, trim back the aligned region of each read, if // necessary. int nLeftContigAlignedRegionLeft = 1; int nLeftContigAlignedRegionRight; if ( pLeftContig == pRightContig ) nLeftContigAlignedRegionRight = pNewContig_->nGetEndConsensusIndex(); else nLeftContigAlignedRegionRight = nInNewJoinedContigAlignedRegionRightConsPos_; int nRead; for( nRead = 0; nRead < pLeftContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pLeftContig->pLocatedFragmentGet( nRead ); pLocFrag->pContig_ = pNewContig_; pLocFrag->transferTagsToNewContig( pNewContig_, 0 ); pNewContig_->addLocatedFragment( pLocFrag ); // trim back the aligned clipped region, if necessary pLocFrag->trimBackAlignedRegion( nLeftContigAlignedRegionLeft, nLeftContigAlignedRegionRight ); } // move the reads in the "not left" contig. // At the same time, trim back the aligned region of each read. int nNotLeftContigAlignedRegionLeft = nInNewJoinedContigAlignedRegionLeftConsPos_; int nNotLeftContigAlignedRegionRight; if ( pNotLeftContig == pRightContig ) nNotLeftContigAlignedRegionRight = pNewContig_->nGetEndConsensusIndex(); else nNotLeftContigAlignedRegionRight = nInNewJoinedContigAlignedRegionRightConsPos_; for( nRead = 0; nRead < pNotLeftContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pNotLeftContig->pLocatedFragmentGet( nRead ); pLocFrag->moveReadAndReadTagsToNewContig( pNewContig_, nNewConsPosFromNotLeftConsPos ); // moveReadAndReadTagsToNewContig will change the nAlignClipStart_ // and nAlignClipEnd_ to the new coordinates. Now I need to // trim them back. pLocFrag->trimBackAlignedRegion( nNotLeftContigAlignedRegionLeft, nNotLeftContigAlignedRegionRight ); } pNewContig_->updateLastDisplayableContigPos(); // updateLastDisplayableContigPos must come before // setPaddedPositionsArray since the latter uses // ntGetCons which does a check that the nConsPos // is within the displayable contig positions pNewContig_->setUnpaddedPositionsArray(); pNewContig_->setPaddedPositionsArray(); cout << "moving consensus tags..." << endl; // transfer consensus tags // the left contig positions are the same as the new contig positions // Let's see if we need to change the // bComplementedWithRespectToWayPhrapCreatedContig_ flag in the // oligo and autoFinishExpTag objects: bool bComplementFlag = ( pLeftContig->bComplementedFromWayPhrapCreated_ == pNewContig_->bComplementedFromWayPhrapCreated_ ? false : true ); int nTag; for( nTag = 0; nTag < pLeftContig->nGetNumberOfTags(); ++nTag ) { tag* pTag = pLeftContig->pGetTag( nTag ); pTag->pContig_ = pNewContig_; if ( bComplementFlag ) pTag->flipOrientation(); pNewContig_->addConsensusTag( pTag ); } // Let's see if we need to change the // bComplementedWithRespectToWayPhrapCreatedContig_ flag in the // oligo and autoFinishExpTag objects: bComplementFlag = ( pNotLeftContig->bComplementedFromWayPhrapCreated_ == pNewContig_->bComplementedFromWayPhrapCreated_ ? false : true ); // the not left contig positions must be adjusted for( nTag = 0; nTag < pNotLeftContig->nGetNumberOfTags(); ++nTag ) { tag* pTag = pNotLeftContig->pGetTag( nTag ); pTag->pContig_ = pNewContig_; pTag->nPaddedConsPosStart_ += nNewConsPosFromNotLeftConsPos; pTag->nPaddedConsPosEnd_ += nNewConsPosFromNotLeftConsPos; if ( bComplementFlag ) pTag->flipOrientation(); pNewContig_->addConsensusTag( pTag ); } // In some cases, the consensus tag may not be on the consensus // any longer. In such a case trim it back or delete the tag // altogether. Report this to the user. RWCString soConsensusTagErrors; for( nTag = pNewContig_->nGetNumberOfTags() - 1; nTag >= 0; --nTag ) { tag* pTag = pNewContig_->pGetTag( nTag ); int nIntersectLeftConsPos; int nIntersectRightConsPos; if (bIntersect( pNewContig_->nGetStartIndex(), pNewContig_->nGetEndIndex(), pTag->nPaddedConsPosStart_, pTag->nPaddedConsPosEnd_, nIntersectLeftConsPos, nIntersectRightConsPos ) ) { if ( nIntersectLeftConsPos != pTag->nPaddedConsPosStart_ || nIntersectRightConsPos != pTag->nPaddedConsPosEnd_ ) { soConsensusTagErrors += " consensus tag of type "; soConsensusTagErrors += pTag->soType_; soConsensusTagErrors += " had to be trimmed back from padded pos"; soConsensusTagErrors += RWCString( (long) pTag->nPaddedConsPosStart_ ); soConsensusTagErrors += "-"; soConsensusTagErrors += RWCString( (long) pTag->nPaddedConsPosEnd_ ); soConsensusTagErrors += " to "; soConsensusTagErrors += RWCString( (long) nIntersectLeftConsPos ); soConsensusTagErrors += "-"; soConsensusTagErrors += RWCString( (long) nIntersectRightConsPos ); soConsensusTagErrors += " in order to fit on the consensus\n\n"; } pTag->nPaddedConsPosStart_ = nIntersectLeftConsPos; pTag->nPaddedConsPosEnd_ = nIntersectRightConsPos; } else { // tag doesn't fit on consensus at all soConsensusTagErrors += "tag of type "; soConsensusTagErrors += pTag->soType_; soConsensusTagErrors += " data: "; soConsensusTagErrors += pTag->soMiscData_; soConsensusTagErrors += " mapped to padded cons pos "; soConsensusTagErrors += RWCString( (long) pTag->nPaddedConsPosStart_ ); soConsensusTagErrors += " to "; soConsensusTagErrors += RWCString( (long) pTag->nPaddedConsPosEnd_ ); soConsensusTagErrors += " which is completely off the new consensus so deleting this consensus tag.\n\n"; pNewContig_->aConsensusTags_.removeAt( nTag ); delete pTag; } } // change for Curtis Jamison, March 2, 2009, to handle case of no // base segments in the ace file. This does *not* handle the case // in which there are base segments in one contig but not the // other. It just assumes that base segments either exist in all // contigs or no contigs. if ( nNewNumberOfBaseSegments != 0 ) { // creating new base segments for( nContigSeg = 0; nContigSeg < contigSegments_.length(); ++nContigSeg ) { int nConsPosContigSegmentStart; int nConsPosContigSegmentEnd; getBestContigSegment( nContigSeg, pBestContig, nConsPosContigSegmentStart, nConsPosContigSegmentEnd ); int nOldConsPosContigSegmentStart; if ( pBestContig == pLeftContig ) nOldConsPosContigSegmentStart = nConsPosContigSegmentStart; else nOldConsPosContigSegmentStart = nConsPosContigSegmentStart - nNewConsPosFromNotLeftConsPos; int nBaseSeg = pBestContig->baseSegArray_.nGetBaseSegmentIndexByConsPos( nOldConsPosContigSegmentStart ); bool bContinue = true; while( bContinue ) { BaseSegment* pBaseSegment = pBestContig->baseSegArray_.dapBs_[ nBaseSeg ]; int nStartInConsensus = pBaseSegment->nStartInConsensus_; int nEndInConsensus = pBaseSegment->nEndInConsensus_; if ( pBestContig == pNotLeftContig ) { nStartInConsensus += nNewConsPosFromNotLeftConsPos; nEndInConsensus += nNewConsPosFromNotLeftConsPos; } if ( nStartInConsensus > nConsPosContigSegmentEnd ) { bContinue = false; } else { BaseSegment* pNewBaseSegment = new BaseSegment( pBaseSegment->pLocFrag_, nStartInConsensus, nEndInConsensus ); // check that the base segment isn't completely before this // contig segment assert( bIntervalsIntersect( pNewBaseSegment->nStartInConsensus_, pNewBaseSegment->nEndInConsensus_, nConsPosContigSegmentStart, nConsPosContigSegmentEnd ) ); if (pNewBaseSegment->nStartInConsensus_ < nConsPosContigSegmentStart ) pNewBaseSegment->nStartInConsensus_ = nConsPosContigSegmentStart; if (pNewBaseSegment->nEndInConsensus_ > nConsPosContigSegmentEnd ) pNewBaseSegment->nEndInConsensus_ = nConsPosContigSegmentEnd; assert( pNewBaseSegment->nStartInConsensus_ <= pNewBaseSegment->nEndInConsensus_ ); pNewContig_->baseSegArray_.addPtrToNewBaseSeg( pNewBaseSegment ); ++nBaseSeg; if ( nBaseSeg >= pBestContig->baseSegArray_.nGetNumSegments() ) bContinue = false; } } // while( bContinue ) } // for( nContigSeg ... assert( pNewContig_->baseSegArray_.bGetDataStructureOk( true ) ); } // Now calculate new quality values for the consensus. I will only // recalculate it over the region that is aligned (as indicated in // the CompareContigs window). For the regions to the left and // right of these, it is fine to just use the existing quality // values from the consensus of the leftmost and rightmost contigs // (the ones that had the highest total qualities over those // regions). After all, the reads of the opposite contigs are not // even aligned against the consensus in these regions, so why care // about them? (Are the alignment clipping of reads changed?) int nRegionToRecalculateConsPosLeft; int nRegionToRecalculateConsPosRight; int nTemp; Contig* pTempContig; getBestContigSegment( 0, pTempContig, nTemp, nRegionToRecalculateConsPosLeft ); getBestContigSegment( contigSegments_.length() - 1, pTempContig, nRegionToRecalculateConsPosRight, nTemp ); assert( nRegionToRecalculateConsPosLeft <= nRegionToRecalculateConsPosRight ); pNewContig_->recalculateConsensusQualitiesAndChange( nRegionToRecalculateConsPosLeft, nRegionToRecalculateConsPosRight ); // figure out some location in the middle of the alignment // to show user when done paddedPieceOfRead* pPaddedPieceOfReadLeft = pGetPaddedPieceOfRead( eLeftContig ); int nUnpaddedMiddleOfAlignment = ( pPaddedPieceOfReadLeft->nUnpaddedAlignmentStart_ + pPaddedPieceOfReadLeft->nUnpaddedAlignmentEnd_ ) / 2; int nMiddleOfAlignmentConsPos = pPaddedPieceOfReadLeft->pContig_->nPaddedIndexFast( nUnpaddedMiddleOfAlignment ); // add a join tag at this location soCommentForTag = soCommentForTag + "\nnew contig " + pNewContig_->soGetName() + " " + " length: " + soAddCommas( pNewContig_->nGetUnpaddedSeqLength() ) + " reads: " + soAddCommas( pNewContig_->nGetNumberOfReads() ); tag* pTag = new tag( NULL, pNewContig_, "join", nMiddleOfAlignmentConsPos, nMiddleOfAlignmentConsPos, false, // bWriteToPhdFileNotAceFile soCommentForTag, // soComment, soConsed, // consed soEmptyString, // date will be current date false ); // bNoTrans pNewContig_->addConsensusTag( pTag ); // I do not think that we want to either make adding this tag // undoable or write it to the edit history file since it is // an indivisible part of a join. Thus we add it to the contig // above. // EditAction* pEditAction = new editAddConsensusTag( pTag ); //ConsEd::pGetConsEd()->doEditAction( pEditAction, // true ); // bWriteToEditHistoryFile cout << "deleting old contigs..." << endl; Contig* pContig1 = pContig1_; Contig* pContig2 = pContig2_; // prevent blinking cursor on gone contig from causing seg fault pContig1_ = NULL; pContig2_ = NULL; // now all of this is done in the contig destructor // // we don't want reads in more than one place at a time // pContig1->apLocatedFragment_.clear(); // pContig2->apLocatedFragment_.clear(); // // clean up base segments // pContig1->baseSegArray_.dapBs_.clearAndDestroy(); // pContig2->baseSegArray_.dapBs_.clearAndDestroy(); // delete pContig1->pUnpaddedContig_; // delete pContig2->pUnpaddedContig_; // delete pContig1->pUnpaddedContigComplemented_; // delete pContig2->pUnpaddedContigComplemented_; // delete pContig1->unpaddedFromPadded_.pUnpaddedPositions_; // delete pContig2->unpaddedFromPadded_.pUnpaddedPositions_; // delete pContig1->pPaddedFromUnpadded_; // delete pContig2->pPaddedFromUnpadded_; // ConsEd::pGetAssembly()->removeContig( pContig1 ); // ConsEd::pGetAssembly()->removeContig( pContig2 ); delete pContig1; delete pContig2; ConsEd::pGetConsEd()->updateContigListOnMainConsedWindow(); // now create a new contigwin and put the left contig in it, scrolled // to somewhere in the alignment--perhaps the right edge of it pNewContigWin_ = ConsEd::pGetConsEd()->pScrollExistingContigWinOrMakeNewContigWin( pNewContig_, nMiddleOfAlignmentConsPos ); ConsEd::pGetAssembly()->setChanged(); delete pPleaseWait; ConsEd::pGetConsEd()->disallowUndos(); RWCString soMessage = "Join complete. You must save the assembly before doing any other edits or else the .wrk recovery file will not be usable. All reads from the old right contig are highlighted. To unhighlight, go to the Misc menu and choose \"Unhighlight All Reads\"\n\n" + soConsensusTagErrors; popupInfoMessage( pNewContigWin_->pGcw_->widGetGuiContigWinTopLevel(), soMessage ); } void compareContigs :: findEndOfMatchSegment( const int nStartPadded, int& nEndPadded, int& nEndUnpadded1, int& nEndUnpadded2, bool& bMatchSegmentExists ) { bool bEndOfAlignment = false; int nPadded = nStartPadded; // two ways to terminate: 1) end of alignment // 2) find discrepancy // xxmmmmmxmmxmmmmmxmmmmmmxmm (end ) // ---||-------||----|||----- // In either case, back up 4 bases. This may be before nStartPadded, // in which case there is no matching segment. If this is at nStartPadded // or after, this finishes a matching segment (which may be just one // length). bool bContinue = true; while( bContinue ) { if ( nPadded > pPaddedPieceOfReadTop_->nPaddedAlignmentEnd_ ) { bContinue = false; bEndOfAlignment = true; } else { if ( pPaddedPieceOfReadTop_->soBasesAndPads_[ nPadded ] != pPaddedPieceOfReadBot_->soBasesAndPads_[ nPadded ] ) { // found discrepancy bContinue = false; } else ++nPadded; } } // only ways to leave loop are that are now past the end of the // alignment or have found a discrepancy. Being off the end // of the alignment is considered a discrepancy, so they are // handled identically. if ( nStartPadded <= (nPadded - nBackUpFromFirstDiscrepancy ) ) { nEndPadded = nPadded - nBackUpFromFirstDiscrepancy; bMatchSegmentExists = true; } else { // set up for next findEndOfDiscrepantSegment nEndPadded = nStartPadded - 1; bMatchSegmentExists = false; } nEndUnpadded1 = pPaddedPieceOfReadTop_->nUnpaddedContigPosFromZeroBasedPaddedPos( nEndPadded ); nEndUnpadded2 = pPaddedPieceOfReadBot_->nUnpaddedContigPosFromZeroBasedPaddedPos( nEndPadded ); } void compareContigs :: findEndOfDiscrepantSegment( const int nPaddedStart, // zero-based paddedPieceOfRead coordinates int& nPaddedEnd, // zero-based paddedPieceOfRead coordinates int& nUnpaddedEnd1, // in unpadded pContig1_ coordinates int& nUnpaddedEnd2, // in unpadded pContig2_ coordinates bool& bEndOfAlignment ) { int nPadded = nPaddedStart; // ways to terminate: // 1) end of alignment // 2) matching segment 5 chars long int nMatchingBasesInARow = 0; bEndOfAlignment = false; bool bContinue = true; while( bContinue ) { if ( nPadded > pPaddedPieceOfReadTop_->nPaddedAlignmentEnd_ ) { bContinue = false; bEndOfAlignment = true; } else { if ( pPaddedPieceOfReadTop_->soBasesAndPads_[ nPadded ] == pPaddedPieceOfReadBot_->soBasesAndPads_[ nPadded ] ) { ++nMatchingBasesInARow; if ( nMatchingBasesInARow >= nNumberOfMatchingBasesInARowInAMatchSegment ) { bContinue = false; } } else { nMatchingBasesInARow = 0; } } if (bContinue ) ++nPadded; } if (bEndOfAlignment ) { nPaddedEnd = 0; // not used nUnpaddedEnd1 = pContig1_->nGetUnpaddedEndIndex(); nUnpaddedEnd2 = pContig2_->nGetUnpaddedEndIndex(); } else { nPaddedEnd = nPadded - nBackUpInMatchSegment; nUnpaddedEnd1 = pGetPaddedPieceOfRead( eContig1 )->nUnpaddedContigPosFromZeroBasedPaddedPos( nPaddedEnd ); nUnpaddedEnd2 = pGetPaddedPieceOfRead( eContig2 )->nUnpaddedContigPosFromZeroBasedPaddedPos( nPaddedEnd ); } } void compareContigs :: addContigSegment( const int nUnpaddedStart1, const int nUnpaddedEnd1, const int nUnpaddedStart2, const int nUnpaddedEnd2 ) { contigSegment cs; cs.nUnpaddedStart1_ = nUnpaddedStart1; cs.nUnpaddedEnd1_ = nUnpaddedEnd1; cs.nUnpaddedStart2_ = nUnpaddedStart2; cs.nUnpaddedEnd2_ = nUnpaddedEnd2; contigSegments_.append( cs ); } void compareContigs :: getBestContigSegment( const int nContigSeg, Contig*& pBestContig, int& nBestPaddedStart, int& nBestPaddedEnd ) { contigSegment cs = contigSegments_[ nContigSeg ]; if ( cs.bBestContigIsContig1NotContig2_ ) { pBestContig = pContig1_; } else { pBestContig = pContig2_; } nBestPaddedStart = cs.nNewPaddedStart_; nBestPaddedEnd = cs.nNewPaddedEnd_; } void compareContigs :: calculateTotalQualitiesOfContigSegment( contigSegment& cs ) { int nTotalQuality1 = 0; int nUnpadded; for( nUnpadded = cs.nUnpaddedStart1_; nUnpadded <= cs.nUnpaddedEnd1_; ++nUnpadded ) { int nConsPos = pContig1_->nPaddedIndexFast( nUnpadded ); if ( pContig1_->ntGetCons( nConsPos ).cGetBase() != 'x' ) { int nQuality = pContig1_->ntGetCons( nConsPos ).qualGetQualityWithout98or99(); if ( nQuality == 0 ) nQuality = 1; nTotalQuality1 += nQuality; } } int nTotalQuality2 = 0; for( nUnpadded = cs.nUnpaddedStart2_; nUnpadded <= cs.nUnpaddedEnd2_; ++nUnpadded ) { int nConsPos = pContig2_->nPaddedIndexFast( nUnpadded ); if ( pContig2_->ntGetCons( nConsPos ).cGetBase() != 'x' ) { int nQuality = pContig2_->ntGetCons( nConsPos ).qualGetQualityWithout98or99(); if ( nQuality == 0 ) nQuality = 1; nTotalQuality2 += nQuality; } } if ( nTotalQuality1 > nTotalQuality2 ) cs.bBestContigIsContig1NotContig2_ = true; else cs.bBestContigIsContig1NotContig2_ = false; } bool compareContigs :: bContigSegmentsOK( ) { // This is the padded offset of the not left contig from the // left contig. // Presumably, the left-most aligned bases are both not pads and // they are aligned. Thus the difference of their padded positions // in their respective coordinate systems gives the offset between // the coordinate systems (padded cons pos) int nConsPos1FromConsPos2 = pGetPaddedPieceOfRead( eContig1 )->pContig_->nPaddedIndexFast( pGetPaddedPieceOfRead( eContig1 )->nUnpaddedAlignmentStart_ ) - pGetPaddedPieceOfRead( eContig2 )->pContig_->nPaddedIndexFast( pGetPaddedPieceOfRead( eContig2 )->nUnpaddedAlignmentStart_ ); // check that starting locations of each contig correspond // check that ending locations of each contig correspond int nOldUnpaddedEnd1; int nOldUnpaddedEnd2; contigSegment csOld; bool bOK = true; for( int nContigSeg = 0; nContigSeg < contigSegments_.length(); ++nContigSeg ) { contigSegment cs = contigSegments_[ nContigSeg ]; int nConsPosStart1 = pContig1_->nPaddedIndexFast( cs.nUnpaddedStart1_ ); int nConsPosStart2 = pContig2_->nPaddedIndexFast( cs.nUnpaddedStart2_ ); if ( ( nConsPosStart1 - nConsPosStart2 ) != nConsPos1FromConsPos2 ) { if ( nContigSeg != 0 ) { cerr << "boink 1 nContigSeg = " << nContigSeg << " cs.nUnpaddedStart1_ = " << cs.nUnpaddedStart1_ << " cs.nUnpaddedStart2_ = " << cs.nUnpaddedStart2_ << " nConsPosStart1 = " << nConsPosStart1 << " nConsPosStart2 = " << nConsPosStart2 << " nConsPos1FromConsPos2 = " << nConsPos1FromConsPos2 << endl; bOK = false; } } int nConsPosEnd1 = pContig1_->nPaddedIndexFast( cs.nUnpaddedEnd1_ ); int nConsPosEnd2 = pContig2_->nPaddedIndexFast( cs.nUnpaddedEnd2_ ); if ( ( nConsPosEnd1 - nConsPosEnd2 ) != nConsPos1FromConsPos2 ) { if ( nContigSeg != ( contigSegments_.length() - 1 ) ) { cerr << "boink 2 nContigSeg = " << nContigSeg << " cs.nUnpaddedEnd1_ = " << cs.nUnpaddedEnd1_ << " cs.nUnpaddedEnd2_ = " << cs.nUnpaddedEnd2_ << " nConsPosEnd1 = " << nConsPosEnd1 << " nConsPosEnd2 = " << nConsPosEnd2 << " nConsPos1FromConsPos2 = " << nConsPos1FromConsPos2 << endl; bOK = false; } } if ( nContigSeg != 0 ) { if ( ! ( ( ( nOldUnpaddedEnd1 + 1 ) == cs.nUnpaddedStart1_ ) && ( ( nOldUnpaddedEnd2 + 1 ) == cs.nUnpaddedStart2_ ) && ( ( csOld.nNewPaddedEnd_ + 1 ) == cs.nNewPaddedStart_ ) ) ) { cerr << "boink not contiguous nContigSeg = " << nContigSeg << " cs.nUnpaddedEnd1_ = " << cs.nUnpaddedEnd1_ << " cs.nUnpaddedEnd2_ = " << cs.nUnpaddedEnd2_ << " nConsPosEnd1 = " << nConsPosEnd1 << " nConsPosEnd2 = " << nConsPosEnd2 << " nConsPos1FromConsPos2 = " << nConsPos1FromConsPos2 << " nOldUnpaddedEnd1 = " << nOldUnpaddedEnd1 << " nOldUnpaddedEnd2 = " << nOldUnpaddedEnd2 << " cs.nUnpaddedStart1_ = " << cs.nUnpaddedStart1_ << " cs.nUnpaddedStart2_ = " << cs.nUnpaddedStart2_ << " csOld.nUnpaddedStart1_ = " << csOld.nUnpaddedStart1_ << " csOld.nUnpaddedStart2_ = " << csOld.nUnpaddedStart2_ << " csOld.nUnpaddedEnd1_ = " << csOld.nUnpaddedEnd1_ << " csOld.nUnpaddedEnd2_ = " << csOld.nUnpaddedEnd2_ << endl; bOK = false; } } // if ( nContigSeg != 0 ) ... csOld = cs; nOldUnpaddedEnd1 = cs.nUnpaddedEnd1_; nOldUnpaddedEnd2 = cs.nUnpaddedEnd2_; } return( bOK ); } // consensus // A*B contig source, CD contig dest // alignment // AB contig source // CD contig dest // nSourceUnpadded is the position of A // nDestUnpadded is the position of C // nDestPadded is the position of C // Hence must insert just before nDestPadded + 1 // final alignment should be: // A*B // C*D // If consensus is A*BC and DE // and alignment is ABC // D*E // then final alignment should be: // A*BC // D**E // The first column of pads cannot be removed because there is // some read in the A*BC contig that has a base in that column and // the pad is necessary in order to make room for that base. The 2 // pads in D**E are necessary so that A aligns with D and C aligns with // E // If the consensus is A*B*C and DE // and the alignment is ABC // D*E // then the final alignment should be: // A*B*C // D***E // If the consensus is A*B and CDE // and the alignment is A*B // CDE // then the final alignment is (?) // A*B // CDE // or alternatively: // A**B // C*DE // where the first column contains the base from the read in the AB // contig that has an extra base and the 3rd column has all pads in // the AB contig and the high quality bases in the CDE contig. void compareContigs :: addColumnsOfPadsToAlignContigs() { int nAlignmentIndexOfRightNonPadAlignment; bool bFirstTime = true; for( int nAlignmentIndex = pPaddedPieceOfReadTop_->nPaddedAlignmentEnd_; nAlignmentIndex >= pPaddedPieceOfReadTop_->nPaddedAlignmentStart_; --nAlignmentIndex ) { if ( ( pPaddedPieceOfReadTop_->soBasesAndPads_[ nAlignmentIndex ] != '*' ) && ( pPaddedPieceOfReadBot_->soBasesAndPads_[ nAlignmentIndex ] != '*' ) ) { // found an alignment of two non-pads if (bFirstTime ) { bFirstTime = false; nAlignmentIndexOfRightNonPadAlignment = nAlignmentIndex; } else { int nAlignmentIndexOfLeftNonPadAlignment = nAlignmentIndex; int nUnpaddedLeft1 = pPaddedPieceOfReadTop_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfLeftNonPadAlignment ); int nPaddedLeft1 = pContig1_->nPaddedIndexFast( nUnpaddedLeft1 ); int nUnpaddedRight1 = pPaddedPieceOfReadTop_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfRightNonPadAlignment ); int nPaddedRight1 = pContig1_->nPaddedIndexFast( nUnpaddedRight1 ); int nUnpaddedLeft2 = pPaddedPieceOfReadBot_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfLeftNonPadAlignment ); int nPaddedLeft2 = pContig2_->nPaddedIndexFast( nUnpaddedLeft2 ); int nUnpaddedRight2 = pPaddedPieceOfReadBot_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfRightNonPadAlignment ); int nPaddedRight2 = pContig2_->nPaddedIndexFast( nUnpaddedRight2 ); // check if the 2 contigs already have the same number // of bases and pads between the two aligned non-pads int nContig1MoreBasesThanContig2 = ( nPaddedRight1 - nPaddedLeft1 ) - ( nPaddedRight2 - nPaddedLeft2 ); if ( nContig1MoreBasesThanContig2 > 0 ) { // better add some pads to contig2 to even them up for( int n = 1; n <= nContig1MoreBasesThanContig2; ++n ) { pContig2_->insertColOfPads( nPaddedLeft2 + 1 ); cout << "."; cout.flush(); } } else if ( nContig1MoreBasesThanContig2 < 0 ) { // better add some pads to contig1 to even them up for( int n = 1; n <= -nContig1MoreBasesThanContig2; ++n ) { pContig1_->insertColOfPads( nPaddedLeft1 + 1 ); cout << "."; cout.flush(); } } else { // they are even--great. Nothing to do. } // set up for next time nAlignmentIndexOfRightNonPadAlignment = nAlignmentIndexOfLeftNonPadAlignment; } } // if ( ( pPaddedPieceOfReadTop_->soBases... } // for( int nAlignmentIndex ... cout << "done" << endl; } bool compareContigs :: bCheckAddColumnsOfPadsToAlignContigs() { bool bOK = true; int nAlignmentIndexOfRightNonPadAlignment; bool bFirstTime = true; for( int nAlignmentIndex = pPaddedPieceOfReadTop_->nPaddedAlignmentEnd_; nAlignmentIndex >= pPaddedPieceOfReadTop_->nPaddedAlignmentStart_; --nAlignmentIndex ) { if ( ( pPaddedPieceOfReadTop_->soBasesAndPads_[ nAlignmentIndex ] != '*' ) && ( pPaddedPieceOfReadBot_->soBasesAndPads_[ nAlignmentIndex ] != '*' ) ) { // found an alignment of two non-pads if (bFirstTime ) { bFirstTime = false; nAlignmentIndexOfRightNonPadAlignment = nAlignmentIndex; } else { int nAlignmentIndexOfLeftNonPadAlignment = nAlignmentIndex; int nUnpaddedLeft1 = pPaddedPieceOfReadTop_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfLeftNonPadAlignment ); int nPaddedLeft1 = pContig1_->nPaddedIndexFast( nUnpaddedLeft1 ); int nUnpaddedRight1 = pPaddedPieceOfReadTop_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfRightNonPadAlignment ); int nPaddedRight1 = pContig1_->nPaddedIndexFast( nUnpaddedRight1 ); int nUnpaddedLeft2 = pPaddedPieceOfReadBot_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfLeftNonPadAlignment ); int nPaddedLeft2 = pContig2_->nPaddedIndexFast( nUnpaddedLeft2 ); int nUnpaddedRight2 = pPaddedPieceOfReadBot_->nUnpaddedContigPosFromZeroBasedPaddedPos( nAlignmentIndexOfRightNonPadAlignment ); int nPaddedRight2 = pContig2_->nPaddedIndexFast( nUnpaddedRight2 ); // check if the 2 contigs already have the same number // of bases and pads between the two aligned non-pads int nContig1MoreBasesThanContig2 = ( nPaddedRight1 - nPaddedLeft1 ) - ( nPaddedRight2 - nPaddedLeft2 ); if ( nContig1MoreBasesThanContig2 != 0 ) { bOK = false; cout << "boink 3 " << " unpadded range " << pContig1_->soGetName() << " " << nUnpaddedLeft1 << " " << nUnpaddedRight1 << " " << pContig2_->soGetName() << " " << nUnpaddedLeft2 << " " << nUnpaddedRight2; cout << "padded range: " << nPaddedLeft1 << " " << nPaddedRight1 << " " << nPaddedLeft2 << " " << nPaddedRight2 << endl; } // set up for next time nAlignmentIndexOfRightNonPadAlignment = nAlignmentIndexOfLeftNonPadAlignment; } } // if ( ( pPaddedPieceOfReadTop_->soBases... } // for( int nAlignmentIndex ... return( bOK ); } void compareContigs :: dumpContigSegments() { for( int nContigSeg = 0; nContigSeg < contigSegments_.length(); ++nContigSeg ) { printf( "----------------------------------\n" ); printf( "contig seg %5d %10d %10d %d\n", nContigSeg, contigSegments_[ nContigSeg ].nUnpaddedStart1_, contigSegments_[ nContigSeg ].nUnpaddedEnd1_, contigSegments_[ nContigSeg ].bBestContigIsContig1NotContig2_ ); printf( " %10d %10d %d\n", contigSegments_[ nContigSeg ].nUnpaddedStart2_, contigSegments_[ nContigSeg ].nUnpaddedEnd2_, contigSegments_[ nContigSeg ].bBestContigIsContig1NotContig2_ ); printf( "new padded: %d to %d\n", contigSegments_[ nContigSeg].nNewPaddedStart_, contigSegments_[ nContigSeg].nNewPaddedEnd_ ); } } void compareContigs :: makeContigSegmentsContiguous( Contig* pLeftContig, const int nNewConsPosFromNotLeftConsPos ) { // first calculate the padded positions of the contig segments // The starting position of the first contig segment is a special case // since the starting bases are not usually aligned. Thus we need // to know which contig to use for the first one. We've already // determined that--it is pLeftContig. Thus we use its starting // location for calculating the starting padded position of the // first contig segment. No, the first and last are no longer // special cases. Now we just use the starting and ending of // whichever is the best contig for a particular contig segment. int nContigSeg; for( nContigSeg = 0; nContigSeg < contigSegments_.length(); ++nContigSeg ) { int nPaddedStart; int nPaddedEnd; bool bBestContigLeftContig; if ( contigSegments_[ nContigSeg ].bBestContigIsContig1NotContig2_ ) { // this is a pContig1_ contig segment, so calculate // the padded start and padded end relative to pContig1_ nPaddedStart = pContig1_->nPaddedIndexFast( contigSegments_[ nContigSeg ].nUnpaddedStart1_ ); nPaddedEnd = pContig1_->nPaddedIndexFast( contigSegments_[ nContigSeg ].nUnpaddedEnd1_ ); bBestContigLeftContig = ( pLeftContig == pContig1_ ); } else { // this is a pContig2_ contig segment, so calculate // the padded start and padded end relative to pContig2_ nPaddedStart = pContig2_->nPaddedIndexFast( contigSegments_[ nContigSeg ].nUnpaddedStart2_ ); nPaddedEnd = pContig2_->nPaddedIndexFast( contigSegments_[ nContigSeg ].nUnpaddedEnd2_ ); bBestContigLeftContig = ( pLeftContig == pContig2_ ); } if ( nContigSeg == 0 ) assert( bBestContigLeftContig ); if ( !bBestContigLeftContig ) { nPaddedStart += nNewConsPosFromNotLeftConsPos; nPaddedEnd += nNewConsPosFromNotLeftConsPos; } contigSegments_[ nContigSeg ].nNewPaddedStart_ = nPaddedStart; contigSegments_[ nContigSeg ].nNewPaddedEnd_ = nPaddedEnd; } // now make contig segments contiguous for( nContigSeg = 1; nContigSeg < contigSegments_.length(); ++nContigSeg ) { if ( ( contigSegments_[ nContigSeg - 1 ].nNewPaddedEnd_ + 1 ) != contigSegments_[ nContigSeg ].nNewPaddedStart_ ) { assert( contigSegments_[ nContigSeg - 1 ].nNewPaddedEnd_ < contigSegments_[ nContigSeg ].nNewPaddedStart_ ); contigSegments_[ nContigSeg - 1 ].nNewPaddedEnd_ = contigSegments_[ nContigSeg ].nNewPaddedStart_ - 1; } } // if the best 1st consensus starts with a pad, then the first contigSegment // will not start at position 1, which will cause a bug since the first // base segment will not start at position 1 so an exception will // be thrown. (July 2002). contigSegment& csFirstCS = contigSegments_[0]; if ( csFirstCS.nNewPaddedStart_ != 1 ) { Contig* pContig = ( csFirstCS.bBestContigIsContig1NotContig2_ ? pContig1_ : pContig2_ ); // I believe this is the only way this could happen assert( pContig->ntGetCons( 1 ).bIsPad() ); csFirstCS.nNewPaddedStart_ = 1; } // I am not doing the same with the last contigSegment since // the new consensus doesn't include terminal pads } void compareContigs :: createFakeRead() { if (! pNewContigWin_ ) { popupErrorMessage( "Hey! Joe! What are you doing trying to add a linking read when you haven't even joined the contigs together? (Do that first by clicking on 'Join Contigs')" ); return; } int nConsPosMin = nInNewJoinedContigAlignedRegionLeftConsPos_ - consedParameters::pGetConsedParameters()->nWhenMakingFakeReadToJoinContigsAddThisManyBasesOnEitherSideOfAlignedRegion_; int nConsPosMax = nInNewJoinedContigAlignedRegionRightConsPos_ + consedParameters::pGetConsedParameters()->nWhenMakingFakeReadToJoinContigsAddThisManyBasesOnEitherSideOfAlignedRegion_; if ( nConsPosMin < pNewContig_->nGetStartIndex() ) nConsPosMin = pNewContig_->nGetStartIndex(); if ( pNewContig_->nGetEndIndex() < nConsPosMax ) nConsPosMax = pNewContig_->nGetEndIndex(); assert( nConsPosMin < nConsPosMax ); // find a name for the linking read that isn't already in the phd // directory FileName filPHDFilename; int n = 1; char szReadName[1000]; bool bUnique = false; while( !bUnique ) { if ( pCP->bNameOfFakeJoiningReadsIncludesAceFileName_ ) sprintf( szReadName, "JoiningRead_%s_%d.a1", ConsEd::pGetAssembly()->soGetFirstPartOfAceFileName().data(), n ); else sprintf( szReadName, "JoiningRead_%d.a1", n ); filPHDFilename = ConsEd::pGetConsEd()->filGetPHDDir() + "/" + szReadName + ".phd.1"; if ( filPHDFilename.bFileByThisNameExists() ) ++n; else bUnique = true; } pFakeLocFrag_ = new LocatedFragment( szReadName, nConsPosMin, false, // not complemented pNewContig_ ); pNewContig_->addLocatedFragment( pFakeLocFrag_ ); int nNumberOfBases = nConsPosMax - nConsPosMin + 1; char* szBases = pNewContig_->szGetPartOfConsensus( nConsPosMin, nNumberOfBases ); // strlen must be used because szGetPartOfConsensus doesn't necessarily // return nNumberOfBases if it goes over the end of the consensus pFakeLocFrag_->setSequence2( szBases, strlen( szBases ) ); pFakeLocFrag_->createPointPosArray( true, // know max trace index 0, // what is the max index pFakeLocFrag_->length(), // nInitialSizeOfArray true ); // bFillUpArray // now set quality to zero and set peak positions to 0 also for( int nReadPos = pFakeLocFrag_->nGetStartIndex(); nReadPos <= pFakeLocFrag_->nGetEndIndex(); ++nReadPos ) { pFakeLocFrag_->setQualityAtSeqPos( nReadPos, 0 ); pFakeLocFrag_->setTracePointPosAtSeqPos( nReadPos, 0 ); } // make the whole read low quality // and make the whole read aligned pFakeLocFrag_->setQualAndAlignClipping( -1, -1, pFakeLocFrag_->nGetAlignStart(), pFakeLocFrag_->nGetAlignEnd() ); // this is required--the -1, -1 above will be translated otherwise pFakeLocFrag_->setWholeReadIsLowQuality(); // do not include directory with phd filename when writing ace file pFakeLocFrag_->setPHDFilename( filPHDFilename.soGetBasename() ); pFakeLocFrag_->setChromatFilename( "no_chromat" ); char* szTimestamp = szGetTime(); pFakeLocFrag_->setPHDTimestamp( szTimestamp ); wholeReadItem* pWR = new wholeReadItem( pFakeLocFrag_, "fake", // type "consed", // source soGetDateTime( nNoSlashes ), "" ); pFakeLocFrag_->aWholeReadItems_.append( pWR ); pFakeLocFrag_->writePHD( filPHDFilename ); // this will make the new fake read appear on the screen pNewContigWin_->horizontalScrollBarMoved( nInNewJoinedContigAlignedRegionLeftConsPos_ ); popupInfoMessage( pGuiCompareContigs_->widPopupShell_, "artificial joining read %s added.\nIf you want to continue editing, please save the assembly before further editing.\nIf you want to remove this artificial read, you must delete the phd file and reassemble.", (char*) pFakeLocFrag_->soGetName().data() ); ConsEd::pGetAssembly()->setNumberOfReadsInAssemblyAccurate(); ConsEd::pGetAssembly()->setChanged(); }