/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "digestForOneEnzyme.h" #include "correspondingFragments.h" #include "restrictionFragment.h" #include "contigEnd.h" #include "contig.h" #include "guiDisplayDigest.h" #include using namespace std; #include "consed.h" #include "arrayOfRestrictionFragments.h" #include "consedParameters.h" #include "popupErrorMessage.h" digestForOneEnzyme :: ~digestForOneEnzyme() { aPredictedRestrictionFragmentsNotEndingAtVectorInsertJunction_.clearAndDestroy(); aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_.clearAndDestroy(); aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_.clearAndDestroy(); // do not clearAndDestroy the aPredictedResFragsBySize_ or the // aPredictedResFragsByPosition_ aActualRestrictionFragments_.clearAndDestroy(); } void digestForOneEnzyme :: processingAfterFindingAllPartialFragments( const bool bFlipVectorFromDefault ) { // transfer all fragments that are not going to be put together to // the aPredictedResFragsBySize_ list // it is usually clear already (I think), but if we are flipping // the end fragments, we need to clear it to prepare for recreating it. aPredictedResFragsBySize_.clear(); for( int nFrag = 0; nFrag < aPredictedRestrictionFragmentsNotEndingAtVectorInsertJunction_.length(); ++nFrag ) { aPredictedResFragsBySize_.insert( aPredictedRestrictionFragmentsNotEndingAtVectorInsertJunction_[ nFrag ] ); } dealWithPartialFragments( bFlipVectorFromDefault ); // sort the predicted fragments based on size aPredictedResFragsBySize_.sortFragmentsBySize(); // calculate their gel positions and the maximum gel position int nMax = 0; for( int nPredicted = 0; nPredicted < aPredictedResFragsBySize_.length(); ++nPredicted ) { restrictionFragment* pFrag = aPredictedResFragsBySize_[ nPredicted ]; pFrag->fPositionOnGel_ = ConsEd::fGetPositionOnGelFromRestrictionFragmentSize( pFrag->nSize_ ); if ( pFrag->fPositionOnGel_ > nMax ) nMax = pFrag->fPositionOnGel_; pFrag->pDigestForOneEnzyme_ = this; } for( int nActualFrag = 0; nActualFrag < aActualRestrictionFragments_.length(); ++nActualFrag ) { restrictionFragment* pActualFrag = aActualRestrictionFragments_[ nActualFrag ]; if ( pActualFrag->fPositionOnGel_ > nMax ) nMax = pActualFrag->fPositionOnGel_; } nMaxGelPosition_ = nMax; relatePredictedAndActualFragments(); aPredictedResFragsByConsPos_ = aPredictedResFragsBySize_; aPredictedResFragsByConsPos_.sortFragmentsByPosition(); } void digestForOneEnzyme :: combineFragments( restrictionFragment* pInsertFrag, const int nWhichEndOfInsert, const int nWhichEndOfVector, restrictionFragment*& pNewFrag ) { fprintf( stderr, "combining fragments from %s of %s with the %s of vector\n", szWhichGap( nWhichEndOfInsert ), ( nWhichEndOfInsert == nLeftGap ? pInsertFrag->pLeftContig_->soGetName().data() : pInsertFrag->pRightContig_->soGetName().data() ), szWhichGap( nWhichEndOfVector ) ); bool bFound = false; restrictionFragment* pVectorFrag = NULL; for( int nFrag = 0; nFrag < aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_.length(); ++nFrag ) { pVectorFrag = aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_[ nFrag ]; if ( nWhichEndOfVector == nLeftGap ) { if ( pVectorFrag->fragLeft_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) { bFound = true; break; } } else { if ( pVectorFrag->fragRight_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) { bFound = true; break; } } } if ( bFound ) { combineFragments2( pInsertFrag, pVectorFrag, pNewFrag ); } // if ( bFound ) else { // couldn't find the vector fragment to the needed end. // This could happen, for instance, if the vector file isn't // correctly set up. pNewFrag = new restrictionFragment( restrictionFragment::PREDICTED_FRAGMENT ); *pNewFrag = *pInsertFrag; // I still set this flag since I don't want to try to put it // together again. pInsertFrag->bPartialFragmentHasBeenPutTogether_ = true; } } void digestForOneEnzyme :: relatePredictedAndActualFragments() { // clear the correpondences. This is because we might be flipping // the vector, and do not want to use the old correspondences. int nFrag; for( nFrag = 0; nFrag < aPredictedResFragsBySize_.length(); ++nFrag ) { restrictionFragment* pFrag = aPredictedResFragsBySize_[ nFrag ]; pFrag->pCorrespondingFrag_ = NULL; } for( nFrag = 0; nFrag < aActualRestrictionFragments_.length(); ++nFrag ) { restrictionFragment* pFrag = aActualRestrictionFragments_[ nFrag ]; pFrag->pCorrespondingFrag_ = NULL; } // now relate them int nPredictedFrag = 0; int nActualFrag = 0; while( nPredictedFrag < aPredictedResFragsBySize_.length() && nActualFrag < aActualRestrictionFragments_.length() ) { restrictionFragment* pPredictedFrag = aPredictedResFragsBySize_[ nPredictedFrag ]; restrictionFragment* pActualFrag = aActualRestrictionFragments_[ nActualFrag ]; if ( ABS( pPredictedFrag->fPositionOnGel_ - pActualFrag->fPositionOnGel_ ) < pCP->nRestrictionDigestToleranceInPositionUnits_ ) { // we've got a pair of bands within tolerance! pPredictedFrag->pCorrespondingFrag_ = pActualFrag; pActualFrag->pCorrespondingFrag_ = pPredictedFrag; ++nPredictedFrag; ++nActualFrag; } else { // If it looks like this: // __________ (actual) // ___________ (predicted) // then advance the actual. // If it looks like this: // ___________ (predicted) // ----------(actual) // then advance the predicted. if ( pActualFrag->fPositionOnGel_ < pPredictedFrag->fPositionOnGel_ ) ++nActualFrag; else ++nPredictedFrag; } } } void digestForOneEnzyme :: combineRemainingVectorAndInsertPartialFragments( const bool bFlipVectorFromDefault ) { int nPartialFragInit = ( bFlipVectorFromDefault ? aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_.length() - 1 : 0 ); int nIncrement = ( bFlipVectorFromDefault ? -1 : 1 ); for( int nPartialFrag = nPartialFragInit; ( bFlipVectorFromDefault ? ( nPartialFrag >= 0 ) : ( nPartialFrag < aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_.length() ) ); nPartialFrag += nIncrement ) { restrictionFragment* pPartialRes = aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_[ nPartialFrag ]; if ( pPartialRes->bPartialFragmentHasBeenPutTogether_ ) continue; // if reached here, we have a partial insert fragment that could // not be put together. If there is any remaining vector fragment, // combine it with this one. bool bFound = false; restrictionFragment* pVectorFragNotYetPutTogether = NULL; for( int nVectorFrag = 0; nVectorFrag < aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_.length() && !bFound; ++nVectorFrag ) { restrictionFragment* pVectorFrag = aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_[ nVectorFrag ]; if ( !pVectorFrag->bPartialFragmentHasBeenPutTogether_ ) { bFound = true; pVectorFragNotYetPutTogether = pVectorFrag; } } if ( !bFound ) { popupErrorMessage( "There are more partial insert fragments than partial vector fragments so we cannot put them all together. There were %d insert fragments ending at a vector-insert junction and %d vector fragments ending at a vector insert junction", aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_.length(), aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_.length() ); return; } restrictionFragment* pNewFrag = NULL; combineFragments2( pPartialRes, pVectorFragNotYetPutTogether, pNewFrag ); aPredictedResFragsBySize_.insert( pNewFrag ); } // now see if any partial vector fragments are left over. This // could happen if one of the insert fragments ended at a cut site // at the vector-insert junction. Since St Louis does not put the // same cut site on both the vector sequence and the insert // sequence (they just put it on the insert sequence), the // guiDisplayDigest::getVectorFragments will not detect that the // vector fragment ends at the vector-insert junction. // there could also be partial vector fragments left over if the // user is digesting an assembly that isn't complete so it doesn't // yet contain the insert up to the vector-insert junction. In // this case, the partial vector fragments would really be // partial--they would not match the actual fragments. // work to help with Aye-Mon's project--Oct 2002 for( int nVectorFrag = 0; nVectorFrag < aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_.length(); ++nVectorFrag ) { restrictionFragment* pVectorFrag = aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_[ nVectorFrag ]; if ( ! pVectorFrag->bPartialFragmentHasBeenPutTogether_ ) { // make a complete fragment out of this RWCString soExplanation; pVectorFrag->getDescriptionLine( soExplanation ); cerr << "left over partial vector fragment is being made into a complete fragment: " << soExplanation << endl; pVectorFrag->bPartialFragmentHasBeenPutTogether_ = true; aPredictedResFragsBySize_.insert( pVectorFrag ); } } } void digestForOneEnzyme :: combineFragments2( restrictionFragment* pPartialInsertFragment, restrictionFragment* pPartialVectorFragment, restrictionFragment*& pNewFrag ) { pNewFrag = new restrictionFragment( restrictionFragment::PREDICTED_FRAGMENT ); // combine fragments. Apparently there are 4 different ways // to combine them: ----------------- ----------- // A B C D // contig vector // A goes with C // A goes with D // B goes with C // B goes with D pNewFrag->nSize_ = pPartialInsertFragment->nSize_ + pPartialVectorFragment->nSize_; pNewFrag->nExtendsOffWhichEndOfContig_ = ( pPartialInsertFragment->fragRight_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ? nRightGap : nLeftGap ); pNewFrag->nPositionOrder_ = MIN( pPartialVectorFragment->nPositionOrder_, pPartialInsertFragment->nPositionOrder_ ); pNewFrag->bCombinedVectorAndInsertPartialFragments_ = true; // I'm going to use the insert coordinates for the new // fragment. Thus the user will see the partial insert fragment // location, but will see the size of the sum of the partial // insert fragment and partial vector fragment pNewFrag->fragLeft_ = pPartialInsertFragment->fragLeft_; pNewFrag->pLeftContig_ = pPartialInsertFragment->pLeftContig_; pNewFrag->nUnpaddedLeft_ = pPartialInsertFragment->nUnpaddedLeft_; pNewFrag->fragRight_ = pPartialInsertFragment->fragRight_; pNewFrag->pRightContig_ = pPartialInsertFragment->pRightContig_; pNewFrag->nUnpaddedRight_ = pPartialInsertFragment->nUnpaddedRight_; pPartialInsertFragment->bPartialFragmentHasBeenPutTogether_ = true; pPartialVectorFragment->bPartialFragmentHasBeenPutTogether_ = true; } void digestForOneEnzyme :: dealWithPartialFragments( const bool bFlipVectorFromDefault ) { // now let's see if we can put together the remaining partial fragments // which are // in // aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_ // and // aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_ // In order to do this, we need to figure out which end of the vector // is connected to which. // first clear the flag since none of these has yet been put together int nPartialFrag; for( nPartialFrag = 0; nPartialFrag < aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_.length(); ++nPartialFrag ) { restrictionFragment* pPartialRes = aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_[ nPartialFrag ]; pPartialRes->bPartialFragmentHasBeenPutTogether_ = false; } for( nPartialFrag = 0; nPartialFrag < aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_.length(); ++nPartialFrag ) { restrictionFragment* pPartialRes = aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_[ nPartialFrag ]; pPartialRes->bPartialFragmentHasBeenPutTogether_ = false; } // special case: no cut sites in vector and two partial insert fragments // so all 3 fragments need to be put together if ( bNoCutSitesInVector_ && aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_.length() == 2 ) { assert( aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_.length() == 1 ); // put all 3 together and that's all we need to do. restrictionFragment* pVectorFrag = aPredictedVectorRestrictionFragmentsEndingAtVectorInsertJunction_[ 0 ]; restrictionFragment* pPartialInsert1 = aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_[0]; restrictionFragment* pPartialInsert2 = aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_[1]; restrictionFragment* pNewFrag = new restrictionFragment( restrictionFragment::PREDICTED_FRAGMENT ); pNewFrag->nSize_ = pPartialInsert1->nSize_ + pPartialInsert2->nSize_ + pVectorFrag->nSize_; // this really isn't true--the fragment extends off // both ends of the contig (if the assembly is all in one piece). // But just to not have any programming problems, I'll fill // in one side or the other. pNewFrag->nExtendsOffWhichEndOfContig_ = ( pPartialInsert1->fragRight_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ? nRightGap : nLeftGap ); pNewFrag->nPositionOrder_ = MIN( pPartialInsert1->nPositionOrder_, MIN( pPartialInsert2->nPositionOrder_, pVectorFrag->nPositionOrder_ ) ); pNewFrag->bCombinedVectorAndInsertPartialFragments_ = true; pNewFrag->bIncludesAllOfVector_ = true; // now I am filling in the location of the fragment. // The terms "left" and "right" for this fragment don't have // any meaning, as this example shows: // --------------------- contig A // Kfffff (f=partial fragment) // --------------------- contig B // Jfffff (f=partial fragment) // when we put these together, we'll just randomly assign position // K to left and position J to right (or visa versa). We just // need some way of remembering the location of the fragment // within the assembly. if ( pPartialInsert1->fragLeft_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) { pNewFrag->fragLeft_ = pPartialInsert1->fragRight_; pNewFrag->pLeftContig_ = pPartialInsert1->pRightContig_; pNewFrag->nUnpaddedLeft_ = pPartialInsert1->nUnpaddedRight_; } else { pNewFrag->fragLeft_ = pPartialInsert1->fragLeft_; pNewFrag->pLeftContig_ = pPartialInsert1->pLeftContig_; pNewFrag->nUnpaddedLeft_ = pPartialInsert1->nUnpaddedLeft_; } if ( pPartialInsert2->fragLeft_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) { pNewFrag->fragRight_ = pPartialInsert2->fragRight_; pNewFrag->pRightContig_ = pPartialInsert2->pRightContig_; pNewFrag->nUnpaddedRight_= pPartialInsert2->nUnpaddedRight_; } else { pNewFrag->fragRight_ = pPartialInsert2->fragLeft_; pNewFrag->pRightContig_ = pPartialInsert2->pLeftContig_; pNewFrag->nUnpaddedRight_= pPartialInsert2->nUnpaddedLeft_; } pPartialInsert1->bPartialFragmentHasBeenPutTogether_ = true; pPartialInsert2->bPartialFragmentHasBeenPutTogether_ = true; pVectorFrag->bPartialFragmentHasBeenPutTogether_ = true; aPredictedResFragsBySize_.insert( pNewFrag ); return; // this takes care of all partial fragments } // now put fragments together for( nPartialFrag = 0; nPartialFrag < aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_.length(); ++nPartialFrag ) { restrictionFragment* pPartialRes = aPredictedInsertRestrictionFragmentsEndingAtVectorInsertJunction_[ nPartialFrag ]; // In the case in which I can't find corresponding partial insert // fragment, I should include the partial vector fragment. Fix me!!! if ( pPartialRes->fragLeft_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) { int nWhichEndOfVector; if ( pPartialRes->pLeftContig_->bFindWhichEndOfVectorIsConnectedToContigEnd( nLeftGap, nWhichEndOfVector, pGuiDisplayDigest_->soVectorSequence_, pGuiDisplayDigest_->soComplementedVectorSequence_ ) ) { // flip vector, if that is what the user wants if ( bFlipVectorFromDefault ) { nWhichEndOfVector = nOppositeGap( nWhichEndOfVector ); } // look for a vector fragment that ends at a vector/insert // junction restrictionFragment* pNewFrag = NULL; // will be changed combineFragments( pPartialRes, nLeftGap, nWhichEndOfVector, pNewFrag ); aPredictedResFragsBySize_.insert( pNewFrag ); } // else { // // making a random decision // nWhichEndOfVector = nLeftGap; // // flip vector, if that is what the user wants // if ( bFlipVectorFromTheDefault ) { // nWhichEndOfVector = nOppositeGap( nWhichEndOfVector ); // } // restrictionFragment* pNewFrag = NULL; // will be changed // combineFragments( pPartialRes, nLeftGap, nWhichEndOfVector, // pNewFrag ); // aPredictedResFragsBySize_.insert( pNewFrag ); // } } // the use of else-if rather than just if makes this not handle // the case of an insert that has no cut sites (an insert that // is a single fragment and thus connects to both ends of the vector) else if ( pPartialRes->fragRight_ == restrictionFragment::AT_VECTOR_INSERT_JUNCTION ) { int nWhichEndOfVector; if ( pPartialRes->pRightContig_->bFindWhichEndOfVectorIsConnectedToContigEnd( nRightGap, nWhichEndOfVector, pGuiDisplayDigest_->soVectorSequence_, pGuiDisplayDigest_->soComplementedVectorSequence_ ) ) { // flip vector, if that is what the user wants if ( bFlipVectorFromDefault ) { nWhichEndOfVector = nOppositeGap( nWhichEndOfVector ); } restrictionFragment* pNewFrag = NULL; // will be changed combineFragments( pPartialRes, nRightGap, nWhichEndOfVector, pNewFrag ); aPredictedResFragsBySize_.insert( pNewFrag ); } else { // // making a random decision // nWhichEndOfVector = nRightGap; // // flip vector, if that is what the user wants // if ( bFlipVectorFromTheDefault ) { // nWhichEndOfVector = nOppositeGap( nWhichEndOfVector ); // } // restrictionFragment* pNewFrag = NULL; // will be changed // combineFragments( pPartialRes, nRightGap, nWhichEndOfVector, // pNewFrag ); // aPredictedResFragsBySize_.insert( pNewFrag ); } } } combineRemainingVectorAndInsertPartialFragments( bFlipVectorFromDefault ); }