/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "assembly.h" #include "contigEndPair.h" #include "contigEnd.h" #include "contig.h" #include "consedParameters.h" #include "fileDefines.h" void Assembly :: tryToMergeContigEndPairWithExistingScaffold( contigEndPair* pInconsistentContigEndPair ) { // at least one of the contigs in pInconsistentContigEndPair // will already be in the existing scaffolds. Find the one that is. // if there are two matches, then I will ignore this CEP. Such a // case usually indicates a misassembly. There is one case in which // it is useful, but I am ignoring that case: // A B C D // --------------- --- (not connected to) --- ------------------ // // <-------------------> // link bool bZeroContigEndInUse = false; bool bFirstContigEndInUse = false; if ( pInconsistentContigEndPair->pContig_[0]->pContig_[ pInconsistentContigEndPair->nWhichEnd_[0] ] ) bZeroContigEndInUse = true; if ( pInconsistentContigEndPair->pContig_[1]->pContig_[ pInconsistentContigEndPair->nWhichEnd_[1] ] ) bFirstContigEndInUse = true; if ( bZeroContigEndInUse && bFirstContigEndInUse ) { if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) { fprintf( pAO, "both ends in use\n" ); } tryToMergeContigEndPairwithExistingScaffoldWhereBothEndsInUse( pInconsistentContigEndPair ); return; } // next check if both contigs in the CEP are already linked to // other things. We won't handle that case. This could be, for example, // like this: // ----------------------- --------------------------- // A B // -- -- // C D // and then there is a link from A-C since the real orientation is // ----------------------- -- -- -------------------------- // A C D B // but we aren't going to handle this case. if ( ( pInconsistentContigEndPair->pContig_[0]->pContig_[nLeftGap] || pInconsistentContigEndPair->pContig_[0]->pContig_[nRightGap] ) && ( pInconsistentContigEndPair->pContig_[1]->pContig_[nLeftGap] || pInconsistentContigEndPair->pContig_[1]->pContig_[nRightGap] ) ) { if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) { fprintf( pAO, "both contigs are already linked to other contigs so not trying to use this inconsistent CEP\n" ); } return; } // A B C // ------------------- --- ----------------------- // A = pContigAlreadyPlaced // B = pContigToBePlaced // C = pOtherContigAlreadyPlaced contigEndPair* pCEPFromAToC = NULL; Contig* pContigA = NULL; int nEndOfContigA = 0; Contig* pContigB = NULL; int nEndOfContigB = 0; if ( bZeroContigEndInUse ) { pContigA = pInconsistentContigEndPair->pContig_[0]; nEndOfContigA = pInconsistentContigEndPair->nWhichEnd_[0]; pCEPFromAToC = pContigA->pCEP_[ nEndOfContigA ]; pContigB = pInconsistentContigEndPair->pContig_[1]; nEndOfContigB = pInconsistentContigEndPair->nWhichEnd_[1]; } else { // since we have disallowed the case of both bZeroContigEndInUse // && bFirstContigEndInUse, the alternative is just // bFirstContigEndInUse pContigA = pInconsistentContigEndPair->pContig_[1]; nEndOfContigA = pInconsistentContigEndPair->nWhichEnd_[1]; pCEPFromAToC = pContigA->pCEP_[ nEndOfContigA ]; pContigB = pInconsistentContigEndPair->pContig_[0]; nEndOfContigB = pInconsistentContigEndPair->nWhichEnd_[0]; } // go ahead and stick this contig into the gap. // save C before overwriting these... Contig* pContigC = pContigA->pContig_[ nEndOfContigA ]; int nEndOfContigC = pContigA->nWhichEnd_[ nEndOfContigA ]; // now overwrite: pContigA->pContig_[ nEndOfContigA ] = pContigB; pContigA->nWhichEnd_[ nEndOfContigA ] = nEndOfContigB; pContigA->pCEP_[ nEndOfContigA ] = pInconsistentContigEndPair; // fix up the ends of the contig to be placed pContigB->pContig_[ nEndOfContigB ] = pContigA; pContigB->nWhichEnd_[ nEndOfContigB ] = nEndOfContigA; pContigB->pCEP_[ nEndOfContigB ] = pInconsistentContigEndPair; // fix up the other end of the contig to be placed int nOtherEndOfContigB = nOppositeGap( nEndOfContigB ); pContigB->pContig_[ nOtherEndOfContigB ] = pContigC; pContigB->nWhichEnd_[ nOtherEndOfContigB ] = nEndOfContigC; pContigB->pCEP_[ nOtherEndOfContigB ] = NULL; // now fix up the other end of the contig to be placed. I'll just // assume that it will connect to the contig that used pContigC->pContig_[ nEndOfContigC ] = pContigB; pContigC->nWhichEnd_[ nEndOfContigC ] = nOtherEndOfContigB; pContigC->pCEP_[ nEndOfContigC ] = NULL; // there isn't any linking information about this-- // this might cause problems if ( !pContigA->pArrayOfIndirectContigEndPairs_[ nEndOfContigA ] ) { pContigA->pArrayOfIndirectContigEndPairs_[ nEndOfContigA ] = new RWTPtrOrderedVector( (size_t) 3 ); } if ( !pContigC->pArrayOfIndirectContigEndPairs_[ nEndOfContigC ] ) { pContigC->pArrayOfIndirectContigEndPairs_[ nEndOfContigC ] = new RWTPtrOrderedVector( (size_t) 3 ); } pContigA->pArrayOfIndirectContigEndPairs_[ nEndOfContigA ]->insert( pCEPFromAToC ); pContigC->pArrayOfIndirectContigEndPairs_[ nEndOfContigC ]->insert( pCEPFromAToC ); pInconsistentContigEndPair->makeAllTemplatesConsistent(); fprintf( pAO, "squeezing little contig %s between %s of %s and %s of %s\n", pContigB->soGetName().data(), szWhichGap( nEndOfContigA ), pContigA->soGetName().data(), szWhichGap( nEndOfContigC ), pContigC->soGetName().data() ); fflush( pAO ); // (We haven't considered the case in which there is already a // a contig squeezed into the gap.) } void Assembly :: tryToMergeContigEndPairwithExistingScaffoldWhereBothEndsInUse( contigEndPair* pInconsistentContigEndPair ) { // This is probably a misassembly. // In this case, the only possibility of redeeming it would be // if this contigEndPair matches the existing assembly, which // would happen in the following case: // A----C // but then added B in between due to a link from A to B: // A--B---C // and now we are considering the link from B to C. Contig* pContigA = pInconsistentContigEndPair->pContig_[0]; Contig* pContigB = pInconsistentContigEndPair->pContig_[1]; int nEndOfContigA = pInconsistentContigEndPair->nWhichEnd_[0]; int nEndOfContigB = pInconsistentContigEndPair->nWhichEnd_[1]; if ( pContigA->pContig_[ nEndOfContigA ] == pContigB && pContigA->nWhichEnd_[ nEndOfContigA ] == nEndOfContigB ) { // the "inconsistent" fwd/rev pair confirms the one already there. // I'll bet the CEP is null: if ( !pContigA->pCEP_[ nEndOfContigA ] ) pContigA->pCEP_[ nEndOfContigA ] = pInconsistentContigEndPair; if ( !pContigB->pCEP_[ nEndOfContigB ] ) pContigB->pCEP_[ nEndOfContigB ] = pInconsistentContigEndPair; pInconsistentContigEndPair->makeAllTemplatesConsistent(); } else { if ( pCP->bPrintInFigureOutContigOrderAndOrientation_ ) { fprintf( pAO, "can't use: %s\n", pInconsistentContigEndPair->soGetDescription().data() ); } } }