/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "seqMatch.h" #include "consedParameters.h" #include "abs.h" #include "contig.h" #include "nCompareContigsByName.h" #include "bIntervalsIntersect.h" #include "min.h" bool seqMatch :: bOKToShow() { // Note: this is run by calculateSeqMatchPositions *before* finding // the location of the arcs. Thus we cannot include here whether // the arcs are in view or not using bInView_ if ( !pCP->bAssemblyViewShowSequenceMatches_ ) return( false ); if ( pCP->bAssemblyViewFilterSequenceMatchesBySize_ ) { int nSizeOfCopyOfRepeat = ABS( nUnpaddedStartConsPos_[0] - nUnpaddedEndConsPos_[0] ); if ( nSizeOfCopyOfRepeat < pCP->nAssemblyViewSequenceMatchesMinSize_ || pCP->nAssemblyViewSequenceMatchesMaxSize_ < nSizeOfCopyOfRepeat ) { return( false ); } } if ( !pCP->bAssemblyViewOKToShowSequenceMatchesBetweenContigs_ && pContig_[0] != pContig_[1] ) return( false ); if ( !pCP->bAssemblyViewOKToShowSequenceMatchesWithinContigs_ && pContig_[0] == pContig_[1] ) return( false ); if ( pCP->bAssemblyViewOnlyShowSequenceMatchesToAParticularRegion_ ) { bool bOneCopyIsInRegion = false; for( int nCopy = 0; nCopy <= 1; ++nCopy ) { if ( pCP->soAssemblyViewOnlyShowSequenceMatchesToThisContig_ == pContig_[nCopy]->soGetName() && bIntervalsIntersect( pCP->nAssemblyViewOnlyShowSequenceMatchesToThisRegionLeft_, pCP->nAssemblyViewOnlyShowSequenceMatchesToThisRegionRight_, nUnpaddedStartConsPos_[ nCopy ], nUnpaddedEndConsPos_[ nCopy ] ) ) { bOneCopyIsInRegion = true; break; } } if ( !bOneCopyIsInRegion ) { return( false ); } } if ( !pCP->bAssemblyViewOKToShowDirectSequenceMatches_ && bDirectNotInverted_ ) return( false ); if ( !pCP->bAssemblyViewOKToShowInvertedSequenceMatches_ && !bDirectNotInverted_ ) return( false ); if ( fPercentSimilar_ < pCP->nAssemblyViewSequenceMatchesMinimumSimilarity_ ) return( false ); if ( pCP->bAssemblyViewOnlyShowSequenceMatchesToEndsOfContigs_ ) { // check whether the sequence match overlapped one of the ends // of each contig. for( int nEnd = 0; nEnd < 2; ++nEnd ) { Contig* pContig = pContig_[ nEnd ]; // left end goes from unpadded conpos 1 to // nAssemblyViewOnlyShowSequenceMatchesToEndsOfContigsThisFar_ int nLeftEndOfContigLeft = 1; int nLeftEndOfContigRight = pCP->nAssemblyViewOnlyShowSequenceMatchesToEndsOfContigsThisFar_; int nRightEndOfContigLeft = pContig->nGetUnpaddedEndIndex() - pCP->nAssemblyViewOnlyShowSequenceMatchesToEndsOfContigsThisFar_ + 1; int nRightEndOfContigRight = pContig->nGetUnpaddedEndIndex(); if ( !bIntervalsIntersect( nUnpaddedStartConsPos_[ nEnd ], nUnpaddedEndConsPos_[ nEnd ], nLeftEndOfContigLeft, nLeftEndOfContigRight ) && !bIntervalsIntersect( nUnpaddedStartConsPos_[ nEnd ], nUnpaddedEndConsPos_[ nEnd ], nRightEndOfContigLeft, nRightEndOfContigRight ) ) { return( false ); } } // for( int nEnd = 0; nEnd < 2; ++nEnd ) { } return( true ); } seqMatch :: seqMatch( Contig* pContig0, Contig* pContig1, const int nStart0, const int nStart1, const int nEnd0, const int nEnd1, const bool bComplemented, const float fPercentSimilar ) : bComplemented_( bComplemented ), fPercentSimilar_( fPercentSimilar ) { // the reason for all this switching of the 2 repeats is // to be able to tell when 2 repeats are really the same one. // To do that, we sort the repeats, and want those that are the // same to be right next to each other. int nOrder = nCompareContigsByName( &pContig0, &pContig1 ); if ( nOrder == 1 ) { // out of order pContig_[0] = pContig1; pContig_[1] = pContig0; if ( bComplemented ) { nUnpaddedStartConsPos_[0] = nEnd1; nUnpaddedStartConsPos_[1] = nEnd0; nUnpaddedEndConsPos_[0] = nStart1; nUnpaddedEndConsPos_[1] = nStart0; } else { nUnpaddedStartConsPos_[0] = nStart1; nUnpaddedStartConsPos_[1] = nStart0; nUnpaddedEndConsPos_[0] = nEnd1; nUnpaddedEndConsPos_[1] = nEnd0; } } else if ( nOrder == 0 && MIN( nStart0, nEnd0 ) > MIN( nStart1, nEnd1 ) ) { // the repeat is within a contig, but the copies are not in // the correct order. Put the leftmost copy of the // repeat the 0th copy pContig_[0] = pContig1; pContig_[1] = pContig0; if ( bComplemented ) { // exchange 0 and 1, and flip each since // 0 must be in order and 1 out of order nUnpaddedStartConsPos_[0] = nEnd1; nUnpaddedStartConsPos_[1] = nEnd0; nUnpaddedEndConsPos_[0] = nStart1; nUnpaddedEndConsPos_[1] = nStart0; } else { // exchange 0 and 1 but keep order nUnpaddedStartConsPos_[0] = nStart1; nUnpaddedStartConsPos_[1] = nStart0; nUnpaddedEndConsPos_[0] = nEnd1; nUnpaddedEndConsPos_[1] = nEnd0; } } else { // in order pContig_[0] = pContig0; pContig_[1] = pContig1; nUnpaddedStartConsPos_[0] = nStart0; nUnpaddedStartConsPos_[1] = nStart1; nUnpaddedEndConsPos_[0] = nEnd0; nUnpaddedEndConsPos_[1] = nEnd1; } // crossmatch does not put the endpoints in order in the // case in which the match is complemented. Put the ends in order // so they can be compared with other matches // I don't believe this is necessary any longer because care is // taken above to keep them in order. if ( nUnpaddedStartConsPos_[ 0 ] > nUnpaddedEndConsPos_[ 0 ] ) { // switch both copies so preserving that Start goes to Start // and End goes to End for( int nCopy = 0; nCopy <= 1; ++nCopy ) { int nTemp = nUnpaddedStartConsPos_[ nCopy ]; nUnpaddedStartConsPos_[ nCopy ] = nUnpaddedEndConsPos_[ nCopy ]; nUnpaddedEndConsPos_[ nCopy ] = nTemp; } } assert( nUnpaddedStartConsPos_[0] <= nUnpaddedEndConsPos_[0] ); if ( bComplemented ) { assert( nUnpaddedEndConsPos_[1] <= nUnpaddedStartConsPos_[1] ); } else { assert( nUnpaddedStartConsPos_[1] <= nUnpaddedEndConsPos_[1] ); } bInView_[0] = true; bInView_[1] = true; }