/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #ifndef readsSortedByReadName_included #define readsSortedByReadName_included #include "locatedFragment.h" class readsSortedByReadName : public RWTPtrOrderedVector { public: bool bIsSorted_; public: readsSortedByReadName( ) : RWTPtrOrderedVector( ), bIsSorted_( false ) {} readsSortedByReadName( size_t nInitialSize ) : RWTPtrOrderedVector( nInitialSize, "readsSortedByReadName" ), bIsSorted_( false ) {} readsSortedByReadName( const RWTPtrOrderedVector& aReads ) : RWTPtrOrderedVector( aReads ), bIsSorted_( false ) {} static int cmp( const LocatedFragment** ppLocFrag1, const LocatedFragment** ppLocFrag2 ) { if ( (*ppLocFrag1)->soSequenceName_ < (*ppLocFrag2)->soSequenceName_ ) return( -1 ); else if ( (*ppLocFrag1)->soSequenceName_ > (*ppLocFrag2)->soSequenceName_ ) return( 1 ); else { // read names are the same. Huh? return( 0 ); } } void resort() { void* pArray = (void*) data(); size_t nNumberOfElements = entries(); size_t nSizeOfAnElement = sizeof( LocatedFragment* ); qsort( pArray, nNumberOfElements, nSizeOfAnElement, ( ( int(*) ( const void*, const void* ) ) cmp ) ); if ( !bIsSorted() ) { THROW_ERROR( "reads out of order" ); } bIsSorted_ = true; } bool bIsSorted() { bool bSorted = true; for( int nRead = 1; nRead < length(); ++nRead ) { if ( ( operator[]( nRead - 1 ) )->soGetName() > ( operator[]( nRead ) )->soGetName() ) { bSorted = false; LocatedFragment* pLocFrag1 = operator[]( nRead - 1 ); LocatedFragment* pLocFrag2 = operator[]( nRead ); cerr << "Elements " << nRead - 1 << " (" << pLocFrag1->soGetName() << ") and " << nRead << " (" << pLocFrag2->soGetName() << ") are out of order" << endl; } } return( bSorted ); } bool bContains( const RWCString& soReadName ) { return( ( pFindReadByName( soReadName ) != NULL ? true : false ) ); } inline void insert( LocatedFragment* pLocFrag ) { RWTPtrOrderedVector::insert( pLocFrag ); bIsSorted_ = false; } // borrowed from mbtValOrderedVectorOfInt LocatedFragment* pFindReadByName( const RWCString& soReadName ) { if ( !bIsSorted_ ) { resort(); } if ( isEmpty() ) return( NULL ); // region A // ----------- nMinIndex // region B // ----------- nTestIndex // region C // ----------- nTooLargeIndex // region D // an index becomes nTooLargeIndex if it is greater than soReadName // an index becomes nMinIndex if it is less than soReadName if ( soReadName < operator[]( 0 )->soGetName() ) return( NULL ); int nMinIndex = 0; int nTooBigIndex = length() - 1; if ( operator[]( nTooBigIndex )->soGetName() <= soReadName ) { if ( operator[]( nTooBigIndex )->soGetName() == soReadName ) return( operator[]( nTooBigIndex ) ); else return( NULL ); } // Consider some boundary cases: // Case 1: If there is only one element in the list. In that case, // nMinIndex == nTooBigIndex. soReadName is either less, equal, // or greater than this one element, and we check all those // case above. // Case 2: There are two elements in the list. If soReadName is // less than the bottom one, we check that above. If it is greater or // equal to the top, we check that above. The only case that // reaches here is when soReadName is < nTooBigIndex and // nMinIndex <= soReadName. This case will not execute the loop // even once, but it will be checked after the loop. // if reached here, soReadName < operator[]( nTooBigIndex ) // and operator[](0) <= nMatch // Thus nTooBigIndex != 0 and the correct index is somewhere // less than nTooBigIndex and correct index >= 0 so // correct index >= 0. // Thus bisect this range over and over, making it smaller // and smaller until it is 0. while( nTooBigIndex - nMinIndex > 1 ) { int nTestIndex = ( nTooBigIndex + nMinIndex ) / 2; if ( soReadName < ( operator[]( nTestIndex ))->soGetName() ) nTooBigIndex = nTestIndex; else nMinIndex = nTestIndex; } // at this point, nTooBigIndex = nMinIndex + 1 so there the // only possibility of equality is the following: if ( soReadName == operator[]( nMinIndex )->soGetName() ) return( operator[]( nMinIndex ) ); else return( NULL ); } }; #endif