/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #ifndef mbtValOrderedVectorOfRWCString_included #define mbtValOrderedVectorOfRWCString_included #include "rwtvalorderedvector.h" #include using namespace std; #include "assert.h" class mbtValOrderedVectorOfRWCString : public RWTValOrderedVector { public: mbtValOrderedVectorOfRWCString( const size_t nInitialSize = 64 ) : RWTValOrderedVector( nInitialSize ), bResortWasRun_( false ) {} static int nCompareStrings( const RWCString* pString1, const RWCString* pString2 ) { if ( *pString1 == *pString2 ) return( 0 ); else if ( *pString1 < *pString2 ) return( -1 ); else return( 1 ); } void resort() { qsort( pArray_, entries(), sizeof( RWCString ), ( (int(*)( const void*, const void*) ) nCompareStrings ) ); if ( !bIsSorted() ) { THROW_ERROR( "mbtValOrderedVectorOfRWCString::resort didn't work" ); } bResortWasRun_ = true; } bool bIsSorted() { int nNumberOfEntries = length(); bool bSorted = true; if ( nNumberOfEntries >= 2 ) { for( int nElement = 0; nElement <= nNumberOfEntries - 2; ++nElement ) { if ( operator[]( nElement + 1 ) < operator[]( nElement ) ) { bSorted = false; cerr << "Elements " << nElement << " (" << operator[]( nElement ) << ") and " << nElement + 1 << " (" << operator[]( nElement + 1 ) << ") are out of order" << endl; } } } return( bSorted ); } // copied from mbtValOrderedVectorOfInt int nFindIndexOfMatchOrPredecessor( const RWCString& soMatch ) { if ( !bResortWasRun_ ) { resort(); } if ( isEmpty() ) return( RW_NPOS ); // region A // ----------- nMinIndex // region B // ----------- nTestIndex // region C // ----------- nTooLargeIndex // region D // an index becomes nTooLargeIndex if it is greater than soMatch // an index becomes nMinIndex if it is less than soMatch if ( soMatch < operator[]( 0 ) ) return( RW_NPOS ); int nMinIndex = 0; int nTooBigIndex = length() - 1; if ( operator[]( nTooBigIndex ) <= soMatch ) return( nTooBigIndex ); // if reached here, soMatch < operator[]( nTooBigIndex ) // and operator[](0) <= soMatch // Thus nTooBigIndex != 0 and the correct index is somewhere // less than nTooBigIndex and correct index >= 0 so // correct index >= 0. // Thus bisect this range over and over, making it smaller // and smaller until it is 0. while( true ) { if ( nTooBigIndex - nMinIndex <= 1 ) return( nMinIndex ); else { int nTestIndex = ( nTooBigIndex + nMinIndex ) / 2; if ( soMatch < operator[]( nTestIndex ) ) nTooBigIndex = nTestIndex; else nMinIndex = nTestIndex; } } } bool bContains( const RWCString& soMatch ) { int nIndex = nFindIndexOfMatchOrPredecessor( soMatch ); if ( nIndex == RW_NPOS ) return( false ); if ( operator[]( nIndex ) == soMatch ) return( true ); else return( false ); } bool bIsContainedBy( const RWCString& soMatch ) const { for( int n = 0; n < length(); ++n ) { if ( soMatch.bContains( (*this)[n] ) ) return true; } return false; } void insert( const RWCString soToInsert ) { RWTValOrderedVector::insert( soToInsert ); bResortWasRun_ = false; } int nFindIndexOfFirstContainedBy( const RWCString& soBiggerString ) const { for( int n = 0; n < length(); ++n ) { if ( soBiggerString.bContains( (*this)[n] ) ) return n; } return RW_NPOS; } // this is useful for finding matches to within a huge (chromosome sized) // string // szString is assumed to be null-terminated. Note that // if the szString is shorter than the RWCString's in // this array, all comparisons will yield nonzero, so the // result will be RW_NPOS int nFindIndexOfFirstMatch( char* szString ) { assert( bResortWasRun_ ); if ( isEmpty() ) return( RW_NPOS ); int nZeroElement = strncmp( szString, operator[]( 0 ).data(), operator[]( 0 ).length() ); if ( nZeroElement == 0 ) // match return 0; else if ( nZeroElement < 0 ) // case in which szString is off the left end of the list return RW_NPOS; // if reached here, the 0th element is too small int nTooSmallIndex = 0; int nTopOfRange = length() - 1; while( true ) { if ( nTopOfRange - nTooSmallIndex <= 1 ) { if ( strncmp( operator[]( nTopOfRange ).data(), szString, operator[]( nTopOfRange ).length() ) == 0 ) return nTopOfRange; else return RW_NPOS; } else { int nTestIndex = ( nTopOfRange + nTooSmallIndex ) / 2; if ( strncmp( operator[]( nTestIndex ).data(), szString, operator[]( nTestIndex ).length() ) < 0 ) nTooSmallIndex = nTestIndex; else nTopOfRange = nTestIndex; } } } // nFindIndexOfFirstMatch void removeDuplicates() { if ( !bIsSorted() ) resort(); for( int n = 0; n < ( length() - 1 ); ++n ) { int n2 = n + 1; bool bSame = true; do { if ( operator[](n) == operator[](n2 ) ) { removeAt(n2); } else { // element at n is different than element at n2 bSame = false; } } while( bSame && n2 < length() ); } } public: bool bResortWasRun_; }; #endif