/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include using namespace std; #include "myAgrep.h" #include "nNumberFromBase2.h" #include "assert.h" #include "consedParameters.h" void myAgrep( char* szText, const int nTextLength, char* szQuery, const int nQueryLength, const int nMaxErrors, RWTValOrderedVector* pArrayOfMatches, RWTValOrderedVector* pArrayOfScoresOfMatches ) { assert( nQueryLength <= sizeof( long long ) * 8 ); long long llEncodedQuery[ nNumberFromBase2MaxValue + 1 ]; /* llEncodedQuery is the pattern written in 1's and 0's like this: aggcgtt looks like this: 1000000 'a' line 0001000 'c' line 0110100 'g' line 0000011 't' line However, actually each of these are flipped since we will use the << operator to move the 1's into position and << implies that the least significant bit is on the right. */ int n; for( n = 0; n <= nNumberFromBase2MaxValue; ++n ) llEncodedQuery[ n ] = 0; for( n = 0; n < nQueryLength; ++n ) { int nBase = nNumberFromBase2[ szQuery[n] ]; long long llMask = ( ( (long long) 1) << n ); llEncodedQuery[ nBase ] |= llMask; } long long llMatchMask = ( (long long) 1 ) << ( nQueryLength - 1 ); /* 0 errors, llPreviousTextPosHistory[0] = 0; 1 errors, llPreviousTextPosHistory[1] = 1; 2 errors, llPreviousTextPosHistory[2] = 11; 3 errors, llPreviousTextPosHistory[3] = 111; */ long long* llHistory = (long long*) malloc( sizeof( long long ) * ( nMaxErrors + 1 ) ); long long* llPreviousTextPosHistory = (long long*) malloc( sizeof( long long ) * ( nMaxErrors + 1 ) ); assert( llHistory ); assert( llPreviousTextPosHistory ); int nd; for( nd = 0; nd <= nMaxErrors; ++nd ) { llHistory[ nd ] = 0; if ( nd == 0 ) llPreviousTextPosHistory[ nd ] = 0; else { llPreviousTextPosHistory[ nd ] = ( llPreviousTextPosHistory[ nd - 1 ] << 1 ) | 1; } } int nTextPos; for( nTextPos = 0; nTextPos < nTextLength; ++nTextPos ) { /* shift left with 1 fill on the right */ llHistory[0] = ( llHistory[ 0 ] << 1 ); llHistory[0] |= 1; llHistory[0] &= llEncodedQuery[ nNumberFromBase2[ szText[ nTextPos ] ] ]; for( nd = 1; nd <= nMaxErrors; ++nd ) { llHistory[ nd ] = ( ( ( llPreviousTextPosHistory[ nd ] << 1 ) | 1 ) & llEncodedQuery[ nNumberFromBase2[ szText[ nTextPos ] ] ] ) | ( ( llPreviousTextPosHistory[ nd - 1] | llHistory[ nd - 1 ] ) << 1 ) | llPreviousTextPosHistory[ nd - 1 ] | 1; } if ( llHistory[ nMaxErrors ] & llMatchMask ) { int nEndOfMatch = nTextPos; // let's find the score of this match. We can do this // by checking the llHistory[n] arrays at values other // than n = nMaxErrors. For example, if the match bit // is set at n = 0, then we know it is an exact match. int nHowManyErrors; for( nHowManyErrors = 0; nHowManyErrors <= nMaxErrors; ++nHowManyErrors ) { if ( llHistory[ nHowManyErrors ] & llMatchMask ) break; } (*pArrayOfMatches).insert( nTextPos ); (*pArrayOfScoresOfMatches).insert( nHowManyErrors ); } for( nd = 0; nd <= nMaxErrors; ++nd ) { llPreviousTextPosHistory[ nd ] = llHistory[ nd ]; } } // for( nTextPos = ... free( llHistory ); free( llPreviousTextPosHistory ); }