/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "fullSmithWaterman.h" #include #include #include "assert.h" #include "consedParameters.h" #include "fileDefines.h" #define MAX4( A, B, C, D ) ( ( (A) >= (B) && (A) >= (C) && (A) >= (D) ) ? \ (A) : \ ( ( (B) >= (C) && (B) >= (D) ) ? \ (B) : \ ( (C) >= (D) ? (C) : (D) ) ) ) static void reverseString( char* pString ) { int nLength; int n; char cTemp; int nMaxIndex; nLength = strlen( pString ); /* see RWCString::reverse */ nMaxIndex = nLength / 2 - 1; for( n = 0; n <= nMaxIndex; ++n ) { cTemp = pString[n]; pString[n] = pString[ nLength - 1 - n]; pString[ nLength - 1 - n ] = cTemp; } } static void findMaximumPointAndScore( char* pSeqDown, char* pSeqAcross, int* pnBestDownEnd, int* pnBestAcrossEnd, int* pnBestScore, const int nGap, const int nMatch, const int nMismatch ) { int nBestScore; int nBestDown; int nBestAcross; int nScoreDiagonallyAbove; int nToBeScoreDiagonallyAbove; int nScoreByDiagonal; int nAcross; int nDown; int* nScore; int nSeqDownLength; int nSeqAcrossLength; nSeqDownLength = strlen( pSeqDown ); nSeqAcrossLength = strlen( pSeqAcross ); // the +1 is because the size of the NW matrix is 1 greater // than that of the sequence (in either dimension) nScore = (int*) calloc( nSeqAcrossLength + 1, sizeof( int ) ); nBestScore = 0; nBestDown = 0; nBestAcross = 0; // score for nDown = 0 for( nAcross = 0; nAcross <= nSeqAcrossLength; ++nAcross ) { nScore[ nAcross ] = 0; } // 0 0 0 0 // 0 // a b c // ^nScoreDiagonallyAbove // ^nToBeScoreDiagonallyAbove // ^then change this value for( nDown = 1; nDown <= nSeqDownLength; ++nDown ) { nToBeScoreDiagonallyAbove = nScore[0]; /* case of nAcross=0 */ nScore[0] = 0; /* now handle nAcross != 0 */ for( nAcross = 1; nAcross <= nSeqAcrossLength; ++nAcross ) { nScoreDiagonallyAbove = nToBeScoreDiagonallyAbove; nToBeScoreDiagonallyAbove = nScore[ nAcross ]; // the -1 in each case is because the NW matrix // has its 0,0 before the start of the sequences // When moving from 0,0 to 1,1 you pass the 0th car of // each sequence if ( pSeqAcross[ nAcross - 1 ] == pSeqDown[ nDown - 1 ] ) nScoreByDiagonal = nScoreDiagonallyAbove + nMatch; else nScoreByDiagonal = nScoreDiagonallyAbove - nMismatch; nScore[ nAcross ] = MAX4( nScore[nAcross] - nGap, nScore[nAcross - 1 ] - nGap, nScoreByDiagonal, 0 ); if ( nScore[nAcross] > nBestScore ) { nBestScore = nScore[nAcross]; nBestAcross = nAcross; nBestDown = nDown; } } } free( nScore ); *pnBestDownEnd = nBestDown; *pnBestAcrossEnd = nBestAcross; *pnBestScore = nBestScore; } void fullSmithWaterman( char* pSeqDown, char* pSeqAcross, int* pn0BestAlignStartDown, int* pn0BestAlignStartAcross, int* pn0BestAlignEndDown, int* pn0BestAlignEndAcross, int* pnBestScore, int nGapPenalty, int nMatchBoost, int nMismatchPenalty ) { findMaximumPointAndScore( pSeqDown, pSeqAcross, pn0BestAlignEndDown, pn0BestAlignEndAcross, pnBestScore, nGapPenalty, nMatchBoost, nMismatchPenalty ); pSeqDown[*pn0BestAlignEndDown ] = 0; pSeqAcross[*pn0BestAlignEndAcross ] = 0; reverseString( pSeqDown ); reverseString( pSeqAcross ); int n0BestReversedEndDown; int n0BestReversedEndAcross; int nBestReversedScore; findMaximumPointAndScore( pSeqDown, pSeqAcross, &n0BestReversedEndDown, &n0BestReversedEndAcross, &nBestReversedScore, nGapPenalty, nMatchBoost, nMismatchPenalty ); // debugging fprintf( pAO, "pnBestScore = %d nBestReversedScore = %d\n", *pnBestScore, nBestReversedScore ); // end debugging assert( *pnBestScore == nBestReversedScore ); pSeqDown[ n0BestReversedEndDown ] = 0; pSeqAcross[ n0BestReversedEndAcross ] = 0; /* should reverse again so the strings are in the same orientation as originally */ reverseString( pSeqDown ); reverseString( pSeqAcross ); /* sssssssssssssssssss string -------------> alignment ends here <------------> *pnBestAlignEndDown <---------- nBestReversedEndDown sss remainder "sss" is the start of the alignment */ *pn0BestAlignStartDown = *pn0BestAlignEndDown - n0BestReversedEndDown; *pn0BestAlignStartAcross = *pn0BestAlignEndAcross - n0BestReversedEndAcross; /* we want pn0BestAlignEnd... when returned by fullSmithWaterman, to be the 0-based coordinate of the end of the alignment. Right now it is one more than the end of the alignment so decrement it */ --(*pn0BestAlignEndDown); --(*pn0BestAlignEndAcross); }