/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ // this is to go with extendAlignmentBanded #include "getAlignmentBanded.h" #include "assert.h" #include "min.h" #include "max.h" #include "setMatchMatrix.h" #include #include static int nBandSizeStatic; static int nSizeOfWhereCameFromArray; static inline int nWhereCameFromAcrossDown( const int nAcross, const int nDown ) { assert( nDown - nBandSizeStatic <= nAcross ); if ( ! ( nAcross <= nDown + nBandSizeStatic ) ) { cerr << "error in nWhereCameFromAcrossDown nAcross = " << nAcross << " nDown = " << nDown << " and ! nAcross <= nDown + nBandSizeStatic" << endl; assert( false ); } int nToReturn = nDown * ( 2*nBandSizeStatic + 1) + nAcross - nDown + nBandSizeStatic; // since nAcross varies from nDown - nBandSize to nDown + nBandSize, // nAcross - nDown + nBandSize varies from 0 to 2*nBandSize if ( ! ( nToReturn < nSizeOfWhereCameFromArray ) ) { cerr << "nToReturn = " << nToReturn << " nSizeOfWhereCameFromArray = " << nSizeOfWhereCameFromArray << " nAcross = " << nAcross << " nDown = " << nDown << " nBandSizeStatic = " << nBandSizeStatic << endl; assert( false ); } return( nToReturn ); } const char cMovedRight = 'R'; const char cMovedDown = 'W'; const char cMovedDiagonally = 'G'; const char cAtBeginning = 'Z'; // pSeqDown is put into soAlignmentTop // pSeqAcross is put into soAlignmentBot void getAlignmentBanded( char* pSeqDown, char* pSeqAcross, const int nBestDownEnd, const int nBestAcrossEnd, const int nGapPenaltyPositive, const int nMatchBoost, const int nMismatchPenaltyPositive, const int nBandSize, RWCString& soAlignmentTop, RWCString& soAlignmentMid, RWCString& soAlignmentBot, int& nReturnedScore ) { // this is a little inefficient, since it may have just been called. // However, it is called different times with different parameters and // I tried making it more efficient and spent a whole day debugging // to find that it wasn't set. setMatchMatrix( nMatchBoost, nMismatchPenaltyPositive ); // for nWhereCameFromAcrossDown nBandSizeStatic = nBandSize; int* nScore = (int*) calloc( nBestAcrossEnd + 2, sizeof( int) ); // nBestAcrossEnd + 1 is the length of the sequence // and N/W matrix is one larger assert( nScore ); // the needed size could be smaller if nBestAcrossEnd < nBestDownEnd // This is for the case of // |--\--------| // |\ \ | // a->| \ \ | // | \ \<-c | // |\ \ \ | // | \ \ \ | // ---------p---- // which will require a larger array than: // |--\--------| // |\ \ | // a->| \ \ | // | \ \<-c | // |\ \ \ | // | \ \ \ | // | \ \ \ | // | \ \ \ | // | b->\ \ \| // | \ \ | // | \ \ |<-d // | \ \| // | \ | // | \ | // | \| // ------------- // The +1 is because (nBestDownEnd+1)*2 * nBandSize is the max subscript, // so the array must be one larger // In former case, point p will have: // nToReturn = nBestDownEnd * ( 2*nBandSizeStatic + 1) + // nBestDownEnd + nBandSizeStatic - nBestDownEnd + nBandSizeStatic; // = nBestDownEnd * ( 2*nBandSizeStatic + 1 ) + 2* nBandSizeStatic // and must add 1 since must convert largest subscript to array size: // nBestDownEnd * ( 2*nBandSizeStatic + 1 ) + 2*nBandSizeStatic + 1 // = ( nBestDownEnd + 1 ) * (2*nBandSizeStatic + 1 ) nSizeOfWhereCameFromArray = ( nBestDownEnd + 1 ) * ( 2*nBandSize + 1 ); char* pWhereCameFrom = (char*) calloc( nSizeOfWhereCameFromArray + 1, sizeof( char ) ); assert( pWhereCameFrom ); int nDown = 0; int nMinAcross = 0; // would be nDown - nBandSize, but bumped into left edge int nMaxAcross = MIN( nBandSize, nBestAcrossEnd ); // nDown + nBandSize // but nBandSize could be greater than nBestAcrossEnd in which // case the band could enclose the entire N/W matrix so we might // be doing a full N/W matrix (depends on if it also encloses // nBestDownEnd int nAcross; for( nAcross = 0; nAcross <= nMaxAcross; ++nAcross ) { nScore[ nAcross ] = -nGapPenaltyPositive * nAcross; pWhereCameFrom[ nWhereCameFromAcrossDown( nAcross, nDown ) ] = cMovedRight; } // must go after all above pWhereCameFrom[ nWhereCameFromAcrossDown( 0, 0 ) ] = cAtBeginning; // now doing nDown >= 1 int nToBeScoreDiagonallyAbove; for( nDown = 1; nDown <= nBestDownEnd; ++nDown ) { int nMinAcross = MAX( nDown - nBandSize, 0 ); int nMaxAcross = MIN( nDown + nBandSize, nBestAcrossEnd ); if ( nMinAcross > nMaxAcross ) break; // how could this happen? Like this: // |--\--------| // |\ \ | // a->| \ \ | // | \ \<-c | // |\ \ \ | // | \ \ \ | // | \ \ \ | // | \ \ \ | // | b->\ \ \| // | \ \ | // | \ \ |<-d // | \ \| // | \ | // | \ | // | \| // | | // | |<- this area and below has nMaxAcross = nBestAcrossEnd // | | but nMinAcross = nDown - nBandSize which is pretty big // | | and gets bigger as you go down // case of nAcross = nMinAcross, 1st column nToBeScoreDiagonallyAbove = nScore[ nMinAcross ]; if ( nMinAcross == 0 ) { // bumped into left edge and top edge so no score diagonally above // such as position "a" in figure nScore[0] = nScore[0] - nGapPenaltyPositive; pWhereCameFrom[ nWhereCameFromAcrossDown( 0, nDown )] = cMovedDown; } else { // position "b" in figure int nDownScore = nScore[ nMinAcross ] - nGapPenaltyPositive; int nDiagonalScore = nScore[ nMinAcross - 1] + nMatchMatrix[ pSeqAcross[ nMinAcross-1 ]][ pSeqDown[ nDown - 1 ]]; if ( nDownScore < nDiagonalScore ) { // see movement along line next to "b" nScore[ nMinAcross ] = nDiagonalScore; pWhereCameFrom[ nWhereCameFromAcrossDown( nMinAcross, nDown ) ] = cMovedDiagonally; } else { // movement straight down to "b" nScore[ nMinAcross ] = nDownScore; pWhereCameFrom[ nWhereCameFromAcrossDown( nMinAcross, nDown ) ] = cMovedDown; } } bool bMaximizeUsingPointAbove = ( nDown > (nBestAcrossEnd - nBandSize ) ) ? true : false; int nAcrossOnLineC = nDown + nBandSize; // right end of band // now handle the case of the other columns after nMinAcross for( int nAcross = nMinAcross + 1; nAcross <= nMaxAcross; ++nAcross ) { int nScoreDiagonallyAbove = nToBeScoreDiagonallyAbove; // after nAcross increments, then we will need to use this value, // but by that time it will be overwritten, so save it nToBeScoreDiagonallyAbove = nScore[nAcross]; // see site c above--in this case we do not have a "move down" case // This is the case in which nAcross == nMaxAcross. However, // there are some cases in which nAcross == nMaxAcross and we // *do* have a "move down" case. That is case d. These are // distinguished by bMaximizeUsingPointAbove. if ( nAcross != nMaxAcross || bMaximizeUsingPointAbove ) { assert( nAcross != nAcrossOnLineC ); int nMoveDownScore = nScore[ nAcross ] - nGapPenaltyPositive; int nMoveRightScore = nScore[nAcross-1] - nGapPenaltyPositive; int nMoveDiagonallyScore = nScoreDiagonallyAbove + nMatchMatrix[ pSeqAcross[nAcross-1]][pSeqDown[nDown-1]]; if ( nMoveDiagonallyScore >= nMoveDownScore && nMoveDiagonallyScore >= nMoveRightScore ) { nScore[nAcross] = nMoveDiagonallyScore; pWhereCameFrom[ nWhereCameFromAcrossDown( nAcross, nDown ) ] = cMovedDiagonally; } else if ( nMoveDownScore >= nMoveDiagonallyScore && nMoveDownScore >= nMoveRightScore ) { nScore[nAcross] = nMoveDownScore; pWhereCameFrom[ nWhereCameFromAcrossDown( nAcross, nDown ) ] = cMovedDown; } else { // nMoveRightScore is greatest nScore[nAcross] = nMoveRightScore; pWhereCameFrom[ nWhereCameFromAcrossDown( nAcross, nDown ) ] = cMovedRight; } } else { // case of being on line c so no case of moving down assert( nAcross == nAcrossOnLineC ); int nMoveRightScore = nScore[nAcross-1] - nGapPenaltyPositive; int nMoveDiagonallyScore = nScoreDiagonallyAbove + nMatchMatrix[pSeqAcross[nAcross-1]][pSeqDown[nDown-1]]; if ( nMoveDiagonallyScore >= nMoveRightScore ) { nScore[nAcross] = nMoveDiagonallyScore; pWhereCameFrom[ nWhereCameFromAcrossDown( nAcross, nDown ) ] = cMovedDiagonally; } else { nScore[nAcross] = nMoveRightScore; pWhereCameFrom[ nWhereCameFromAcrossDown( nAcross, nDown ) ] = cMovedRight; } } } // for( int nAcross = nMinAcross + 1; ... } // for( nDown = 1; nDown <= nBestDownEnd; ++nDown ) { // now trace backwards finding bases int nCheckScore = 0; nDown = nBestDownEnd; nAcross = nBestAcrossEnd; soAlignmentTop = ""; soAlignmentMid = ""; soAlignmentBot = ""; int nMaxRoomNeeded = MAX( nBestAcrossEnd, nBestDownEnd ) + nBandSize; soAlignmentTop.increaseButNotDecreaseMaxLength( nMaxRoomNeeded ); soAlignmentMid.increaseButNotDecreaseMaxLength( nMaxRoomNeeded ); soAlignmentBot.increaseButNotDecreaseMaxLength( nMaxRoomNeeded ); char cWhereCameFrom; while( ( cWhereCameFrom = pWhereCameFrom[ nWhereCameFromAcrossDown( nAcross, nDown ) ] ) != cAtBeginning ) { if ( cWhereCameFrom == cMovedDiagonally ) { soAlignmentTop += pSeqDown[ nDown - 1 ]; soAlignmentBot += pSeqAcross[ nAcross - 1]; int nBoost = nMatchMatrix[ pSeqAcross[nAcross-1]][ pSeqDown[ nDown - 1 ] ]; nCheckScore += nBoost; if ( nBoost < 0 ) soAlignmentMid += 'X'; else if ( nBoost == 0 ) soAlignmentMid += '?'; else soAlignmentMid += ' '; --nDown; --nAcross; } else if ( cWhereCameFrom == cMovedDown ) { soAlignmentTop += pSeqDown[nDown-1]; soAlignmentMid += '*'; soAlignmentBot += '*'; nCheckScore -= nGapPenaltyPositive; --nDown; } else { assert( cWhereCameFrom == cMovedRight ); soAlignmentTop += '*'; soAlignmentMid += '*'; soAlignmentBot += pSeqAcross[ nAcross-1 ]; nCheckScore -= nGapPenaltyPositive; --nAcross; } } // the alignments are reversed, so straighten them out soAlignmentTop.reverse(); soAlignmentMid.reverse(); soAlignmentBot.reverse(); nReturnedScore = nCheckScore; free( (char*) nScore ); free( (char*) pWhereCameFrom ); }