/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "paddedPieceOfRead.h" #include "bIsAPad.h" #include "contig.h" #include "assert.h" // if the base pointed to by nZeroBasedPaddedPos is a pad, the // unpadded position returned is that from the previous base int paddedPieceOfRead :: nUnpaddedContigPosFromZeroBasedPaddedPos( const int nZeroBasedPaddedPos ) { assert( 0 <= nZeroBasedPaddedPos ); assert( nZeroBasedPaddedPos < nGetPaddedLength() ); int nUnpadded = nUnpaddedOffset_; // count pads up to nZeroBasedPaddedPos int nPads = 0; for( int nZeroBased2 = 0; nZeroBased2 <= nZeroBasedPaddedPos; ++nZeroBased2 ) { if ( bIsAPad( soBasesAndPads_[ nZeroBased2 ] ) ) ++nPads; } // pad pad // v v // bbbbbbbPbbbbbbbbPbbbbbbbbbbbbPbbbbbbbb // ^ ^ // nUnpaddedOffset_ nZeroBasedPaddedPos // If the sequence is complemented, the unpadded pos counts down // to the right. if ( bComplemented_ ) { return( nUnpadded = nUnpaddedOffset_ - nZeroBasedPaddedPos + nPads ); } else { return( nUnpaddedOffset_ + nZeroBasedPaddedPos - nPads ); } } int paddedPieceOfRead :: nZeroBasedAlignPosFromUnpadded( const int nSourceUnpadded ) { int nNumberOfNonPads; // the number of nonpads we must // pass to get to the base at nSourceUnpadded if ( bComplemented_ ) nNumberOfNonPads = nUnpaddedOffset_ - nSourceUnpadded; else nNumberOfNonPads = nSourceUnpadded - nUnpaddedOffset_; assert( 0 <= nNumberOfNonPads ); // this is checked below, but checked better since // it is possible for the assertion to be true, but // nZeroBasedUnpadded to still be wrong. // assert( nNumberOfNonPads <= soBasesAndPads_.length() ); // checks: 1) not off end 2) if # of non-pads now equals nSourceUnpadded // Perform these checks before incrementing int nNonPads = 0; for( int nZeroBasedAlignPos = 0; nZeroBasedAlignPos < soBasesAndPads_.length(); ++nZeroBasedAlignPos ) { if ( !bIsAPad( soBasesAndPads_[ nZeroBasedAlignPos ] ) ) { ++nNonPads; if ( nNonPads == nNumberOfNonPads ) return( nZeroBasedAlignPos ); } } // should never reach here--reaching here means that this routine // is being passed an unpadded position that isn't in the alignment assert( false ); } paddedPieceOfRead :: paddedPieceOfRead( const RWCString& soName, const RWCString& soBasesAndPads, const int nZeroBasedPaddedPos, const int nPaddedConsPos, Contig* pContig, const int nUnpaddedAlignmentStart, const int nUnpaddedAlignmentEnd, const int nPaddedAlignmentStart, const int nPaddedAlignmentEnd, const bool bComplemented ) : soName_( soName ), soBasesAndPads_( soBasesAndPads ), pContig_( pContig ), nUnpaddedAlignmentStart_( nUnpaddedAlignmentStart ), nUnpaddedAlignmentEnd_( nUnpaddedAlignmentEnd ), nPaddedAlignmentStart_( nPaddedAlignmentStart ), nPaddedAlignmentEnd_( nPaddedAlignmentEnd ), aBaseAndPadQualities_( (size_t) soBasesAndPads.length() ), bComplemented_( bComplemented ) { assert( !bIsAPad( soBasesAndPads_[ nZeroBasedPaddedPos ] ) ); // if we go back to the Contig object, we should find the same // base as the one in the center of this alignment if ( bComplemented ) { assert( toupper( soBasesAndPads_[ nZeroBasedPaddedPos ] ) == toupper( cComplementBase( pContig_->ntideGet( nPaddedConsPos ).cGetBase() ) ) ); } else { assert( toupper( soBasesAndPads_[ nZeroBasedPaddedPos ] ) == toupper( pContig_->ntideGet( nPaddedConsPos ).cGetBase()) ); } // we need to calculate nUnpaddedOffset_ // do this by counting the pads up to and including the // base at nZeroBasedPaddedPos // the < nZeroBasedPaddedPos instead of <= is because I know // that the way this is used, this base isn't a pad int nTotalPads = 0; for( int nZeroBased = 0; nZeroBased < nZeroBasedPaddedPos; ++nZeroBased ) { if ( bIsAPad( soBasesAndPads[ nZeroBased ] ) ) ++nTotalPads; } int nUnpaddedOfKnownBase = pContig_->nUnpaddedIndex( nPaddedConsPos ); if ( bComplemented_ ) { nUnpaddedOffset_ = nUnpaddedOfKnownBase + nZeroBasedPaddedPos - nTotalPads; } else { nUnpaddedOffset_ = nUnpaddedOfKnownBase - nZeroBasedPaddedPos + nTotalPads; } calculateQualities(); } void paddedPieceOfRead :: calculateQualities() { int nUnpadded = -666; bool bFirstBase = true; bool bProblems = false; for( int nZeroBasedAlignPos = 0; nZeroBasedAlignPos < soBasesAndPads_.length(); ++nZeroBasedAlignPos ) { char cBaseOrPad = soBasesAndPads_[ nZeroBasedAlignPos ]; if ( bFirstBase ) { // this routine is very expensive, so only do it for the // first base in the loop nUnpadded = nUnpaddedContigPosFromZeroBasedPaddedPos( nZeroBasedAlignPos ); bFirstBase = false; } else { if ( !bIsAPad( cBaseOrPad ) ) { if ( bComplemented_ ) --nUnpadded; else ++nUnpadded; } } // what shall we use for the quality of the initial or terminal // blanks, the ones that are used context bases when we have // reached the end of the contig? Doesn't matter since it won't // be used when displaying the base. Quality qual = 0; if ( cBaseOrPad != ' ' ) { // convert unpadded to padded int nConsPos = pContig_->nPaddedIndexFast( nUnpadded ); // find the quality shade of the base in the consensus qual = pContig_->ntGetCons( nConsPos ).qualGetQualityWithout98or99(); if ( !bIsAPad( cBaseOrPad ) ) { if ( bComplemented_ ) { if ( tolower( cComplementBase( cBaseOrPad ) ) != pContig_->ntGetCons( nConsPos ).cGetBase() ) { bool bMatch = ( tolower( cComplementBase( cBaseOrPad ) ) == pContig_->ntGetCons( nConsPos ).cGetBase() ); cerr << ( bMatch ? " " : "mis" ) << " " << (char) tolower( cComplementBase( cBaseOrPad ) ) << " " << pContig_->ntGetCons( nConsPos ).cGetBase() << " " << pContig_->nUnpaddedIndex( nConsPos ) << endl; bProblems = true; } } else { if ( tolower( cBaseOrPad ) != pContig_->ntGetCons( nConsPos ).cGetBase() ) { bool bMatch = ( tolower( cBaseOrPad ) == pContig_->ntGetCons( nConsPos ).cGetBase() ? true : false ); cerr << ( bMatch ? " " : "mis" ) << " " << (char) tolower( cBaseOrPad ) << " " << pContig_->ntGetCons( nConsPos ).cGetBase() << " " << pContig_->nUnpaddedIndex( nConsPos ) << endl; bProblems = true; } } } // if ( !bIsAPad( cBaseOrPad ) ) { // note that we are inserting qualities that come from only // the unpadded consensus bases. So even if we have this situation: // bbbbbb*bbbbbbbb consensus // bbbbbb*bbbbbbbb this read // ^ // the quality of this base will be that of the unpadded 'b' // just before it--not the quality of the pad in the consensus // (which, of course, is the average of the qualities of the two // unpadded bases on either side of it. } aBaseAndPadQualities_.insert( qual ); } // for( int nZeroBasedAlignPos = 0; assert( aBaseAndPadQualities_.length() == soBasesAndPads_.length() ); if (bProblems ) { cerr << soName_ << endl; THROW_ERROR("program bug--see xterm for details" ); } } int paddedPieceOfRead :: nGetLeftMostUnpaddedConsPosOfAlignment() { int nUnpadded = nUnpaddedContigPosFromZeroBasedPaddedPos( 0 ); return( nUnpadded ); } int paddedPieceOfRead :: nGetRightMostUnpaddedConsPosOfAlignment() { // zero-based position of padded alignment (not related // to the contig consensus position--it is a funciton of the // the alignment ) int nZeroBasedOfLastBase = soBasesAndPads_.length() - 1; int nUnpadded = nUnpaddedContigPosFromZeroBasedPaddedPos( nZeroBasedOfLastBase ); return( nUnpadded ); }