/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "sysdepend.h" #include "highQualityDiscrepancyGotoList.h" #include "quality.h" #include "contig.h" #include "locatedFragment.h" #include "consedParameters.h" #include "assert.h" static char* szGotoItemDesc = "high quality base disagrees with consensus"; // ctor reads through consensus of passed contig, adds a // gotoItem to list each time it encounters a region where // quality is <= qualityLow (as defined by quality.h) highQualityDiscrepancyGotoList :: highQualityDiscrepancyGotoList( Contig* pContig, const bool bExcludeCompressionOrG_dropoutTags, const bool bExcludeSpuriousPads ) { Contig* pOldContig = NULL; LocatedFragment* pOldLocFrag = NULL; int nOldConsPos = 0; gotoItem* pOldGotoItem = NULL; int nQualityThreshold = consedParameters::pGetConsedParameters()->nQualityThresholdForFindingHighQualityDiscrepancies_; int nNumFrags = pContig->nGetNumberOfFragsInContig(); for (int nFrag = 0; nFrag < nNumFrags; nFrag++) { // get pointer to this located frag from contig LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet(nFrag); for( int nConsPos = pLocFrag->nGetAlignStart(); nConsPos <= pLocFrag->nGetAlignEnd(); ++nConsPos ) { if ( (pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() >= nQualityThreshold) && (pLocFrag->ntGetFragFromConsPos( nConsPos ).qualGetQuality() != ucQualityLowEdited) ) { if ( ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() != pContig->cGetConsensusBase( nConsPos ) ) // Phil said to allow pads as highQualityDiscrepancies // on Nov 26, 1997 // && // !pLocFrag->ntGetFragFromConsPos( nConsPos ).bIsPad() ) { // Phil decided Dec 27, 1997 to only show high quality // discrepancies if in the aligned part of the read if ( !pLocFrag->bIsInAlignedPartOfRead( nConsPos ) ) continue; if ( nConsPos < ( pLocFrag->nGetAlignClipStart() + consedParameters::pGetConsedParameters()->nIgnoreHighQualityDiscrepanciesThisManyBasesFromEndOfAlignedRegion_ ) ) continue; if ( ( pLocFrag->nGetAlignClipEnd() - consedParameters::pGetConsedParameters()->nIgnoreHighQualityDiscrepanciesThisManyBasesFromEndOfAlignedRegion_ ) < nConsPos ) continue; if ( bExcludeCompressionOrG_dropoutTags ) { if ( pLocFrag->bIsWithinACompressionOrG_dropoutTag( nConsPos ) ) continue; } if ( bExcludeSpuriousPads ) { if ( pLocFrag->ntGetFragFromConsPos( nConsPos ).cGetBase() == '*' ) { // the high quality discrepancy is a pad. Let's see // what the consensus has here and if it is a repeated base. // If it is a possible misassembly, then show it in the // hqd list. If it is a spurious hqd pad, do not show // it. if ( !pContig->bDoesPadIndicateAPossibleMisassemblyHere( pLocFrag, nConsPos ) ) { // these are the hqd's that would be eliminated from // the hqd list continue; } } } if ( (pOldContig == pContig) && (pOldLocFrag == pLocFrag) && ( ( nOldConsPos + 1) == nConsPos ) ) { pOldGotoItem->setNewEnd( nConsPos, pContig->nUnpaddedIndex( nConsPos ) ); nOldConsPos = nConsPos; } else { gotoItem* pGotoItem = new gotoItem( pContig, pLocFrag, nConsPos, // start padded nConsPos, // end padded pContig->nUnpaddedIndex( nConsPos ),// start unpadded pContig->nUnpaddedIndex( nConsPos ), // end unpadded szGotoItemDesc, true, // bPrefixContigToDescription NULL ); // pOtherData addToList(pGotoItem); pOldGotoItem = pGotoItem; pOldContig = pContig; pOldLocFrag = pLocFrag; nOldConsPos = nConsPos; } } } } } sortByPosition(); } // highQualityDiscrepancyGotoList