/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include #include #include #include #include #include // for chmod #include #include #include "bIsNumeric.h" #include "choose_contig.h" #include "consed.h" #include "consedParameters.h" #include "contig.h" #include "fileDefines.h" #include "fragment_and_contig.h" #include "guiapp.h" #include "locatedFragment.h" #include "mbt_errors.h" #include "mbt_exception.h" #include "nGetReadTypeFromReadName.h" #include "ntide.h" #include "numutil.h" #include "quality.h" #include "readPHD.h" #include "readTypes.h" #include "readsSortedByTemplateName.h" #include "rwcregexp.h" #include "rwcstring.h" #include "tag.h" #include "wholeReadItem.h" #include "library.h" #include "dErrorRateFromQuality.h" #include "extendAlignment.h" #include "getAlignment.h" #include "printAlignment.h" #include "complement_so.h" #include "rwcregexp.h" #include "soAddCommas.h" #include "mbtValVector.h" #include #include "rwctokenizer.h" #include "bIsNumericFloat.h" #include "filename.h" #include "nNumberFromBase2.h" Fragment :: ~Fragment() { delete pTraceFile_; aTags_.clearAndDestroy(); } Fragment& Fragment :: operator=( const Fragment& frag ) { // currently not implemented, and default won't work // because tag pointers would be copied and point to the // same tags assert( false ); return( *this ); } Fragment :: Fragment( const Fragment& frag ) { // currently not implemented, and default won't work // because tag pointers would be copied and point to the // same tags assert( false ); } void Fragment :: readTraces( const bool bAskForPathnameIfMissing ) { // removed July 2009 (DG) to make fake solexa traces // if ( soChemistry_ == "solexa" ) { // THROW_ERROR( "read " + soGetName() + " is a solexa read so there is no trace" ); // } if ( bReadOnly_ ) { ostrstream ost; ost << "Read " << soGetName() << " is marked as read-only (possibly because the phd file is missing) so you cannot bring up the trace for it" << ends; InputDataError ide( ost.str() ); throw ide; } // you only get to do this once. will throw exception if fails if (! pTraceFile_ ) { if ( bGettingTraceFile_ ) { // another thread is currently reading the tracefile and // probably waiting for user input regarding the filename // Thus terminate this thread--we don't want 2 tracefile // objects or 2 dialog boxes asking the user for filename. SysRequestFailed srf; srf.setUserNotified(); // I don't want a popup throw srf; // throw exception } pTraceFile_ = new TraceFile( this, bAskForPathnameIfMissing ); // It is also important that the PHD file not be re-read. This is // because if the user has already edited it, then there will be // mismatches between the PHD file and the edited bases in memory. try { // now set the point positions of the Ntides in the (inherited) Sequence fixFragmentPointPositions( pTraceFile_ ); } catch( InputDataError ide ) { // this is to prevent someone from trying twice to bring up // a bad trace (due to bad point positions) and having it brought // up the 2nd time delete pTraceFile_; pTraceFile_ = NULL; throw ide; } } } void Fragment :: writePHD( const FileName filPHD ) { ofstream ofsPHD; RWCString soLine( (RWSize_2T) 200 ); RWCString soCompletePathname = ConsEd::pGetConsEd()->filGetPHDDir() + "/" + filPHD; #ifdef OFSTREAM_OPEN_WITHOUT_PERMISSIONS ofsPHD.open( (char *) soCompletePathname.data(), ios:: out ); #else ofsPHD.open( (char *) soCompletePathname.data(), ios:: out, 0666 ); #endif if (!ofsPHD ) { ostrstream ost; ost << "could not open PHD file " << filPHD << " for output " << endl << ends; InputDataError ide( ost.str() ); throw ide; } ofsPHD << "BEGIN_SEQUENCE " << soGetFragmentName() << endl; ofsPHD << endl; ofsPHD << "BEGIN_COMMENT" << endl; ofsPHD << endl; if ( !filGetChromatFilename().bIsNull() && filGetChromatFilename() != "none" ) { ofsPHD << "CHROMAT_FILE: " << filGetChromatFilename() << endl; } if ( soChemistry_ != "prim" && soChemistry_ != "term" && soChemistry_ != "unknown" ) { // this is for solexa, 454, fasta and other reads if ( !soCallMethod_.bIsNull() ) { ofsPHD << "CALL_METHOD: " << soCallMethod_ << endl; } ofsPHD << "TIME: " << soGetTimestamp() << endl; } else { // Sanger reads ofsPHD << "ABI_THUMBPRINT: " << soABIThumbprint_ << endl; ofsPHD << "PHRED_VERSION: " << soPhredVersion_ << endl; ofsPHD << "CALL_METHOD: " << soCallMethod_ << endl; ofsPHD << "QUALITY_LEVELS: " << nQualityLevels_ << endl; ofsPHD << "TIME: " << soGetTimestamp() << endl; if ( bPhdFileHasTraceArrayMinAndMax_ ) { ofsPHD << "TRACE_ARRAY_MIN_INDEX: " << nTraceArrayMinIndex_ << endl; ofsPHD << "TRACE_ARRAY_MAX_INDEX: " << nTraceArrayMaxIndex_ << endl; } } if ( !soChemistry_.bIsNull() ) { ofsPHD << "CHEM: " << soChemistry_ << endl; } if ( !soDye_.bIsNull() ) { ofsPHD << "DYE: " << soDye_ << endl; } // stupid newline left over from past ofsPHD << endl; ofsPHD << "END_COMMENT" << endl; ofsPHD << endl; ofsPHD << "BEGIN_DNA" << endl; bool bNeedToComplementBack = false; LocatedFragment* pLocFrag; if ( bComp() ) { pLocFrag = (LocatedFragment*) this; pLocFrag->complementLocatedFragmentButNotTraces(); // but not traces! bNeedToComplementBack = true; } // handle case of solexa reads (and possibly others) that do not // have traces and those no peak positions if ( cWhichPeakPositionsAreUsed_ == cNone ) { // case of solexa reads for( int nPadded = nGetStartIndex(); nPadded <= nGetEndIndex(); ++nPadded ) { if ( !ntideGet( nPadded ).bIsPad() ) { ofsPHD << (char) tolower( ntideGet( nPadded ).cGetBase() ) << " " << (int) ntideGet( nPadded ).qualGetQuality() << " 0" << endl; } } } else { // normal case for( int nPadded = nGetStartIndex(); nPadded <= nGetEndIndex(); ++nPadded ) { if ( ! ntideGet( nPadded ).bIsPad() ) { ofsPHD << (char ) tolower( ntideGet( nPadded ).cGetBase() ) << " " << (int) ntideGet( nPadded ).qualGetQuality() << " " << nGetPointPos( nPadded ) << endl; } } } ofsPHD << "END_DNA" << endl; ofsPHD << endl; writeWholeReadItemsToPhdFile( ofsPHD ); // the tag positions have been complemented (if necessary) in locfrag writeTagsToPhdFile( ofsPHD ); ofsPHD << "END_SEQUENCE" << endl; ofsPHD.close(); #ifdef OFSTREAM_OPEN_WITHOUT_PERMISSIONS int nError = chmod( soCompletePathname.data(), 0666 ); #endif if (bNeedToComplementBack ) pLocFrag->complementLocatedFragmentButNotTraces(); } void Fragment :: writePHDAndMore() { FileName filOldPHD = filGetPHDFilename(); FileName filTempPHD = filOldPHD; FileName filNewPHD; int nNewVersion = nVersion_ + 1; if ( filTempPHD.bIsNull() ) { filTempPHD = soGetName() + ".phd." + RWCString( (long) nNewVersion ); FileName filTempPHDFullPath = ConsEd::pGetConsEd ()->filGetPHDDir() + "/" + filTempPHD; if ( !filTempPHDFullPath.bFileByThisNameExists() ) { filNewPHD = filTempPHD; nVersion_ = nNewVersion; } else { filNewPHD = filTempPHD.filFindOneHigherThanHighestVersion2( ConsEd::pGetConsEd ()->filGetPHDDir(), 2, nNewVersion ); nVersion_ = nNewVersion; } } else { filNewPHD = filTempPHD.filFindOneHigherThanHighestVersion( ConsEd::pGetConsEd ()->filGetPHDDir(), 2 ); // version if none already exists (such // as when all phd files are in phd.ball and then deleted from phd_dir ) } writePHD( filNewPHD ); // I've put this here rather in the writePHD code to make it // clear that the PHD filename is changed before writing out // the ace file. This is important since the ace file contains // the name of the PHD. setPHDFilename( filNewPHD ); } void Fragment :: writeTagsToPhdFile( ofstream& ofsPHD ) { if ( nGetNumberOfTags() == 0 ) return; for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); if ( pTag->bWriteToPhdFileNotAceFile_ ) pTag->writeTagToPHDFile( ofsPHD ); } } void Fragment :: writeWholeReadItemsToPhdFile( ofstream& ofsPHD ) { if ( aWholeReadItems_.length() == 0 ) return; for( int nWR = 0; nWR < aWholeReadItems_.length(); ++nWR ) { wholeReadItem* pWR = aWholeReadItems_[ nWR ]; pWR->writeWholeReadItemToPHDFile( ofsPHD ); } } void Fragment :: writeReadTagsToAceFile( ofstream& ofsAce ) { if ( nGetNumberOfReadTagsToWriteToAceFile() == 0 ) return; for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); if ( ! pTag->bWriteToPhdFileNotAceFile_ ) { pTag->writeTagToAceFile( ofsAce ); } } } void Fragment :: fixFragmentPointPositions( TraceFile* pTraceFile ) { // This actually should have already been checked in Tracefile ctor // But do this additional check. if ( bPhdFileHasTraceArrayMinAndMax_ ) { if ( ( ( pTraceFile->nGetNumberOfPoints() - 1 ) != nTraceArrayMaxIndex_ ) || ( 0 != nTraceArrayMinIndex_ ) ) { ostrstream ost; ost << "There is an inconsistency between the phd file and the chromatogram file for read " << soGetName() << " (phd file " << filGetPHDFilename() << " ). The phd says the maximum point position is " << nTraceArrayMaxIndex_ << " while the tracefile says it is " << ( pTraceFile->nGetNumberOfPoints() - 1 ) << ". The phd file says the minimum point position is " << nTraceArrayMinIndex_ << " while the tracefile says it is 0. I suspect what has happened is that while you weren't looking, someone overwrote the original chromatogram file with a new one. Check the file dates on the chromatogram file and the phd file. Re-phredding and re-assembling will fix this problem this time. To prevent this from happening again, find out who/why the chromatogram was switched." << endl << ends; InputDataError ide( ost.str() ); throw ide; } } // tell the inherited Sequence in the LocatedFragment // what the maximum point poisiton is so the point positions // can be complemented as well as the bases // The reason for -1 is that the point positions start at 0. // Hence the number of points is 1 more than the maximum if ( ! bPhdFileHasTraceArrayMinAndMax_ ) { nTraceArrayMaxIndex_ = pTraceFile->nGetNumberOfPoints() - 1; nTraceArrayMinIndex_ = 0; // pSeqPhredCalledBases_ would be null if there is only // a .1 phd file. It would be non-null if this assembly // refers to a higher version phd file and thus the // base calls were obtained from the .1 version if ( pSeqPhredCalledBases_ ) { pSeqPhredCalledBases_->nTraceArrayMaxIndex_ = pTraceFile->nGetNumberOfPoints() - 1; pSeqPhredCalledBases_->nTraceArrayMinIndex_ = 0; } } // fix up the trace peak positions in the case in which // the phd file didn't contain TRACE_ARRAY_MAX_INDEX so // we couldn't determine the complemented peak positions. // Thus determine them now that we have the max index value. if ( ( !bPhdFileHasTraceArrayMinAndMax_ ) && bComp() ) { int nPadded; for( nPadded = nGetStartIndex(); nPadded <= nGetEndIndex(); ++nPadded ) { int nPeakPosition = nGetPointPos( nPadded ); nPeakPosition += ( nTraceArrayMinIndex_ + nTraceArrayMaxIndex_ ); if ( nPeakPosition < 0 || nTraceArrayMaxIndex_ < nPeakPosition ) { ostrstream ost; ost << "bad peak position = " << nPeakPosition << " for base at padded read position (left to right if complemented) " << nPadded << " in fragment " << soGetFragmentName() << endl << " nTraceArrayMinIndex_ = " << nTraceArrayMinIndex_ << " nTraceArrayMaxIndex_ = " << nTraceArrayMaxIndex_ << " peak position before complementing = " << nGetPointPos( nPadded ) << " pTraceFile_->nGetNumberOfPoints() = " << pTraceFile_->nGetNumberOfPoints() << endl << ends; InputDataError ide( ost.str() ); throw ide; } setTracePointPosAtSeqPos( nPadded, nPeakPosition ); } if (pSeqPhredCalledBases_ ) { for( nPadded = pSeqPhredCalledBases_->nGetStartIndex(); nPadded <= pSeqPhredCalledBases_->nGetEndIndex(); ++nPadded ) { int nPeakPosition = pSeqPhredCalledBases_->nGetPointPos( nPadded ); nPeakPosition += ( nTraceArrayMinIndex_ + nTraceArrayMaxIndex_ ); pSeqPhredCalledBases_->setTracePointPosAtSeqPos( nPadded, nPeakPosition ); } } } checkPointPositions(); } // PURPOSE: used for reading the phred base calls (no edits, // and no crossmatch) // HOW IT WORKS: // gets the PHD file with extension .1 FileName Fragment :: filGetPHDFilenameForPhredCalls() { FileName filTemp = filGetPHDFilename(); FileName filTemp2 = ConsEd::pGetConsEd()->filGetPHDDir() + "/" + filTemp.soGetBasenameWithoutVersion() + ".1"; return( filTemp2 ); } // passed a point position range, returns the range of // called base indices that fall within it. If pSeqPhredCalledBases_ // is not null, this returns the unpadded index of this array. If // pSeqPhredCalledBases_ is null, this returns the padded read position // from left to right. void Fragment :: getRangeOfPhredCalledPositions( const int nPointMin, const int nPointMax, int& nCalledBaseMin, int& nCalledBaseMax, bool& bError ) const { // Sequence provides this functionality if (pSeqPhredCalledBases_ ) pSeqPhredCalledBases_->getIndexRangeFromPointPositions(nPointMin, nPointMax, nCalledBaseMin, nCalledBaseMax, bError ); else Sequence::getIndexRangeFromPointPositions(nPointMin, nPointMax, nCalledBaseMin, nCalledBaseMax, bError ); } void Fragment :: checkPointPositions() { for( int nPadded = nGetStartIndex(); nPadded <= nGetEndIndex(); ++nPadded ) { int nPointPosition = nGetPointPos( nPadded ); if (nPointPosition < 0 || nPointPosition > nGetMaxPointPos() ) { ostrstream ost; ost << "Bad point position " << nPointPosition << " at padded position " << nPadded << " in fragment " << soGetFragmentName() << endl; ost << "nTraceArrayMaxIndex_ = " << nTraceArrayMaxIndex_ << " pTraceFile_->nGetNumberOfPoints() = " << pTraceFile_->nGetNumberOfPoints() << endl << ends; InputDataError ide( ost.str() ); throw ide; } } } void Fragment :: addTag( tag* pTag ) { aTags_.insert( pTag ); } void Fragment :: removeTag( tag* pTag ) { tag* pReturnedTag = aTags_.remove( pTag ); assert( pReturnedTag ); } void Fragment :: dumpTags() { cout << "number of tags = " << nGetNumberOfTags() << endl; bool bOutOfOrder = false; if ( nGetNumberOfTags() > 1 ) { for( int nTag = 0; nTag < nGetNumberOfTags() - 1; ++nTag ) { tag* pTagLeft = pGetTag( nTag ); tag* pTagRight = pGetTag( nTag + 1 ); if (! (*pTagLeft < *pTagRight) ) { cout << "tags out of order, tag indices: " << " " << nTag << " " << nTag + 1 << " start: " << pTagLeft->nPaddedConsPosStart_ << " " << pTagRight->nPaddedConsPosStart_ << " ends: " << pTagLeft->nPaddedConsPosEnd_ << " " << pTagRight->nPaddedConsPosEnd_ << endl; bOutOfOrder = true; } } } if (bOutOfOrder) { for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); cout << "tag " << nTag << " at " << (long) pTag << " start, end = " << pTag->nPaddedConsPosStart_ << " " << pTag->nPaddedConsPosEnd_ << " type: " << pTag->soType_ << endl; } } } int Fragment :: nUnorientedPaddedFromOrientedUnpadded( const int nUnpadded ) { int nUnorientedUnpadded; if (bIsComplemented_ ) nUnorientedUnpadded = nComplementUnpaddedIndex( nUnpadded ); else nUnorientedUnpadded = nUnpadded; int nUnorientedPadded = nPaddedIndex( nUnorientedUnpadded ); return( nUnorientedPadded ); } int Fragment :: nOrientedUnpaddedFromUnorientedPadded( const int nPadded ) { int nUnpaddedReadPos = nUnpaddedIndex( nPadded ); if ( bIsComplemented_ ) nUnpaddedReadPos = nComplementUnpaddedIndex( nUnpaddedReadPos ); return( nUnpaddedReadPos ); } void Fragment :: makeCopyOfPhredCallsIfNecessary() { if (!pSeqPhredCalledBases_ ) { // there is no need to have phred calls (separate from current // bases) unless we are going to display them in the trace // window. // Note also that bHasChromat might be true even though there // is no chromat--just because there is CHROMAT_FILE on the DS // line of the ace file if ( bHasChromat() ) pSeqPhredCalledBases_ = pSeqMakeCopyOfPhredCalls(); } } int Fragment :: compareTags( const tag** ppTag1, const tag** ppTag2 ) { if ( **ppTag1 < **ppTag2 ) return( -1 ); else if ( **ppTag2 < **ppTag1 ) return( 1 ); else return( 0 ); } void Fragment :: sortTags() { void* pArray = (void*) aTags_.data(); size_t nNumberOfElementsInArray = aTags_.entries(); size_t nSizeOfAnElement = sizeof( tag* ); qsort( pArray, nNumberOfElementsInArray, nSizeOfAnElement, ( ( int(*) ( const void*, const void*) ) compareTags ) ); if ( ! bTagsAreSorted() ) { PANIC_OST( ost) << "Fragment :: sortTags didn't work" << endl << ends; throw ProgramLogicError( ost.str() ); } } bool Fragment :: bTagsAreSorted() { int nNumberOfEntries = aTags_.entries(); bool bSorted = true; if (nNumberOfEntries >= 2 ) { for( int nIndex = 0; nIndex <= nNumberOfEntries - 2; ++nIndex ) { if ( *aTags_[ nIndex + 1 ] < *aTags_[ nIndex ] ) { bSorted = false; cout << "Elements " << nIndex << " and " << nIndex + 1 << " are out of order." << endl; } } } return( bSorted ); } int Fragment :: nGetNumberOfReadTagsToWriteToAceFile() { int nNumberOfReadTagsToWriteToAceFile = 0; for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); if ( !pTag->bWriteToPhdFileNotAceFile_ ) ++nNumberOfReadTagsToWriteToAceFile; } return( nNumberOfReadTagsToWriteToAceFile ); } const void Fragment :: setDefaultTemplateNameIfNecessary() { if ( soTemplate_.isNull() ) { int nDot = soGetName().first( '.' ); if ( nDot == RW_NPOS ) soTemplate_ = soGetName(); else { if ( nDot > 0 ) { soTemplate_ = soGetName()(0, nDot ); } } } } const RWCString& Fragment :: soGetTemplateName() { setDefaultTemplateNameIfNecessary(); return( soTemplate_ ); } int Fragment :: nGetNumberOfPhredCalledBases() { if ( pSeqPhredCalledBases_ ) return( pSeqPhredCalledBases_->length() ); else { // in this case, the sequence contains the phred called bases return( Sequence::length() ); } } bool Fragment :: bIsDyePrimerNotDyeTerminator() { if ( !soChemistry_.bIsNull() ) { if ( soChemistry_ == "prim" ) return( true ); else if ( soChemistry_ == "term" ) return( false ); } // if reached here, there are 2 possibilities: // Either the CHEM field was not set // or, if it was, it was set to something other than // prim or term (such as unknown or something unrecognizeable ) // Thus use the filename extension to detect it. FileName filTemp = soGetName(); RWCString soExtension = filTemp.soGetExtension(); // if no extension, then assume it is a dye primer if ( soExtension.isNull() ) return( true ); if ( ( soExtension[0] == 's' ) || ( soExtension[0] == 'r' ) || ( soExtension[0] == 'f' ) ) return( true ); else return( false ); } bool Fragment :: bIsWholeCloneTemplateNotSubcloneTemplate() { if ( !soTemplateType_.isNull() ) { if ( soTemplateType_ == "bac" || soTemplateType_ == "cos" ) return( true ); else return( false ); } // if reached here, then the information was not in the phd file if ( soSequenceName_.index( "_BAC" ) != RW_NPOS ) { return( true ); } else if ( soSequenceName_.index( "_bac" ) != RW_NPOS ) { return( true ); } else { return( false ); } } bool Fragment :: bIsForwardNotReverseRead() { if ( nReadType_ == 0 ) setReadTypeFromReadName(); if ( nReadType_ == nUniversalForward ) return( true ); else return( false ); } void Fragment :: setReadTypeFromReadName() { nReadType_ = nGetReadTypeFromReadName( soSequenceName_ ); } bool Fragment :: bIsWalkingNotUniversalPrimerRead() { if ( nReadType_ == 0 ) setReadTypeFromReadName(); if ( nReadType_ == nWalk ) return( true ); else return( false ); } bool Fragment :: bIsUniversalPrimerRead() { if ( nReadType_ == 0 ) setReadTypeFromReadName(); if ( nReadType_ == nUniversalForward || nReadType_ == nUniversalReverse ) return( true ); else return( false ); } int Fragment :: nGetNumberOfVectorBasesInRead() { int nNumberOfVectorBases = 0; for( int nPadded = nGetStartIndex(); nPadded <= nGetEndIndex(); ++nPadded ) { if ( ntideGet( nPadded ).cGetBase() == 'x' ) ++nNumberOfVectorBases; } return( nNumberOfVectorBases ); } int LocatedFragment :: nConsensusPositionFromPoint( const int nPoint ) { int nConsPosOfPoint; int nConsPos = nGetAlignStart(); bool bFound = false; while( !bFound && nConsPos <= nGetAlignEnd() ) { if (nGetPointPosByConsPos( nConsPos ) < nPoint ) ++nConsPos; else bFound = true; } // handle case in which the loop ended because it ran off end of the array if (!bFound) nConsPosOfPoint = nGetAlignEnd(); else { if (nConsPos == nGetAlignStart()) // point lies on or to left of the left edge of the fragment nConsPosOfPoint = nGetAlignStart(); else { // // nPoint lies between nConsPos - 1 and nConsPos // return whichever base nPoint is closest to if (nPoint < nGetLeftPointBoundaryForBase( nConsPos ) ) nConsPosOfPoint = nConsPos - 1; else nConsPosOfPoint = nConsPos; } } return( nConsPosOfPoint ); } // Given a base, this returns the point position between it and the base // to its left int LocatedFragment :: nGetLeftPointBoundaryForBase( const int nConsPos ) { assert( nConsPos >= nGetAlignStart() ); assert( nConsPos <= nGetAlignEnd() ); if ( nConsPos == nGetAlignStart() ) return( 0 ); else { // so nConsPos must have a base to its left int nPointBetween = ( nGetPointPosByConsPos( nConsPos - 1 ) + nGetPointPosByConsPos( nConsPos ) ) / 2; return( nPointBetween ); } } // passed a point position range, returns the range of // consensus based indices that fall within it void LocatedFragment :: getRangeOfConsensusPositions(const int nPointMin, const int nPointMax, int& nConsBaseMin, int& nConsBaseMax, bool& bError ) const { // inherited Sequence provides these in fragment relative terms int nFragBaseMin, nFragBaseMax; Sequence::getIndexRangeFromPointPositions(nPointMin, nPointMax, nFragBaseMin, nFragBaseMax, bError); if (bError) return; // now convert these to consensus relative indices nConsBaseMin = nGetConsPosFromFragIndex(nFragBaseMin); nConsBaseMax = nGetConsPosFromFragIndex(nFragBaseMax); } void LocatedFragment :: getPointRangeForBase(const int nConsPos, int& nLeftPoint, int& nRightPoint ) { nLeftPoint = nGetLeftPointBoundaryForBase( nConsPos ); if ( nConsPos + 1 <= nGetAlignEnd() ) nRightPoint = nGetLeftPointBoundaryForBase( nConsPos + 1 ); else // Special case of the last base. // Its rightmost boundary is the last point--I believe the last // point is right at the base. nRightPoint = nGetPointPosByConsPos( nConsPos ); } void LocatedFragment :: complementLocatedFragmentButNotTraces() { complementLocatedFragmentAndMaybeTraces( false ); } void LocatedFragment :: complementLocatedFragment() { complementLocatedFragmentAndMaybeTraces( true ); } void LocatedFragment :: complementLocatedFragmentAndMaybeTraces( bool bCompTraces ) { // fixed bug: do not modify high quality region if the entire // read is low quality // do not modify aligned region if the entire read is unaligned // It is possible (has occurred at Fred Hutch) to have one // or the other of nQualClipStart_ or nQualClipEnd_ be -1 // and the read not be all low quality. If both are -1, then // the read is totally low quality. It cannot be the case that // the high quality region is just the base at -1 since Phil // said (990702) that high quality segments must be more than // 1 base. He said this is also true with aligned segments. if ( !bWholeReadIsLowQuality_ ) pContig_->complementEndPoints( nQualClipStart_, nQualClipEnd_ ); if ( !bWholeReadIsUnaligned_ ) pContig_->complementEndPoints( nAlignClipStart_, nAlignClipEnd_ ); // use the inheritied sequence member function to invert and // complement the array of nucleotides Sequence :: complementSequence(); if ( pSeqPhredCalledBases_ ) pSeqPhredCalledBases_->complementSequence(); // flip the boolean that indicates complementation from original // (i.e as loaded from .ace) orientation bIsComplemented_ = ! bIsComplemented_; // the fragment has been complemented, now it's alignment must // be complemented as well by flipping the private data members // indicating the start and end of alignment against contig. // MEMBER DATA passed by reference and CHANGED // That's right, Chris, and for that reason, you darn well better // not pass a function! int nEnd = nGetAlignEnd(); pContig_->complementEndPoints( nAlignStartPos_, nEnd ); // if the TraceFile object exists, then the traces and originally // called bases must be complemented as well so the TedWin // display matches the ContigWin if ( bCompTraces ) { if (pTraceFile_) { pTraceFile_->complementTracesAndABICalledBases(); } } for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); pTag->complementSelf( ); } if ( bFoundVectorInsertJunction_ ) { nVectorInsertJunctionConsPos_ = pContig_->nGetEndConsensusIndex() + pContig_->nGetStartConsensusIndex() - nVectorInsertJunctionConsPos_; } } void LocatedFragment :: makeHighQuality( const int nMinConsensusPosition, const int nMaxConsensusPosition ) { for( int n = nMinConsensusPosition; n <= nMaxConsensusPosition; ++n ) { setQualityAtConsPos( n, ucQualityHighEdited ); } } void LocatedFragment :: makeLowQuality( const int nMinConsensusPosition, const int nMaxConsensusPosition ) { for( int n = nMinConsensusPosition; n <= nMaxConsensusPosition; ++n ) { setQualityAtConsPos( n, ucQualityLowEdited ); } } void LocatedFragment :: makeLowQualityToRight( const int nConsensusPosition ) { for( int n = nConsensusPosition; n <= nGetAlignEnd(); ++n ) { setQualityAtConsPos( n, ucQualityLowEdited ); } } void LocatedFragment :: makeLowQualityToLeft( const int nConsensusPosition ) { for( int n = nGetAlignStart(); n <= nConsensusPosition; ++n ) { setQualityAtConsPos( n, ucQualityLowEdited ); } } void LocatedFragment :: changeToXs( const int nMinConsensusPosition, const int nMaxConsensusPosition ) { for( int nConsPos = nMinConsensusPosition; nConsPos <= nMaxConsensusPosition; ++nConsPos ) { overstrikeNtideButLeaveOldPointPosition( nConsPos, 'x', ucQualityLowEdited, false ); // add edit tag } addWideEditTag( nMinConsensusPosition, nMaxConsensusPosition ); } void LocatedFragment :: changeToNs( const int nMinConsensusPosition, const int nMaxConsensusPosition ) { for( int nConsPos = nMinConsensusPosition; nConsPos <= nMaxConsensusPosition; ++nConsPos ) { overstrikeNtideButLeaveOldPointPosition( nConsPos, 'n', ucQualityLowEdited, false ); // add edit tag } addWideEditTag( nMinConsensusPosition, nMaxConsensusPosition ); } void LocatedFragment :: changeToNsToRight( const int nConsensusPositionMin ) { for( int nConsPos = nConsensusPositionMin; nConsPos <= nGetAlignEnd(); ++nConsPos ) { overstrikeNtideButLeaveOldPointPosition( nConsPos, 'n', ucQualityLowEdited, false ); // add edit tag } addWideEditTag( nConsensusPositionMin, nGetAlignEnd() ); } void LocatedFragment :: changeToNsToLeft( const int nConsensusPositionMax ) { for( int nConsPos = nGetAlignStart(); nConsPos <= nConsensusPositionMax; ++nConsPos ) { overstrikeNtideButLeaveOldPointPosition( nConsPos, 'n', ucQualityLowEdited, false ); // add edit tag } addWideEditTag( nGetAlignStart(), nConsensusPositionMax ); } void LocatedFragment :: setChanged() { bChanged_ = true; pContig_->setChanged(); } void LocatedFragment :: moveTagsToRight( const int nMoveToRightAfterThisConsPos ) { adjustAllTags( nMoveToRightAfterThisConsPos, 1 ); } void LocatedFragment :: moveTagsToLeft( const int nMoveToLeftAfterThisConsPos ) { adjustAllTags( nMoveToLeftAfterThisConsPos, -1 ); } void LocatedFragment :: adjustAllTags( const int nAdjustToRightOfThisConsPos, const int nAdjustByThisManyBases ) { for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); pTag->adjustTagPosition( nAdjustToRightOfThisConsPos, nAdjustByThisManyBases ); } } void LocatedFragment :: overstrikeNtide( const int nConsPos, const Ntide nt, const bool bAddEditTag ) { makeCopyOfPhredCallsIfNecessary(); bool bOldNtideWasPad = ntGetFragFromConsPos( nConsPos ).bIsPad(); bool bNewNtideIsPad = nt.bIsPad(); Sequence::setNtideAtPos(nGetFragIndexFromConsPos(nConsPos), nt); pContig_->fixConsensusWhenOverstrikeFragNtide( this, nConsPos, nt ); if (bAddEditTag ) addEditTag( bOldNtideWasPad, bNewNtideIsPad, nConsPos ); } void LocatedFragment :: overstrikeNtideButLeaveOldPointPosition( const int nConsPos, const char cBase, const unsigned char ucQuality, const bool bAddEditTag ) { makeCopyOfPhredCallsIfNecessary(); bool bOldNtideWasPad = ntGetFragFromConsPos( nConsPos ).bIsPad(); bool bNewNtideIsPad = ( cBase == '*' ) ? true : false; Sequence::setBaseAtPos(nGetFragIndexFromConsPos(nConsPos), cBase); setQualityAtConsPos( nConsPos, ucQuality ); Ntide nt = ntGetFragFromConsPos( nConsPos ); pContig_->fixConsensusWhenOverstrikeFragNtide( this, nConsPos, nt ); if (bAddEditTag ) addEditTag( bOldNtideWasPad, bNewNtideIsPad, nConsPos ); } void LocatedFragment :: addWideEditTag( const int nConsPosStart, const int nConsPosEnd ) { bool bFoundNonPad = false; for( int nConsPos = nConsPosStart; nConsPos <= nConsPosEnd && !bFoundNonPad; ++nConsPos ) { if ( !ntGetFragFromConsPos( nConsPos ).bIsPad() ) { bFoundNonPad = true; } } int nConsPosStartOfTag; int nConsPosEndOfTag; if ( ! bFoundNonPad ) { nConsPosStartOfTag = nGetConsPosOfPriorBaseNotAPad( nConsPosStart ); nConsPosEndOfTag = nGetConsPosOfNextBaseNotAPad( nConsPosEnd ); if ( nConsPosStartOfTag == -1 && nConsPosEndOfTag == -1 ) return; if ( nConsPosStartOfTag == -1 ) nConsPosStartOfTag = nConsPosEndOfTag; if ( nConsPosEndOfTag == -1 ) nConsPosEndOfTag = nConsPosStartOfTag; } else { nConsPosStartOfTag = nConsPosStart; nConsPosEndOfTag = nConsPosEnd; } tag* pTag = new tag( this, NULL, // contig "edit", nConsPosStartOfTag, nConsPosEndOfTag, true, // bWriteToPhdFileNotAceFile--edit tags go into the phd file "", // soComment "consed", // soSource "", // current date false ); // bNoTrans aTags_.insert( pTag ); } void LocatedFragment :: addEditTag( const bool bOldNtideWasPad, const bool bNewNtideIsPad, const int nConsPos ) { if (bNewNtideIsPad && bOldNtideWasPad ) { // no change } else if ( !bNewNtideIsPad && !bOldNtideWasPad || bOldNtideWasPad && !bNewNtideIsPad ) { // this is a substitution or an insertion // if there is an insertion tag, don't do anything tag *pTag; if (bThereIsAnEditTagCoveringThisBase( pTag, nConsPos ) ) return; // So we are making a substitution on a base that was // neither a substitution or an insertion itself. pTag = new tag( this, NULL, // contig "edit", nConsPos, nConsPos, true, // bWriteToPhdFileNotAceFile--edit tags go into the phd file "", // soComment "consed", // soSource "", // current date false ); // bNoTrans aTags_.insert( pTag ); } else if ( !bOldNtideWasPad && bNewNtideIsPad ) { // this is a deletion (horrors!) // So we must consolidate it with any other tags that are already // there. removeAnyTagsJustOnThisBase( nConsPos ); fixEndPointsOfTagsNotOnAPad( nConsPos ); if ( bThereIsAnEditTagCoveringThisBaseButNotEndingHere( nConsPos ) ) // no need to add anything return; int nPriorBaseNotAPad = nGetConsPosOfPriorBaseNotAPad( nConsPos ); int nNextBaseNotAPad = nGetConsPosOfNextBaseNotAPad( nConsPos ); tag* pTag = new tag( this, NULL, //contig "edit", nPriorBaseNotAPad, nNextBaseNotAPad, true, // bWriteToPhdFileNotAceFile--edit tags go into the phd file "", // soComment "consed", // soSource "", // current date false ); // bNoTrans aTags_.insert( pTag ); // This newly added tag may overlap some other edit tag. In // that case, consolidate them into a single tag. consolidateEditTags(); } // else if ( !bOldNtideWasPad && bNewNtideIsPad ) } void LocatedFragment :: consolidateEditTags() { // the tags must be in sorted order for the algorithm below to work sortTags(); bool bTryAgainToConsolidate = true; while( bTryAgainToConsolidate ) { bTryAgainToConsolidate = false; // if we find any pair that can be consolidated, then all pairs again for( int nPriorTag = 0; nPriorTag < nGetNumberOfTags() - 1; ++nPriorTag ) { tag* pPriorTag = pGetTag( nPriorTag ); if (pPriorTag->soGetTagType() != "edit" ) continue; // find next edit tag tag* pAfterTag; int nAfterTag = nPriorTag + 1; bool bFound = false; while( !bFound && nAfterTag < nGetNumberOfTags() ) { pAfterTag = pGetTag( nAfterTag ); if ( pAfterTag->soGetTagType() == "edit" ) bFound = true; else ++nAfterTag; } // now we've found a pair of edit tags. See if they overlap. if (bFound ) { // we already know that pPriorTag->nPaddedReadStart_ <= // pAfterTag->nPaddedReadStart_ since the list is sorted if ( pAfterTag->nPaddedConsPosStart_ <= pPriorTag->nPaddedConsPosEnd_ ) { // aha. Consolidate these int nNewStartPadded = MIN( pPriorTag->nPaddedConsPosStart_, pAfterTag->nPaddedConsPosStart_ ); int nNewEndPadded = MAX( pPriorTag->nPaddedConsPosEnd_, pAfterTag->nPaddedConsPosEnd_ ); tag* pConsolidatedTag = new tag( this, NULL, // contig "edit", nNewStartPadded, nNewEndPadded, true, // bWriteToPhdFileNotAceFile--edit tags go into the phd file "", // soComment "consed", // soSource "", // current date false ); // bNoTrans assert( aTags_.remove( pPriorTag ) ); assert( aTags_.remove( pAfterTag ) ); // I don't think I can delete these tags since they are // still referenced by the stack of undo-able operations // So just remove them and leave the objects--just uses // a little extra memory. aTags_.insert( pConsolidatedTag ); sortTags(); bTryAgainToConsolidate = true; } } } // for( int nPriorTag = ... } // while( bTryAgainToConsolidate... } void LocatedFragment :: fixEndPointsOfTagsNotOnAPad( const int nConsPosOfDeletedBase ) { // If there were any tags that started or ended on the base we just // deleted, we must attach those tags to some other non-pad base so we // can write it to the phd file. for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); int nConsPosStart = pTag->nPaddedConsPosStart_; int nConsPosEnd = pTag->nPaddedConsPosEnd_; if ( ( nConsPosStart == nConsPosOfDeletedBase ) || ( nConsPosEnd == nConsPosOfDeletedBase ) ) { // we already deleted any tags that were just on the base // base being deleted, so this tag should cover more than // one base. if ( (nConsPosStart == nConsPosOfDeletedBase ) && (nConsPosEnd == nConsPosOfDeletedBase ) ) continue; int nNewConsPosStart; int nNewConsPosEnd; if ( nConsPosStart == nConsPosOfDeletedBase ) { nNewConsPosStart = nGetConsPosOfNextBaseNotAPad( nConsPosStart ); // leave end of tag alone nNewConsPosEnd = nConsPosEnd; } else { nNewConsPosStart = nConsPosStart; nNewConsPosEnd = nGetConsPosOfPriorBaseNotAPad( nConsPosEnd ); } // we now have the new endpoints for the old tag // we create a new tag and remove the old one to make it easy // to undo this edit action. (If we didn't have to undo the // edit, then we could just modify the old tag to have // different end points.) // fixed bug 980710 in which a comment tag would loose its // comment in this manner tag* pNewTag = new tag( this, NULL, // contig pTag->soType_, nNewConsPosStart, nNewConsPosEnd, pTag->bWriteToPhdFileNotAceFile_, pTag->soComment_, pTag->soSource_, pTag->soDate_, false ); // read tags are never NoTrans aTags_.remove( pTag ); aTags_.insert( pNewTag ); } // if ( ntGetFragFromConsPos... } // for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { } void LocatedFragment :: removeAnyTagsJustOnThisBase( const int nConsPos ) { for( int nTag = 0; nTag < nGetNumberOfTags(); ) { tag* pTag = pGetTag( nTag ); int nConsPosStart = pTag->nPaddedConsPosStart_; int nConsPosEnd = pTag->nPaddedConsPosEnd_; if ( nConsPosStart == nConsPos && nConsPos == nConsPosEnd ) { aTags_.remove( pTag ); } else // don't increment nTag if you just deleted the nTag'th object // since now the nTag'th object will be the next object ++nTag; } } int LocatedFragment :: nGetConsPosOfPriorBaseNotAPad( const int nConsPosOnPad, bool& bError ) { int nConsPosNotPad; bool bFound = false; for( int nConsPos = nConsPosOnPad - 1; nConsPos >= nGetAlignStart() && !bFound; --nConsPos ) { if ( ! ntGetFragFromConsPos( nConsPos ).bIsPad() ) { nConsPosNotPad = nConsPos; bFound = true; } } if (bFound) { bError = false; return( nConsPosNotPad ); } else { bError = true; return ( -1 ); } } int LocatedFragment :: nGetConsPosOfNextBaseNotAPad( const int nConsPosOnPad, bool& bError ) { int nConsPosNotPad; bool bFound = false; for( int nConsPos = nConsPosOnPad + 1; nConsPos <= nGetAlignEnd() && !bFound; ++nConsPos ) { if ( !ntGetFragFromConsPos( nConsPos ).bIsPad() ) { nConsPosNotPad = nConsPos; bFound = true; } } if (bFound) { bError = false; return( nConsPosNotPad ); } else { bError = true; return( -1 ); } } int LocatedFragment :: nGetConsPosOfNextBaseNotAPad( const int nConsPosOnPad ) { bool bError; int nConsPosNotPad = nGetConsPosOfNextBaseNotAPad( nConsPosOnPad, bError ); if (!bError ) return( nConsPosNotPad ); else { nConsPosNotPad = nGetConsPosOfPriorBaseNotAPad( nConsPosOnPad, bError ); // the entire read consists of pads--what can we do? assert( !bError ); return( nConsPosNotPad ); } } int LocatedFragment :: nGetConsPosOfPriorBaseNotAPad( const int nConsPosOnPad ) { bool bError; int nConsPosNotPad = nGetConsPosOfPriorBaseNotAPad( nConsPosOnPad, bError ); if (!bError ) return( nConsPosNotPad ); else { nConsPosNotPad = nGetConsPosOfNextBaseNotAPad( nConsPosOnPad, bError ); // the entire read consists of pads--what can we do? assert( !bError ); return( nConsPosNotPad ); } } bool LocatedFragment :: bThereIsAnEditTagCoveringThisBase( tag*& pReturnedTag, const int nConsPos ) { bool bFound = false; for( int nTag = 0; nTag < nGetNumberOfTags() && !bFound; ++nTag ) { tag* pTag = pGetTag( nTag ); if (pTag->soGetTagType() != "edit" ) continue; int nConsPosStart = pTag->nPaddedConsPosStart_; int nConsPosEnd = pTag->nPaddedConsPosEnd_; if ( nConsPosStart <= nConsPos && nConsPos <= nConsPosEnd ) { bFound = true; pReturnedTag = pTag; } } return( bFound ); } bool LocatedFragment :: bThereIsAnEditTagCoveringThisBaseButNotEndingHere( const int nConsPos ) { bool bThereIs = false; tag* pTag; if ( bThereIsAnEditTagCoveringThisBase( pTag, nConsPos ) ) { int nConsPosStart = pTag->nPaddedConsPosStart_; int nConsPosEnd = pTag->nPaddedConsPosEnd_; if ( (nConsPosStart != nConsPos) && (nConsPosEnd != nConsPos) ) bThereIs = true; } return( bThereIs ); } RWCString LocatedFragment :: soGetPartOfReadUpperOrLower( const int nConsPosStart, const int nConsPosEnd ) { RWCString soPartOfRead; for( int nConsPos = nConsPosStart; nConsPos <= nConsPosEnd; ++nConsPos ) { Ntide nt = ntGetFragFromConsPos( nConsPos ); char cBase = nt.cGetBaseUpperOrLower(); RWCString soBase( cBase ); soPartOfRead += soBase; } return( soPartOfRead ); } RWCString LocatedFragment :: soGetPartOfReadLower( const int nConsPosStart, const int nConsPosEnd ) { RWCString soPartOfRead( (size_t) ( nConsPosEnd - nConsPosStart + 1 ) ); for( int nConsPos = nConsPosStart; nConsPos <= nConsPosEnd; ++nConsPos ) { soPartOfRead += ntGetFragFromConsPos( nConsPos ).cGetBase(); } return( soPartOfRead ); } int LocatedFragment :: nGetAlignStartUnpadded() { return pContig_->nUnpaddedIndex( nGetAlignStart() ); } int LocatedFragment :: nGetAlignEndUnpadded() { return pContig_->nUnpaddedIndex( nGetAlignEnd() ); } int LocatedFragment :: nGetAlignClipStartUnpadded() { return( pContig_->nUnpaddedIndex( nAlignClipStart_ ) ); } int LocatedFragment :: nGetAlignClipEndUnpadded() { return( pContig_->nUnpaddedIndex( nAlignClipEnd_ ) ); } int LocatedFragment :: nGetHighQualityStartUnpadded() { return( pContig_->nUnpaddedIndex( nQualClipStart_ ) ); } int LocatedFragment :: nGetHighQualityEndUnpadded() { return( pContig_->nUnpaddedIndex( nQualClipEnd_ ) ); } int LocatedFragment :: nGetAlignedLengthUnpadded() { if ( bIsWholeReadUnaligned() ) return( 0 ); int nUnpaddedLength = nGetAlignClipEndUnpadded() - nGetAlignClipStartUnpadded() + 1; return( nUnpaddedLength ); } int LocatedFragment :: nGetCenterOfAlignedRegionUnpadded() { int nUnpadded = ( nGetAlignClipStartUnpadded() + nGetAlignClipEndUnpadded() ) / 2; return( nUnpadded ); } unsigned char LocatedFragment :: ucGetReadType() { // first determine if the read is a primer or terminator read // for now just do this by the name bool bPrimerNotTerminator = bIsDyePrimerNotDyeTerminator(); bool bBottomStrand = bComp(); if ( bPrimerNotTerminator ) { if ( bBottomStrand ) return( BOTTOM_STRAND_PRIMER_READS ); else return( TOP_STRAND_PRIMER_READS ); } else { if ( bBottomStrand ) return( BOTTOM_STRAND_TERMINATOR_READS ); else return( TOP_STRAND_TERMINATOR_READS ); } } int LocatedFragment :: nGetStrandAndChemistry() { bool bPrimerNotTerminator = bIsDyePrimerNotDyeTerminator(); bool bBottomStrand = bComp(); if ( bPrimerNotTerminator ) { if ( bBottomStrand ) return( nBottomStrandPrimer ); else return( nTopStrandPrimer ); } else { if ( bBottomStrand ) return( nBottomStrandTerminator ); else return( nTopStrandTerminator ); } } int LocatedFragment :: nGetStartUnclippedFragPos() { if (!bIsWholeReadLowQuality() ) { int nConsPos = nGetHighQualityStart(); int nReadPos = nGetFragIndexFromConsPos( nConsPos ); return( nReadPos ); } else { // The old ace format cannot handle a read with no // high quality. So make a base somewhere in the middle of // the read high quality int nConsPosLength = nGetPaddedSeqLength(); int nMiddle = nConsPosLength / 2; if (nMiddle < 1 ) nMiddle = 1; return( nMiddle ); } } int LocatedFragment :: nGetEndUnclippedFragPos() { if (!bIsWholeReadLowQuality() ) { int nConsPos = nGetHighQualityEnd(); int nReadPos = nGetFragIndexFromConsPos( nConsPos ); return( nReadPos ); } else { // The old ace format cannot handle a read with no // high quality. So make a base somewhere in the middle of // the read high quality int nConsPosLength = nGetPaddedSeqLength(); int nMiddle = nConsPosLength / 2; if (nMiddle < 1 ) nMiddle = 1; return( nMiddle ); } } void LocatedFragment :: moveReadAndReadTagsToNewContig( Contig* pNewContig, const int nNewConsPosFromOldConsPos ) { nAlignStartPos_ += nNewConsPosFromOldConsPos; nQualClipStart_ += nNewConsPosFromOldConsPos; nQualClipEnd_ += nNewConsPosFromOldConsPos; nAlignClipStart_ += nNewConsPosFromOldConsPos; nAlignClipEnd_ += nNewConsPosFromOldConsPos; pContig_ = pNewContig; // move tags transferTagsToNewContig( pNewContig, nNewConsPosFromOldConsPos ); pNewContig->addLocatedFragment( this ); } void LocatedFragment :: transferTagsToNewContig( Contig* pNewContig, const int nNewConsPosFromOldConsPos ) { // move tags, but moveReadAndReadTagsToNewContig is also needed to // adjust the tag positions nPaddedConsPosStart_ and nPaddedConsPosEnd_ for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); pTag->pContig_ = pNewContig; pTag->nPaddedConsPosStart_ += nNewConsPosFromOldConsPos; pTag->nPaddedConsPosEnd_ += nNewConsPosFromOldConsPos; } } void LocatedFragment :: adjustQualAndAlignClippingWhenInsertColumnOfPads( const int nConsInsertPos ){ if ( nConsInsertPos <= nQualClipStart_ ) ++nQualClipStart_; if ( nConsInsertPos <= nQualClipEnd_ ) ++nQualClipEnd_; if ( nConsInsertPos <= nAlignClipStart_ ) ++nAlignClipStart_; if ( nConsInsertPos <= nAlignClipEnd_ ) ++nAlignClipEnd_; } void LocatedFragment :: adjustQualAndAlignClippingWhenRemoveColumnOfPads( const int nConsInsertPos ) { if ( nConsInsertPos <= nQualClipStart_ ) --nQualClipStart_; if ( nConsInsertPos <= nQualClipEnd_ ) --nQualClipEnd_; if ( nConsInsertPos <= nAlignClipStart_ ) --nAlignClipStart_; if( nConsInsertPos <= nAlignClipEnd_ ) --nAlignClipEnd_; } bool LocatedFragment :: bIsWithinACompressionOrG_dropoutTag( const int nConsPos ) { for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); if ( pTag->soType_ == "compression" ) { if ( ( pTag->nPaddedConsPosStart_ <= nConsPos ) && ( nConsPos <= pTag->nPaddedConsPosEnd_ ) ) return( true ); } else if ( pTag->soType_ == "G_dropout" ) { if ( ( pTag->nPaddedConsPosStart_ <= nConsPos ) && ( nConsPos <= pTag->nPaddedConsPosEnd_ ) ) return( true ); } } return( false ); } void LocatedFragment :: setBadPHDFile() { bReadOnly_ = true; for( int nReadPos = nGetStartIndex(); nReadPos <= nGetEndIndex(); ++nReadPos ) { setQualityAtSeqPos( nReadPos, 0 ); } } static RWCRegexp fakeRead1( "\\.a[0-9]*$" ); static RWCRegexp fakeRead2( "\\.c[0-9]*$" ); bool LocatedFragment :: bIsAFakeRead() { // changed Dec 2008 to make any read with a WR item with // referenceSequence to be a fake read, regardless of extension if ( bIsAFakeRead_ ) return true; if ( pCP->bFakeReadsSpecifiedByFilenameExtension_ ) { // check to see if this is a fake read as defined by St Louis' // naming convention if ( !( soGetName()( fakeRead1 ) ).isNull() ) return( true ); if ( !( soGetName()( fakeRead2 ) ).isNull() ) return( true ); return( false ); } else { return( bIsAFakeRead_ ); } } void LocatedFragment :: addUnalignedHighQualityRegionsToList( gotoList* pGotoList, const bool bIncludeTotallyUnalignedReads) { // If the whole read is low quality, there is no high quality unaligned // region if ( bIsWholeReadLowQuality() ) return; // if the whole read is unaligned and has a high quality region, // there is one high quality unaligned region--the high quality region if ( bIsWholeReadUnaligned() ) { if ( bIncludeTotallyUnalignedReads ) { int nHighQualityUnalignedStart = nGetHighQualityStart(); int nHighQualityUnalignedEnd = nGetHighQualityEnd(); maybeAddUnalignedHighQualityRegion( nHighQualityUnalignedStart, nHighQualityUnalignedEnd, pGotoList ); } // Regardless whether we included this read's high quality segment // in the list, there is no point in looking further since // the aligned region is just -1's return; } // below here there is an aligned region if ( bIntervalsIntersect( nGetAlignClipStart(), nGetAlignClipEnd(), nGetHighQualityStart(), nGetHighQualityEnd() ) ) { // this check insures that any unaligned region is low quality. // Hence there is no high quality unaligned region. // llllllllHHHHHHHHHHHHHHHHHHlllllllllllllll // uuuuuaaaaaaaaaaaaaaaaaaaaaaaaaauuuuuuuuu // where l is low quality, H is high quality, u is unaligned, // a is aligned if ( ( nGetAlignClipStart() <= nGetHighQualityStart() ) && ( nGetHighQualityEnd() <= nGetAlignClipEnd() ) ) return; // so there is so unaligned high quality region. there may be 2. if ( nGetHighQualityStart() < nGetAlignClipStart() ) { int nHighQualityUnalignedStart = nGetHighQualityStart(); int nHighQualityUnalignedEnd = nGetAlignClipStart() - 1; maybeAddUnalignedHighQualityRegion( nHighQualityUnalignedStart, nHighQualityUnalignedEnd, pGotoList ); } // if ( pLocFrag->nGetHighQualityStart() if ( nGetAlignClipEnd() < nGetHighQualityEnd() ) { int nHighQualityUnalignedStart = nGetAlignClipEnd() + 1; int nHighQualityUnalignedEnd = nGetHighQualityEnd(); maybeAddUnalignedHighQualityRegion( nHighQualityUnalignedStart, nHighQualityUnalignedEnd, pGotoList ); } // if ( pLocFrag->nGetAlignClipEnd() ... } // if ( bIntersectRegions( pLocFrag->nGetAlignClipStart() else { // one final pathological case (but I saw it!) // If the read looks like this: // aaaaaaaaaaaaaa HHHHHHHHHHHHHHHHHHHH // where the aaa is aligned low quality and the // HHHHHHH is unaligned high quality // In this case, the unaligned high quality region // is just the high quality region. int nHighQualityUnalignedStart = nGetHighQualityStart(); int nHighQualityUnalignedEnd = nGetHighQualityEnd(); maybeAddUnalignedHighQualityRegion( nHighQualityUnalignedStart, nHighQualityUnalignedEnd, pGotoList ); } } void LocatedFragment :: maybeAddUnalignedHighQualityRegion( const int nHighQualityUnalignedStartConsPos, const int nHighQualityUnalignedEndConsPos, gotoList* pGotoList ) { // per Phil's instructions, the nIgnoreUnalignedHighQualitySegmentsShorterThanThis_ // doesn't count pads. Then changed mind since difficult to not // count pads in reads--only in consensus. // Phil decided (7/20/98) to just count the number of bases not // covered by a chimera tag. // There must not be a chimera tag overlapping the // unaligned high quality region. if ( nUnpaddedBasesNotInChimeraTag( nHighQualityUnalignedStartConsPos, nHighQualityUnalignedEndConsPos ) < pCP->nIgnoreUnalignedHighQualitySegmentsShorterThanThis_ ) return; int nHighQualityUnalignedStartUnpadded = pContig_->nUnpaddedIndex( nHighQualityUnalignedStartConsPos ); int nHighQualityUnalignedEndUnpadded = pContig_->nUnpaddedIndex( nHighQualityUnalignedEndConsPos ); int nLength = nHighQualityUnalignedEndUnpadded - nHighQualityUnalignedStartUnpadded + 1; char szTemp[200]; sprintf( szTemp, "%d unaligned high quality", nLength ); gotoItem* pGotoItem = new gotoItem( pContig_, this, // pLocFrag nHighQualityUnalignedStartConsPos, nHighQualityUnalignedEndConsPos, nHighQualityUnalignedStartUnpadded, nHighQualityUnalignedEndUnpadded, szTemp ); pGotoList->addToList( pGotoItem ); } bool LocatedFragment :: bUnalignedHighQualityRegionTooLong( const bool bIncludeTotallyUnalignedReads ) { gotoList myGotoList; addUnalignedHighQualityRegionsToList( &myGotoList, bIncludeTotallyUnalignedReads ); if ( myGotoList.nGetNumGotoItems() > 0 ) return( true ); else return( false ); } bool LocatedFragment :: bIsReadNearEndOfContigAndPointingOut( int& nWhichEndOfContig ) { bool bBottomStrand = bComp(); if ( bBottomStrand ) nWhichEndOfContig = nLeftGap; else nWhichEndOfContig = nRightGap; if ( !pContig_->bIsThereAHighQualitySegment() ) { // If we are calling this routine to check if a read is too far // from its mate, we just can't say so we might as well say // that it is not too far. // In this case there is no way to really know how close the read // is to the "end" of the contig. In fact, the contig may overlap // another contig so the read might overlap another contig. return( true ); } if ( nWhichEndOfContig == nLeftGap ) { int nStartOfRead = nGetAlignEndUnpadded(); if ( nStartOfRead < // in the future, this should use the library-specific // maximum and should also use the high quality segment (since // there can be overlap of the low quality tails of the // contigs) pSub_->pLibrary_->nMaxInsertSize_ + pContig_->nUnpaddedHighQualitySegmentStart_ ) return( true ); else return( false ); } else { // right end of contig // -------------------------------------- (high quality segment // of contig ) // ^ // pContig_->nUnpaddedHighQualitySegmentEnd_ - // pSub_->pLibrary_->nMaxInsertSize_ // -----------------> (no good read) // ----------------> (good read) int nStartOfRead = nGetAlignStartUnpadded(); // must not use pContig_->nUnpaddedHighQualitySegmentEnd_ // directly since it might not yet have been calculated. int nUnpaddedHighQualitySegmentEnd = pContig_->nGetHighQualitySegmentEnd(); if ( nUnpaddedHighQualitySegmentEnd - pSub_->pLibrary_->nMaxInsertSize_ < nStartOfRead ) { return( true ); } else { return( false ); } } } bool LocatedFragment :: bIsReadNearEndOfContig( int& nWhichEnd ) { // this routine just checks if the read is near *either* end // of the contig. It doesn't matter which end and it doesn't // matter which way the read is pointing. if ( !pContig_->bIsThereAHighQualitySegment() ) { // If we are calling this routine to check if a read is too far // from its mate, we just can't say so we might as well say // that it is not too far. // In this case there is no way to really know how close the read // is to the "end" of the contig. In fact, the contig may overlap // another contig so the read might overlap another contig. return( true ); } int nBeginningConsPos = nGetConsPosOfBeginningOfReadUnpadded(); if ( ABS( nBeginningConsPos - pContig_->nUnpaddedHighQualitySegmentStart_ ) < pSub_->pLibrary_->nMaxInsertSize_ ) { nWhichEnd = nLeftGap; return( true ); } if ( ABS( nBeginningConsPos - pContig_->nUnpaddedHighQualitySegmentEnd_ ) < pSub_->pLibrary_->nMaxInsertSize_ ) { nWhichEnd = nRightGap; return( true ); } return( false ); } int LocatedFragment :: nHowFarIsReadFromEndOfHighQualitySegment( bool& bIsThereAHighQualitySegment ) { bIsThereAHighQualitySegment = pContig_->bIsThereAHighQualitySegment(); if ( !bIsThereAHighQualitySegment ) return( 0 ); if ( bComp() ) { // bottom strand read // ------------------------- consensus high quality segment // <----- read // <--------- distance // If read is off high quality segment, then distance is negative. int nStartOfRead = nGetAlignEndUnpadded(); return( nStartOfRead - pContig_->nGetHighQualitySegmentStart() ); } else { // top strand read // ------------------------- consensus high quality segment // -----> // -------> distance int nStartOfRead = nGetAlignStartUnpadded(); return( pContig_->nGetHighQualitySegmentEnd() - nStartOfRead ); } } bool LocatedFragment :: bIsFwdRevPairInSameEndOfSameContig( LocatedFragment* pLocFrag, const int nWhichEnd ) { if ( pLocFrag->pGetContig() != pGetContig() ) return( false ); // this shows an inconsistency, so I'm going to say "no" if ( bComp() == pLocFrag->bComp() ) { fprintf( pAO, "inconsistent reads for template %s--both reads pointing same way in same contig\n", (const char*) soGetTemplateName() ); return( false ); } int nStartOfRead1 = ( bComp() ? nGetAlignEnd() : nGetAlignStart() ); int nStartOfRead2 = ( pLocFrag->bComp() ? pLocFrag->nGetAlignEnd() : pLocFrag->nGetAlignStart() ); if ( ( ABS( nStartOfRead1 - nStartOfRead2 ) ) > consedParameters::pGetConsedParameters()->nPrimersMaxInsertSizeOfASubclone_ ) { fprintf( pAO, "inconsistent read for template %s since reads %d apart\n\n", (const char*) soGetTemplateName(), ABS( nStartOfRead1 - nStartOfRead2 ) ); return( false ); } if ( bComp() ) { if ( nStartOfRead2 < nStartOfRead1 ) return( true ); else { fprintf( pAO, "inconsistent reads for template %s--reads pointing away from each other\n\n", (const char*) soGetTemplateName() ); return( false ); } } else { if ( nStartOfRead1 < nStartOfRead2 ) return( true ); else { fprintf( pAO, "inconsistent reads for template %s--reads pointing away from each other\n\n", (const char*) soGetTemplateName() ); return( false ); } } } bool LocatedFragment :: bFoundVectorInsertJunctionForUniversalPrimerRead( int& nVectorInsertJunctionConsPos ) { if ( !bTriedToFindVectorInsertJunction_ ) { int nFirstVectorInsertJunctionConsPos; int nSecondVectorInsertJunctionConsPos; bool bFoundSecondVectorInsertJunction; findVectorInsertJunctionsInUniversalPrimerRead( nFirstVectorInsertJunctionConsPos, nSecondVectorInsertJunctionConsPos, bFoundSecondVectorInsertJunction ); } nVectorInsertJunctionConsPos = nVectorInsertJunctionConsPos_; return( bFoundVectorInsertJunction_ ); } void LocatedFragment :: findVectorInsertJunctionsInUniversalPrimerRead( int& nFirstVectorInsertJunctionConsPos, // "first" means the one closest to the // end where sequencing started--the right // end for bottom strand reads, the left // end for top strand reads // This is on the not X base at the // junction int& nSecondVectorInsertJunctionConsPos, bool& bFoundSecondVectorInsertJunction ) { bTriedToFindVectorInsertJunction_ = true; bFoundSecondVectorInsertJunction = false; const int nUnknownState = 3; const int nLastWasX = 4; const int nLastWasNotX = 5; int nXToNotX; bool bFoundAPossibleVectorInsertJunction = false; bool bContinueLookingForFirstVectorInsertJunction = true; int nState = nUnknownState; // since the range of padded positions from left to right is nGetStartIndex // to nGetEndIndex, then the range of padded positions in the direction // of sequencing is also nGetStartIndex to nGetEndIndex (although this // may refer to bases from right to left in the case of bottom strand reads). // nComplementPaddedIndex straightens out exactly which base is // referenced. int nUnpaddedIndexInDirectionOfSequencing = nGetUnpaddedStartIndex() - 1; for( int nPaddedIndexInDirectionOfSequencing = nGetStartIndex(); bContinueLookingForFirstVectorInsertJunction && nPaddedIndexInDirectionOfSequencing <= nGetEndIndex(); ++nPaddedIndexInDirectionOfSequencing ) { Ntide nt = ( bComp() ? ntideGet( nComplementPaddedIndex( nPaddedIndexInDirectionOfSequencing ) ) : ntideGet( nPaddedIndexInDirectionOfSequencing ) ); if (nt.bIsPad() ) continue; ++nUnpaddedIndexInDirectionOfSequencing; char cBase = nt.cGetBase(); if ( cBase == 'x' ) { nState = nLastWasX; } else { if ( nState == nLastWasX ) { // have a transition from X to not X nXToNotX = nPaddedIndexInDirectionOfSequencing; bFoundAPossibleVectorInsertJunction = true; } nState = nLastWasNotX; // if we have reached base 125 (or so) and currently it is // not X, then assume we are in the insert // If we reach base 125 and it is an X, then assume we haven't // yet reached the insert. That is the reason this condition // (below) only applies if this base is not an X. if ( nUnpaddedIndexInDirectionOfSequencing > consedParameters::pGetConsedParameters()->nPrimersLookThisFarForForwardVectorInsertJunction_ ) bContinueLookingForFirstVectorInsertJunction = false; } } if (bFoundAPossibleVectorInsertJunction ) { // I want to check to be sure this doesn't happen: // (AAAA is the aligned part of the read.) // --------AAAAAAAAAAAA---xxxxx-----------> // ^ // putative vector/insert junction // This *can* happen with very short insert templates // <--------xxxxxxxxx----AAAAAAAAAAAAA------ // ^ // putative vector/insert junction if ( bComp() ) { nFirstVectorInsertJunctionConsPos = nGetConsPosFromFragIndex( nComplementPaddedIndex( nXToNotX ) ); if ( !bIsWholeReadUnaligned() && nFirstVectorInsertJunctionConsPos < nGetAlignClipStart() ) nFirstVectorInsertJunctionConsPos = nGetAlignClipEnd(); } else { nFirstVectorInsertJunctionConsPos = nGetConsPosFromFragIndex( nXToNotX ); if ( !bIsWholeReadUnaligned() && nFirstVectorInsertJunctionConsPos > nGetAlignClipEnd() ) nFirstVectorInsertJunctionConsPos = nGetAlignClipStart(); } } else { // I'm going to pretend that the first base of the read // is the vector/insert junction. // No. This often fails. Phil and Pat suggested 7/99 to pick // the first aligned base of the read. if ( bIsWholeReadUnaligned() ) { // should actually not be using this read for anything // for the template if it is completely unaligned since // it probably doesn't below there if ( bComp() ) { nFirstVectorInsertJunctionConsPos = nGetAlignEnd(); } else { nFirstVectorInsertJunctionConsPos = nGetAlignStart(); } } else { if ( bComp() ) { nFirstVectorInsertJunctionConsPos = nGetAlignClipEnd(); } else { nFirstVectorInsertJunctionConsPos = nGetAlignClipStart(); } } // if ( bIsWholeReadUnaligned() ) { else } // if (bFoundAPossibleVectorInsertJunction ) { else // now to find the second junction, start where we left off (assuming we // are now in the insert) and continue until we see any X's if ( nState == nLastWasX ) { // this is a problem--that probably means that the entire read // is X's. However, it could also mean that the read has a *very* // short insert, less than // nPrimersLookThisFarForForwardVectorInsertJunction_ // It could also mean that the read is running into cloning vector. // In several of these cases, let's use the beginning of the aligned // region for the vector. fprintf( pAO, "Warning: short insert read: %s ", soGetName().data() ); if ( bIsWholeReadUnaligned() ) { // in this case we don't know where the vector/insert junction // is. (We probably shouldn't use such a read for // finishing--it is full of x's and isn't aligned.) fprintf( pAO, "whole read is unaligned\n" ); } else { fprintf( pAO, "aligned region is %d long from %d to %d\n", nGetAlignClipEndUnpadded() - nGetAlignClipStartUnpadded() + 1, nGetAlignClipStartUnpadded(), nGetAlignClipEndUnpadded() ); bFoundVectorInsertJunction_ = true; if ( bComp() ) nVectorInsertJunctionConsPos_ = nGetAlignClipEnd(); else nVectorInsertJunctionConsPos_ = nGetAlignClipStart(); } return; } // if reached here, we found the first vector/insert junction bFoundVectorInsertJunction_ = true; nVectorInsertJunctionConsPos_ = nFirstVectorInsertJunctionConsPos; // now look for 2nd vector/insert junction. // Look for it after the aligned region. if ( bIsWholeReadUnaligned() ) return; int nConsPosStart; int nIncrement; int nLastNonPad; if ( bComp() ) { nIncrement = -1; nConsPosStart = nAlignClipStart_ - 1; nLastNonPad = nAlignClipStart_; } else { nIncrement = 1; nConsPosStart = nAlignClipEnd_ + 1; nLastNonPad = nAlignClipEnd_; } for( int nConsPos = nConsPosStart; ( bComp() ? nConsPos >= nGetAlignStart() : nConsPos <= nGetAlignEnd()); nConsPos += nIncrement ) { if ( ntGetFragFromConsPos( nConsPos ).bIsPad() ) continue; if ( ntGetFragFromConsPos( nConsPos ).cGetBase() == 'x' ) { // aha! found an x after the aligned region nSecondVectorInsertJunctionConsPos = nLastNonPad; bFoundSecondVectorInsertJunction = true; break; } nLastNonPad = nConsPos; } } void LocatedFragment :: findVectorInsertJunctionInWalkingRead( bool& bFoundXsAfterAligned, int& nLastBaseBeforeVector ) { // the most reliable method of finding the insert is to look for // the aligned part of the read. if ( bWholeReadIsUnaligned_ ) { bFoundXsAfterAligned = false; return; } // we are looking for the 2nd vector/insert junction // for top strand reads: // uuuuuAAAAAAAAAAAxxxxxxx look this way -> // ^ // start here // for bottom strand reads: // xxxxxxAAAAAAAAuuuuu // ^ // start here, looking this way <- int nIncrement; int nFirstPlaceToLookForVectorInsertJunction; if ( bComp() ) { nFirstPlaceToLookForVectorInsertJunction = nAlignClipStart_ - 1; nIncrement = -1; } else { nFirstPlaceToLookForVectorInsertJunction = nAlignClipEnd_ + 1; nIncrement = 1; } // this is to fix a purify bug in which nLastNonPadConsPos never // get set because the loop below immediately finds an x int nLastNonPadConsPos = ( bComp() ? nAlignClipStart_ : nAlignClipEnd_ ); for( int nConsPos = nFirstPlaceToLookForVectorInsertJunction; ( bIsComplemented_ ? ( nConsPos >= nGetAlignStart() ) : ( nConsPos <= nGetAlignEnd() ) ); nConsPos += nIncrement ) { if ( ntGetFragFromConsPos( nConsPos ).bIsPad() ) continue; if ( ntGetFragFromConsPos( nConsPos ).cGetBase() == 'x' ) { bFoundXsAfterAligned = true; nLastBaseBeforeVector = nLastNonPadConsPos; return; } else nLastNonPadConsPos = nConsPos; } bFoundXsAfterAligned = false; // however, it is useful to at least indicate that we found aligned // read and how far it went if ( bIsComplemented_ ) nLastBaseBeforeVector = nAlignClipStart_; else nLastBaseBeforeVector = nAlignClipEnd_; } bool LocatedFragment :: bHasSeriousHighQualityDiscrepancies() { if ( bIsWholeReadUnaligned() ) return( false ); int nQualityThreshold = consedParameters::pGetConsedParameters()->nQualityThresholdForFindingHighQualityDiscrepancies_; // Phil decided Dec 27, 1997 to only show high quality // discrepancies if in the aligned part of the read for( int nConsPos = nGetAlignClipStart(); nConsPos <= nGetAlignClipEnd(); ++nConsPos ) { Quality ucQuality = ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99(); if ( ucQuality >= nQualityThreshold ) { if ( ntGetFragFromConsPos( nConsPos ).cGetBase()!= pContig_->cGetConsensusBase( nConsPos ) ) { // we've found a high quality discrepancy if ( bIsWithinACompressionOrG_dropoutTag( nConsPos ) ) continue; // we've found a SERIOUS high quality discrepancy return( true ); } } } return( false ); } void LocatedFragment :: readPHDFileAndSetQualitiesAndPeakPositions( ) { // for turbo-startup if ( !bAlreadyReadPhdFile_ ) readPHDFileToSetQualitiesAndPeakPositions2( this, false ); // ignore tags bool bAssignPeakPositions = bHasChromat(); assignQualityAndPeakPositionsToPads( bAssignPeakPositions ); // bAndPeakPositions } void LocatedFragment :: readPhredCalls() { // just throw all these away RWCString soTimestamp; RWCString soABIThumbprint; RWCString soPhredVersion; RWCString soCallMethod; RWCString soChemistry; RWCString soDye; int nQualityLevels; readPhredCalls( soTimestamp, soABIThumbprint, soPhredVersion, soCallMethod, soChemistry, soDye, nQualityLevels, true // bIgnoreTagsAndWRItems ); } void LocatedFragment :: readPhredCalls( RWCString& soTimestampFromPHDFile, RWCString& soABIThumbprint, RWCString& soPhredVersion, RWCString& soCallMethod, RWCString& soChemistry, RWCString& soDye, int& nQualityLevels, const bool bIgnoreTagsAndWRItems ) { pSeqPhredCalledBases_ = new Sequence(); int nArraySize; char* pBaseArray; int* pQualityArray; int* pPointPositionArray; // information to be read from PHD file and then thrown away int nNumberOfUnpaddedBases; FileName filPHD = filGetPHDFilenameForPhredCalls(); nArraySize = nReadPHDFileForSize( filPHD ); pBaseArray = (char *) calloc( nArraySize, sizeof( char ) ); pQualityArray = (int *) calloc( nArraySize, sizeof( int ) ); pPointPositionArray = (int *) calloc( nArraySize, sizeof( int ) ); bool bFoundTraceArrayMaxIndex; readPHDFileAndMaybeComplement( filPHD, bComp(), this, nArraySize, pBaseArray, pQualityArray, pPointPositionArray, soTimestampFromPHDFile, soABIThumbprint, soPhredVersion, soCallMethod, soChemistry, soDye, pSeqPhredCalledBases_->nTraceArrayMinIndex_, pSeqPhredCalledBases_->nTraceArrayMaxIndex_, bFoundTraceArrayMaxIndex, nQualityLevels, nNumberOfUnpaddedBases, bIgnoreTagsAndWRItems ); pSeqPhredCalledBases_->createPointPosArray( bFoundTraceArrayMaxIndex, pSeqPhredCalledBases_->nTraceArrayMaxIndex_, nArraySize, false ); // bFillUpArray // copy in the originally called bases and their point positions pSeqPhredCalledBases_->resize( nNumberOfUnpaddedBases ); for( int nZeroBasedIndex = 0; nZeroBasedIndex < nNumberOfUnpaddedBases; nZeroBasedIndex++ ) { // make - into N char c = tolower( *(pBaseArray + nZeroBasedIndex) ); int nPointPosition = *(pPointPositionArray + nZeroBasedIndex ); int nQuality = *(pQualityArray + nZeroBasedIndex ); Ntide nt(c, nQuality ); pSeqPhredCalledBases_->append(nt); pSeqPhredCalledBases_->appendPointPos( nPointPosition ); } // for( ... free( (char*) pBaseArray ); free( (char*) pQualityArray ); free( (char*) pPointPositionArray ); } void LocatedFragment :: readPHDFileForAddedRead() { // notice this ignores tags and WR items at this point // We'll have to read the phd file again to get these--we first // need the alignment before we can convert these to padded consensus // positions Done. Now tags and WR items are read. readPhredCalls( soTimestamp_, soABIThumbprint_, soPhredVersion_, soCallMethod_, soChemistry_, soDye_, nQualityLevels_, true // bIgnoreTagsAndWRItems ); nTraceArrayMinIndex_ = pSeqPhredCalledBases_->nTraceArrayMinIndex_; nTraceArrayMaxIndex_ = pSeqPhredCalledBases_->nTraceArrayMaxIndex_; if ( nTraceArrayMaxIndex_ == 0 ) bPhdFileHasTraceArrayMinAndMax_ = false; else bPhdFileHasTraceArrayMinAndMax_ = true; bAlreadyReadPhdFile_ = true; } bool LocatedFragment :: bIsChimeric() { for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); if ( pTag->soType_ == "chimera" ) { return( true ); } } return( false ); } int LocatedFragment :: nGetHighQualitySegmentLengthSortOfUnpadded() { if ( bWholeReadIsLowQuality_ ) return( 0 ); else { return( pContig_->nUnpaddedIndex( nQualClipEnd_ ) - pContig_->nUnpaddedIndex( nQualClipStart_ ) + 1 ); } } void LocatedFragment :: zeroErrorProbabilitiesWhereChoseMinilibrary() { // assumes that this read is a universal primer read int nUnpaddedLeft1; int nUnpaddedRight1; getFromStartOfInsertToEndOfContig( nUnpaddedLeft1, nUnpaddedRight1 ); if ( bComp() ) { // left gap nUnpaddedLeft1 = pContig_->pUnpaddedErrorProbabilities_->nGetStartIndex(); } else { // right gap nUnpaddedRight1 = pContig_->pUnpaddedErrorProbabilities_->nGetEndIndex(); } for( int nPos = nUnpaddedLeft1; nPos <= nUnpaddedRight1; ++nPos ) { (*pContig_->pUnpaddedErrorProbabilities_)[ nPos ] = 0.0; } } void LocatedFragment :: getFromStartOfInsertToEndOfContig( int& nUnpaddedLeft, int& nUnpaddedRight ) { int nFirstVectorInsertJunctionConsPos; int nSecondVectorInsertJunctionConsPos; bool bFoundFirstVectorInsertJunction; bool bFoundSecondVectorInsertJunction; findVectorInsertJunctionsInUniversalPrimerRead( nFirstVectorInsertJunctionConsPos, nSecondVectorInsertJunctionConsPos, bFoundSecondVectorInsertJunction ); if ( bComp() ) { nUnpaddedLeft = pContig_->nGetUnpaddedStartIndex(); nUnpaddedRight = pContig_->nUnpaddedIndex( nFirstVectorInsertJunctionConsPos ); } else { nUnpaddedLeft = pContig_->nUnpaddedIndex( nFirstVectorInsertJunctionConsPos ); nUnpaddedRight = pContig_->nGetUnpaddedEndIndex(); } } void LocatedFragment :: convertVectorTagsToXs() { for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); if ( pTag->soType_ == "vector" || pTag->soType_ == "sequencingVector" || pTag->soType_ == "cloningVector" ) { for( int nConsPos = pTag->nPaddedConsPosStart_; nConsPos <= pTag->nPaddedConsPosEnd_; ++nConsPos ) { if ( !ntGetFragFromConsPos( nConsPos ).bIsPad() ) { Sequence::setBaseAtPos(nGetFragIndexFromConsPos(nConsPos), 'x' ); } } } } } bool LocatedFragment :: bIsInAlignedPartOfReadAndNotTooCloseToUnalignedPart( const int nConsPos ) { if ( bIsWholeReadUnaligned() ) return( false ); if ( nConsPos < ( nGetAlignClipStart() + pCP->nIgnoreHighQualityDiscrepanciesThisManyBasesFromEndOfAlignedRegion_ ) ) return( false ); if ( ( nGetAlignClipEnd() - pCP->nIgnoreHighQualityDiscrepanciesThisManyBasesFromEndOfAlignedRegion_ ) < nConsPos ) return( false ); return( true ); } int LocatedFragment :: nUnpaddedBasesNotInChimeraTag( const int nConsPosStart, const int nConsPosEnd ) { RWTValVector aBasesInRegion( nConsPosEnd - nConsPosStart + 1, false ); for( int nTag = 0; nTag < nGetNumberOfTags(); ++nTag ) { tag* pTag = pGetTag( nTag ); if ( pTag->soType_ == "chimera" ) { int nIntersectLeft; int nIntersectRight; if ( bIntersect( nConsPosStart, nConsPosEnd, pTag->nPaddedConsPosStart_, pTag->nPaddedConsPosEnd_, nIntersectLeft, nIntersectRight ) ) { // set flag to true wherever there is a chimera tag for( int nConsPos = nIntersectLeft; nConsPos <= nIntersectRight; ++nConsPos ) { aBasesInRegion[ nConsPos - nConsPosStart ] = true; } } // if ( bIntersect ... } // if ( pTag->soType_ ... } // now count how many base pairs are not covered by chimera tags int nUncoveredByChimeraTag = 0; for( int nConsPos = nConsPosStart; nConsPos <= nConsPosEnd; ++nConsPos ) { if ( ! aBasesInRegion[ nConsPos - nConsPosStart ] ) { if ( !ntGetFragFromConsPos( nConsPos ).bIsPad() && ntGetFragFromConsPos( nConsPos ).cGetBase() != 'x' ) ++nUncoveredByChimeraTag; } } return( nUncoveredByChimeraTag ); } static double dAmountToSubtractFrom = 0.05; bool LocatedFragment :: bThereIsHighQualitySegmentOffConsensus( const int nWhichEndOfConsensus, int& nLengthOfHighQualityVectorSegment, int& nReadPosHighQualityVectorLeft, int& nReadPosHighQualityVectorRight ) { // Users often will convert to x's (quality 98) bases off the consensus. // This will mask the original qualities. So I must use the original // quality values from the .phd.1 // Note that the bases may have been converted to x's. Since I will // shortly need the original base calls anyway (in >bDoesEitherEndOfVectorMatchThisPartOfRead ), I will read them here. if ( !pSeqPhredCalledBases_ ) { readPhredCalls(); } // But what is the range of base positions that I should use for // this? I will convert to unpadded read positions (in the direction // of sequencing) for the *current* version of the read. This may // or may not be the same as the original read (since there may have // been insertions or deletions). Then I will use those coordinates. int nUnpaddedReadStart; int nUnpaddedReadEnd; // the oriented unpadded read position of // the left end of the read int nIncrement; if ( nWhichEndOfConsensus == nLeftGap ) { // cccccccccccccccccccc (consensus) // rrrrrrrrrr (read) // ^ ^ // end // start // (part of read off consensus) int nConsPosOffConsensusStart = pContig_->nGetStartIndex() - 1; // check that this lies within the read if ( ! ( nGetAlignStart() <= nConsPosOffConsensusStart && nConsPosOffConsensusStart <= nGetAlignEnd() ) ) { return( false ); } // convert to read coordinates nUnpaddedReadStart = nConsPosToUnpaddedFragPos( nConsPosOffConsensusStart ); // fix for bug 8/2005 in which edited read has more bases than // pSeqPhredCalledBases_ giving subscript error if ( nUnpaddedReadStart > pSeqPhredCalledBases_->nGetUnpaddedEndIndex() ) nUnpaddedReadStart = pSeqPhredCalledBases_->nGetUnpaddedEndIndex(); nUnpaddedReadEnd = pSeqPhredCalledBases_->nGetUnpaddedStartIndex(); nIncrement = -1; // this loop will decrement from nUnpaddedReadStart (which should be big) // to nUnpaddedReadEnd (which should be 1). Guard against going around // forever: if ( nUnpaddedReadStart < nUnpaddedReadEnd ) return( false ); } else if ( nWhichEndOfConsensus == nRightGap ) { // cccccccccccccccccc (consensus ) // rrrrrrrrrrrrrrrrrrrr (read) // ^ ^ // start end // (part of read off consensus ) int nConsPosOffConsensusStart = pContig_->nGetEndIndex() + 1; // check for this: // ccccccccccccccccc (consensus ) // rrrrrrrrr ( read does not stick out from consensus ) // check that this position is within the read if ( ! ( nGetAlignStart() <= nConsPosOffConsensusStart && nConsPosOffConsensusStart <= nGetAlignEnd() ) ) { return( false ); } // convert to read coordinates in the direction of sequencing // (since that will be the coordinate of pSeqPhredCalledBases_) // No it won't! pSeqPhredCalledBases_ is always in the left to // right orientation, regardless whether read is complemented or // not. That is, the leftmost base is always base 1. If you // complement the contig and hence the read, the base on the other // end of the read, which is now the leftmost, is base 1. nUnpaddedReadStart = nConsPosToUnpaddedFragPos( nConsPosOffConsensusStart ); // fix for bug 8/2005 in which edited read has more bases than // pSeqPhredCalledBases_ giving subscript error if ( nUnpaddedReadStart < pSeqPhredCalledBases_->nGetUnpaddedStartIndex() ) nUnpaddedReadStart = pSeqPhredCalledBases_->nGetUnpaddedStartIndex(); nUnpaddedReadEnd = pSeqPhredCalledBases_->nGetUnpaddedEndIndex(); nIncrement = 1; // this loop will increment from nUnpaddedReadStart to nUnpaddedReadEnd // Guard against going around forever. if ( nUnpaddedReadEnd < nUnpaddedReadStart ) { return( false ); } } else assert( false ); // now figure out the high quality segment (if there is one). This is // a high quality segment from nUnpaddedReadStart to nUnpaddedReadEnd // (these may be in reverse order--it depends on nIncrement) double dBestSum = 0.0; int nUnpaddedReadPosOfBestSum; double dSumFromBeginning = 0.0; for( int nUnpaddedReadPos = nUnpaddedReadStart; nUnpaddedReadPos != nUnpaddedReadEnd; nUnpaddedReadPos += nIncrement ) { Quality qual = pSeqPhredCalledBases_->ntideGet( nUnpaddedReadPos ).qualGetQualityWithout98or99(); double dAdjustedError = dAmountToSubtractFrom - dErrorRateFromQuality[ qual ]; dSumFromBeginning += dAdjustedError; if ( dSumFromBeginning > dBestSum ) { dBestSum = dSumFromBeginning; // the high quality segment starts the base *after* the base // that makes the least sum base nUnpaddedReadPosOfBestSum = nUnpaddedReadPos; } } // check if there was a maximal segment if ( dBestSum == 0.0 ) return( false ); // so maximal segment goes from nUnpaddedReadStart to // nUnpaddedReadPosOfBestSum // but which is the bigger and which is the smaller number? if ( nUnpaddedReadStart <= nUnpaddedReadPosOfBestSum ) { nReadPosHighQualityVectorLeft = nUnpaddedReadStart; nReadPosHighQualityVectorRight = nUnpaddedReadPosOfBestSum; } else { nReadPosHighQualityVectorLeft = nUnpaddedReadPosOfBestSum; nReadPosHighQualityVectorRight = nUnpaddedReadStart; } // while I'm at it, calculate the length of the high quality vector // segment nLengthOfHighQualityVectorSegment = nReadPosHighQualityVectorRight - nReadPosHighQualityVectorLeft + 1; return( true ); } bool LocatedFragment :: bDoesEitherEndOfVectorMatchThisPartOfRead( const int nThisReadProtrudesIntoWhichGap, const RWCString& soVectorSequence, const RWCString& soComplementedVectorSequence, const int nReadPosHighQualityVectorLeft, const int nReadPosHighQualityVectorRight, int& nMatchesWhichEndOfVector ) { // now take the high quality segment and do a pinned alignment against both // ends of the vector and see which matches best // We want the bases from nReadPosHighQualityVectorLeftOfBestRead // to nReadPosHighQualityVectorRightOfBestRead of pBestRead // Unfortunately, they are probably mostly // (or all) x's. Thus we need to the raw phred calls from // pSeqPhredCalledBases_ // First convert // to unpadded read positions, since that is what we will have when // we read the phd file. // will we ever need to complement these bases? Yes. In order to compare // it to the vector, let's have the convention of using the strand // that is going *into* the vector. Thus it is top strand on the // right end of the contig and bottom strand on the left end of the contig. // When we compare this with the vector, we will use the uncomplemented // beginning of the vector, and the reverse complemented end of the vector. // By doing this, we will be able to look for alignment without having // to try both orientations. RWCString soBasesToCompare = ( (size_t) ( nReadPosHighQualityVectorRight - nReadPosHighQualityVectorLeft + 1 ) ); // order the bases so they are in the direction away from // the contig // ccccccccccccccccccccc // bbbbbbbbbbb bbbbbbbbbbbb // bbbbbb bbbbbb // -> // complement if ( nThisReadProtrudesIntoWhichGap == nLeftGap ) { // must reverse complement the bases so the beginning of the sequence // will be at the vector/insert junction for( int nReadPos = nReadPosHighQualityVectorRight; nReadPos >= nReadPosHighQualityVectorLeft; --nReadPos ) { soBasesToCompare += cComplementBase( pSeqPhredCalledBases_->ntideGet( nReadPos ).cGetBase() ); } } else if ( nThisReadProtrudesIntoWhichGap == nRightGap ) { for( int nReadPos = nReadPosHighQualityVectorLeft; nReadPos <= nReadPosHighQualityVectorRight; ++nReadPos ) { soBasesToCompare += pSeqPhredCalledBases_->ntideGet( nReadPos ).cGetBase(); } } else assert( false ); // let's not use too many bases... if ( soBasesToCompare.length() > pCP->nRestrictionDigestMaximumBasesToCompareToVector_ ) { soBasesToCompare.truncate( pCP->nRestrictionDigestMaximumBasesToCompareToVector_ ); } // For the length of vector to compare it to, let's use about the same // length, maybe a little longer to allow for indel's // The direction of the sequence should be in the direction // into the vector. That means just the first n bases at the beginning // and the last n bases at the end but reverse/complemented. int nNumberOfVectorBases = MIN( soBasesToCompare.length() + soBasesToCompare.length()/10, soVectorSequence.length() ); RWCString soVectorSequenceLeftEnd = soVectorSequence( 0, nNumberOfVectorBases ); const int nGapPenalty = 4; const int nMatchBoost = 1; const int nMismatchPenalty = 2; int nBestIndexBasesToCompareIndex1; int nBestIndexVectorSequenceLeftEnd; int nLeftEndSWScore; extendAlignment( soBasesToCompare.data(), soVectorSequenceLeftEnd.data(), soBasesToCompare.length(), soVectorSequenceLeftEnd.length(), &nBestIndexBasesToCompareIndex1, &nBestIndexVectorSequenceLeftEnd, &nLeftEndSWScore, nGapPenalty, nMatchBoost, nMismatchPenalty ); // print alignment as a sanity check. Handle case in which // the alignment is not extended even one base. if ( nBestIndexBasesToCompareIndex1 > 0 ) { RWCString soBasesToCompareTruncated = soBasesToCompare(0, nBestIndexBasesToCompareIndex1 ); RWCString soVectorSequenceTruncated = soVectorSequenceLeftEnd( 0, nBestIndexVectorSequenceLeftEnd ); RWCString soAlignment1; RWCString soAlignment2; int nScore; getAlignment( soBasesToCompareTruncated.data(), soVectorSequenceTruncated.data(), nGapPenalty, nMatchBoost, nMismatchPenalty, soAlignment1, soAlignment2, nScore ); printAlignment( soAlignment1, soAlignment2 ); assert( nScore == nLeftEndSWScore ); } RWCString soVectorSequenceRightEnd = soComplementedVectorSequence( 0, nNumberOfVectorBases ); int nBestIndexBasesToCompareIndex2; int nBestIndexVectorSequenceRightEnd; int nRightEndSWScore; extendAlignment( soBasesToCompare.data(), soVectorSequenceRightEnd.data(), soBasesToCompare.length(), soVectorSequenceRightEnd.length(), &nBestIndexBasesToCompareIndex2, &nBestIndexVectorSequenceRightEnd, &nRightEndSWScore, nGapPenalty, nMatchBoost, nMismatchPenalty ); // print alignment as a sanity check. Handle case in which // the alignment is not extended even one base. if ( nBestIndexBasesToCompareIndex2 > 0 ) { RWCString soBasesToCompareTruncated = soBasesToCompare( 0, nBestIndexBasesToCompareIndex2 ); RWCString soVectorSequenceTruncated = soVectorSequenceRightEnd( 0, nBestIndexVectorSequenceRightEnd ); RWCString soAlignment1; RWCString soAlignment2; int nScore; getAlignment( soBasesToCompareTruncated.data(), soVectorSequenceTruncated.data(), nGapPenalty, nMatchBoost, nMismatchPenalty, soAlignment1, soAlignment2, nScore ); printAlignment( soAlignment1, soAlignment2 ); assert( nScore == nRightEndSWScore ); } // let's check that at least one of them is acceptably good. // How good is acceptably good? How about 20 perfectly matching bases? const int nNumberOfPerfectlyMatchingBases = 20; int nAcceptablyGood = nMatchBoost * nNumberOfPerfectlyMatchingBases; if ( nLeftEndSWScore < nAcceptablyGood && nRightEndSWScore < nAcceptablyGood ) { int nUnpaddedConsPosLeft = pContig_->nUnpaddedIndex( nGetConsPosFromUnpaddedFragPos( nReadPosHighQualityVectorLeft ) ); int nUnpaddedConsPosRight = pContig_->nUnpaddedIndex( nGetConsPosFromUnpaddedFragPos( nReadPosHighQualityVectorRight ) ); fprintf( stderr, "Read %s was not useful for determining which end of vector the %s end of contig %s is attached to. (Score were %d (left) and %d (right)) This is because neither end of the vector matched very well to this read. (The sequence from read this read from %d to %d is %s (%s))\n", soGetName().data(), szWhichGap( nThisReadProtrudesIntoWhichGap ), pContig_->soGetName().data(), nLeftEndSWScore, nRightEndSWScore, nUnpaddedConsPosLeft, nUnpaddedConsPosRight, soBasesToCompare.data(), ( nThisReadProtrudesIntoWhichGap == nLeftGap ? "reverse complemented so extends into vector" : "not complemented" ) ); return( false ); } else { fprintf( stderr, "read %s in %s end of %s has score %d to left end of vector and %d to right end of vector.\n", soGetName().data(), szWhichGap( nThisReadProtrudesIntoWhichGap ), pContig_->soGetName().data(), nLeftEndSWScore, nRightEndSWScore ); if ( nLeftEndSWScore < nRightEndSWScore ) nMatchesWhichEndOfVector = nRightGap; else nMatchesWhichEndOfVector = nLeftGap; return( true ); } } // so far, this is used to tell whether 2 reads come from the // same subclone template, and thus may have the same mutation bool LocatedFragment :: bIsThisReadFromTheSameTemplate( LocatedFragment* pLocFrag2 ) { if ( bIsWholeCloneTemplateNotSubcloneTemplate() || pLocFrag2->bIsWholeCloneTemplateNotSubcloneTemplate() ) return( false ); // if reached here, neither are whole clone templates if ( soTemplate_ == pLocFrag2->soTemplate_ ) return( true ); else return( false ); } Contig* LocatedFragment :: pMakeAContigOutOfThisRead() { RWCString soNewContigName = ConsEd::pGetConsEd()->pGetAssembly()->soGetNewContigName(); Contig* pNewContig = new Contig( soNewContigName.data(), 1, // new number of reads 1, // new number of base segments false, // complemented from the way // phrap created it ConsEd::pGetAssembly() ); pCP->aListOfContigNamesForMiniassembly_.insert( pNewContig->soGetName() ); pNewContig->resize( nGetPaddedSeqLength() ); for( int nReadPos = 1; nReadPos <= nGetPaddedSeqLength(); ++nReadPos ) { pNewContig->append( Sequence::ntideGet( nReadPos ) ); } // do not need to worry about pBigPeakPositions_ or pLittlePeakPositions_ // since these will stay with the LocatedFragment ConsEd::pGetConsEd()->pGetAssembly()->addContig( pNewContig ); int nOldAlignStart = nAlignStartPos_; nAlignStartPos_ = 1; pContig_ = pNewContig; int nNewConsPosFromOldConsPos = 1 - nOldAlignStart; // keep the quality clipping where they used to be nQualClipStart_ += nNewConsPosFromOldConsPos; nQualClipEnd_ += nNewConsPosFromOldConsPos; // (if the whole read is low quality, leave it that way) // but make the entire read aligned, regardless what it was before nAlignClipStart_ = 1; nAlignClipEnd_ = nGetPaddedSeqLength(); bWholeReadIsUnaligned_ = false; pNewContig->addLocatedFragment( this ); // this is a single base segment which is the entire length of the read BaseSegment* pNewBaseSegment = new BaseSegment( this, 1, nGetPaddedSeqLength() ); pNewContig->baseSegArray_.addPtrToNewBaseSeg( pNewBaseSegment ); assert( pNewContig->baseSegArray_.bGetDataStructureOk( true ) ); pNewContig->updateFirstAndLastDisplayableContigPos(); // updateLastDisplayableContigPos must come before // setPaddedPositionsArray since the latter uses // ntGetCons which does a check that the nConsPos // is within the displayble contig positions pNewContig->setUnpaddedPositionsArray(); pNewContig->setPaddedPositionsArray(); // fix positions of the read tags on the removed read transferTagsToNewContig( pNewContig, nNewConsPosFromOldConsPos ); return( pNewContig ); } void LocatedFragment :: trimBackAlignedRegion( const int nAlignedRegionLeftConsPos, const int nAlignedRegionRightConsPos ) { // can't trim back nothing if ( bWholeReadIsUnaligned_ ) return; int nOldStart = nAlignClipStart_; int nOldEnd = nAlignClipEnd_; bool bOldWhole = bWholeReadIsUnaligned_; if ( !bIntersect( nAlignClipStart_, nAlignClipEnd_, nAlignedRegionLeftConsPos, nAlignedRegionRightConsPos, nAlignClipStart_, nAlignClipEnd_ ) ) // there is no longer any aligned region bWholeReadIsUnaligned_ = true; // report the change if ( bOldWhole != bWholeReadIsUnaligned_ || nOldStart != nAlignClipStart_ || nOldEnd != nAlignClipEnd_ ) { // do *not* use nUnpaddedIndex since setUnpaddedPositionsArray() // has not yet been called. cerr << "read " << soGetName() << " alignment changed from " << nOldStart << "-" << nOldEnd << " " << szWholeRead( bOldWhole ) << " to " << nAlignClipStart_ << "-" << nAlignClipEnd_ << " " << szWholeRead( bWholeReadIsUnaligned_ ) << " (in padded consensus coordinates)" << endl; } } // used for subcloneTTemplate::markTemplateBadIfInsertSeemsTooBig int LocatedFragment :: nGetFirstBaseOfInsertUnpadded() { if ( bIsWalkingNotUniversalPrimerRead() ) { if ( bIsWholeReadUnaligned() ) { // we are somewhere in the middle of the insert // so it really doesn't matter which end we take return( nGetAlignStartUnpadded() ); } else { if ( bComp() ) { return( nGetAlignClipEndUnpadded() ); } else { return( nGetAlignClipStartUnpadded() ); } } } else { int nVectorInsertJunction; if ( bFoundVectorInsertJunctionForUniversalPrimerRead( nVectorInsertJunction ) ) { return( pContig_->nUnpaddedIndex( nVectorInsertJunction ) ); } else { if ( bIsWholeReadUnaligned() ) { if ( bComp() ) return( nGetAlignEndUnpadded() ); else return( nGetAlignStartUnpadded() ); } else { // I don't think this will ever happen because // bFoundVectorInsertJunction will have found one if // the read is aligned. // But just to be safe: if ( bComp() ) { return( nGetAlignClipEndUnpadded() ); } else { return( nGetAlignClipStartUnpadded() ); } } } } } void LocatedFragment :: calculateHighQualitySegmentOfRead() { // these are all in read positions, not consensus positions bool bHighQualitySegmentExists; int nUnpaddedHighQualitySegmentStart; int nUnpaddedHighQualitySegmentEnd; int nPaddedHighQualitySegmentStart; int nPaddedHighQualitySegmentEnd; findHighQualitySegment( bHighQualitySegmentExists, nUnpaddedHighQualitySegmentStart, nUnpaddedHighQualitySegmentEnd, nPaddedHighQualitySegmentStart, nPaddedHighQualitySegmentEnd ); if ( bHighQualitySegmentExists ) { nQualClipStart_ = nGetConsPosFromFragIndex( nPaddedHighQualitySegmentStart ); nQualClipEnd_ = nGetConsPosFromFragIndex( nPaddedHighQualitySegmentEnd ); bWholeReadIsLowQuality_ = false; } else { bWholeReadIsLowQuality_ = true; } // added Nov 2004 // Removed Nov 2004 because it causes new versions of each phd // file to be written when adding new reads, even though those // phd files have not changed. // setChanged(); } bool LocatedFragment :: bGetUserDefinedHighQualitySegment( const double dErrorProbability, int& nHighQualitySegmentLeftUnpaddedConsPos, int& nHighQualitySegmentRightUnpaddedConsPos ) { bool bHighQualitySegmentExists; int nHighQualitySegmentLeftUnpaddedReadPos; int nHighQualitySegmentRightUnpaddedReadPos; int nHighQualitySegmentLeftPaddedReadPos; int nHighQualitySegmentRightPaddedReadPos; findHighQualitySegmentWithUserDefinedQuality( dErrorProbability, bHighQualitySegmentExists, nHighQualitySegmentLeftUnpaddedReadPos, nHighQualitySegmentRightUnpaddedReadPos, nHighQualitySegmentLeftPaddedReadPos, nHighQualitySegmentRightPaddedReadPos ); int nLeftConsPos = nGetConsPosFromFragIndex( nHighQualitySegmentLeftPaddedReadPos ); int nRightConsPos = nGetConsPosFromFragIndex( nHighQualitySegmentRightPaddedReadPos ); nHighQualitySegmentLeftUnpaddedConsPos = pContig_->nUnpaddedIndex( nLeftConsPos ); nHighQualitySegmentRightUnpaddedConsPos = pContig_->nUnpaddedIndex( nRightConsPos ); return( bHighQualitySegmentExists ); } FileName Fragment :: filGetCompletePHDPathname() { return( ConsEd::pGetConsEd()->filGetPHDDir() + "/" + filPHDFile_ ); } // wholeReadItem* LocatedFragment :: pGetContigDirectionWholeReadItem() { // for( int nWR = 0; nWR < aWholeReadItems_.length(); ++nWR ) { // wholeReadItem* pWR = aWholeReadItems_[ nWR ]; // if ( pWR->soType_ == "contigDirection" ) { // return( pWR ); // } // } // return( NULL ); // } bool LocatedFragment :: bOKToShowThisTraceInShowAllTraces( const int nConsPos ) { // check that there is a base at this position if ( ! ( nGetAlignStart() <= nConsPos && nConsPos <= nGetAlignEnd() ) ) return( false ); // check that there is trace signal at this position, actually // at the region around this base, say +1 one base. int nConsPosBefore = nConsPos - 1; int nConsPosAfter = nConsPos + 1; // this *will* intersect--we checked that above bIntersect( nConsPosBefore, nConsPosAfter, nGetAlignStart(), nGetAlignEnd(), nConsPosBefore, nConsPosAfter ); int nPointPosBefore = nGetPointPosByConsPos( nConsPosBefore ); int nPointPosAfter = nGetPointPosByConsPos( nConsPosAfter ); // now I'd like to check the signal of each color for these point // positions assert( pTraceFile_ ); int nTotal = 0; int nNumberOfPoints = 0; for( int nPoint = nPointPosBefore; nPoint <= nPointPosAfter; nPoint += 5 ) { nTotal = nTotal + (*pTraceFile_->pdaTraceA_)[ nPoint ] + (*pTraceFile_->pdaTraceC_)[ nPoint ] + (*pTraceFile_->pdaTraceG_)[ nPoint ] + (*pTraceFile_->pdaTraceT_)[ nPoint ]; nNumberOfPoints += 4; } float fAverage = (float) nTotal / (float) nNumberOfPoints; if ( fAverage < 1.0 ) return( false ); RWCString soTagTypesToExclude = pCP->soShowAllTracesDoNotShowTraceIfTheseTagsPresent_ + " "; // now look for dataNeeded tags for( int nTag = 0; nTag < aTags_.length(); ++nTag ) { tag* pTag = aTags_[ nTag ]; if ( ! ( pTag->nPaddedConsPosStart_ <= nConsPos && nConsPos <= pTag->nPaddedConsPosEnd_ ) ) continue; if ( soTagTypesToExclude.index( pTag->soType_ + " " ) == RW_NPOS ) continue; // so one of the pCP->soShowAllTracesDoNotShowTraceIfTheseTagsPresent_ // tags is present at the position of interest. Hence don't show // this read return( false ); } // if passed all these checks, ok to show trace return( true ); } void Fragment :: findReadOrderInAlignedReadsWindowFromFile() { for( int nRegExpLine = 0; nRegExpLine < pCP->aShowReadsInAlignedReadsWindowOrderGivenByFile_.length(); ++nRegExpLine ) { RWCRegexp* pRegExp = pCP->aShowReadsInAlignedReadsWindowOrderGivenByFile_[ nRegExpLine ]; if ( soGetName().bContains( *pRegExp ) ) { nReadOrderInAlignedReadsWindow_ = nRegExpLine; return; } } // if reached here, the read name didn't match any pattern nReadOrderInAlignedReadsWindow_ = nReadOrderInAlignedReadsWindowAtBottom; } float LocatedFragment :: fGetAverageQualityInWindow( const int nConsPosInCenter, const int nWindowSize ) { // this truncates rather than rounds, so that // if nWindowSize = 11, nHalfWindow is 5. int nHalfWindow = nWindowSize / 2; int nTotalQuality = 0; int nNonPadsSoFar = 0; int nConsPos; // going to nHalfWindow + 1 because this includes the nConsPosInCenter // base. Notice "<" since want to stop before executing another // iteration. for( nConsPos = nConsPosInCenter; nConsPos <= nGetAlignEnd() && nNonPadsSoFar < ( nHalfWindow + 1 ); ++nConsPos ) { if ( ntGetFragFromConsPos( nConsPos ).bIsPad() ) continue; nTotalQuality += ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99(); ++nNonPadsSoFar; } for( nConsPos = nConsPosInCenter - 1; nConsPos >= nGetAlignStart() && nNonPadsSoFar < nWindowSize; --nConsPos ) { if ( ntGetFragFromConsPos( nConsPos ).bIsPad() ) continue; nTotalQuality += ntGetFragFromConsPos( nConsPos ).qualGetQualityWithout98or99(); ++nNonPadsSoFar; } if ( nNonPadsSoFar == 0 ) return( 0.0 ); float f = (float) nTotalQuality / (float) nNonPadsSoFar; return( f ); } int LocatedFragment :: nUnpaddedConsPosFromOrientedUnpaddedFragPos( const int nReadPos ) { return( pGetContig()->nUnpaddedIndex( nConsPosFromOrientedUnpaddedFragPos( nReadPos ) ) ); } #include "soLine.h" bool LocatedFragment :: bGetSingleSignalBasesConsensusLength( MBTValOrderedOffsetVector*& pSingleSignalConsensusLength ) { pSingleSignalConsensusLength = new MBTValOrderedOffsetVector( pGetContig()->nGetPaddedConsLength() ); pSingleSignalConsensusLength->soName_ = "pSingleSignalConsensusLength for " + soGetName(); if ( pCP->bAutoReportFilterSingleSignal_ ) { for( int nConsPos = pGetContig()->nGetStartConsensusIndex(); nConsPos <= pGetContig()->nGetEndConsensusIndex(); ++nConsPos ) { pSingleSignalConsensusLength->insert( cSingleSignalUndetermined ); } } else { for( int nConsPos = pGetContig()->nGetStartConsensusIndex(); nConsPos <= pGetContig()->nGetEndConsensusIndex(); ++nConsPos ) { pSingleSignalConsensusLength->insert( cSingleSignal ); } return true; } // must modify chromat name in case we used a fake read for // a 2nd alignment RWCString soReadName = soGetName(); if ( pCP->bJustForPrimateProject_ ) { if ( soReadName.bEndsWithAndRemove( "b.r" ) ) { soReadName += ".r"; } else if ( soReadName.bEndsWithAndRemove( "f.r" ) ) { soReadName += ".f"; } } FileName filPolyFile = "../poly/" + soReadName + ".poly"; if ( !filPolyFile.bFileByThisNameExists() ) { // run phred to create a .poly file // run it with the following command line: // phred -if (temp.fof) -dd /tmp // make a unique name: pid_t pid = getpid(); int nPid = pid; RWCString soTempFOF = "/tmp/temp" + RWCString( (long) nPid ) + ".fof"; FILE* pTempFOF = fopen( soTempFOF.data(), "w" ); if ( !pTempFOF ) { THROW_FILE_ERROR( soTempFOF ); } FileName filChromat = "../chromat_dir/" + soReadName; if ( !filChromat.bFileByThisNameExists() ) { cerr << filChromat << " does not exist" << endl; return false; } fprintf( pTempFOF, "../chromat_dir/%s\n", soReadName.data() ); fclose( pTempFOF ); RWCString soCommand = pCP->filFullPathnameOfPhred_; soCommand += " -if "; soCommand += soTempFOF; soCommand += " -dd /tmp"; int nRetStat = system( soCommand.data() ); if ( nRetStat != 0 ) { RWCString soError = "problem running "; soError += soCommand; soError += " with error "; soError += soGetErrno(); THROW_ERROR( soError ); } // delete the temporary file assert( !remove( soTempFOF.data() ) ); filPolyFile = "/tmp/" + soReadName + ".poly"; } // now read the .poly file FILE* pPoly = fopen( filPolyFile.data(), "r" ); if ( !pPoly ) { THROW_FILE_ERROR( filPolyFile ); } // throw away header line assert( fgets( soLine, nMaxLineSize, pPoly ) ); RWTValOrderedVector aRelativeAreaOfUncalledBases( (size_t) nGetPaddedFragLength() ); int n1UnpaddedOrientedReadPos = 0; while( fgets( soLine, nMaxLineSize, pPoly ) ) { soLine.nCurrentLength_ = strlen( soLine.data() ); ++n1UnpaddedOrientedReadPos; int nConsPos = nConsPosFromOrientedUnpaddedFragPos( n1UnpaddedOrientedReadPos ); RWCTokenizer tok( soLine ); // The information on each line consists of the called // base, tok(); // the position of the called base, tok(); //the area of the // called peak, tok(); // the relative area of the called peak, tok(); //the uncalled base, tok(); // the position of the uncalled base, tok(); // the area of the uncalled base, tok(); // the relative area of the uncalled base, RWCString soRelativeAreaOfUncalledBase = tok(); // and the amplitudes of the four traces at // the position of the called base. float fRelativeAreaOfUncalledBase; if ( !bIsNumericFloat( soRelativeAreaOfUncalledBase, fRelativeAreaOfUncalledBase ) ) { RWCString soError = soRelativeAreaOfUncalledBase; soError += " was not numeric float"; THROW_ERROR( soError ); } if ( pGetContig()->nGetStartConsensusIndex() <= nConsPos && nConsPos <= pGetContig()->nGetEndConsensusIndex() ) { if ( fRelativeAreaOfUncalledBase == -1 ) { (*pSingleSignalConsensusLength)[ nConsPos ] = cSingleSignal; } else { (*pSingleSignalConsensusLength)[ nConsPos ] = cMultipleSignal; } } } fclose( pPoly ); // assign single signal value to pads for( int nConsPos = nGetAlignStart(); nConsPos <= nGetAlignEnd(); ) { if ( ntGetFragFromConsPos( nConsPos ).bIsPad() ) { // in a region of pads // pathology: might have no previous non-pad base // might have no final non-pad base bool bFoundLeftNonPad; int nSingleSignalOfLeftNonPad; if ( nConsPos == nGetAlignStart() ) { bFoundLeftNonPad = false; } else { assert( !ntGetFragFromConsPos( nConsPos - 1 ).bIsPad() ); bFoundLeftNonPad = true; nSingleSignalOfLeftNonPad = (*pSingleSignalConsensusLength)[nConsPos - 1]; assert( nSingleSignalOfLeftNonPad != cSingleSignalUndetermined ); } int nConsPosOfRightNonPad; int nSingleSignalOfRightNonPad; bool bFoundRightNonPad = false; int nConsPos2; for( nConsPos2 = nConsPos + 1; nConsPos2 <= nGetAlignEnd(); ++nConsPos2 ) { if ( !ntGetFragFromConsPos( nConsPos2 ).bIsPad() ) { bFoundRightNonPad = true; nSingleSignalOfRightNonPad = (*pSingleSignalConsensusLength)[nConsPos2]; assert( nSingleSignalOfRightNonPad != cSingleSignalUndetermined ); nConsPosOfRightNonPad = nConsPos2; break; } } int nSingleSignalOfPads; if ( bFoundLeftNonPad && bFoundRightNonPad ) { if ( ( nSingleSignalOfLeftNonPad == cSingleSignal ) && ( nSingleSignalOfRightNonPad == cSingleSignal ) ) { nSingleSignalOfPads = cSingleSignal; } else { nSingleSignalOfPads = cMultipleSignal; } } else if ( bFoundLeftNonPad ) { nSingleSignalOfPads = nSingleSignalOfLeftNonPad; } else if ( bFoundRightNonPad ) { nSingleSignalOfPads = nSingleSignalOfRightNonPad; } assert( nSingleSignalOfPads != cSingleSignalUndetermined ); int nConsPosOfRightmostPad = ( bFoundRightNonPad ? nConsPosOfRightNonPad - 1 : nGetAlignEnd() ); for( nConsPos2 = nConsPos; nConsPos2 <= nConsPosOfRightmostPad; ++nConsPos2 ) { (*pSingleSignalConsensusLength)[nConsPos2] = nSingleSignalOfPads; } // nConsPosOfRightmostPad + 1 is a non-pad (or after the end of the sequence) // so there is no point seeing if it is a pad. Try the next base. nConsPos = nConsPosOfRightmostPad + 2; } else { ++nConsPos; } } return true; } bool LocatedFragment :: bGetSingleSignalRelativeAreas( mbtValVector*& paRelativeAreaOfUncalledBases ) { FileName filChromatPathname = "../chromat_dir/" + filGetChromatFilename(); if ( !filChromatPathname.bFileByThisNameExists() ) { return false; } // run phred to create a .poly file // run it with the following command line: // phred -if (temp.fof) -dd /tmp // make a unique name: pid_t pid = getpid(); int nPid = pid; RWCString soTempFOF = "/tmp/temp" + RWCString( (long) nPid ) + ".fof"; FILE* pTempFOF = fopen( soTempFOF.data(), "w" ); if ( !pTempFOF ) { THROW_FILE_ERROR( soTempFOF ); } fprintf( pTempFOF, "../chromat_dir/%s\n", filGetChromatFilename().data() ); fclose( pTempFOF ); RWCString soCommand = pCP->filFullPathnameOfPhred_; soCommand += " -if "; soCommand += soTempFOF; soCommand += " -dd /tmp"; int nRetStat = system( soCommand.data() ); if ( nRetStat != 0 ) { RWCString soError = "problem running "; soError += soCommand; soError += " with error "; soError += soGetErrno(); THROW_ERROR( soError ); } // delete the temporary file assert( !remove( soTempFOF.data() ) ); // now read the .poly file FileName filPolyFile = "/tmp/" + filGetChromatFilename() + ".poly"; FILE* pPoly = fopen( filPolyFile.data(), "r" ); if ( !pPoly ) { THROW_FILE_ERROR( filPolyFile ); } // throw away header line assert( fgets( soLine, nMaxLineSize, pPoly ) ); paRelativeAreaOfUncalledBases = new mbtValVector( (size_t) nGetPaddedFragLength(), 1, -666.0, "paRelativeAreaOfUncalledBases" ); int n1UnpaddedReadPos = 0; while( fgets( soLine, nMaxLineSize, pPoly ) ) { soLine.nCurrentLength_ = strlen( soLine.data() ); ++n1UnpaddedReadPos; RWCTokenizer tok( soLine ); // The information on each line consists of the called // base, tok(); // the position of the called base, tok(); //the area of the // called peak, tok(); // the relative area of the called peak, tok(); //the uncalled base, tok(); // the position of the uncalled base, tok(); // the area of the uncalled base, tok(); // the relative area of the uncalled base, RWCString soRelativeAreaOfUncalledBase = tok(); // and the amplitudes of the four traces at // the position of the called base. float fRelativeAreaOfUncalledBase; if ( !bIsNumericFloat( soRelativeAreaOfUncalledBase, fRelativeAreaOfUncalledBase ) ) { RWCString soError = soRelativeAreaOfUncalledBase; soError += " was not numeric float"; THROW_ERROR( soError ); } (*paRelativeAreaOfUncalledBases)[ n1UnpaddedReadPos ] = fRelativeAreaOfUncalledBase; } return true; } bool LocatedFragment :: bGetAlignedHighQualitySegment( int& nAlignedHighQualityStart, int& nAlignedHighQualityEnd ) { if ( bIsWholeReadUnaligned() || bIsWholeReadLowQuality() ) return false; nAlignedHighQualityStart = nGetAlignClipStart(); nAlignedHighQualityEnd = nGetAlignClipEnd(); if (!bIntersect( nAlignedHighQualityStart, nAlignedHighQualityEnd, nGetHighQualityStart(), nGetHighQualityEnd(), nAlignedHighQualityStart, nAlignedHighQualityEnd ) ) return false; return true; } void LocatedFragment :: insertNtideAtConsPos( const int nConsPos, const Ntide nt ) { if ( !bHasPeakPositions() ) { // no peak position insertNtideWithoutPeakAtConsPos( nConsPos, nt ); } else { insertNtideWithPeakAtConsPos( nConsPos, nt ); } } void LocatedFragment :: insertNtideWithoutPeakAtConsPos( const int nConsPos, const Ntide nt ) { makeCopyOfPhredCallsIfNecessary(); Sequence::insertNtideAtPos( nt, nGetFragIndexFromConsPos( nConsPos ) ); } void LocatedFragment :: fixTagCoordinates() { if ( aTags_.length() == 0 ) return; int nNumberOfUnpaddedBases = pSeqPhredCalledBases_->length(); int* pPaddedFromUnpaddedArray = (int*) malloc( sizeof( int) * ( nNumberOfUnpaddedBases + 10 ) ); if ( !pPaddedFromUnpaddedArray ) { RWCString soError = "could not get enough memory " + RWCString( (long) nNumberOfUnpaddedBases ) + soGetName(); THROW_ERROR( soError ); } int nUnpadded = 0; for( int nPaddedSeqPos = nGetStartIndex(); nPaddedSeqPos <= nGetEndIndex(); ++nPaddedSeqPos ) { if ( !ntideGet( nPaddedSeqPos ).bIsPad() ) { ++nUnpadded; pPaddedFromUnpaddedArray[ nUnpadded ] = nPaddedSeqPos; } } int nMaxUnpadded = nUnpadded; if ( bComp() ) { for( int nUnpadded = 1; nUnpadded <= ( nMaxUnpadded / 2); ++nUnpadded ) { int nTemp = pPaddedFromUnpaddedArray[ nUnpadded ]; pPaddedFromUnpaddedArray[ nUnpadded ] = pPaddedFromUnpaddedArray[ nMaxUnpadded + 1 - nUnpadded ]; pPaddedFromUnpaddedArray[ nMaxUnpadded + 1 - nUnpadded ] = nTemp; } } for( int nTag = 0; nTag < aTags_.length(); ++nTag ) { tag* pTag = aTags_[ nTag ]; // if we ever use this routine for anything but addNewReads, we // could replace this assertion with if ( // pTag->bCoordinatesAreUnpadded_ ) ... assert( pTag->bCoordinatesAreUnpaddedRead_ ); int nPaddedReadPosStart = pPaddedFromUnpaddedArray[ pTag->nPaddedConsPosStart_ ]; int nPaddedReadPosEnd = pPaddedFromUnpaddedArray[ pTag->nPaddedConsPosEnd_ ]; // convert these to consensus positions pTag->nPaddedConsPosStart_ = nGetConsPosFromFragIndex( nPaddedReadPosStart ); pTag->nPaddedConsPosEnd_ = nGetConsPosFromFragIndex( nPaddedReadPosEnd ); if ( bComp() ) { int nTemp = pTag->nPaddedConsPosStart_; pTag->nPaddedConsPosStart_ = pTag->nPaddedConsPosEnd_; pTag->nPaddedConsPosEnd_ = nTemp; } pTag->bCoordinatesAreUnpaddedRead_ = false; } free( (void*) pPaddedFromUnpaddedArray ); } bool Fragment :: bIsAShortRead() { return( (( soChemistry_ == "solexa" || soChemistry_ == "454" ) ) ? true : false ); } bool Fragment :: bIsAnUnpairedShortRead() { // Jan 27, 2009 to implement solexa mate pairs (DG) // if ( soChemistry_ == "solexa" ) return true; if ( soChemistry_ == "454" ) { if ( !bIsUniversalPrimerRead() ) return true; } return false; } void Fragment :: writeReadToFasta( FILE* pFasta, FILE* pQual, const bool bUseSeqPhredCalledBases ) { // first write header // traditionally looks like this: // >djs74-1802.s1 CHROMAT_FILE: djs74-1802.s1 PHD_FILE: djs74-1802.s1.phd.1 CHEM: prim DYE: ET TIME: Tue Jul 22 09:50:12 2008 // If the reads are in a phdball, it does not have PHD_FILE and instead // has VERSION: 1 // In either case (phdball or phd file), there can optionally be the // following: // TEMPLATE: djs736a1_fp02q277 DIRECTION: rev RWCString soHeader = ">"; soHeader += soGetName(); RWCString soQualHeader = soHeader; if ( !filGetChromatFilename().bIsNull() && filGetChromatFilename() != "none" ) { soHeader += " CHROMAT_FILE: "; soHeader += filGetChromatFilename(); } if ( !filGetPHDFilename().bIsNull() ) { soHeader += " PHD_FILE: "; soHeader += filGetPHDFilename(); } else { soHeader += " VERSION: "; soHeader += RWCString( (long) nVersion_ ); } if ( !soChemistry_.bIsNull() ) { soHeader += " CHEM: "; soHeader += soChemistry_; } if ( !soDye_.bIsNull() ) { soHeader += " DYE: "; soHeader += soDye_; } soHeader += " TIME: " ; soHeader += soGetTimestamp(); soHeader += "\n"; soQualHeader += "\n"; fputs( soHeader.data(), pFasta ); fputs( soQualHeader.data(), pQual ); int nBasesOnLine = 0; const int nMaxBasesOnLine = 50; if ( bUseSeqPhredCalledBases ) { assert( pSeqPhredCalledBases_ ); if ( bComp() ) { for( int n1Padded = pSeqPhredCalledBases_->nGetEndIndex(); n1Padded >= pSeqPhredCalledBases_->nGetStartIndex(); --n1Padded ) { if ( nBasesOnLine >= nMaxBasesOnLine ) { fputc( '\n', pFasta ); fputc( '\n', pQual ); nBasesOnLine = 0; } fputc( cComplementBaseTable[ pSeqPhredCalledBases_->ntideGet( n1Padded ).cGetBase() ], pFasta ); fprintf( pQual, " %d", pSeqPhredCalledBases_->ntideGet( n1Padded ).qualGetQuality() ); ++nBasesOnLine; } } // if ( bComp() ) { else { for( int n1Padded = pSeqPhredCalledBases_->nGetStartIndex(); n1Padded <= pSeqPhredCalledBases_->nGetEndIndex(); ++n1Padded ) { if ( nBasesOnLine >= nMaxBasesOnLine ) { fputc( '\n', pFasta ); fputc( '\n', pQual ); nBasesOnLine = 0; } fputc( pSeqPhredCalledBases_->ntideGet( n1Padded ).cGetBase(), pFasta ); // note no conversion of 98 and 99 since phrap recognizes // those special qualities fprintf( pQual, " %d", pSeqPhredCalledBases_->ntideGet( n1Padded ).qualGetQuality() ); ++nBasesOnLine; } } } // if ( bUseSeqPhredCalledBases ) { else { if ( bComp() ) { for( int nPadded = nGetEndIndex(); nPadded >= nGetStartIndex(); --nPadded ) { if ( ntideGet( nPadded ).bIsPad() ) continue; if ( nBasesOnLine >= nMaxBasesOnLine ) { fputc('\n', pFasta ); fputc('\n', pQual ); nBasesOnLine = 0; } fputc( cComplementBaseTable[ ntideGet( nPadded ).cGetBase() ], pFasta ); // note no conversion of 98 and 99 since phrap recognizes // those special qualities fprintf( pQual, " %d", ntideGet( nPadded ).qualGetQuality() ); ++nBasesOnLine; } } else { for( int nPadded = nGetStartIndex(); nPadded <= nGetEndIndex(); ++nPadded ) { if ( ntideGet( nPadded ).bIsPad() ) continue; if ( nBasesOnLine >= nMaxBasesOnLine ) { fputc('\n', pFasta ); fputc('\n', pQual ); nBasesOnLine = 0; } // I don't see any need to convert to upper/lower case fputc( ntideGet( nPadded ).cGetBase(), pFasta ); // note no conversion of 98 and 99 since phrap recognizes // those special qualities fprintf( pQual, " %d", ntideGet( nPadded ).qualGetQuality() ); ++nBasesOnLine; } } } fputc('\n', pFasta ); fputc('\n', pQual ); } void LocatedFragment :: transferQualityValues( LocatedFragment* pTargetRead ) { bool bNeedToComplementBack = false; if ( bComp() != pTargetRead->bComp() ) { pTargetRead->complementLocatedFragmentButNotTraces(); bNeedToComplementBack = true; } assert( Sequence::nGetUnpaddedSeqLength() == pTargetRead->Sequence::nGetUnpaddedSeqLength() ); int nTargetSeqPos = 0; // ready for ++ for( int nSeqPos = nGetStartIndex(); nSeqPos <= nGetEndIndex(); ++nSeqPos ) { if ( ntideGet( nSeqPos ).bIsPad() ) continue; ++nTargetSeqPos; while( pTargetRead->ntideGet( nTargetSeqPos ).bIsPad() ) { ++nTargetSeqPos; } // sanity check char cSourceBase = ntideGet( nSeqPos ).cGetBase(); char cTargetBase = pTargetRead->ntideGet( nTargetSeqPos ).cGetBase(); if ( ( nNumberFromBase2[ cSourceBase ] != 4 ) && ( nNumberFromBase2[ cTargetBase ] != 4 ) ) { assert( cSourceBase == cTargetBase ); } pTargetRead->setQualityAtSeqPos( nTargetSeqPos, ntideGet( nSeqPos ).qualGetQuality() ); } pTargetRead->assignQualityAndPeakPositionsToPads( false ); // bAndPeakPositions if ( bNeedToComplementBack ) { pTargetRead->complementLocatedFragmentButNotTraces(); } }