/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #ifndef LOCATEDFRAGMENT_INCLUDED #define LOCATEDFRAGMENT_INCLUDED #include "ntide.h" #include "rwtvalorderedvector.h" #include "rwtptrorderedvector.h" #include "rwcstring.h" #include "sysdepend.h" #include "sequence.h" #include "tracefile.h" #include "filename.h" #include "tag.h" #include #include #include "readTypes.h" #include #include "mbtValVector.h" #include // forward declaration of contig class Contig; class gotoList; class subcloneTTemplate; class GuiColorText; // class wholeReadItem; #include "wholeReadItem.h" inline char* szWholeRead( const bool bIsWholeReadUnaligned ) { if ( bIsWholeReadUnaligned ) return( "whole read is unaligned" ); else return( "not whole read is unaligned" ); } #define INITIALIZE_READ_ARRAYS \ soName_ = "read sequence";\ aTags_.soName_ = "LocatedFragment.aTags_";\ aWholeReadItems_.soName_ = "LocatedFragment.aWholeReadItems_"; const int nReadOrderInAlignedReadsWindowNotYetSet = -2; const int nReadOrderInAlignedReadsWindowAtBottom = INT_MAX; const int nNumberOfSingleSignalArrays = 9; const char cSingleSignalUndetermined = 'u'; const char cSingleSignal = 's'; const char cMultipleSignal = 'm'; typedef mbtValVector* pArrayOfBool; class Fragment : public Sequence { public: // null ctor - required by dynamic arrays Fragment() : bIsComplemented_( false ), pTraceFile_( NULL ), bReadCurrentlyHighlighted_( false ), bGettingTraceFile_( false ), pSeqPhredCalledBases_( NULL ), bReadOnly_( false ), nReadType_( 0 ), // start undefined nReadTypeFromPhdFile_( 0 ), bAlreadyReadPhdFile_( false ), bAlreadyReadPhredCallsInPhdBall_( false ), bIsAFakeRead_( false ), nReadOrderInAlignedReadsWindow_( nReadOrderInAlignedReadsWindowNotYetSet ), pSub_( NULL ), nVersion_( 0 ) { INITIALIZE_READ_ARRAYS } // initialize file name and bComp // (required by LocatedFragment ctor ) Fragment(const RWCString& soFragmentName, bool bIsComp) : Sequence( soFragmentName ), bIsComplemented_(bIsComp), pTraceFile_( NULL ), bReadCurrentlyHighlighted_( false ), bGettingTraceFile_( false ), pSeqPhredCalledBases_( NULL ), bReadOnly_( false ), nReadType_( 0 ), // start undefined nReadTypeFromPhdFile_( 0 ), bAlreadyReadPhdFile_( false ), bAlreadyReadPhredCallsInPhdBall_( false ), bIsAFakeRead_( false ), nReadOrderInAlignedReadsWindow_( nReadOrderInAlignedReadsWindowNotYetSet ), pSub_( NULL ), nVersion_( 0 ) { INITIALIZE_READ_ARRAYS } // The Big Three ~Fragment(); Fragment& operator=( const Fragment& frag ); Fragment( const Fragment& frag ); const RWCString& soGetFragmentName() const { return soSequenceName_; } const RWCString& soGetTemplateName(); const void setDefaultTemplateNameIfNecessary(); FileName filGetChromatFilename() { return filChromatFile_; } void setChromatFilename( const FileName filChromatFile ) { filChromatFile_ = filChromatFile; } bool bHasChromat() { if ( filChromatFile_ == "none" || filChromatFile_ == "no_chromat" || filChromatFile_.bIsNull() ) return( false ); else return( true ); } // This conflicted with Sequence::bHasPeakPositions and caused // a bug that through an exception when trying to get peak // positions when trying to make a join. DG, Sept 2008 // this is *not* the same as bHasChromat. E.g., 454 reads // have peak positions but no chromat // bool bHasPeakPositions() { // if ( soChemistry_ == "solexa" ) // return false; // else // return true; // } FileName filGetPHDFilename() { return filPHDFile_; } FileName filGetCompletePHDPathname(); void setPHDFilename( const FileName filPHDFile ) { filPHDFile_ = filPHDFile; } void setPHDTimestamp( const RWCString& soTimestamp ) { soTimestamp_ = soTimestamp; } RWCString soGetTimestamp() { return soTimestamp_; } void writePHD( const FileName filPHD ); void writePHDAndMore(); // returns boolean indicating whether sequence is complemented // in terms of original read sense bool bComp() const { return bIsComplemented_; } void fixFragmentPointPositions( TraceFile* pTraceFile ); // return the ntide as originally called by the base calling software // indexed by position in original read. note that this is a // Sequence style (1 based) index. Ntide& ntideGetPhredCalledBase(const int nIndex) const { if ( pSeqPhredCalledBases_ ) return pSeqPhredCalledBases_->ntideGet(nIndex); else return Sequence::ntideGet( nIndex ); } int nGetPointPosOfPhredCalledBase( const int nPaddedSeqPos ) { if ( pSeqPhredCalledBases_ ) return( pSeqPhredCalledBases_->nGetPointPos( nPaddedSeqPos ) ); else return( Sequence::nGetPointPos( nPaddedSeqPos ) ); } void getRangeOfPhredCalledPositions(const int nPointMin, const int nPointMax, int& nCalledBaseMin, int& nCalledBaseMax, bool& bError ) const; void checkPointPositions(); FileName filGetPHDFilenameForPhredCalls(); ////////////////////////////////////////////////////////////////////// // // Trace files and "point position" (position of called base on // trace) related members. many of these will throw an exception // when called if the trace file has not been read yet. // ////////////////////////////////////////////////////////////////////// // has the trace file already been read? ask me and be sure. bool bTraceFileAlreadyRead() const { return (pTraceFile_ != NULL); } // this call will create a TraceFile object for this fragment. // can only be called once, calling twice for same frag is // program logic error. The file hasn't changed, why would // you want to read it again?. void readTraces( const bool bAskForPathnameIfMissing ); // clients get access to the TraceFile object TraceFile* pTraceFileGet() { return pTraceFile_; } // null the tracefile--used when copying fragments and you don't // want the new instance to point to the same tracefile (for fear // of the destructor for the tracefile firing twice void setTraceFileNull() { pTraceFile_ = NULL; } // Input: read position (doesn't count pads) // Output: read position (doesn't count pads) in the direction of sequencing int nOrientedUnpaddedFragPosFromUnpaddedFragPos( const int nUnpaddedFragPos ) { if (bIsComplemented_ ) return( nComplementUnpaddedIndex( nUnpaddedFragPos ) ); else return( nUnpaddedFragPos ); } // Input: read position (counts pads) // Output: read position (counts pads) in the direction of sequencing int nOrientedFragPosFromFragPos( const int nFragPos ) const { if (bIsComplemented_ ) return( nComplementPaddedIndex( nFragPos ) ); else return( nFragPos ); } // used for user modifying tag positions int nUnorientedPaddedFromOrientedUnpadded( const int nUnpadded ); // for use in displaying pad positions int nOrientedUnpaddedFromUnorientedPadded( const int nPadded ); tag* pGetTag( const int nTagIndex ) { return( aTags_[ nTagIndex ] ); } int nGetNumberOfTags() { return( aTags_.length() ); } void writeTagsToPhdFile( ofstream& ofsPHD ); void addTag( tag* pTag ); void removeTag( tag* pTag ); void sortTags(); static int compareTags( const tag** ppTag1, const tag** ppTag2 ); bool bTagsAreSorted(); friend class EditOverstrike; friend class editChangeToNs; friend class editChangeToNsToLeft; friend class editChangeToNsToRight; void makeCopyOfPhredCallsIfNecessary(); void dumpTags(); int nGetNumberOfReadTagsToWriteToAceFile(); void writeReadTagsToAceFile( ofstream& ofsAce ); int nGetNumberOfPhredCalledBases(); bool bIsDyePrimerNotDyeTerminator(); bool bIsWholeCloneTemplateNotSubcloneTemplate(); // this refers to which end of a double-stranded template // the read is on. It does *not* refer to which phrap // assembled the read on the top strand or bottom strand (whether // phrap had to complement it). That is bComp() bool bIsForwardNotReverseRead(); // this indicates that the read is not at the ends of the insert bool bIsWalkingNotUniversalPrimerRead(); // this is not the opposite of bIsWalkingNotUniversalPrimerRead // since there are also pcr end reads bool bIsUniversalPrimerRead(); int nGetNumberOfVectorBasesInRead(); void setReadTypeFromReadName(); void writeWholeReadItemsToPhdFile( ofstream& ofsPHD ); void findReadOrderInAlignedReadsWindowFromFile(); bool bIsAShortRead(); bool bIsAnUnpairedShortRead(); void writeReadToFasta( FILE* pFasta, FILE* pQual, const bool bUseSeqPhredCalledBases ); public: bool bReadCurrentlyHighlighted_; bool bGettingTraceFile_; public: // I moved this to sequence.h and called it soSequenceName_ // RWCString soFragmentName_; // this is the ABI filename as given by the .ace file // It contains the full pathname // The name above is really just the fragment name, I believe (DG) FileName filChromatFile_; // the name of the PHD file which contains the base point positions, // the bases, and the quality information. This file is not read // unless the user calls up the traces. FileName filPHDFile_; // used to check if the PHD being read is the same one used in the assembly RWCString soTimestamp_; // boolean indicates fragment is complemented relative to ABI chromatigram // file bool bIsComplemented_; RWCString soABIThumbprint_; RWCString soPhredVersion_; RWCString soCallMethod_; int nQualityLevels_; RWCString soDye_; RWCString soChemistry_; bool bPhdFileHasTraceArrayMinAndMax_; // backing these out since they are already in the // Sequence object. I would like to move them to the // Fragment object instead in the future since they // have no purpose for the Contig (which inherits Sequence). // However, then there will be the issue of the pSeqPhredCalledBases_ // which currently is just a Sequence object but must have // trace information including these. // int nTraceArrayMinIndex_; // int nTraceArrayMaxIndex_; // the TraceFile is created when a user asks to see the // traces for a fragment. NULL if the traces haven't been read TraceFile* pTraceFile_; // MBT rather than RWT allows re-sort // However, I don't believe there is any reason to have this in // a sorted vector. I suggest changing to RWTPtrOrderedVector RWTPtrOrderedVector aTags_; RWTPtrOrderedVector aWholeReadItems_; // this array stores the bases as called by the phred base calling software, // This is a Sequence, i.e. a 1 based array of Ntide // and as such can be complemented with complementSequence() member. // The point positions are stored within the Ntides. Unlike the // Sequence contained within a Fragment, it will never contain pads. // The data in the array remains unedited throughout the life of // the object. See the associated array in tracefile.h which contains // the ABI calls // Note: This is null unless there is a phd file other than .phd.1 // This causes a problem since the array in memory has x's instead of // the actual base calls for sequence matching vector. Thus to get // the actual bases, you must read the phred calls. Sequence* pSeqPhredCalledBases_; RWCString soTemplate_; int nTemplateSize_; RWCString soTemplateType_; subcloneTTemplate* pSub_; // added 4/01, DG to support // finding library object from read efficiently // since this must be done for most reads // for bIsReadNearEndOfContigAndPointingOut RWCString soPrimerSequence_; RWCString soLibrary_; bool bReadOnly_; int nReadType_; // nUniversalForward, nUniversalReverse, nWalk, nPCREnd int nReadTypeFromPhdFile_; // nUniversalForward, nUniversalReverse, nWalk, nPCREnd bool bAlreadyReadPhdFile_; bool bAlreadyReadPhredCallsInPhdBall_; // this just indicates whether a fake WR tag was in the phd file // If the user has set pCP->bFakeReadsSpecifiedByFilenameExtension_, // then this flag will not be used bool bIsAFakeRead_; int nReadOrderInAlignedReadsWindow_; int nVersion_; // used for solexa and 454 reads, takes place of .phd.1, .phd.2, etc. FileName filPhdBall_; // path relative to ../phdball_dir }; // class Fragment class LocatedFragment : public Fragment { public: // null ctor - required by dynamic arrays LocatedFragment() : nAlignStartPos_(0), pContig_( NULL ), bChanged_( false ), bWholeReadIsLowQuality_( false ), bWholeReadIsUnaligned_( false ), bFoundVectorInsertJunction_( false ), bTriedToFindVectorInsertJunction_( false ) {} LocatedFragment(const RWCString& soReadName, int nConsensusStartPos, bool bIsComp, Contig* pContig) : Fragment(soReadName, bIsComp ), nAlignStartPos_(nConsensusStartPos), pContig_( pContig ), bChanged_( false ), bWholeReadIsLowQuality_( false ), bWholeReadIsUnaligned_( false ), bFoundVectorInsertJunction_( false ), bTriedToFindVectorInsertJunction_( false ) {} // // these two operators are for use in the sorted array // of located fragments. sort order is determined // by complement, then alignment position. note that this // operator is what determines the order of fragments from top // to bottom as they appear in the contigwin. // // bool operator < ( const LocatedFragment& locfrag ) const { // bool bLessThan; // // forward sorts before reverse // if ((! bIsComplemented_) && (locfrag.bIsComplemented_)) { // bLessThan = true; // } // else if ((bIsComplemented_) && (! locfrag.bIsComplemented_)) { // bLessThan = false; // } // else { // // otherwise sort by alignment // if (nAlignStartPos_ < locfrag.nAlignStartPos_ ) { // bLessThan = true; // } // else if ( nAlignStartPos_ > locfrag.nAlignStartPos_ ) { // bLessThan = false; // } // else { // // at this point the reads have the same start position // // and the same strand. Define a relation between them. // if ( this < &locfrag ) // bLessThan = true; // else // bLessThan = false; // } // } // return bLessThan; // } bool operator== ( const LocatedFragment& locfrag ) const { return ( this == &locfrag ); } // a located fragment is by definition part of a contig. // return a pointer to the Contig object of which you are // a part Contig* pGetContig() { return( pContig_ ); } // naming is the mother of things. use this instead // of inherited Sequence member function if you can. int nGetPaddedFragLength() const { return (*this).nGetPaddedSeqLength(); } ////////////////////////////////////////////////////////////////////// // // clipping i.e. regions phrap says are too poor to use // ////////////////////////////////////////////////////////////////////// // you need these for writing out an ace file. what are the // boundaries of the clipped region (as currently determined // by phrap. in padded consensus relative positions. int nGetStartUnclippedConsPos() const { return nQualClipStart_; } int nGetHighQualityStart() { return nQualClipStart_; } int nGetEndUnclippedConsPos() const { return nQualClipEnd_; } int nGetHighQualityEnd() { return nQualClipEnd_; } // these are in padded consensus positions int nGetAlignClipStart() { return nAlignClipStart_; } int nGetAlignClipEnd() { return nAlignClipEnd_; } bool bGetAlignedHighQualitySegment( int& nAlignedHighQualityStart, int& nAlignedHighQualityEnd ); // unpadded consensus positions int nGetAlignClipStartUnpadded(); int nGetAlignClipEndUnpadded(); int nGetAlignedLengthUnpadded(); int nGetCenterOfAlignedRegionUnpadded(); int nGetHighQualityStartUnpadded(); int nGetHighQualityEndUnpadded(); // It is possible (has occurred at Fred Hutch) to have one // or the other of nQualClipStart_ or nQualClipEnd_ be -1 // and the read not be all low quality. If both are -1, then // the read is totally low quality. It cannot be the case that // the high quality region is just the base at -1 since Phil // said (990702) that high quality segments must be more than // 1 base. He said this is also true with aligned segments. // Now just avoid problem by having a flag for all low quality. // Note that it is not possible for phrap to output an ace file // QA a b c d // with a = -1 and b != -1, or b = -1 and a != -1, or c = -1 and d != -1, // or d = -1 and c != -1 int nGetHighQualityStartUnlessAllBad() { if ( bWholeReadIsLowQuality_ ) return( nGetAlignStart() ); else return( nQualClipStart_ ); } int nGetHighQualityEndUnlessAllBad() { if ( bWholeReadIsLowQuality_ ) return( nGetAlignEnd() ); else return( nQualClipEnd_ ); } int nGetHighQualitySegmentLengthSortOfUnpadded(); bool bThereIsHighQualitySegmentOffConsensus( const int nWhichEndOfConsensus, int& nLengthOfHighQualityVectorSegment, int& nReadPosHighQualityVectorLeft, int& nReadPosHighQualityVectorRight ); bool bDoesEitherEndOfVectorMatchThisPartOfRead( const int nThisReadProtrudesIntoWhichGap, const RWCString& soVectorSequence, const RWCString& soComplementVectorSequence, const int nReadPosHighQualityVectorLeft, const int nReadPosHighQualityVectorRight, int& nMatchesWhichEndOfVector ); // passed consensus position, returns boolean indicating // whether the fragment base aligned to the consesnsus at // the passed (padded, consensus) position was clipped by // the phrap assembly. [ if so it will appear different on // screen, presumably dim. ] bool bIsClipped( const int nConsPos ) { return( bIsInLowQualityEndsOfRead( nConsPos ) ); } bool bIsInLowQualityEndsOfRead( const int nConsPos ) { if ( bIsWholeReadLowQuality() ) return true; else if ( ( nConsPos < nGetStartUnclippedConsPos() ) || ( nGetEndUnclippedConsPos() < nConsPos ) ) return true; else return false; } bool bIsInHighQualitySegmentOfRead( const int nConsPos ) { if ( bIsWholeReadLowQuality() ) return false; else { if ( nQualClipStart_ <= nConsPos && nConsPos <= nQualClipEnd_ ) return true; else return false; } } bool bIsInAlignedPartOfRead( const int nConsPos ) { if ( bIsWholeReadUnaligned() ) return( false ); else { if ( ( nGetAlignClipStart() <= nConsPos ) && ( nConsPos <= nGetAlignClipEnd() ) ) return true; else return false; } } bool bIsInAlignedPartOfReadAndNotTooCloseToUnalignedPart( const int nConsPos ); // obsolete--for compatibility with old ace format int nGetStartUnclippedFragPos(); int nGetEndUnclippedFragPos(); void calculateHighQualitySegmentOfRead(); bool bGetUserDefinedHighQualitySegment( const double dErrorProbability, int& nHighQualitySegmentLeftUnpaddedConsPos, int& nHighQualitySegmentRightUnpaddedConsPos ); // in cons pos void setQualAndAlignClipping( const int nQualClipStart, const int nQualClipEnd, const int nAlignClipStart, const int nAlignClipEnd ) { nQualClipStart_ = nQualClipStart; nQualClipEnd_ = nQualClipEnd; nAlignClipStart_ = nAlignClipStart; nAlignClipEnd_ = nAlignClipEnd; } void setWholeReadIsLowQuality() { bWholeReadIsLowQuality_ = true; } void setWholeReadIsUnaligned() { bWholeReadIsUnaligned_ = true; } bool bIsWholeReadLowQuality() { return( bWholeReadIsLowQuality_ ); } bool bIsWholeReadUnaligned() { return( bWholeReadIsUnaligned_ ); } int nGetFirstBaseOfInsertUnpadded(); void extendAlignedRegion( const int nConsPos ) { if ( !bIsInAlignedPartOfRead( nConsPos ) ) { if ( bIsWholeReadUnaligned() ) { nAlignClipStart_ = nConsPos; nAlignClipEnd_ = nConsPos; bWholeReadIsUnaligned_ = false; } else { // there are only two ways this base could be off the // aligned region: // b1 aaaaaaaaaa b2 // a = aligned region, // b1, one possibility // b2, another possibility if ( nConsPos < nAlignClipStart_ ) nAlignClipStart_ = nConsPos; else nAlignClipEnd_ = nConsPos; } } } void trimBackAlignedRegion( const int nAlignedRegionLeftConsPos, const int nAlignedRegionRightConsPos ); ////////////////////////////////////////////////////////////////////// // // alignment, indexing, etc. // ////////////////////////////////////////////////////////////////////// // return (current) alignment to the consensus in terms of // padded consensus position const int nGetAlignStart() const { return nAlignStartPos_ ; } const int nGetAlignEnd() const { return nAlignStartPos_ + nGetPaddedSeqLength() - 1; } int nGetConsPosOfBeginningOfRead() { return( bIsComplemented_ ? nGetAlignEnd() : nGetAlignStart() ); } int nGetConsPosOfBeginningOfReadUnpadded() { return( bIsComplemented_ ? nGetAlignEndUnpadded() : nGetAlignStartUnpadded() ); } int nGetAlignStartUnpadded(); int nGetAlignEndUnpadded(); bool bIsInRead( const int nConsPos ) { return( ( ( nGetAlignStart() <= nConsPos ) && ( nConsPos <= nGetAlignEnd() ) ) ); } // convert padded consensus position into padded fragment position int nGetFragIndexFromConsPos(const int nConsPos) const { return nConsPos - nAlignStartPos_ + Sequence::nGetStartIndex(); } // convert fragment index to consensus position // padded read position to padded consensus position // padded read position is left to right regardless of the // direction of the read inline int nGetConsPosFromFragIndex(const int nFragIndex ) const { return( nFragIndex + nAlignStartPos_ - Sequence::nGetStartIndex() ); } // passed the padded consensus position, returns the // unpadded index (1 based) into the fragment. // primarily for benefit of the functions below int nConsPosToUnpaddedFragPos( const int nConsPos ) const { return( nUnpaddedIndex( nGetFragIndexFromConsPos(nConsPos) ) ); } int nGetConsPosFromUnpaddedFragPos( const int nUnpaddedFragPos ) { return( nGetConsPosFromFragIndex( nPaddedIndex( nUnpaddedFragPos ) ) ); } // Input: consensus position ( counts pads ) // Output: fragment index in the direction of sequencing (counts pads) int nOrientedFragPosFromConsPos( const int nConsPos ) const { return( nOrientedFragPosFromFragPos( nGetFragIndexFromConsPos( nConsPos ) ) ); } // Input: consensus position ( counts pads ) // Output: unpadded fragment index in the direction of sequencing // rewritten 1/2000 for better efficiency for bottom strand reads int nOrientedUnpaddedFragPosFromConsPos( const int nConsPos ) { int nFragPos = nGetFragIndexFromConsPos( nConsPos ); if ( !bComp() ) { return( nUnpaddedIndex( nFragPos ) ); } else { int nUnpaddedBases = 0; for( int nPadded = nFragPos; nPadded <= nGetEndIndex(); ++nPadded ) if ( !ntideGet( nPadded ).bIsPad() ) ++nUnpaddedBases; return( nUnpaddedBases ); } } // useful for converting any positions in the phd file (e.g., tags ) int nConsPosFromOrientedUnpaddedFragPos( const int nReadPos ) { return nGetConsPosFromFragIndex( nUnorientedPaddedFromOrientedUnpadded( nReadPos ) ); } // useful for custom navigation or cases in which the phd file position // should be converted for output int nUnpaddedConsPosFromOrientedUnpaddedFragPos( const int nReadPos ); // shifts the alignment of the located frag by one position // by adding 1 to alignment start and stop member data void shiftAlignmentPlusOne() { ++nAlignStartPos_; } // shifts the alignment of the located frag by one position // by subtracting 1 from alignment start and stop member data void shiftAlignmentMinusOne() { --nAlignStartPos_; } ////////////////////////////////////////////////////////////////////// // // retrieving, editing sequence/ntides // ////////////////////////////////////////////////////////////////////// // passed the position in the consensus, returns corresponding // Ntide in fragment. uses inherited (protected) Sequence // class [] operator. using "myFrag[n]" also works. Ntide ntGetFragFromConsPos(const int nConsPos) const { return Sequence :: ntideGet(nGetFragIndexFromConsPos( nConsPos ) ); } RWCString soGetPartOfReadUpperOrLower( const int nConsPosStart, const int nConsPosEnd ); RWCString soGetPartOfReadLower( const int nConsPosStart, const int nConsPosEnd ); // get the nucleotide at a consensus position, then get it's point // position. convenience function. int nGetPointPosByConsPos( const int nConsPos ) { return( nGetPointPos( nGetFragIndexFromConsPos( nConsPos ) ) ); } void setQualityAtConsPos( const int nPadded, const Quality& qual ) { makeCopyOfPhredCallsIfNecessary(); Sequence :: setQualityAtSeqPos( nGetFragIndexFromConsPos( nPadded ), qual ); } void transferQualityValues( LocatedFragment* pTargetRead ); // this call will complement the entire LocatedFragment, including // sequence, clipping locations (and the point positions // and TraceFile object as well if one has been read). void complementLocatedFragment( ); // this is a convenience routine for printing out the PHD file for // a complemented fragment void complementLocatedFragmentButNotTraces(); void complementLocatedFragmentAndMaybeTraces( bool bCompTraces ); void overstrikeNtide(const int nConsPos, const Ntide nt, const bool bAddEditTag ); void overstrikeNtideButLeaveOldPointPosition( const int nConsPos, const char cBase, const unsigned char ucQuality, const bool bAddEditTag ); void insertNtideAtConsPos( const int nConsPos, const Ntide nt ); // insert the passed ntide at the passed location. // inherited Sequence class interpolates point position if need be. // Contig class responsible for fixing BaseSegArray. void insertNtideWithPeakAtConsPos(const int nConsPos, const Ntide nt) { makeCopyOfPhredCallsIfNecessary(); Sequence::insertNtideWithPeakAtPos(nt,nGetFragIndexFromConsPos(nConsPos)); } void insertNtideWithoutPeakAtConsPos( const int nConsPos, const Ntide nt ); // delete the ntide at the passed location. // Contig class responsible for fixing BaseSegArray. Ntide deleteNtide(const int nConsPos) { makeCopyOfPhredCallsIfNecessary(); return Sequence::deleteNtideAtPos(nGetFragIndexFromConsPos(nConsPos), true ); // bDeletePointPos } // passed a consensus-relative position, sets passed-by-reference // nLeftPoint and nRightPoint to be the leftmost and rightmost // point positions "owned" by that base. although the base calling // software only assigns one point position to a base, for the // purposes of intercepting user mouse actions, etc. we need to // know what base they meant to click on, hence need a (somewhat // arbitrary) width, set by interpolating midpoints rather than // any examination of the trace data, peak width, etc. void getPointRangeForBase(const int nConsPos, int& nLeftPoint, int& nRightPoint ); // passed a consensus relative index, returns the point position // between that ntide and the one to its left int nGetLeftPointBoundaryForBase( const int nConsPos ); // passed a point position on the trace, returns the consensus // relative index of the fragment ntide located there int nConsensusPositionFromPoint( const int nPoint ); // passed a point position range, returns the range of // consensus based indices that fall within it, or that // have part of the base within it, even though the peak // may be off screen--useful for drawing bases void getRangeOfConsensusPositions(const int nPointMin, const int nPointMax, int& nConsensusBaseMin, int& nConsensusBaseMax, bool& bError ) const; void makeHighQuality( const int nMinConsensusPosition, const int nMaxConsensusPosition ); void makeLowQuality( const int nMinConsensusPosition, const int nMaxConsensusPosition ); void makeLowQualityToRight( const int nConsensusPosition ); void makeLowQualityToLeft( const int nConsensusPosition ); void changeToXs( const int nMinConsensusPosition, const int nMaxConsensusPosition ); void changeToNs( const int nMinConsensusPosition, const int nMaxConsensusPosition ); void changeToNsToLeft( const int nConsensusPosition ); void changeToNsToRight( const int nConsensusPosition ); void setChanged(); void setUnchanged() { bChanged_ = false; } bool bChanged() { return bChanged_; } // used for inserting a column of pads void moveTagsToRight( const int nMoveToRightAfterThisConsPos ); // used for undoing insert column of pads void moveTagsToLeft( const int nMoveToRightAfterThisConsPos ); int nGetConsPosOfPriorBaseNotAPad( const int nConsPos ); int nGetConsPosOfNextBaseNotAPad( const int nConsPos ); void consolidateEditTags(); void fixEndPointsOfTagsNotOnAPad( const int nConsPosOfDeletedBase ); void removeAnyTagsJustOnThisBase( const int nConsPos ); unsigned char ucGetReadType(); int nGetStrandAndChemistry(); void moveReadAndReadTagsToNewContig( Contig* pNewContig, const int nNewConsPosFromOldConsPos ); void transferTagsToNewContig( Contig* pContig, const int nNewConsPosFromOldConsPos ); void adjustQualAndAlignClippingWhenInsertColumnOfPads( const int nConsInsertPos ); void adjustQualAndAlignClippingWhenRemoveColumnOfPads( const int nConsInsertPos ); bool bIsWithinACompressionOrG_dropoutTag( const int nConsPos ); void setBadPHDFile(); // actually reads the PHD files void readPHDFileAndSetQualitiesAndPeakPositions( ); bool bSomethingIsWrongWithUsingThisReadAsATemplate(); bool bIsThisReadFromTheSameTemplate( LocatedFragment* pLocFrag ); bool bUnalignedHighQualityRegionTooLong( const bool bIncludeTotallyUnalignedReads ); bool bHasSeriousHighQualityDiscrepancies(); bool bIsChimeric(); bool bIsAFakeRead(); bool bIsReadNearEndOfContigAndPointingOut( int& nWhichEnd ); bool bIsReadNearEndOfContig( int& nWhichEnd ); bool bIsFwdRevPairInSameEndOfSameContig( LocatedFragment* pLocFrag, const int nWhichEnd ); void findVectorInsertJunctionsInUniversalPrimerRead( int& nFirstVectorInsertJunctionConsPos, // "first" means the one closest to the // end where sequencing started--the right // end for bottom strand reads, the left // end for top strand reads // This is on the not X base at the // junction int& nSecondVectorInsertJunctionConsPos, bool& bFoundSecondVectorInsertJunction ); void findVectorInsertJunctionInWalkingRead( bool& bFoundXsAfterAligned, int& nLastBaseBeforeVector ); void readPhredCalls(); void readPhredCalls( RWCString& soTimestampFromPHDFile, RWCString& soABIThumbprint, RWCString& soPhredVersion, RWCString& soCallMethod, RWCString& soChemistry, RWCString& soDye, int& nQualityLevels, const bool bIgnoreTagsAndWRItems ); void readPHDFileForAddedRead(); void zeroErrorProbabilitiesWhereChoseMinilibrary(); // this is only used for universal primer reads pointing out of the contig // so this assumes that the insert runs off the end of the contig void getFromStartOfInsertToEndOfContig( int& nUnpaddedLeft, int& nUnpaddedRight ); void convertVectorTagsToXs(); int nUnpaddedBasesNotInChimeraTag( const int nConsPosStart, const int nConsPosEnd ); void addUnalignedHighQualityRegionsToList( gotoList* pGotoList, const bool bIncludeTotallyUnalignedReads ); void maybeAddUnalignedHighQualityRegion( const int nHighQualityUnalignedStartConsPos, const int nHighQualityUnalignedEndConsPos, gotoList* pGotoList ); Contig* pMakeAContigOutOfThisRead(); bool bFoundVectorInsertJunctionForUniversalPrimerRead( int& nVectorInsertJunctionConsPos ); // from end of contig int nHowFarIsReadFromEndOfHighQualitySegment( bool& bThereIsAHighQualitySegment ); // wholeReadItem* pGetContigDirectionWholeReadItem(); bool bOKToShowThisTraceInShowAllTraces( const int nConsPos ); float fGetAverageQualityInWindow( const int nConsPosInCenter, const int nWindowSize ); bool bGetSingleSignalBasesConsensusLength( MBTValOrderedOffsetVector*& pSingleSignalConsensusLength ); // returns "true" for success bool bGetSingleSignalRelativeAreas( mbtValVector*& paRelativeAreaOfUncalledBases ); void fixTagCoordinates(); public: int nGetConsPosOfPriorBaseNotAPad( const int nConsPos, bool& bError ); int nGetConsPosOfNextBaseNotAPad( const int nConsPos, bool& bError ); bool bThereIsAnEditTagCoveringThisBase( tag*& pReturnedTag, const int nConsPos ); bool bThereIsAnEditTagCoveringThisBaseButNotEndingHere( const int nConsPos ); void addEditTag( const bool bOldNtideWasPad, const bool bNewNtideIsPad, const int nConsPos ); void addWideEditTag( const int nConsPosStart, const int nConsPosEnd ); void adjustAllTags( const int nAdjustToRightOfThisConsPos, const int nAdjustByThisManyBases ); public: // alignment against the consensus, in terms of padded consensus // position. may need to be updated after user edit actions // that affect the alignment (e.g. deletion in consensus // before this location). for this reason all edits originate // at the contig level. // // end pos is now _always_ calculated, not saved int nAlignStartPos_; // the fragment contains a pointer back to the contig that // contains it. Contig *pContig_; // indicates that the user has edited this read, and thus // it must be written back to disk when consed exits bool bChanged_; bool bWholeReadIsLowQuality_; bool bWholeReadIsUnaligned_; // these are in consensus positions int nQualClipStart_; int nQualClipEnd_; int nAlignClipStart_; int nAlignClipEnd_; float dErrorRate_; // so far these are only used for (some) universal primer reads bool bTriedToFindVectorInsertJunction_; bool bFoundVectorInsertJunction_; int nVectorInsertJunctionConsPos_; RWCString soReadPrefix_; GuiColorText* pGctForReadPrefix_; }; // LocatedFragment #endif