/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ // // contig.h // // header file for the Contig class // #ifndef CONTIG_INCLUDED #define CONTIG_INCLUDED #include #include "rwcstring.h" #include "rwbtreedictionary.h" #include "ntide.h" #include "locatedFragment.h" #include "assert.h" #include "basesegment.h" #include "sequence.h" #include "unpaddedFromPadded.h" #include "contigMatchTablesType.h" #include "mbtValVector.h" #include "readTypes.h" #include "mbtPtrOrderedVector.h" #include "primerType.h" #include "paddedFromUnpadded.h" #include "subcloneTTemplate.h" #include "experiment.h" #include "mbtPtrVector.h" #include "dictionary.h" #include "distribution.h" #include "mbtValVectorOfBool.h" #include "readsSortedByReadName.h" #include "mbtValOrderedVectorOfRWCString.h" #include "consedParameters.h" #include "readsSortedByLeftEndPosition.h" #include "tag.h" // required to prevent this error when building: // CC -g -DANSI_C -w -I/usr/local/rogue -I/opt/SUNWspro/SC4.0/include/CC -c assembly.cpp // "/usr/local/rogue/rw/tpordvec.cc", line 93: Error: The type "tag" is incomplete. // "assembly.cpp", Where: While instantiating "RWTPtrOrderedVector::index(const tag*) const". // "assembly.cpp", Where: Instantiated from non-template code. // "/usr/local/rogue/rw/tpordvec.cc", line 122: Error: The type "tag" is incomplete. // "assembly.cpp", Where: While instantiating "RWTPtrOrderedVector::occurrencesOf(const tag*) const". // "assembly.cpp", Where: Instantiated from non-template code. // "/usr/local/rogue/rw/tpordvec.cc", line 147: Error: The type "tag" is incomplete. // "assembly.cpp", Where: While instantiating "RWTPtrOrderedVector::removeAll(const tag*)". // "assembly.cpp", Where: Instantiated from non-template code. // 3 Error(s) detected. // *** Error code 3 // C //class experiment; class gotoItem; class ContigView; class ContigWin; class contigEndPair; class bigArrayOfLittleArraysOfDistributions; class readsSortedByReadName; class Assembly; class autoReport; class kimuraBranchProb; class mbtValVectorOfBool; typedef mbtPtrVector arrayOfLocatedFragmentPtrs; typedef RWTValOrderedVector arrayOfTemplates; #define INITIALIZE_CONTIG \ nFirstDisplayableContigPos_(1), /* by default, first pos is 1 */ \ nLastDisplayableContigPos_(0), \ pUnpaddedContig_(0), \ pUnpaddedContigComplemented_(0), \ \ pCumUnpaddedErrors_( NULL ), \ \ dTargetNumberOfErrors_( 0.0 ), \ dExpectedNumberOfErrorsInConsensus_( 0.0 ), \ dExpectedNumberOfErrorsInExtendedContig_( 0.0 ), \ \ pUnpaddedErrorProbabilities_(NULL), \ pOriginalUnpaddedErrorProbabilities_( NULL ), \ pUnpaddedReadTypesCoveringEachBase_( NULL ), \ pCandidateExperiments_( NULL ), \ pChosenExperiments_( NULL ), \ pUniversalPrimerExperimentsForLowQualityRegions_( NULL ), \ \ pPaddedFromUnpadded_( NULL ), \ \ nUnpaddedHighQualitySegmentStart_( -2 ), \ nUnpaddedHighQualitySegmentEnd_( -2 ), \ \ pDistributionsOfChosenExperimentsArray_( NULL ), \ pSavedDistributionsOfChosenExperimentsArray_( NULL ), \ pSavedUnpaddedReadTypesCoveringEachBase_( NULL ),\ pSavedUnpaddedErrorProbabilities_( NULL ), \ bThisContigIsExcludedFromAutofinishFinishing_( false ), \ bLeftEndOfContigExaminedForEndOfClone_( false ), \ bRightEndOfContigExaminedForEndOfClone_( false ), \ pProblemsInUnpaddedContig_( NULL ), \ bChanged_( false ), \ bTagsSayDoNotExtendToLeft_( false ), \ bTagsSayDoNotExtendToRight_( false ), \ bAlreadyExaminedContigForDoNotExtendTags_( false ), \ bOKToUsePaddedIndexFast_( false ), \ bReadDepthArrayCalculated_( false ), \ pReadDepthArray_( NULL ), \ bTagsSayDoNotDoPCRWithLeftEnd_( false ), \ bTagsSayDoNotDoPCRWithRightEnd_( false ), \ bAlreadyExaminedContigForDoNotDoPCRTags_( false ), \ nScaffold_( -1 ), \ bSetStartNumberingUnpaddedConsensusAtUserDefined_( false ), \ nStartNumberingUnpaddedConsensusAtUserDefined_( 1 ), \ bReadsWillExtendLeftEnd_( false ), \ bReadsWillExtendRightEnd_( false ), \ pBranchProbabilities_( 0 ), \ bReadListsNeedFixing_( true ), \ pReferenceSequence_( NULL ), \ nReadsToAdd_( 0 ), \ bDiscrepancyListsCalculated_( false ), \ paDiscrepanciesNotIndels_( NULL ), \ paDiscrepanciesJustIndels_( NULL ), \ pSubcloneTemplatesForFwdRevPairDepth_( NULL ), \ nApproxSizeOfSubcloneTemplatesForFwdRevPairDepth_( 0 ) #define INITIALIZE_CONTIG_ARRAYS \ soName_ = "contig sequence";\ aSubcloneTemplates_.soName_ = "Contig.aSubcloneTemplates_";\ aConsensusTags_.soName_ = "Contig.aConsensusTags_";\ apLocatedFragment_.soName_ = "Contig.apLocatedFragment_";\ apLocatedFragment2_.soName_ = "Contig.apLocatedFragment2_";\ apVeryLongReads_.soName_ = "Contig.apVeryLongReads_";\ pContig_[0] = NULL; \ pContig_[1] = NULL; \ pContig_[2] = NULL; \ bAlreadyCheckedEnd_[0] = false; \ bAlreadyCheckedEnd_[1] = false; \ bAlreadyCheckedEnd_[2] = false; \ pPreviousContig_ = NULL; \ pNextContig_ = NULL; \ aConsistentFwdRevPairDepth_.soName_ = "Contig::aConsistentFwdRevPairDepth_"; \ pArrayOfIndirectContigEndPairs_[0] = 0; \ pArrayOfIndirectContigEndPairs_[1] = 0; \ pArrayOfIndirectContigEndPairs_[2] = 0; class Contig : public Sequence { public: // ctor with no args creates empty contig // Contig() // : // INITIALIZE_CONTIG // , // baseSegArray_( this ), // bComplementedFromWayPhrapCreated_( false ) /* really don't know */ // { // INITIALIZE_CONTIG_ARRAYS // } // ctor used by old ace format (in assembly.cpp ) Contig(const RWCString& soContigName, Assembly* pAssembly ) : Sequence( soContigName ), INITIALIZE_CONTIG , baseSegArray_( this ), bComplementedFromWayPhrapCreated_( false ), /* really don't know */ pAssembly_( pAssembly ) { INITIALIZE_CONTIG_ARRAYS } Contig( char* szContigName, const int nNumberOfReads, const int nNumberOfBaseSegments, const bool bComplementedFromWayPhrapCreated, Assembly* pAssembly ) : Sequence( szContigName ), apLocatedFragment_( (size_t) nNumberOfReads ), baseSegArray_( this, nNumberOfBaseSegments ), bComplementedFromWayPhrapCreated_( bComplementedFromWayPhrapCreated ), pAssembly_( pAssembly ), INITIALIZE_CONTIG { INITIALIZE_CONTIG_ARRAYS } ~Contig(); // the operator semantics are defined only for use by // RWTPtrOrderedVector. USERS BEWARE: this only // tests for equality of the contig NAME. The contents // (i.e. consensus and fragments) are not tested. bool operator==(const Contig& con) const { return( soSequenceName_ == con.soSequenceName_); } // just so that we can have some way of sorting a long // list of contigs bool operator<( const Contig& con) const { return( soSequenceName_ < con.soSequenceName_ ); } // returns the integer number of fragments in the contig int nGetNumberOfFragsInContig() const { return apLocatedFragment_.length(); } int nGetNumberOfReads() const { return apLocatedFragment_.length(); } // returns length of the consensus, including pads. // calles Sequence member function. int nGetPaddedConsLength() const { return nGetPaddedSeqLength(); } // translate padded to unpadded index. additional code around // the inherited Sequence member function accounts for the // overhanging fragment situation int nUnpaddedIndex(const int nConsPos); // this always uses user-defined starting positions (if the // "startNumberingConsensus" tag is there) inline int nUnpaddedIndexStartingAtUserDefinedPosition2( const int nConsPos) { // for efficiency, test bSetStartNumberingUnpaddedConsensusAtUserDefined_ // before making the system call if ( !bSetStartNumberingUnpaddedConsensusAtUserDefined_ ) { setStartNumberingUnpaddedConsensusAtUserDefinedIfNecessary(); } return( nUnpaddedIndex( nConsPos ) + nStartNumberingUnpaddedConsensusAtUserDefined_ - 1 ); } // this allows the user to turn on and off using user-defined starting // positions inline int nUnpaddedIndexStartingAtUserDefinedPosition( const int nConsPos ) { if ( pCP->bNumberUnpaddedConsensusAtUserDefined_ ) { return( nUnpaddedIndexStartingAtUserDefinedPosition2( nConsPos ) ); } else { return( nUnpaddedIndex( nConsPos ) ); } } // this overrides Sequence::nPaddedIndex // It is used by the contig since it allows converting an // unpadded index that is negative (where the contig doesn't have // any bases but there are reads) int nPaddedIndex( const int nUnpaddedConsPos ); // this is much faster, but first you must set pPaddedFromUnpadded_ // by calling setPaddedPositionsArray(); int nPaddedIndexFast( const int nUnpaddedConsPos ) { if ( !pPaddedFromUnpadded_ || !bOKToUsePaddedIndexFast_ ) { if ( !bOKToUsePaddedIndexFast_ ) { // cerr << "had to call setPaddedPositionArray because bOKToUsePaddedIndexFast_ was null in " << soGetName() << endl; setPaddedPositionsArray(); } } return( pPaddedFromUnpadded_->nPaddedFromUnpadded( nUnpaddedConsPos ) ); } // This is a wrapper around the Sequence::setSequence // so that the aUnpaddedPositionsArray_ gets set correctly void setSequence( RWCString& soSequence ) { Sequence::setSequence( soSequence ); setUnpaddedPositionsArray(); } void setSequence2( char* szBases, const int nNumberOfBases ) { Sequence::setSequence2( szBases, nNumberOfBases ); setUnpaddedPositionsArray(); } void setSequence3( char* szBases, const int nNumberOfBases ) { Sequence::setSequence3( szBases, nNumberOfBases ); setUnpaddedPositionsArray(); } // used to printing unpadded positions void setUnpaddedPositionsArray(); // used much less commonly--for autofinish and for force joins void setPaddedPositionsArray(); // first valid pos (i.e. has ntide in consensus, not just overhanging // fragment) in contig consensus sequence int nGetStartConsensusIndex() const { return Sequence::nGetStartIndex(); } // returns last valid pos( i.e. has ntide in consensus ) int nGetEndConsensusIndex() const { return Sequence::nGetEndIndex(); } // overrides Sequence::nGetUnpaddedEndIndex to be more efficient int nGetUnpaddedEndIndex(); // overrides Sequence::nGetUnpaddedSeqLength() to be more efficient // (This assumes that the starting unpadded position is 1.) virtual int nGetUnpaddedSeqLength() { return( Contig::nGetUnpaddedEndIndex() ); } bool bIsOnConsensus( const int nConsPos ) { if ( ( nConsPos < nGetStartConsensusIndex() ) || ( nGetEndConsensusIndex() < nConsPos ) ) return( false ); else return( true ); } // fragments can "overhang" the consensus if part of them // was used to form the beginning or end of the consensus // but an adjacent part was deemed of too poor a quality // to be included. the consensus relative index of the // first and last displayable POSITIONS is returned // by these numbers. They often will be different // then those returned by the two above, // nGetStartConsensusIndex and nGetEndConsensusIndex // E.g., this number may be negative // This may be negative int nGetFirstDisplayableContigPos() const { return nFirstDisplayableContigPos_; } // This may be greater than the last base in the consensus int nGetLastDisplayableContigPos() const { return nLastDisplayableContigPos_; } // since the fragments can overhang the consensus // the displayable length of the Contig is different // than the length of the consensus int nGetDisplayableContigLength() const { return nLastDisplayableContigPos_ - nFirstDisplayableContigPos_ + 1; } void setHighQualitySegment(); bool bIsThereAHighQualitySegment() { if ( nUnpaddedHighQualitySegmentStart_ == -2 || nUnpaddedHighQualitySegmentEnd_ == -2 ) setHighQualitySegment(); if ( nUnpaddedHighQualitySegmentStart_ != -1 && nUnpaddedHighQualitySegmentEnd_ != -1 ) return true; else return false; } int nGetHighQualitySegmentStart() { if ( nUnpaddedHighQualitySegmentStart_ == -2 ) setHighQualitySegment(); if ( nUnpaddedHighQualitySegmentStart_ == -1 ) return( nGetUnpaddedStartIndex() ); else return( nUnpaddedHighQualitySegmentStart_ ); } int nGetHighQualitySegmentEnd() { if ( nUnpaddedHighQualitySegmentEnd_ == -2 ) setHighQualitySegment(); if ( nUnpaddedHighQualitySegmentEnd_ == -1 ) return( nGetUnpaddedEndIndex() ); else return( nUnpaddedHighQualitySegmentEnd_ ); } int nGetHighQualitySegmentLengthUnpadded(); int nGetUnpaddedClippedLowQualityBases( const int nWhichEnd ); // linear transform sets passed consensus position to it's // complemented equivalent. read it before using. // note this does NOT affect data members, only passed args void complement_coordinate( int& nConsensusPosition ); // uses above on both args. used to complement fragment and // clipping start/stop points. // note this does NOT affect data members, only passed args void complementEndPoints( int& nStart, int& nEnd ); ////////////////////////////////////////////////////////////////////// // // members for retrieving part of the contig, fragments or Ntides. // ////////////////////////////////////////////////////////////////////// // wrapper for Sequence class indexing operator // passed 1 based index into padded consensus sequence // use this where possible - it's clearer that the nt // comes from the consensus and not a component frag. // it also allows for overhanging fragments - returns // a blank base at those positions. Ntide ntGetCons(const int nConsPos) const { // there are more "legal" indices than in the consensus array if ( nConsPos < nFirstDisplayableContigPos_ ) { RWCString soErrorMessage = "nConsPos = " + RWCString( (long) nConsPos ) + " < nFirstDisplayableContigPos_ = " + RWCString( (long) nFirstDisplayableContigPos_ ) + " in contig " + soGetName(); THROW_ERROR2( soErrorMessage ); } if ( nConsPos > nLastDisplayableContigPos_ ) { RWCString soErrorMessage = "nConsPos = " + RWCString( (long) nConsPos ) + " > nLastDisplayableContigPos_ = " + RWCString( (long) nLastDisplayableContigPos_ ) + " in contig " + soGetName(); THROW_ERROR2( soErrorMessage ); } if ((nConsPos >= Sequence::nGetStartIndex()) && (nConsPos <= Sequence::nGetEndIndex()) ) { return( Sequence :: ntideGet( nConsPos ) ); } else { Ntide ntTemp(' ', ucQualityMin); // assume null return ntTemp; } } // this is to be used during parsing the ace file for setting // quality values when the nLastDisplayableContigPos_ has not // yet been set Ntide ntGetConsUnsafe(const int nConsPos) const { if ((nConsPos >= Sequence::nGetStartIndex()) && (nConsPos <= Sequence::nGetEndIndex()) ) { return( Sequence :: ntideGet( nConsPos ) ); } else { return( Ntide(' ', ucQualityMin) ); // assume null } } Ntide ntGetConsUnsafeOnConsensus( const int nConsPos ) const { return( Sequence::ntideGet( nConsPos ) ); } // This has been replaced by the form above which does not // use the Ntide::operator= function. Thus it is faster during // startup. // Ntide ntGetConsUnsafe(const int nConsPos) const { // Ntide ntTemp(' ', ucQualityMin); // assume null // if ((nConsPos >= Sequence::nGetStartIndex()) && // (nConsPos <= Sequence::nGetEndIndex()) ) { // ntTemp = Sequence :: ntideGet( nConsPos ); // } // return ntTemp; // } RWCString soGetFancyName(); RWCString soGetAbbreviatedName(); // e.g., 15 instead of Contig15 // Ntide has a (char) conversion operator as well char cGetConsensusBase( const int nConsPos ) { return( ntGetCons( nConsPos ) ); } char cGetConsensusBaseUnsafe( const int nConsPos ) { return( ntGetConsUnsafe( nConsPos ) ); } // get part of consensus as char string. may include // blanks if in fragment overhang region. // caller must delete returned char array // // only returns as many as are avail - may truncate request // at end of contig. chrisa 10-mar-95 char* szGetPartOfConsensus(const int nStartPosition, const int nLength) const; // these are more efficient because they do not require // copying the string to return the string, as do the // soGet... functions void getPartOfConsensus( const int nStartConsPos, const int nLength, RWCString& soPartOfConsensus ); void getPartOfComplementedConsensus( const int nStartConsPos, const int nLength, RWCString& soPartOfConsensus ); void getPartOfUnpaddedConsensus( const int nUnpaddedStart, const int nUnpaddedLength, RWCString& soPartOfUnpaddedConsensus, const bool bComplemented, const bool bTowardsIncreasingUnpaddedPositions ); void getPartOfUnpaddedConsensusForCompareContigs( const int nUnpaddedStart, const int nUnpaddedLength, RWCString& soPartOfUnpaddedConsensus, const bool bComplemented, const bool bToLeftNotToRight ); RWCString soGetPartOfConsensusUpperOrLower( const int nStartPosition, const int nEndPosition ); char* szGetUnpaddedConsensusToRightEnd( const int nConsPosPadded ); char* szGetUnpaddedConsensusToLeftEnd( const int nConsPosPadded ); char* szGetPartUnpaddedConsensusToRight( const int nStartConsPosPadded, const int nUnpaddedLength ); // this will not try to return further back than the beginning of the // sequence, even if nLength is set too large char* szGetPartUnpaddedConsensusToLeft( const int nConsPosPadded, const int nLength ); RWCString soGetUnpaddedConsensus(); char* szGetUnpaddedConsensus( int& nUnpaddedConsensusLength, const int nExtraRoom = 0 ); void appendUnpaddedHighQualitySegment( RWCString& soClone, const bool bComplement ); void appendPartOfUnpaddedHighQualitySegment( RWCString& soAppendToThis, const bool bComplement, const int nFirstOrLastUnpadded, const bool bPreviousArgumentIsFirstNotLast ); void appendPartOfUnpaddedConsensus( RWCString& soAppendToThis, const bool bComplement, const int nLeftUnpadded, const int nRightUnpadded ); LocatedFragment* pGetLocatedFragment( const int nIndex ) { assert ((nIndex >= 0) && (nIndex < apLocatedFragment_.length())); return apLocatedFragment_[nIndex]; } // return a pointer to the located fragment at index LocatedFragment* pLocatedFragmentGet(const int nIndex) { assert ((nIndex >= 0) && (nIndex < apLocatedFragment_.length())); return apLocatedFragment_[nIndex]; } // will return a pointer to located fragment if // one is found with matching name, NULL if not LocatedFragment* pLocFragGetByName(const RWCString); // returns a ptr to newly allocated ContigView // client responsible for deletion. passed a range // (nStart <= n <= nEnd), it bombs if out either param // is not within consensus i.e. !(nEnd < myContig.length()) ContigView* pGetContigView( const int nStart, const int nEnd); ContigView* view(const int nStart, const int nEnd) { return pGetContigView(nStart, nEnd); } ////////////////////////////////////////////////////////////////////// // // building or editing the contig or its members // ////////////////////////////////////////////////////////////////////// // add the passed fragment to the contig, aligning it // at nAlignPos_. The fragment can and probably will be // modified by the Contig, although the fragment will not // modify the file from which it came. // Note (May 2008): probably need to set bReadListsNeedFixing_ to true void addLocatedFragment(LocatedFragment* plocfrag ); // for debugging due to the fact that the HP debugger will not // execute inline functions int nDumpContig(); // this call will complement the entire contig & all it's // located fragments, trace files (if any). void complementContig(); void complementContigIfNecessary( RWCString& soErrorMessage ); // pays attention to contigDirection // whole read items // // edits to any part of the contig, including it's located // fragments, can affect other contig data structures. // hence even ntide level editing passes through here, // allowing the contig the chance to adjust base segments etc. // if need be. // // replace the ntide at passed location with passed ntide // note that if passed a char instead of ntide it gets default // quality, not the quality of the previously existing ntide // throws exception if there is no ntide at that position. void fixConsensusWhenOverstrikeFragNtide(LocatedFragment* pLocFrag, const int nConsPos, const Ntide nt) { // is it in the base segment array? if (baseSegArray_.bIsInBaseSegment(nConsPos, pLocFrag)) { // yes, edit consensus as well Sequence::setNtideAtPos(nConsPos,nt); setUnpaddedPositionsArray(); } } void fixRunsInConsensus(); void maybeFixConsensusInRun( const int nStartOfRunConsPos, const int nEndOfRunConsPos, const char cBaseOfRun, const int nStartOfRunUnpadded, int& nEndOfRunUnpadded ); // removed below Aug 1997 since not used (DG) // same as above, but for consensus sequence - no frags affected // void overstrikeConsNtide(const int nConsPos, // const Ntide nt) { // Sequence::setNtideAtPos(nConsPos,nt); // } // inserts a column of pads into consensus and any frags aligned // just before the passed consensus relative position // Thus the new column of pads becomes at nConsPos and what // used to be at nConsPos now becomes at nConsPos + 1 void insertColOfPads(const int nConsPos); // deletes a column of pads in consensus and any frags aligned // at the passed consensus relative positon. this is only // intended to be used as an undo version of insertColOfPads, // in that the user is never allowed to delete an entire column // simply for it's own sake. void undoInsertColOfPads(const int nConsPos); void deleteColumn(const int nConsDeletePos, const bool bColumnOfPads ); ////////////////////////////////////////////////////////////////////// // // file i/o // ////////////////////////////////////////////////////////////////////// // write a file at passed path with containing consensus // sequence, stripped of pads, in fasta format void writeConsensusToFastaFile(const char* szFilePath) const { // call inherited sequence function for same, // using contig name as comment Sequence::writeUnpaddedSeqToFastaFile(szFilePath, soGetName()); } void writeConsensusToFastaFile2( const bool bWriteBothBasesAndQualFiles, const FileName& soBasesFileName, const FileName& soQualFileName, const bool bWriteWholeContig, const int nStartUnpaddedConsPos, const int nEndUnpaddedConsPos ); void writeConsensusToFastaFile3( FILE *pfilBases, FILE *pfilQualities, const bool bWriteBothBasesAndQualFiles, const FileName& soBasesFileName, const FileName& soQualFileName, const bool bWriteWholeContig, const int nStartUnpaddedConsPos, const int nEndUnpaddedConsPos, const RWCString& soHeaderLine = "" ); void writeConsensusTagsToAceFile( ofstream& ofsAceFile ); void writeConsensusToPhdFile( const FileName& soPhdFileName, const bool bWriteWholeContig, const int nStartUnpaddedConsPos, const int nEndUnpaddedConsPos ); void updateFirstDisplayableContigPos(); void updateLastDisplayableContigPos(); void updateFirstAndLastDisplayableContigPos(); void setChanged(); void setUnchanged() { bChanged_ = false; } bool bChanged() { return( bChanged_ ); } void setContigMatchTables(); void freeContigMatchTables( contigMatchTablesType* pContigMatchTables ); void addConsensusTag( tag* pConsensusTag ); void removeConsensusTag( tag* pConsensusTag ); void dumpConsensusTags(); tag* pGetTag( const int nTagIndex ) { return( aConsensusTags_[ nTagIndex ] ); } int nGetNumberOfTags() { return( aConsensusTags_.length() ); } void complementConsensusTags(); bool bIsComplementedFromWayPhrapCreated() { return( bComplementedFromWayPhrapCreated_ ); } void setUnpaddedReadsThatCouldImproveQualities(); void setUnpaddedErrorProbabilities(); void setUnpaddedErrorProbabilitiesAndMore(); void createCumulativeErrorArray(); void findFinishingReads(); bool bCustomPrimerQualityTest( const int nFirstBaseOfReadUnpadded, const bool bTopStrandNotBottomStrand ); void createUniversalPrimerExperimentCandidatesList(); void createCustomPrimerExperimentCandidatesList(); bool bUniversalSubclonePrimerHere( const int nFirstBaseOfReadUnpadded, const bool bTopStrandNotBottomStrand, LocatedFragment* pLocFrag ); void createExperimentWithCustomPrimer( primerType* pPrimer, const readTypes ucRead, const bool bTopStrandNotBottomStrand, const int nFirstBaseOfReadUnpadded, const bool bCloneTemplateNotSubcloneTemplate, const bool bForCoveringSingleSubcloneRegions, const double dErrorsFixed ); void createExperimentsWithCustomPrimers(); void createExperimentsWithResequencingUniversalPrimers(); void createExperimentWithResequencingUniversalPrimer( LocatedFragment* pDyePrimerLocFrag, subcloneTTemplate* pSubcloneTemplate ); void createExperimentsWithFirstForwardsOrFirstReverses(); // just for emulating 9.66 behavior void createExperimentCandidatesListFor9_66(); void chooseExperimentsToCoverLowQualityRegionsAndGapsFor9_66(); void chooseExperimentsToCoverLowQualityRegionsAndGapsFor9_66_2(); void maybeCreateExperimentWithCustomPrimer( const readTypes ucRead, const bool bTopStrandNotBottomStrand, const int nFirstBaseOfReadUnpadded, const bool bCloneTemplateNotSubcloneTemplate, const bool bForCoveringSingleSubcloneRegions ); bool bMoreWorthwhileExperiments( bool& bMoreAvailableExperiments ); bool bStillTooManyExpectedErrors(); void adjustExpectedNumberOfErrors( experiment* pExperiment ); experiment* pGetBestExperimentCandidate(); experiment* pGetBestSingleSubcloneExperiment(); experiment* pGetBestExperimentCandidateButMightHaveBeenTriedBefore(); experiment* pGetBestSingleSubcloneExperimentButMightHaveBeenTriedBefore(); void setVariousErrorData( experiment* pExp ); void adjustRemainingExperimentCandidates( experiment* pExperimentJustChosen ); void adjustErrorsFixed( experiment* pExperimentToBeAdjusted ); void adjustConsensusDistributionsAndArrays( experiment* pExperimentJustChosen ); void printNoExperiments(); void printWorstRemainingRegion(); void setTargetNumberOfErrorsForThisContig(); void setExpectedNumberOfErrorsForThisContig(); void recalculateCurrentExpectedNumberOfErrorsForThisContig( double& dErrorsInConsensus, double& dErrorsInExtendedContig ); void displayExperimentCandidates(); void guiPopupAndDisplayExperiments(); gotoItem* pCreateGotoItemForOneExperiment( experiment* pExperiment ); void acceptExperiment( experiment* pExperiment ); void chooseExperimentsToCoverLowQualityRegionsAndGaps( const bool bUniversalPrimerNotCustomPrimerReads ); void chooseExperimentsToCoverLowQualityRegionsAndGaps2( const bool bUniversalPrimerNotCustomPrimerReads ); bool bIsThisEndTheEndOfTheClone( const int nWhichEnd ); bool bIsThisEndTheEndOfTheCloneGenomic( const int nWhichEnd ); bool bIsThisEndTheEndOfTheCloneCDNA( const int nWhichEnd ); bool bFindWhichEndOfVectorIsConnectedToContigEnd( const int nWhichEndOfContig, int& nWhichEndOfVector, const RWCString& soVectorSequence, const RWCString& soComplementedVectorSequence ); bool bDoTagsAllowExtendingThisEnd( const int nWhichEnd ) { if ( nWhichEnd == nLeftGap ) { return( !bTagsSayDoNotExtendToLeft_ ); } else { return( !bTagsSayDoNotExtendToRight_ ); } } void setUpForCreatingExperimentsList(); void guiFindFinishingReads(); void batchFindFinishingReads(); void prioritizeCandidateExperiments(); void rejectExperiment( experiment* pExperiment ); void calculateErrorsFixedJustInConsensus( experiment* pExperiment ); void coverSingleSubcloneRegions(); void autoFinishSetSingleSubcloneArrays( const bool bObserveDoNotFinishTags ); void saveCopyOfSingleSubcloneArray(); void autoFinishSetCumulativeSingleSubcloneArray(); void autoFinishFindSingleSubcloneRegionsFromExistingReads(); void autoFinishFindSingleSubcloneRegionsFromSuggestedExperiments(); void autoFinishCreateCandidateExperimentsToCoverSingleSubcloneRegions(); void autoFinishCreateTopStrandCandidateExperimentsForSingleSubcloneRegions(); void autoFinishCreateBottomStrandCandidateExperimentsForSingleSubcloneRegions(); void autoFinishChooseSingleSubcloneExperiments(); void orderTemplatesAndSetSingleSubcloneValues( experiment* pExp ); void sortTemplatesByHowManySingleSubcloneBasesEachFixes( primerType* pPrimer, int nSingleSubcloneBasesFixedByThisTemplate[ nNUMBER_OF_TEMPLATES] ); void eliminateTemplatesThatDoNotSolveAnySingleSubcloneBases( primerType* pPrimer, int nSingleSubcloneBasesFixedByThisTemplate[ nNUMBER_OF_TEMPLATES] ); void adjustSingleSubcloneRegionsInRemainingExperiments( experiment* pChosenExp ); void adjustSingleSubcloneBasesFixedByExperiment( experiment* pExpToBeAdjusted ); void adjustSingleSubcloneBasesFixedBySubcloneExperiment( experiment* pExpToBeAdjusted, const int nConsPosLeft, const int nConsPosRight ); void adjustSingleSubcloneBasesFixedByWholeCloneExperiment( experiment* pExpToBeAdjusted, const int nConsPosLeft, const int nConsPosRight ); void autoFinishAdjustSingleSubcloneArrays( experiment* pChosenExperiment ); void autoFinishAdjustSingleSubcloneArraysDueToPickingSubcloneExperiment( experiment* pChosenExperiment ); void autoFinishAdjustSingleSubcloneArraysDueToPickingWholeCloneExperiment( experiment* pChosenExperiment ); void dealWithOneContigEnd( const int nWhichGap ); void autoFinishCallReversesToFlankGapsOnOneEnd2( const int nWhichGap, const int nNumberOfReversesToCall ); bool bWillThisTemplateWorkForFlankingThisGap( subcloneTTemplate* pTrialSub, const int nWhichGap, int& nReadTypeToSuggest ); void dealWithContigEnds(); void findNeighborForOneGap( contigEndPair*& pPair, const int nWhichGap, int& nNumberOfExistingForwardReversePairs ); void findNeighborForOneGap2( const int nWhichGap, RWTPtrSortedVector aContigsConsideredSoFar, RWTPtrOrderedVector*& pArrayOfContigEndPairs ); void nearGapsSuggestEachMissingReadOfReadPairs(); void suggestEachMissingReadOfReadPairsOnOneEnd( const int nWhichGap ); bool bGetLikelyAlignedRegionOnContig( experiment* pExp, int& nConsPosLeft, int& nConsPosRight ); bool bGetLikelyAlignedRegionOnContig2( const int nFirstBaseOfReadUnpadded, const bool bTopStrandNotBottomStrand, int& nConsPosLeftUnpadded, int& nConsPosRightUnpadded ); int nCountSingleSubcloneBases(); void autoFinishPrintSingleSubcloneRegions(); double dGetExpectedNumberOfErrorsPerMegabaseForConsensus(); bool bFindTemplatesForPrimer( primerType* pPrimer, const readTypes ucRead ); double dGetDepthOfCoverage(); void putReadIntoItsOwnContig( LocatedFragment* pLocFragToRemove, ContigWin* pOldContigWin, const bool bOKToUseGui ); void putReadsIntoTheirOwnContigs( readsSortedByReadName* pArrayofReadsToRemove, const bool bOKToUseGui, const int nOldConsPosDisplayed, RWTPtrOrderedVector* pAdditionalReadsPutIntoOwnContigs ); void putReadIntoItsOwnContigAndLeaveOldContigInOnePiece( LocatedFragment* pLocFrag, ContigWin* pOldContigWin, const bool bOKToUseGui ); void dumpTemplates(); void setErrorsToZeroWithinDoNotFinishTags(); void handleTagsToNotExtendContig(); void determineIfTagsPreventExtendingContig(); void determineIfTagsPreventPCR(); void lookForCloneEndTags(); void dealWithCloneEndTag( tag* pTag ); void dealWithCloneEndTagForSingleSubcloneRegions( tag* pTag ); void autoFinishExamineDoNotFinishTagsForSingleSubcloneRegions(); double dGetOriginalConsensusErrors( const int nUnpaddedLeft, const int nUnpaddedRight ); void getErrorInfo( const int nUnpaddedLeftPos, const int nUnpaddedRightPos, double& dTotalErrors, int& nTotalNonEditedBases, double& dErrorRate ); void getSingleSubcloneInfo( const int nUnpaddedLeftPos, const int nUnpaddedRightPos, int& nSingleSubcloneBases ); int nGetCurrentSingleSubcloneBases( const int nUnpaddedLeftPos, const int nUnpaddedRightPos ); int nGetGapSize( arrayOfTemplates* pArrayOfInformativeTemplates ); double dGetErrorsFixedForWholeCloneRead( const bool bTopStrandNotBottomStrand, const int nFirstBaseOfReadUnpadded, const bool bChooseExperiment, const bool bJustConsensusNotGaps, const bool bFindWindowOfMostErrorsFixed ); double dGetInitialMaxErrorsFixed2( const int nFirstBaseOfReadUnpadded, const bool bTopStrandNotBottomStrand ); double dGetInitialMaxErrorsFixed( const int nUnpaddedLeft, const int nUnpaddedRight ); double dGetErrorsFixedForCustomPrimerSubcloneRead( primerType* pPrimer, const bool bTopStrandNotBottomStrand, const int nFirstBaseOfReadUnpadded, const bool bChooseExperiment, const bool bJustConsensusNotGaps, const bool bFindWindowOfMostErrorsFixed ); double dGetErrorsFixedForSubcloneRead( subcloneTTemplate** ppSubcloneTemplateArray, const int nNumberOfSubclones, const bool bTopStrandNotBottomStrand, const int nFirstBaseOfReadUnpadded, const bool bExcludeVectorBasesAtBeginningOfRead, const bool bChooseExperiment, const bool bJustConsensusNotGaps, const bool bFindWindowOfMostErrorsFixed ); void findWindowOfMostErrorsFixed( experiment* pExp ); void findWindowOfMostSingleSubcloneBasesFixed( experiment* pExp ); void dumpContigErrorProbabilities( const int nFileID ); bool bThereIsAnotherSubcloneCustomPrimerReadTooCloseOnTheSameStrand( experiment* pExperimentCandidate ); bool bThereIsAnotherWholeCloneCustomPrimerReadTooCloseOnTheSameStrand( experiment* pExperimentCandidate ); void possiblyUpdateContigHighQualitySegment( experiment* pChosenExp ); void warnUserIfCouldNotExtendContig(); void createFakeSubcloneTTemplatesForWholeCloneReads(); void zeroErrorProbabilitiesWhereChoseMinilibrary( contigEndPair* pPair ); void resetCumulativeErrorArray(); void suggestMinilibrary( contigEndPair* pPair ); void clearExperimentCandidatesList(); void saveStateOfConsensus(); void restoreStateOfConsensus(); void recalculateConsensusUsingAllChosenUniversalPrimerExperiments(); void recalculateErrorsFixedOfExperimentCandidates(); int nGetPaddedConsPosForMakingPCRPrimers( const int nWhichEnd ); void getUnpaddedRangeForMakingPCRPrimers( const int nWhichEnd, int& nUnpaddedLeft, int& nUnpaddedRight, bool& bProblems ); void findProblemsInContig( const bool bFindSingleSubcloneRegions, const bool bFindHighQualityDiscrepancies, const bool bFindUnalignedHighQualityRegions); void addSingleSubcloneBasesToProblemsArray(); void addHighQualityDiscrepanciesToProblemsArray(); void addUnalignedHighQualityRegionsToProblemsArray(); void addUnalignedHighQualityRegionsToProblemsArray2( LocatedFragment* pLocFrag, const int nHighQualityUnalignedStart, const int nHighQualityUnalignedEnd ); bool bIsThereAProblemHere( const int nUnpaddedLeft, const int nUnpaddedRight, int& nWhichProblem ); void determineWhetherEndOfClone( const int nWhichEnd ); void addMononucleotideRichRegionsToList( gotoList* pGotoList, const int nUnpaddedStartOfRegion, const int nUnpaddedEndOfRegion, const bool bNeedToSetPaddedPositionsArrays, bool& bFoundARegion ); void addDinucleotideRichRegionsToList( gotoList* pGotoList, const int nUnpaddedStartOfRegion, const int nUnpaddedEndOfRegion, const bool bNeedToSetPaddedPositionsArrays, bool& bFoundARegion ); void addStopsCausingLCQs( gotoList* pGotoList, const bool bSetPaddedPositionsArray ); void addRunsAndMicrosatellitesCausingLCQs( gotoList* pGotoList, const bool bSetPaddedPositionsArray); void addEditableSites( gotoList* pEditableSitesList ); void determineLocationsOfRunsAndStops(); void determineWhetherThisExperimentNeedsSpecialChemistry( experiment* pExp ); bool bDoesRegionCrossARunOrStop( const int nUnpaddedLeft, const int nUnpaddedRight, bool& bCrossesRun, bool& bCrossesStop ); bool bDoesPadIndicateAPossibleMisassemblyHere( LocatedFragment* pLocFragOfHQDPad, int nConsPosOfPad ); bool bIAmARealContig(); Contig* pGetNextContigInScaffold( const int nPreviousContigWasConnectedAtWhichEndOfThisContig, bool& bCurrentContigIsComplementedInTheScaffold, int& nThisContigIsConnectedToWhichEndOfNextContig ); // nScaffoldPos is 0-based while nUnpaddedConsPos is 1-based int nGetUnpaddedConsPosFromScaffoldPos( const int nScaffoldPos ); int nGetUnpaddedConsPosFromScaffoldPosUnsafe( const int nScaffoldPos ); int nGetScaffoldPosFromUnpaddedConsPos( const int nUnpadded ); void recalculateConsensusQualitiesNoChange( const int nRegionConsPosLeft, const int nRegionConsPosRight, RWTValOrderedVector& aNewQualities ); void recalculateConsensusQualitiesAndChange( const int nRegionConsPosLeft, const int nRegionConsPosRight ); void appendListOfDifferencesBetweenRecalculatedAndOriginalConsensus( gotoList* pGotoList ); void highlightAllReads(); void highlightAllReads( const bool bHighlight ); void unhighlightAllReads(); void addAllSeriousFalseMatchesInOneContig( primerType* pPrimer, RWTValOrderedVector& aFalseMatchLocations, RWTValOrderedVector& aFalseMatchContigsOfLocations, RWTValOrderedVector& aFalseMatchStrandsOfLocations ); void addAllSeriousFalseMatchesInOneContigOneStrand( primerType* pPrimer, RWTValOrderedVector& aFalseMatchLocations, RWTValOrderedVector& aFalseMatchContigsOfLocations, RWTValOrderedVector& aFalseMatchStrandsOfLocations, const bool bLookInTopStrandNotBottomStrandOfContig ); void figureOutConsistentForwardReversePairDepth(); // really just set up for autoEdit void changeToXsInAllReads( const int nFirstBaseToMakeAnX, const char cChangeToXsRightOrLeft ); void findMultipleHighQualityDiscrepancies( gotoList* pGotoList ); void navigateByHighlyDiscrepantPositions( const int nMinDiscrepantReads, const int nMaxDepthOfCoverage, const int nMinimumQuality, const bool bJustListIndels, const bool bCountOnceReadsStartingAtSameLocation, const bool bIgnoreIfListedBasesInConsensus, const RWCString& soIgnoreIfTheseBasesInConsensus, const bool bGotoListNotArrays, gotoList* pGotoList ); void searchForHighQualityDiscrepanciesWithReference( LocatedFragment* pLocFragRef, gotoList* pGotoList ); void appendListOfQuestionableConsensusBases( gotoList* pGotoList ); // used for writing out ace file void sortReadsByPosition(); void tellPhrapNotToOverlapReadsDiscrepantAtThisLocation( const int nConsPosWhereToSplit ); int nGetGoodPlaceForContigEndPairTag( const int nWhichEnd ); void fixOrientation( RWCString& soErrorMessage ); void unfixOrientation(); int nGetMeanAlignedRegionOfReads(); void calculateReadDepth(); void calculateReadDepthIfNecessary(); void lookForCloneEndTags2( RWTPtrOrderedVector* pCloneEndTags ); void setStartNumberingUnpaddedConsensusAtUserDefinedIfNecessary(); void fixReadLists(); // This is for Green Lab research void printDiscrepantRegions(); void printBasesInDiscrepantRegions( autoReport* pAutoReport ); int nGetNumberOfBasesInContigNotBackbone( const RWCString& soPatternInNameOfBackbone ); int nGetNumberOfPotentialDiscrepantRegions( const int nSizeOfPotentialDiscrepantRegion, const mbtValVectorOfBool& aStretchesOfAgreeingBases, const mbtValVector& aBestQuality, const mbtValVectorOfBool& aPadPositions, int& nPotentialSitesNoPads ); int nGetNumberOfPotentialDiscrepantRegions2( const int nSizeOfDiscrepantRegion ); int nGetNumberOfPotentialDiscrepantRegions3( const int nSizeOfDiscrepantRegion ); int nGetNumberOfPotentialDiscrepantRegions4( const int nSizeOfDiscrepantRegion ); void addToMinimumQualityHistogram( RWTValVector& aMinimumQualityHistogram ); void addNumberOfIsolatedPads( mbtValVector& aIsolatedChimpPads, mbtValVector& aIsolatedHumanPads ); void countPadsForEachSpecies( const RWTValOrderedVector& aSpecies, RWTValOrderedVector& aNumberOfIsolatedPads, RWTValOrderedVector& aNumberOfPadsInHumanButNotInPrimate, int& nPossibleSites ); void printAgreeDisagreeBetweenPairsOfSpecies( autoReport* pAutoReport ); void printAgreeDisagreeBetweenPairsOfSpecies2( autoReport* pAutoReport ); void calculateErrorProbabilitiesByComparingPTroPPan( autoReport* pAutoReport ); void areReadsCorrectlyAligned( autoReport* pAutoReport ); void printLengthsOfUnalignedHighQualitySegments(); void printLengthsOfAlignedSegments(); void compareTopAndBottomStrands( autoReport* pAutoReport ); void compareTopAndBottomStrandsWithHuman( autoReport* pAutoReport ); void compareTopAndBottomStrands2( autoReport* pAutoReport ); void compareTopAndBottomStrands3( autoReport* pAutoReport ); void compareTopAndBottomStrands4( autoReport* pAutoReport ); void singleSignalInfo( autoReport* pAutoReport ); void singleSignalInfo2(); void countColumnsForGroupsOfSpecies( autoReport* pAutoReport ); void compareHQSWithLQS( autoReport* pAutoReport ); void lowQualityBasesInHQS( autoReport* pAutoReport ); void singleSignalOrQuality(autoReport *); void discrepancyRateInFlankedRegions( autoReport* pAutoReport ); void discrepancyRateInFlankedRegions2( autoReport* pAutoReport ); void discrepancyRateInFlankedRegions4( autoReport* pAutoReport ); void discrepancyRateInFlankedRegions5( autoReport* pAutoReport ); void countReadsDueToBugInGoodReads( ); void compareTopAndBottomStrandsNoHuman( autoReport* pAutoReport ); void highQualitySegmentData(); void printFlankedColumns( autoReport* pAutoReport ); void printSingleSignalBases( const int nUnpadded ); bool bGetCombinedReadAtBase( LocatedFragment* pForwardRead, MBTValOrderedOffsetVector* pForwardReadSingleSignalArray, LocatedFragment* pReverseRead, MBTValOrderedOffsetVector* pReverseReadSingleSignalArray, const int nConsPos, char& cCombinedBase, int& nCombinedQuality ); // same as above, but allows flanking bases to // have weaker conditions bool bGetCombinedReadAtBase2( const char cForFlankingOrNotForFlanking, LocatedFragment* pForwardRead, MBTValOrderedOffsetVector* pForwardReadSingleSignalArray, LocatedFragment* pReverseRead, MBTValOrderedOffsetVector* pReverseReadSingleSignalArray, const int nConsPos, char& cCombinedBase, int& nCombinedQuality ); // this differs from bGetCombinedReadAtBase in that it guesses at // the base when the data is bad bool bGetCombinedReadAtBase3( const bool bAddQualityValuesOfStrands, LocatedFragment* pForwardRead, MBTValOrderedOffsetVector* pForwardReadSingleSignalArray, LocatedFragment* pReverseRead, MBTValOrderedOffsetVector* pReverseReadSingleSignalArray, const int nConsPos, char& cCombinedBase, int& nCombinedQuality, bool& bBaseALittleOK ); enum { cForFlanking = 'f', cNotForFlanking = 'n' }; // problems to be returned below enum { cProblemReadsDisagree = 'd', cProblemUnaligned = 'u', cProblemLowQualitySegment = 'l', cProblemMultipleSignal = 'm', cProblemLowQuality = 'q', cProblemNoGoodRead = 'o', cProblemFlankingNotAgreeing = 'f', cProblemFlankingOther = 'h', cProblemNone = 'e' }; bool bGetCombinedReadAtBaseAndReportProblem( const char cForFlankingOrNotForFlanking, LocatedFragment* pForwardRead, MBTValOrderedOffsetVector* pForwardReadSingleSignalArray, LocatedFragment* pReverseRead, MBTValOrderedOffsetVector* pReverseReadSingleSignalArray, const int nConsPos, char& cCombinedBase, int& nCombinedQuality, char& cProblem ); void printFlankedColumns2( autoReport* pAutoReport ); void printFlankedColumns3( autoReport* pAutoReport ); void printFlankedColumns4( autoReport* pAutoReport ); void countAcceptableColumnsWithNoneOnLeft(autoReport* pAutoReport ); enum { cNoCpG = 'n', cDefinitelyCpG = 'y', cMaybeCpG = '?' }; void checkForDeaminationMutationsAmongMultipleTrees( RWTValOrderedVector* pTreesPositionA, RWTValOrderedVector* pTreesPositionB, const int nConsPos, RWTValVector* pChosenTreesPositionA, RWTValVector* pChosenTreesPositionB ); // called by applyFilters void checkForDeaminationMutationsAmongUniqueTrees( RWTValVector& aArrayOfOneTreeAtEachConsPos, RWTValVector& aDeaminationMutationsAtEachConsPos, RWTValVector& aAncestralCpGAtEachConsPos2 ); void countDeaminationMutations( autoReport* pAutoReport ); void countMutationsAtConsPos( const RWCString& soOneTree, const RWCString& soPresentDayBases, const int nConsPos, const RWCString& soDeaminationMutations ); void countMutationsAtConsPosML( RWTValOrderedVector* pTrees, const RWCString& soPresentDayBases, RWTValOrderedVector* pProbabilitiesOfTrees, const int nConsPos, mbtValOrderedVectorOfRWCString* pArrayOfDeaminationMutations, RWTValOrderedVector* pArrayOfProbabilitiesOfDeaminationMutations ); void checkTreesAndRecordDeaminationMutations( RWTValOrderedVector* pTreesPositionA, RWTValOrderedVector* pTreesPositionB, mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionA, mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionB, RWTValOrderedVector*& pArrayOfProbsOfDeaminationMutationsPositionA, RWTValOrderedVector*& pArrayOfProbsOfDeaminationMutationsPositionB ); void checkTreesAndRecordDeaminationMutationsML( RWTValOrderedVector* pTreesPositionA, RWTValOrderedVector* pTreesPositionB, RWTValOrderedVector* pProbabilitiesOfTreesPositionA, RWTValOrderedVector* pProbabilitiesOfTreesPositionB, mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionA, mbtValOrderedVectorOfRWCString*& pArrayOfDeaminationMutationsPositionB, RWTValOrderedVector*& pArrayOfProbsOfDeaminationMutationsPositionA, RWTValOrderedVector*& pArrayOfProbsOfDeaminationMutationsPositionB ); void countAllMutations( autoReport* pAutoReport ); void countAllMutationsML( autoReport* pAutoReport ); void printMutationsWithContext( autoReport* pAutoReport ); typedef MBTValOrderedOffsetVector typeArrayOfChar; void deleteColumnsOfPads( const bool bAddQualityValuesOfStrands, autoReport* pAutoReport, LocatedFragment* pHumanRead, RWTPtrOrderedVector& aGoodReads, RWTPtrOrderedVector& aListOfReadsInSingleSignalArrays, RWTPtrOrderedVector& aListOfSingleSignalArrays ); typedef RWTValOrderedVector arrayOfRWCString; void applyFilters( autoReport* pAutoReport, RWTPtrVector& aCombinedReads, mbtValVector& aSpeciesInAcceptableColumns, RWTValVector& aArrayOfOneTreeAtEachConsPos, RWTValVector& aArrayOfBasesAtEachConsPos, RWTValVector& aArrayOfLenientlyFilteredBasesAtEachConsPos, RWTValVector& aDeaminationMutationsAtEachConsPos, RWTValVector& aAncestralCpGAtEachConsPos2, // aAncestralCpGAtEachConsPos has just // the left position marked for an // ancestral CpG // aAncestralCpGAtEachConsPos2 has both // positions marked for an ancestral CpG LocatedFragment*& pHumanRead ); void printCrudeChimpHumanMutations( autoReport* pAutoReport ); float fGetProbabilityOfTree( const RWCString& soTree ); void adjustReadQualitiesAndRecalculateHighQualitySegments( autoReport* pAutoReport, const RWTPtrOrderedVector& aGoodReads ); void chooseTreesUsingBadData( const RWCString& soColumnOfBadBases, RWTValOrderedVector& aTrees, RWTValVector* pChosenTrees ); void printToCompareToReich( autoReport* pAutoReport ); void deleteReadFromContig( LocatedFragment* pLocFragToDelete ); void getDepthOfCoverage( int& nMinReadDepth, int& nMaxReadDepth, int& nMedianDepth, int& nMeanDepth ); public: // array of BaseSegment objects. these are provided // by the .ace file and indicate which parts of which // reads are used to form the consensus. they // can and will be changed by user edits. BaseSegArray baseSegArray_; contigMatchTablesType* pUnpaddedContig_; contigMatchTablesType* pUnpaddedContigComplemented_; // these are set when the contig is built from the .ace // file. the first displayable position is the leftmost // consensus-relative position at which an aligned // fragment has a base, even if that base was not included // in the consensus and hence no consensus base "exists" // at that position. (ditto rightmost.) // // these will have to be maintained if/when user edits // shift the alignment boundaries // int nFirstDisplayableContigPos_; int nLastDisplayableContigPos_; // array holds attachment objects, i.e. pointer to // frag object and offset into contig readsSortedByReadName apLocatedFragment_; // when reads are added or removed to the contig, don't fix things // right away--just set bReadListsNeedFixing_ bool bReadListsNeedFixing_; readsSortedByLeftEndPosition apLocatedFragment2_; RWTPtrOrderedVector apVeryLongReads_; // this is superceded by sorting apLocatedFragment_ and using // a binary search on it // dictionary dictOfReads_; // this array is indexed by nGetStartIndex to nGetEndIndex // by padded position and gives the unpadded position for // each padded position unpaddedFromPadded unpaddedFromPadded_; // set by setPaddedPositionsArray(); paddedFromUnpadded* pPaddedFromUnpadded_; bool bOKToUsePaddedIndexFast_; // changed bit for prompting user to save the assembly bool bChanged_; // tags on this contig RWTPtrOrderedVector aConsensusTags_; bool bComplementedFromWayPhrapCreated_; // AutoFinish data // these will normally be null unless set by the // finishing program // These arrays do not count gaps as errors. They // are not updated as experiments are accepted. // They are only used initially to decide which // experiments to put in the candidate list. // The index starts at -99 (assuming that // nAutoFinishNumberOfBasesBetweenContigsAssumed_ == 100 // These cumulative values are up to and *including* the // error at the index. E.g., // (*pCumUnpaddedErrorsFixedByTopTerm_)[nUnpaddedConsPos] is // the errors up to and including the error at nUnpaddedConsPos // all are indexed by unpadded positions mbtValVector* pCumUnpaddedErrors_; // This array count gaps as errors. This array // is updated as experiments are accepted. // The index starts at -499 (assuming that // nAutoFinishNumberOfBasesBetweenContigsAssumed_ == 500 // This is indexed by unpadded positions mbtValVector* pUnpaddedErrorProbabilities_; // This array counts gaps as errors. This array is continually // updated as new reads are selected by autofinish. mbtValVector* pOriginalUnpaddedErrorProbabilities_; // this is the same as above, only it is not updated // These arrays go together--they both have the same subscripts // as the contig (the bases--not the protruding reads) // The index is in unpadded consensus position. They do not have // gap bases on the ends. mbtValVector* pNumberOfSubclonesCoveringEachBase_; mbtValVector* pNumberOfSubclonesCoveringEachBaseWithoutSuggestedReads_; arrayOfLocatedFragmentPtrs* pWhichSingleSubcloneIsCoveringEachBase_; // This array has minimum subscript 0 instead of 1 so that // we can subtract 1 from the first base of the high quality region // to get the number of single subclone bases fixed by a read mbtValVector* pCumNumberOfSingleSubcloneBases_; // (*pCumNumOfSingleSubcloneBases_)[ nConsPos ] gives the number // of single subclone bases up to and including position nConsPos // The range is from nGetStartIndex() - 1 to nGetEndIndex() // Note that pCumNumberOfSingleSubcloneBases_ is not updated as // experiments are picked. This means that we can use this // to record the original number of single subclone bases in // the autoFinishExpTag. No, No! It is not created *after* all // weak and extending experiments are picked, and takes them into // account. Thus we cannot use it to record the original number of // single subclone bases in the autoFinishExpTag. Hence we // invented the pNumberOfSubclonesCoveringEachBaseWithoutSuggestedReads_ // array. double dTargetNumberOfErrors_; double dExpectedNumberOfErrorsInConsensus_; double dExpectedNumberOfErrorsInExtendedContig_; // temporary value that is decreased as add experiments mbtValVector* pUnpaddedReadTypesCoveringEachBase_; mbtValVector* pOriginalUnpaddedReadTypesCoveringEachBase_; typedef RWTPtrOrderedVector aArrayOfDistributionType; // this is indexed by the extended consensus bigArrayOfLittleArraysOfDistributions* pDistributionsOfChosenExperimentsArray_; mbtValVector* pSavedUnpaddedReadTypesCoveringEachBase_; bigArrayOfLittleArraysOfDistributions* pSavedDistributionsOfChosenExperimentsArray_; mbtValVector* pSavedUnpaddedErrorProbabilities_; mbtPtrOrderedVector* pCandidateExperiments_; mbtPtrOrderedVector* pChosenExperiments_; mbtPtrOrderedVector* pUniversalPrimerExperimentsForLowQualityRegions_; mbtPtrOrderedVector aSubcloneTemplates_; // these are initialized to -2. If there is not // high quality segment, they are set to -1. int nUnpaddedHighQualitySegmentStart_; int nUnpaddedHighQualitySegmentEnd_; // these are updated as reads are added int nUpdatedUnpaddedHighQualitySegmentStart_; int nUpdatedUnpaddedHighQualitySegmentEnd_; subcloneTTemplate* pFakeSubcloneForWholeCloneReads_; bool bThisContigIsExcludedFromAutofinishFinishing_; bool bLeftEndOfContigExaminedForEndOfClone_; bool bLeftEndOfContigIsEndOfClone_; bool bRightEndOfContigExaminedForEndOfClone_; bool bRightEndOfContigIsEndOfClone_; bool bAlreadyExaminedContigForDoNotExtendTags_; bool bTagsSayDoNotExtendToLeft_; bool bTagsSayDoNotExtendToRight_; bool bAlreadyExaminedContigForDoNotDoPCRTags_; bool bTagsSayDoNotDoPCRWithLeftEnd_; bool bTagsSayDoNotDoPCRWithRightEnd_; bool bReadsWillExtendLeftEnd_; bool bReadsWillExtendRightEnd_; // which contig ends are next to this contig // pContig_[nLeftGap] and nWhichEnd_[nLeftGap] are the contig and // end of that // contig that are connected to the left end of this contig. // pContig_[nRightGap] and nWhichEnd_[nRightGap] are the contig and // end of that // contig that are connected to the right end of this contig. // I don't believe pContig_[0], nWhichEnd_[0], and pCEP_[0] are used Contig* pContig_[3]; int nWhichEnd_[3]; contigEndPair* pCEP_[3]; // these are the array of pairs that connect contigs that are not placed // next to each other in a scaffold RWTPtrOrderedVector* pArrayOfIndirectContigEndPairs_[3]; // here is an alternate format of the above information which is // more convenient to use Contig* pPreviousContig_; Contig* pNextContig_; bool bThisContigIsComplementedInTheScaffold_; // I believe this is // whether it is complemented w.r.t. the way the contig currently // is--not the way phrap created it. // set by // Contig::pGetNextContigInScaffold called by // Assembly :: figureOutContigOrderAndOrientation() called by // Assembly :: getScaffolds called by // assemblyView :: createWindow // Thus I can assume in assemblyView, that this is set. (Jan 2004) // when I make the clone, I want to put the location // of this contig within that scaffold. Note that always // nClonePosLeft_ <= nClonePosRight_, even if // bThisContigIsComplementedInTheScaffold_ is true. // These are zero-based positions. // These are set by Assembly::getScaffoldBases int nClonePosLeft_; int nClonePosRight_; bool bPlacedInAScaffold_; bool bAlreadyCheckedEnd_[3]; bool bAbleToFindEndOfVector_[3]; int nWhichEndOfVector_[3]; int nScaffold_; // note: Used by assemblyView and set by // assemblyView::setContigScaffolds. This gives index into // assemblyView::aScaffolds_ mbtValVector* pProblemsInUnpaddedContig_; // indexed by consensus positions 1 to end--not extended consensus mbtValVectorOfBool* pUnpaddedLocationsOfStops_; mbtValVectorOfBool* pUnpaddedLocationsOfRuns_; // used to record the fwd/rev pair depth at any unpadded base position // in the consensus, this is indexed by unpadded consensus position MBTValOrderedOffsetVector aConsistentFwdRevPairDepth_; // these are temporary variables used in calculating aConsistentFwdRevPairDepth_ int nApproxSizeOfSubcloneTemplatesForFwdRevPairDepth_; mbtPtrOrderedVector* pSubcloneTemplatesForFwdRevPairDepth_; Assembly* pAssembly_; RWTPtrOrderedVector aContigOrientationWholeReadItems_; bool bReadDepthArrayCalculated_; int nReadDepthArrayCalculatedWithQuality_; RWTValVector* pReadDepthArray_; bool bDiscrepancyListsCalculated_; mbtValVectorOfBool* paDiscrepanciesNotIndels_; mbtValVectorOfBool* paDiscrepanciesJustIndels_; int nMinDiscrepantReads_; int nMaxDepthOfCoverage_; int nMinQuality_; int bIgnoreOtherReadsStartingAtSameLocation_; bool bSetStartNumberingUnpaddedConsensusAtUserDefined_; int nStartNumberingUnpaddedConsensusAtUserDefined_; kimuraBranchProb* pBranchProbabilities_; LocatedFragment* pReferenceSequence_; int nReadsToAdd_; // temporary variable used by addNewReads }; // for use by pProblemsInContig_ const unsigned char uSINGLE_SUBCLONE_BASE = 1; const unsigned char uHIGH_QUALITY_DISCREPANCY = 2; const unsigned char uUNALIGNED_HIGH_QUALITY_REGION = 4; #endif // CONTIG_INCLUDED