/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ // // basesegment.h // // BaseSegment corresponds to those lines in the .ace file // // Sequence Contig13 // ... // Assembled_from B5A_09_G06.s2 3199 3828 // Assembled_from* B5A_09_G06.s2 3471 4119 // ... // Base_segment 3564 3828 B5A_09_G06.s2 367 631 // Base_segment* 3852 4119 B5A_09_G06.s2 382 649 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // // these lines indicate which segment of the read was used // to determine the consensus. Those segments will (in at // least one display mode) appear differently from the rest // of the read, to give the finisher a visual indication // of what the determining read is. note that the first // pair of indices gives the alignment of the segment to // the consensus, and the second where the segment occurs in // the read. // // // these are now used by the contig object. // much recommenting and renaming required. // chrisa 21-mar-95 // // **** new class BaseSegArray added, consists of manipulations // **** broken out of contig object // chrisa 18-apr-95 // // extensive bug fixing and modification dg #ifndef BASESEGMENT_INCLUDED #define BASESEGMENT_INCLUDED #include "rwcstring.h" #include "rwtvalsortedvector.h" #include "rwtptrsortedvector.h" #include "locatedFragment.h" class BaseSegment { public: // default constructor used by dynamic arrays BaseSegment() : pLocFrag_(0), nStartInConsensus_(0), nEndInConsensus_(0) {} BaseSegment(LocatedFragment* pLocFrag, const int nStartInCon, const int nEndInCon) : pLocFrag_(pLocFrag), nStartInConsensus_(nStartInCon), nEndInConsensus_(nEndInCon) {} // this ctor used only for dummy BaseSegment(const int nStartInCon) : nEndInConsensus_(0), nStartInConsensus_(nStartInCon) {} // keep RWTPtrSortedVector happy. sort order is dependent on // start in consensus position ONLY. // Note in June 2010 (DG): looking for a base segment by nConsPos // will always fail since the operator== will always return false because // the pLocFrag_ will not match (we are looking with a dummy base // segment). Thus we really should change this to just be // the nStartInConsensus_. I wonder if it used to be that way... bool operator==(const BaseSegment& rBsp) const { return ((nStartInConsensus_ == rBsp.nStartInConsensus_) && (nEndInConsensus_ == rBsp.nEndInConsensus_) && (pLocFrag_ == rBsp.pLocFrag_)); } bool operator<(const BaseSegment& rBsp) const { return (nStartInConsensus_ < rBsp.nStartInConsensus_); } // return the pointer to the located fragment to which you apply LocatedFragment* pGetLocFrag() { return pLocFrag_; } // the name of the read is no longer stored in this object, // but picked up from the located fragment itself const RWCString& soGetReadName() const { return pLocFrag_->soGetFragmentName(); } // this kludge is brought to you by an imported data // structure void setEndInConsensus(const int n) { nEndInConsensus_ = n; } void setStartInConsensus( const int n ) { nStartInConsensus_ = n; } int nGetStartInConsensus() const { return nStartInConsensus_; } int nGetEndInConsensus() const { return nEndInConsensus_; } // for debugging friend ostream& operator << (ostream&, const BaseSegment&); void complementBaseSegment(); // most of the edits to this data structure are going to // come in from the BaseSegArray. let it mess with internals. friend class BaseSegArray; public: // only the BaseSegArray gets to do this void shiftEndAlignmentPlusOne() { ++nEndInConsensus_; } void shiftAlignmentPlusOne() { ++nStartInConsensus_; ++nEndInConsensus_; } // only the BaseSegArray gets to do this void shiftEndAlignmentMinusOne() { --nEndInConsensus_; } void shiftAlignmentMinusOne() { --nStartInConsensus_; --nEndInConsensus_; } // pointer to located fragment to which this base segment refers LocatedFragment* pLocFrag_; //RWCString soReadName_; // name of contig of which base seg is a part // RWCString soContigName_; // consensus-based padded indices int nStartInConsensus_; int nEndInConsensus_; }; // this class holds two arrays of base segment that, between // them, can record and enable undo of changes to an array // of base segment class BaseSegChanges { public: BaseSegChanges() {} // note these are values (i.e. copies), not pointers, // since the originals may not exist anymore RWTValSortedVector daInsertedBaseSegs_; RWTValSortedVector daRemovedBaseSegs_; // for debugging friend ostream& operator << (ostream&, const BaseSegChanges&); }; class BaseSegArray { public: BaseSegArray( Contig* pContig, const int nMaxNumberOfBaseSegments ) : pContig_( pContig ), dapBs_( (size_t) nMaxNumberOfBaseSegments ) { dapBs_.soName_ = "BaseSegArray dapBs_"; } // for old ace file format BaseSegArray( Contig* pContig) : pContig_( pContig ) { dapBs_.soName_ = "BaseSegArray dapBs_"; } ~BaseSegArray() { dapBs_.clearAndDestroy(); // deletes pointed to base segments } // add a pointer to the base segment array void addPtrToNewBaseSeg(BaseSegment* pBs) { dapBs_.insert(pBs); } // how many segments are there in the array? int nGetNumSegments() { return dapBs_.length(); } // get pointer to the base segment at index BaseSegment* operator [] (const int nIndex) { return dapBs_[nIndex]; } // passed the (consensus) index and a pointer to located fragment, // returns true if that base is part of a base segment, i.e. // a region of the read used to form the consensus bool bIsInBaseSegment(const int nConsPos, const LocatedFragment* pLf) const; BaseSegment* pGetBaseSegmentByConsPos( const int nConsPos ); int nGetBaseSegmentIndexByConsPos( const int nConsPos ); int nGetBaseSegmentAtOrAfterConsPos( const int nConsPos ); // // force the base segment in a passed consensus region to be // the aligned segment of the passed LocatedFragment. // // undo data is returned as a pointer to a BaseSegChanges object // BaseSegChanges* setBaseSegToFragRegion(LocatedFragment*, const int nStartConsPos, const int nEndConsPos); // // passed pointer to array of changes, restores base segment // array to prior state PROVIDING no changes made in between. // the inserted ones get deleted and the deleted ones inserted // void undoBaseSegChanges(BaseSegChanges*); // an artifact of phrap is discontinuity in the base segments // under certain circumstances. this member fun fixes any "gaps" // between base segs. called once, after reading the .ace file. void forceSegsToBeContiguous(); // complement all the consensus relative positions in all // segments in the array. void complementBaseSegArray(); // repair the base segment array after an insertion in the // consensus has caused all frags at or after that pos // to adjust their alignment void adjustSegsForInsertionAtPos(const int nConsInsertPos); // ditto after deletion void adjustSegsForDeletionAtPos(const int nConsDeletePos); // check the data structure for any inconsistencies or // errors. returns false if there's a prob. debugging only. bool bGetDataStructureOk( const bool bCheckThatBaseSegmentsGoFromBeginningToEndOfConsensus = false ); void resize( size_t nNewSize ) { dapBs_.resize( nNewSize ); } bool bIsBaseSegArraySorted( int& nBadPosition ); void reverseBaseSegArray(); void removeBaseSegmentsToRight( const int nStartPaddedConsPos ); void fixBaseSegments(); void fixBaseSegment( const int nBadBaseSegment ); void fixBaseSegmentAtBeginning(); void fixBaseSegmentAtEnd(); void findReadsToMatchConsensus( const int nConsPosLeft, const int nConsPosRight ); void makeBsChanges(BaseSegChanges*); void dumpBaseSegments(); public: RWTPtrSortedVector dapBs_; Contig* pContig_; }; // BaseSegArray #endif // BASESEGMENT_INCLUDED