/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "assemblyView.h" #include "rwcstring.h" #include #include "popupErrorMessage2.h" #include "rwctokenizer.h" #include "soGetErrno.h" #include "seqMatch.h" #include "bIsNumericMaybeWithWhitespace.h" #include "consed.h" #include "consedParameters.h" #include #include "bIsNumericFloat.h" #include "clScaffold.h" // crossmatch output lines look like this: // Exact duplicate reads: None. // Maximal single base matches (low complexity regions): // ALIGNMENT 17020 0.23 0.02 0.14 H_NH0396K03.0.Contig434 160859 178133 (33398) H_NH0396K03.0.Contig434 188922 // 206175 (5356) // ALIGNMENT 4903 0.39 0.04 0.35 H_NH0396K03.0.Contig434 178059 183202 (28329) H_NH0396K03.0.Contig434 206011 2 // 11138 (393) * // ALIGNMENT 17020 0.23 0.14 0.02 H_NH0396K03.0.Contig434 188922 206175 (5356) H_NH0396K03.0.Contig434 160859 1 // 78133 (33398) // ALIGNMENT 4903 0.39 0.35 0.04 H_NH0396K03.0.Contig434 206011 211138 (393) H_NH0396K03.0.Contig434 178059 183 // 202 (28329) * // 1 matching entries (first file). #define PARSE_PANIC( message ) \ { RWCString soError( (size_t) 1000); \ soError = message; \ soError += "\nerror in assembly view file "; \ soError += filCrossMatchOutputFile; \ soError += "\nError detected from source file "; \ soError += __FILE__; \ soError += " at line "; \ soError += RWCString( (long) __LINE__ ); \ THROW_ERROR( soError ); \ } #define COMPARE_FOR_QSORT( A, B ) \ if ( A < B ) \ return( -1 ); \ else if ( A > B ) \ return( 1 ); \ else static int nCompareSeqMatches( const seqMatch** ppSeqMatch1, const seqMatch** ppSeqMatch2 ) { if ( (*ppSeqMatch1)->pContig_[0] != (*ppSeqMatch2)->pContig_[0] ) { if ( (*ppSeqMatch1)->pContig_[0]->soGetName() < (*ppSeqMatch2)->pContig_[0]->soGetName() ) return( -1 ); else return( 1 ); } else { // so (*ppSeqMatch1)->pContig_[0] == (*ppSeqMatch2)->pContig_[0] if ( (*ppSeqMatch1)->pContig_[1] != (*ppSeqMatch2)->pContig_[1] ) { if ( (*ppSeqMatch1)->pContig_[1]->soGetName() < (*ppSeqMatch2)->pContig_[1]->soGetName() ) return( -1 ); else return( 1 ); } else { // so both matches have the same origin contigs // and the same destination contigs. // So compare them based on the positions and // complemented flag assert( (*ppSeqMatch1)->pContig_[0] == (*ppSeqMatch2)->pContig_[0] ); assert( (*ppSeqMatch1)->pContig_[1] == (*ppSeqMatch2)->pContig_[1] ); // however, it is not necessarily true that the repeat // is contained within a single contig. // common case in which the seqMatches are the same if ( (*ppSeqMatch1)->nUnpaddedStartConsPos_[0] == (*ppSeqMatch2)->nUnpaddedStartConsPos_[0] && (*ppSeqMatch1)->nUnpaddedEndConsPos_[0] == (*ppSeqMatch2)->nUnpaddedEndConsPos_[0] && (*ppSeqMatch1)->nUnpaddedStartConsPos_[1] == (*ppSeqMatch2)->nUnpaddedStartConsPos_[1] && (*ppSeqMatch1)->nUnpaddedEndConsPos_[1] == (*ppSeqMatch2)->nUnpaddedEndConsPos_[1] && (*ppSeqMatch1)->bComplemented_ == (*ppSeqMatch2)->bComplemented_ ) { return( 0 ); } // // common case of inverted repeats within a single contig // // the same. They could be like this: // // ----------------------------------------- // // | | // // | | // // | -------------- | // // | | | | // // | | | | // // nStart0 nEnd0 nEnd1 nStart1 <-method A // // nEnd1 nStart1 nStart0 nEnd0 <-method B // // // // 2 method of naming the same inverted repeat // However, the seqMatch ctor now makes these both identical // by switching the repeats so that is taken care of. COMPARE_FOR_QSORT( (*ppSeqMatch1)->nUnpaddedStartConsPos_[0], (*ppSeqMatch2)->nUnpaddedStartConsPos_[0] ) { COMPARE_FOR_QSORT( (*ppSeqMatch1)->nUnpaddedStartConsPos_[1], (*ppSeqMatch2)->nUnpaddedStartConsPos_[1] ) { COMPARE_FOR_QSORT( (*ppSeqMatch1)->nUnpaddedEndConsPos_[0], (*ppSeqMatch2)->nUnpaddedEndConsPos_[0] ) { COMPARE_FOR_QSORT( (*ppSeqMatch1)->nUnpaddedEndConsPos_[1], (*ppSeqMatch2)->nUnpaddedEndConsPos_[1] ) { COMPARE_FOR_QSORT( (*ppSeqMatch1)->bComplemented_, (*ppSeqMatch2)->bComplemented_ ) { assert( false ); // should have been caught above } } } } } } } } void assemblyView :: parseCrossMatchOutput() { RWTPtrOrderedVector aTempSeqMatches; aTempSeqMatches.soName_ = "assemblyView::parseCrossMatchOutput aTempSeqMatches"; // filCrossMatchOutput--shall this be a member data or a parameter? // I guess it will probably come from a window textfield (which // will default to something in consedParameters FileName filCrossMatchOutputFile = ConsEd::pGetAssembly()->filGetAceFileFullPathname() + ".aview"; if ( !filCrossMatchOutputFile.bFileByThisNameExists() ) { popupErrorMessage2( widPopupShell_, "Sequence matches will not be shown in Assembly View because there is no file %s. If you want sequence matches to be shown, click on \"What to show: Sequence Matches\" and then \"run crossmatch\"", filCrossMatchOutputFile.data() ); return; } FILE* pCrossMatchOutput = fopen( filCrossMatchOutputFile.data(), "r" ); if ( !pCrossMatchOutput ) { popupErrorMessage2( widPopupShell_, "could not open %s because %s\n", filCrossMatchOutputFile.data(), soGetErrno().data() ); return; } else { cerr << "reading " << filCrossMatchOutputFile << endl; } // parse crossmatch output const int SZLINE_SIZE = 2000; RWCString soLine( (size_t) ( SZLINE_SIZE + 1 ) ); // look at the title line for the name of the fasta file if ( fgets( soLine.data(), SZLINE_SIZE, pCrossMatchOutput ) == NULL ) PARSE_PANIC( "failed reading title line" ); soLine.nCurrentLength_ = strlen( soLine.data() ); RWCTokenizer tok( soLine ); tok(); // get rid of crossmatch name RWCString soFileName( tok() ); FileName filFastaFile( soFileName ); if ( !bFastaFileMatchesAceFile( filFastaFile.soGetBasename() ) ) { // already reported the error return; } while( fgets( soLine.data(), SZLINE_SIZE, pCrossMatchOutput ) != NULL ) { soLine.nCurrentLength_ = strlen( soLine.data() ); if ( !soLine.bStartsWith( "ALIGNMENT" ) ) continue; RWCTokenizer tok( soLine ); // ALIGNMENT 4903 0.39 0.35 0.04 H_NH0396K03.0.Contig434 206011 211138 (393) H_NH0396K03.0.Contig434 178059 183202 (28329) * //or //ALIGNMENT 618 1.84 1.69 0.00 Contig14 57 764 (66) C Contig434 (157490) 54041 53322 RWCString soAlignmentKeyword = tok(); assert( soAlignmentKeyword == "ALIGNMENT" ); RWCString soSWScore = tok(); RWCString soPercentSubstitutions = tok(); RWCString soPercentDeletions = tok(); RWCString soPercentInsertions = tok(); float fPercentSubstitutions; float fPercentDeletions; float fPercentInsertions; if ( !bIsNumericFloat( soPercentSubstitutions, fPercentSubstitutions ) ) { popupErrorMessage2( widPopupShell_, "crossmatch output line has percent substitutions not numeric: %s on line %s", soPercentSubstitutions.data(), soLine.data() ); // let's see if we are just on some goofy line that // starts with ALIGNMENT continue; } if ( !bIsNumericFloat( soPercentDeletions, fPercentDeletions ) ) { popupErrorMessage2( widPopupShell_, "crossmatch output line has percent deletions not numeric: %s on line %s", soPercentDeletions.data(), soLine.data() ); continue; } if ( !bIsNumericFloat( soPercentInsertions, fPercentInsertions ) ) { popupErrorMessage2( widPopupShell_, "crossmatch output line has percent insertions not numeric: %s on line %s", soPercentInsertions.data(), soLine.data() ); continue; } RWCString soContig1Name = tok(); RWCString soStart1 = tok(); RWCString soEnd1 = tok(); RWCString soPastEnd1 = tok(); RWCString soCOrName = tok(); bool bComplemented = ( soCOrName == "C" ? true : false ); RWCString soContig2Name; if ( bComplemented ) soContig2Name = tok(); else soContig2Name = soCOrName; RWCString soPastEnd2; if ( bComplemented ) soPastEnd2 = tok(); RWCString soStart2 = tok(); RWCString soEnd2 = tok(); if ( !bComplemented ) soPastEnd2 = tok(); RWCString soMatchIsContainedWithinAnother = tok(); assert( !soPastEnd2.isNull() ); // convert this into a seqMatch structure. First, we need to find // the contig's corresponding to soSeqName1 and soSeqName2. Assembly* pAssembly = ConsEd::pGetAssembly(); Contig* pContig1 = pAssembly->pGetContigByName( soContig1Name ); Contig* pContig2 = pAssembly->pGetContigByName( soContig2Name ); // throw out matches between contigs that we aren't showing if ( nGetScaffoldForContig( pContig1 ) == -1 ) { continue; } if ( nGetScaffoldForContig( pContig2 ) == -1 ) { continue; } if ( !pContig1 ) { popupErrorMessage2( widPopupShell_, "could not find %s from line %s in assembly\n", soContig1Name.data(), soLine.data() ); return; } if ( !pContig2 ) { popupErrorMessage2( widPopupShell_, "could not find %s from line %s in assembly\n", soContig2Name.data(), soLine.data() ); return; } int nStart1; if ( !bIsNumericMaybeWithWhitespace( soStart1, nStart1 ) ) { popupErrorMessage2( widPopupShell_, "start position %s of %s was not numeric on line %s\n", soStart1.data(), soContig1Name.data(), soLine.data() ); return; } int nEnd1; if ( !bIsNumericMaybeWithWhitespace( soEnd1, nEnd1 ) ) { popupErrorMessage2( widPopupShell_, "end position %s of %s was not numeric on line %s\n", soEnd1.data(), soContig1Name.data(), soLine.data() ); return; } int nStart2; if (!bIsNumericMaybeWithWhitespace( soStart2, nStart2 ) ) { popupErrorMessage2( widPopupShell_, "start position %s of %s was not numeric on line %s\n", soStart2.data(), soContig2Name.data(), soLine.data() ); return; } int nEnd2; if ( !bIsNumericMaybeWithWhitespace( soEnd2, nEnd2 ) ) { popupErrorMessage2( widPopupShell_, "end position %s of %s was not numeric on line %s\n", soEnd2.data(), soContig2Name.data(), soLine.data() ); return; } // throw out perfect matches if ( pContig1 == pContig2 && nStart1 == nStart2 && nEnd1 == nEnd2 ) { continue; } seqMatch* pSeqMatch = new seqMatch( pContig1, pContig2, nStart1, nStart2, nEnd1, nEnd2, bComplemented, 100.0 - fPercentSubstitutions - fPercentDeletions - fPercentInsertions ); aTempSeqMatches.insert( pSeqMatch ); } // remove duplicates. Do this by sorting the temporary list and // then copying only one copy of each repeat. qsort( aTempSeqMatches.data(), aTempSeqMatches.length(), sizeof( seqMatch* ), ( int(*) ( const void*, const void* ) ) nCompareSeqMatches ); aSeqMatches_.clearAndDestroy(); aSeqMatches_.resize( aTempSeqMatches.length() / 2 ); if ( aTempSeqMatches.length() > 0 ) aSeqMatches_.insert( aTempSeqMatches[0] ); for( int nSeqMatch = 1; nSeqMatch < aTempSeqMatches.length(); ++nSeqMatch ) { seqMatch* pSeqMatchNext = aTempSeqMatches[ nSeqMatch ]; seqMatch* pSeqMatchLast = aSeqMatches_.last(); if ( nCompareSeqMatches( (const seqMatch**) &pSeqMatchNext, (const seqMatch**) &pSeqMatchLast ) == 0 ) continue; aSeqMatches_.insert( pSeqMatchNext ); } } // finds scaffold positions void assemblyView :: calculateSeqMatchPositions() { createNewGridTable(); for( int nSeqMatch = 0; nSeqMatch < aSeqMatches_.length(); ++nSeqMatch ) { seqMatch* pSeqMatch = aSeqMatches_[ nSeqMatch ]; if ( !pSeqMatch->bOKToShow() ) continue; calculatePositionOfOneSeqMatch( pSeqMatch ); } } // find scaffold positions void assemblyView :: calculatePositionOfOneSeqMatch( seqMatch* pSeqMatch ) { // will be changed if it isn't in view. pSeqMatch->bInView_[0] = true; pSeqMatch->bInView_[1] = true; // find scaffold number and scaffold position of the 2 endpoints for( int nMatchEnd = 0; nMatchEnd <= 1; ++nMatchEnd ) { int nScaffold = nGetScaffoldForContig( pSeqMatch->pContig_[ nMatchEnd ]); // handle case of contig not being displayed if ( nScaffold == -1 ) { pSeqMatch->bInView_[0] = false; pSeqMatch->bInView_[1] = false; return; } pSeqMatch->nScaffold_[ nMatchEnd ] = nScaffold; pSeqMatch->nScaffoldPosStart_[ nMatchEnd ] = pSeqMatch->pContig_[nMatchEnd]->nGetScaffoldPosFromUnpaddedConsPos( pSeqMatch->nUnpaddedStartConsPos_[ nMatchEnd ] ); pSeqMatch->nScaffoldPosEnd_[ nMatchEnd ] = pSeqMatch->pContig_[nMatchEnd]->nGetScaffoldPosFromUnpaddedConsPos( pSeqMatch->nUnpaddedEndConsPos_[ nMatchEnd ] ); } calculateBezierCoefficientsOfOneSeqMatch( pSeqMatch ); // is this a direct or inverted repeat? Remember, arcs go // from start to start and end to end bool bStartOfArcsAreInOrder = ( pSeqMatch->nScaffoldPosStart_[0] < pSeqMatch->nScaffoldPosEnd_[0] ? true : false ); bool bEndOfArcsAreInOrder = ( pSeqMatch->nScaffoldPosStart_[1] < pSeqMatch->nScaffoldPosEnd_[1] ? true : false ); pSeqMatch->bDirectNotInverted_ = ( bStartOfArcsAreInOrder && bEndOfArcsAreInOrder ) || ( !bStartOfArcsAreInOrder && !bEndOfArcsAreInOrder ); } void assemblyView :: calculateBezierCoefficientsOfOneSeqMatch( seqMatch* pSeqMatch ) { clScaffold* pScaffold0 = aScaffolds_[ pSeqMatch->nScaffold_[0] ]; clScaffold* pScaffold1 = aScaffolds_[ pSeqMatch->nScaffold_[1] ]; if ( pScaffold0->nScaffoldRow_ == pScaffold1->nScaffoldRow_ ) { // this will just be a parabola--not a cubic // arc's go from start to start and from end to end, // regardless whether complemented or not. for( int nArc = 0; nArc <= 1; ++nArc ) { int nRegion1 = nArc; int nRegion2 = ( nArc == 0 ? 1 : 0 ); int nStartOfArcScaffold = pSeqMatch->nScaffold_[ nRegion1 ]; int nEndOfArcScaffold = pSeqMatch->nScaffold_[ nRegion2 ]; int nStartOfArcScaffoldPos; int nEndOfArcScaffoldPos; if ( nArc == 0 ) { nStartOfArcScaffoldPos = pSeqMatch->nScaffoldPosStart_[ nRegion1 ]; nEndOfArcScaffoldPos = pSeqMatch->nScaffoldPosStart_[ nRegion2 ]; } else { // arc 1 nStartOfArcScaffoldPos = pSeqMatch->nScaffoldPosEnd_[ nRegion1 ]; nEndOfArcScaffoldPos = pSeqMatch->nScaffoldPosEnd_[ nRegion2 ]; } int nStartOfArcPixelX = nGetPixelXFromScaffoldPos( nStartOfArcScaffold, nStartOfArcScaffoldPos ); int nEndOfArcPixelX = nGetPixelXFromScaffoldPos( nEndOfArcScaffold, nEndOfArcScaffoldPos ); // how high shall we make this arc? That depends on how // far away the positions are. int nStartOfArcScaffoldRowPos = aScaffolds_[ nStartOfArcScaffold ]->nScaffoldPositionsBeforeItOnScaffoldRow_ + nStartOfArcScaffoldPos; int nEndOfArcScaffoldRowPos = aScaffolds_[ nEndOfArcScaffold ]->nScaffoldPositionsBeforeItOnScaffoldRow_ + nEndOfArcScaffoldPos; int nDistanceInScaffoldPos = ABS( nStartOfArcScaffoldRowPos - nEndOfArcScaffoldRowPos ); int nHeightY = nGetHeightFromRepeatSeparation( nDistanceInScaffoldPos ); int nMidpointX = ( nStartOfArcPixelX + nEndOfArcPixelX ) / 2; pSeqMatch->fAX_[ nArc ] = 0; pSeqMatch->fBX_[ nArc ] = 0; pSeqMatch->fCX_[ nArc ] = nEndOfArcPixelX - nStartOfArcPixelX; pSeqMatch->fDX_[ nArc ] = nStartOfArcPixelX; // since both scaffolds are on the same nScaffoldRow_, it doesn't // matter whether we use nScaffold_[0] or nScaffold_[1] int nYBaseOfArc = nGetPixelYOfConsistentFwdRevPairBaseline( pSeqMatch->nScaffold_[0] ); pSeqMatch->fAY_[ nArc ] = 0; pSeqMatch->fBY_[ nArc ] = 4 * nHeightY; pSeqMatch->fCY_[ nArc ] = -4 * nHeightY; pSeqMatch->fDY_[ nArc ] = nYBaseOfArc; pSeqMatch->nNumberOfPointsForDrawing_[ nArc ] = 10; } // for( int nArc = 0; nArc <= 1; ++nArc ) { } else { // spline curve since starts and ends at different Y pixel position for( int nArc = 0; nArc <= 1; ++nArc ) { int nRegion1 = nArc; int nRegion2 = ( nArc == 0 ? 1 : 0 ); int nStartOfArcScaffold = pSeqMatch->nScaffold_[ nRegion1 ]; int nEndOfArcScaffold = pSeqMatch->nScaffold_[ nRegion2 ]; int nStartOfArcScaffoldPos; int nEndOfArcScaffoldPos; if ( nArc == 0 ) { nStartOfArcScaffoldPos = pSeqMatch->nScaffoldPosStart_[ nRegion1 ]; nEndOfArcScaffoldPos = pSeqMatch->nScaffoldPosStart_[ nRegion2 ]; } else { nStartOfArcScaffoldPos = pSeqMatch->nScaffoldPosEnd_[ nRegion1 ]; nEndOfArcScaffoldPos = pSeqMatch->nScaffoldPosEnd_[ nRegion2 ]; } // convert these starting and ending locations of the // arc to pixel positions int nStartOfArcPixelY = nGetPixelYOfConsistentFwdRevPairBaseline( nStartOfArcScaffold ); int nEndOfArcPixelY = nGetPixelYOfConsistentFwdRevPairBaseline( nEndOfArcScaffold ); int nStartOfArcPixelX = nGetPixelXFromScaffoldPos( nStartOfArcScaffold, nStartOfArcScaffoldPos ); int nEndOfArcPixelX = nGetPixelXFromScaffoldPos( nEndOfArcScaffold, nEndOfArcScaffoldPos ); // this is the place I should a check to see if the arc is on // the screen if ( nStartOfArcPixelX < nGetLeftEdgePixelX() && nEndOfArcPixelX < nGetLeftEdgePixelX() ) { pSeqMatch->bInView_[ nArc ] = false; continue; // next arc } if ( nGetRightEdgePixelX() < nStartOfArcPixelX && nGetRightEdgePixelX() < nEndOfArcPixelX ) { pSeqMatch->bInView_[ nArc ] = false; continue; // next arc } if ( nStartOfArcPixelY < nGetTopEdgePixelY() && nEndOfArcPixelY < nGetTopEdgePixelY() ) { pSeqMatch->bInView_[ nArc ] = false; continue; // next arc } if ( nGetBottomEdgePixelY() < nStartOfArcPixelY && nGetBottomEdgePixelY() < nEndOfArcPixelY ) { pSeqMatch->bInView_[ nArc ] = false; continue; // next arc } // now convert these to bezier coefficients. We will create // control points as follows: // If the top end of the arc is to the left of the bottom // end of the arc, then make the control points in this order: // x0........ // x1......x2 // ........x3 // If the top end of the arc is to the right of the bottom // end of the arc, then make the control points in this order: // ........x0 // x2......x1 // x3........ int nMidHeightPixelY = ( nStartOfArcPixelY + nEndOfArcPixelY )/2; pSeqMatch->fDX_[ nArc ] = nStartOfArcPixelX; // pSeqMatch->fCX_[ nArc ] = 3 * ( nPixelX1 - nStartOfArcPixelX ); // would always be zero pSeqMatch->fCX_[ nArc ] = 0; pSeqMatch->fBX_[ nArc ] = 3 * ( nEndOfArcPixelX - nStartOfArcPixelX ); pSeqMatch->fAX_[ nArc ] = -2*( nEndOfArcPixelX - nStartOfArcPixelX ); pSeqMatch->fDY_[ nArc ] = nStartOfArcPixelY; pSeqMatch->fCY_[ nArc ] = 1.5 * ( nEndOfArcPixelY - nStartOfArcPixelY ); pSeqMatch->fBY_[ nArc ] = -pSeqMatch->fCY_[ nArc ]; pSeqMatch->fAY_[ nArc ] = nEndOfArcPixelY - nStartOfArcPixelY; pSeqMatch->nNumberOfPointsForDrawing_[ nArc ] = 20; } // for( int nArc = 0; nArc <= 1; ++nArc ) { } // if ( nScaffold_[0] == nScaffold_[1] ) { else } void assemblyView :: drawRepeats( bool bFirstCalculatePositions ) { if ( !bAlreadyReadCrossMatchOutput_ ) { bAlreadyReadCrossMatchOutput_ = true; parseCrossMatchOutput(); } if ( !bFirstCalculatePositions && !p2DArrayOfArrayOfSeqMatches_ ) bFirstCalculatePositions = true; if ( bFirstCalculatePositions ) { calculateSeqMatchPositions(); } drawRepeats2( bFirstCalculatePositions ); highlightClickedSeqMatches(); } void assemblyView :: drawRepeats2( const bool bSavePositionsInGrids ) { for( int nSeqMatch = 0; nSeqMatch < aSeqMatches_.length(); ++nSeqMatch ) { seqMatch* pSeqMatch = aSeqMatches_[ nSeqMatch ]; if ( !pSeqMatch->bOKToShow() ) continue; drawOneRepeat( pSeqMatch, bSavePositionsInGrids, false ); // bHighlight } } void assemblyView :: drawOneRepeat( seqMatch* pSeqMatch, const bool bSavePositionsInGrids, const bool bHighlighted ) { for( int nArc = 0; nArc <= 1; ++nArc ) { drawOneRepeatArc( pSeqMatch, nArc, bSavePositionsInGrids, bHighlighted ); } for( int nRegion = 0; nRegion <=1; ++nRegion ) { drawOneRegionBaseline( pSeqMatch, nRegion, bHighlighted ); } } static const int nThicknessOfSequenceMatchBaseline = 5; void assemblyView :: drawOneRegionBaseline( seqMatch* pSeqMatch, const int nRegion, const bool bHighlighted ) { int nPixelYBottom = nGetPixelYOfConsistentFwdRevPairBaseline( pSeqMatch->nScaffold_[nRegion] ); int nPixelYTop = nPixelYBottom - nThicknessOfSequenceMatchBaseline + 1; int nPixelXLeft = nGetPixelXFromScaffoldPos( pSeqMatch->nScaffold_[ nRegion ], pSeqMatch->nScaffoldPosStart_[ nRegion ] ); int nPixelXRight = nGetPixelXFromScaffoldPos( pSeqMatch->nScaffold_[ nRegion ], pSeqMatch->nScaffoldPosEnd_[ nRegion ] ); if ( nPixelXLeft > nPixelXRight ) { // switch them int nTemp = nPixelXLeft; nPixelXLeft = nPixelXRight; nPixelXRight = nTemp; } const GC& gc = ( bHighlighted ? pGctHighlight_ : ( pSeqMatch->bDirectNotInverted_ ? pGctDirectSequenceMatches_ : pGctInvertedSequenceMatches_ ) )->gcGet(); XFillRectangle( XtDisplay( widDrawingArea_ ), XtWindow( widDrawingArea_ ), gc, nPixelXLeft, nPixelYTop, (unsigned int) (nPixelXRight - nPixelXLeft + 1), (unsigned int) (nThicknessOfSequenceMatchBaseline ) ); } const int nMAX_POINTS = 8192; static XPoint xpoint[ nMAX_POINTS ]; void assemblyView :: drawOneRepeatArc( seqMatch* pSeqMatch, const int nArc, const bool bSavePositionsInGrid, const bool bHighlight ) { if ( !pSeqMatch->bInView_[ nArc ] ) return; const GC& gc = ( bHighlight ? pGctHighlight_ : ( pSeqMatch->bDirectNotInverted_ ? pGctDirectSequenceMatches_ : pGctInvertedSequenceMatches_ ))->gcGet(); int nFinalIndex = pSeqMatch->nNumberOfPointsForDrawing_[ nArc ] - 1; const char cLookingForLineSegmentOnScreen = 'l'; const char cExtendingLineSegments = 'e'; int nLastX; int nLastY; bool bLastPointOnScreen; bool bLastPointExists = false; char cState = cLookingForLineSegmentOnScreen; int nNumberOfPointsSoFar = 0; for( int n = 0; n <= nFinalIndex; ++n ) { bool bDrawPoints = false; float fT = (float) n / (float) ( pSeqMatch->nNumberOfPointsForDrawing_[ nArc ] - 1 ); int nX = pSeqMatch->nGetBezierX( nArc, fT ); int nY = pSeqMatch->nGetBezierY( nArc, fT ); bool bCurrentPointOnScreen = bIsOnScreen( nX, nY ); if ( cState == cLookingForLineSegmentOnScreen ) { if ( bLastPointExists && ( bCurrentPointOnScreen || bLastPointOnScreen ) ) { // can start the array xpoint[0].x = nLastX; xpoint[0].y = nLastY; xpoint[1].x = nX; xpoint[1].y = nY; nNumberOfPointsSoFar = 2; cState = cExtendingLineSegments; if ( n == nFinalIndex ) bDrawPoints = true; } } else if ( cState == cExtendingLineSegments ) { if ( bCurrentPointOnScreen || bLastPointOnScreen ) { xpoint[ nNumberOfPointsSoFar ].x = nX; xpoint[ nNumberOfPointsSoFar ].y = nY; ++nNumberOfPointsSoFar; if ( nNumberOfPointsSoFar >= nMAX_POINTS || n == nFinalIndex ) bDrawPoints = true; } else { // the last line segment was not on the screen. bDrawPoints = true; } } if ( bDrawPoints ) { XDrawLines( XtDisplay( widDrawingArea_ ), XtWindow( widDrawingArea_ ), gc, xpoint, nNumberOfPointsSoFar, CoordModeOrigin ); // put line segments into grid if ( bSavePositionsInGrid ) { for( int nTemp = 1; nTemp < nNumberOfPointsSoFar; ++nTemp ) { putLineSegmentIntoGrid( xpoint[nTemp-1].x, xpoint[nTemp-1].y, xpoint[nTemp].x, xpoint[nTemp].y, pSeqMatch ); } } // clear buffer nNumberOfPointsSoFar = 0; cState = cLookingForLineSegmentOnScreen; } nLastX = nX; nLastY = nY; bLastPointOnScreen = bCurrentPointOnScreen; bLastPointExists = true; } } void assemblyView :: clearOldGridTable() { if ( !p2DArrayOfArrayOfSeqMatches_ ) return; // this will delete the arrays of seqMatch's, but not // the seqMatch's p2DArrayOfArrayOfSeqMatches_->clearAndDestroy(); delete p2DArrayOfArrayOfSeqMatches_; p2DArrayOfArrayOfSeqMatches_ = NULL; } void assemblyView :: createNewGridTable() { clearOldGridTable(); // how many grids down and across will this be? int nNumberOfColumns = ceil( nGetAssemblyViewWindowWidth() / pCP->dAssemblyViewGridCellWidthInPixels_); int nNumberOfRows = ceil( nGetAssemblyViewWindowHeight() / pCP->dAssemblyViewGridCellWidthInPixels_ ); p2DArrayOfArrayOfSeqMatches_ = new RWT2DPtrVector( nNumberOfColumns, nNumberOfRows, "assemblyView::p2DArrayOfArrayOfSeqMatches_" ); } void assemblyView :: putLineSegmentIntoGrid( const int nX0, const int nY0, const int nX1, const int nY1, seqMatch* pSeqMatch ) { // first figure out where the endpoints go // this will be truncated (floor) so it will give a 0-based coordinate int nXIndex0 = nX0 / pCP->dAssemblyViewGridCellWidthInPixels_; int nYIndex0 = nY0 / pCP->dAssemblyViewGridCellWidthInPixels_; int nXIndex1 = nX1 / pCP->dAssemblyViewGridCellWidthInPixels_; int nYIndex1 = nY1 / pCP->dAssemblyViewGridCellWidthInPixels_; if ( nXIndex0 == nXIndex1 && nYIndex0 == nYIndex1 ) { if ( !p2DArrayOfArrayOfSeqMatches_->bIsSubscriptInBounds( nXIndex0, nYIndex0 ) ) { return; } // the line segment lies within a single grid cell. RWTPtrOrderedVector* pSeqMatchArray = p2DArrayOfArrayOfSeqMatches_->pGetElement( nXIndex0, nYIndex0 ); if ( !pSeqMatchArray ) { RWCString soName = RWCString( (long) nXIndex0 ) + " " + RWCString( (long) nYIndex0 ) + " assemblyView::p2DArrayOfArrayOfSeqMatches_"; pSeqMatchArray = new RWTPtrOrderedVector( 64, soName ); p2DArrayOfArrayOfSeqMatches_->pGetElement( nXIndex0, nYIndex0 ) = pSeqMatchArray; } if ( pSeqMatchArray->index( pSeqMatch ) == RW_NPOS ) { pSeqMatchArray->insert( pSeqMatch ); } } else { // this line goes through more than one square, but we don't know // precisely which ones. // ABC // DEF // Let's say the line segment goes through D and C. We need to check // the other squares and see which of them it goes through. int nXIndexMin; int nXIndexMax; if ( nXIndex0 < nXIndex1 ) { nXIndexMin = nXIndex0; nXIndexMax = nXIndex1; } else { nXIndexMin = nXIndex1; nXIndexMax = nXIndex0; } int nYIndexMin; int nYIndexMax; if ( nYIndex0 < nYIndex1 ) { nYIndexMin = nYIndex0; nYIndexMax = nYIndex1; } else { nYIndexMin = nYIndex1; nYIndexMax = nYIndex0; } for( int nXIndex = nXIndexMin; nXIndex <= nXIndexMax; ++nXIndex ) { for( int nYIndex = nYIndexMin; nYIndex <= nYIndexMax; ++nYIndex ) { // some parts of some line segments will be off the screen if ( !p2DArrayOfArrayOfSeqMatches_->bIsSubscriptInBounds( nXIndex, nYIndex ) ) { continue; } if ( bIsLineSegmentInThisSquare( nX0, nY0, nX1, nY1, nXIndex, nYIndex ) ) { RWTPtrOrderedVector* pSeqMatchArray = p2DArrayOfArrayOfSeqMatches_->pGetElement( nXIndex, nYIndex ); if ( !pSeqMatchArray ) { RWCString soName = RWCString( (long) nXIndex ) + " " + RWCString( (long) nYIndex ) + " array in assemblyView::p2DArrayOfArrayOfSeqMatches_"; pSeqMatchArray = new RWTPtrOrderedVector( 64, soName ); p2DArrayOfArrayOfSeqMatches_->pGetElement( nXIndex, nYIndex ) = pSeqMatchArray; } if ( pSeqMatchArray->index( pSeqMatch ) == RW_NPOS ) { pSeqMatchArray->insert( pSeqMatch ); } } } } } } void assemblyView :: unhighlightCurrentSeqMatches() { for( int nSeqMatch = 0; nSeqMatch < aCurrentlyHighlightedSeqMatches_.length(); ++nSeqMatch ) { seqMatch* pSeqMatch = aCurrentlyHighlightedSeqMatches_[ nSeqMatch ]; drawOneRepeat( pSeqMatch, false, // bSavePositionsInGrids false ); // bHighlighted } aCurrentlyHighlightedSeqMatches_.clear(); } void assemblyView :: highlightSeqMatch( seqMatch* pSeqMatch ) { drawOneRepeat( pSeqMatch, false, // put position in grid true ); // bHighlight } int assemblyView :: nGetHeightFromRepeatSeparation( const int nSeparationInScaffoldPos ) { // convert size of insert into a vertical height // 0 -> 0 // infinity -> nHeightOfAreaForFwdRevPairsInSameScaffold_ // The formula that will do this is: 2/pi * nVertical.. arctan( insert) double dHeight = atan( nSeparationInScaffoldPos/50000.0 ) * 2.0 / 3.14159264 * ( nHeightOfAreaForFwdRevPairsInSameScaffold_ / 2 ); return( (int) dHeight ); } static RWCString soErrorInstructions( "\nThis may be caused by editing of the assembly since crossmatch was run. To fix this problem, click on \"What to show: sequence matches\" and then \"Run Crossmatch\"\n" ); bool assemblyView :: bFastaFileMatchesAceFile( const FileName& filFastaFile ) { FILE* pFastaFile = fopen( filFastaFile.data(), "r" ); if ( !pFastaFile ) { popupErrorMessage2( widPopupShell_, "could not open %s because %s\n", filFastaFile.data(), soGetErrno().data() ); return( false ); } const int SZLINE_SIZE = 2000; RWCString soLine( (size_t) ( SZLINE_SIZE + 1 ) ); RWCString soDNA; Contig* pLastContig = NULL; int nLine = 0; bool bFirstTime = true; bool bContinue = true; while( bContinue ) { bool bCheckLastContig = false; bool bNewContigStarting = false; if ( fgets( soLine.data(), SZLINE_SIZE, pFastaFile ) == NULL ) { bContinue = false; if ( bFirstTime ) { popupErrorMessage2( widPopupShell_, "could not read first line of fasta file %s because %s\n", filFastaFile.data(), soGetErrno().data() ); return( false ); } else { bCheckLastContig = true; } } ++nLine; soLine.nCurrentLength_ = strlen( soLine.data() ); if ( soLine.length() > 0 && soLine[0] == '>' ) { bNewContigStarting = true; if ( bFirstTime ) bFirstTime = false; else bCheckLastContig = true; } else { // normal line of bases soLine.stripTrailingWhitespaceFast(); soLine.toLower(); soDNA += soLine; } if ( bCheckLastContig ) { if ( ! ( pLastContig->soGetUnpaddedConsensus() == soDNA ) ) { // error! try to be descriptive RWCString soError( (size_t) 200 ); soError += "Cannot show sequence matches because "; soError += pLastContig->soGetName(); soError += " in Consed does not match that in the fasta file "; soError += filFastaFile; // let's see if we can determine where it went wrong RWCString soContig = pLastContig->soGetUnpaddedConsensus(); if ( soContig.length() != soDNA.length() ) { soError += " unpadded length of contig in consed is "; soError += RWCString( (long) soContig.length() ); soError += " unpadded length of contig in fasta file is "; soError += RWCString( (long) soDNA.length() ); } else { soError += " both have length "; soError += RWCString( (long) soDNA.length() ); } soError += "\n"; int nDiscrepancies = 0; int nMinLength = MIN( soContig.length(), soDNA.length() ); for( int nBase = 0; nBase < nMinLength; ++nBase ) { if ( soContig[nBase] != soDNA[nBase] ) { ++nDiscrepancies; soError += "at position "; soError += RWCString( (long) nBase ); soError += " consed says "; soError += soContig[nBase]; soError += " but fasta file says "; soError += soDNA[nBase]; soError += "\n"; if ( nDiscrepancies >= 10 ) break; } } soError += soErrorInstructions; popupErrorMessage2( widPopupShell_, "%s", soError.data() ); return( false ); } } if ( bNewContigStarting ) { RWCString soContigName = soLine.soGetRestOfString( 1 ); soContigName.stripTrailingWhitespaceFast(); pLastContig = ConsEd::pGetAssembly()->pGetContigByName( soContigName ); if ( !pLastContig ) { RWCString soError = "fasta file had contig "; soError += soContigName; soError += " but couldn't find this contig in Consed"; soError += soErrorInstructions; popupErrorMessage2( widPopupShell_, "%s", soError.data() ); return( false ); } soDNA = ""; soDNA.increaseButNotDecreaseMaxLength( pLastContig->nGetUnpaddedSeqLength() ); } } // while( bContinue ); fclose( pFastaFile ); return( true ); }