/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "autoReport.h" #include "contig.h" #include "locatedFragment.h" #include "rwcstring.h" #include "assembly.h" #include "consed.h" #include "consedParameters.h" #include "terminateIfNoPhdDir.h" #include "soAddCommas.h" #include "filename.h" #include "rwctokenizer.h" #include "fileDefines.h" #include "openLogFiles.h" #include "printAutoFinishMiscInfo.h" #include "soLine.h" #include "soGetErrno.h" #include "listOfLibraries.h" #include "lowconsqual.h" #include "highQualityDiscrepancyGotoList.h" #include "singleSubcloneRegionsGotoList.h" #include #include "primateSpecies.h" #include "fwdRevPair2.h" #include "fwdRevPair.h" autoReport :: autoReport( const FileName& filAceFileToOpen ) : filAceFileToOpen_( filAceFileToOpen ) {} void autoReport :: doIt() { pCP->bOKToUseGui_ = false; ConsEd* pConsed = new ConsEd(); pConsed->pAutoReport_ = this; openLogFiles( filAceFileToOpen_ ); // this uses the output file to report the error // so it must have openAutoFinishOutputFile before it terminateIfNoPhdDir(); printAutoFinishMiscInfo( filAceFileToOpen_, "" ); // I want to read the list of libraries before reading the ace file // to make debugging by the user easier pCP->pListOfLibraries_ = new listOfLibraries(); pCP->pListOfLibraries_->aLibraries_.soName_ = "pCP->pListOfLibraries_->aLibraries_ in autoEdit.cpp"; pCP->pListOfLibraries_->parseLibraryFile(); parseSpecies(); // tried moving this to above to before openAutoFinishOutputFiles // so that, if // the user specifies a bogus ace file, such as -ace -ace, // we don't create several zero-length files that have filenames // that the shell can't handle. However, if I do that, then any // error encountered by openAssemblyFile will try to write to // pAO and give a segmentation fault // this does "new Assembly" ConsEd::pGetConsEd()->openAssemblyFile( filAceFileToOpen_ ); Assembly* pAssembly = ConsEd::pGetAssembly(); pAssembly->setAllPaddedPositionsArrays(); checkPrimateSpeciesAndConstants( this ); if ( pCP->bAutoReportPrintSpeciesAlignment_ ) { dumpSpeciesAlignment(); } if ( pCP->bAutoReportPrintReadAlignment_ ) { printReadAlignment(); } if ( pCP->bAutoReportPrintDiscrepantRegions_ ) { printDiscrepantRegions(); } if ( pCP->bAutoReportPrintBasesInDiscrepantRegions_ ) { printBasesInDiscrepantRegions(); } if ( pCP->bAutoReportPrintMinimumQualityHistogram_ ) { printMinimumQualityHistogram(); } if ( pCP->bAutoReportPrintNumberOfIsolatedPads_ ) { printNumberOfIsolatedPads(); } if ( pCP->bAutoReportPrintNumberOfIsolatedPadsForEachSpecies_ ) { printNumberOfIsolatedPadsForEachSpecies(); } if ( pCP->bAutoReportPrintAgreeDisagreeBetweenPairsOfSpecies_ ) { printAgreeDisagreeBetweenPairsOfSpecies(); } if ( pCP->bAutoReportPrintAgreeDisagreeBetweenPairsOfSpecies2_ ) { printAgreeDisagreeBetweenPairsOfSpecies2(); } if ( pCP->bAutoReportPrintScaffolds_ ) { printScaffolds(); } if ( pCP->bAutoReportCalculateErrorProbabilitiesByComparingPTroPPan_ ) { calculateErrorProbabilitiesByComparingPTroPPan(); } if ( pCP->bAutoReportPrintIfReadsAreCorrectlyAligned_ ) { printIfReadsAreCorrectlyAligned(); } if ( pCP->bAutoReportPrintLengthsOfUnalignedHighQualitySegmentsOfReads_ ) { printLengthsOfUnalignedHighQualitySegments(); } if ( pCP->bAutoReportPrintLengthsOfAlignedSegmentsOfReads_ ) { printLengthsOfAlignedSegments(); } if ( pCP->bAutoReportCompareTopAndBottomStrands_ ) { compareTopAndBottomStrands(); } if ( pCP->bAutoReportCompareTopAndBottomStrands2_ ) { compareTopAndBottomStrands2(); } if ( pCP->bAutoReportCompareTopAndBottomStrands3_ ) { compareTopAndBottomStrands3(); } if ( pCP->bAutoReportCompareTopAndBottomStrands4_ ) { compareTopAndBottomStrands4(); } if ( pCP->bAutoReportSingleSignalInfo_ ) { singleSignalInfo(); } if ( pCP->bAutoReportSingleSignalInfo2_ ) { singleSignalInfo2(); } if ( pCP->bAutoReportCompareTopAndBottomStrandsWithHuman_ ) { compareTopAndBottomStrandsWithHuman(); } if ( pCP->bAutoReportCountColumnsForGroupsOfSpecies_ ) { countColumnsForGroupsOfSpecies(); } if ( pCP->bAutoReportCompareHQSWithLQS_ ) { compareHQSWithLQS(); } if ( pCP->bAutoReportLowQualityBasesInHQS_ ) { lowQualityBasesInHQS(); } if ( pCP->bAutoReportSingleSignalOrQuality_ ) { singleSignalOrQuality(); } if ( pCP->bAutoReportDiscrepancyRateInFlankedRegions_ ) { discrepancyRateInFlankedRegions(); } if ( pCP->bAutoReportDiscrepancyRateInFlankedRegions2_ ) { discrepancyRateInFlankedRegions2(); } if ( pCP->bAutoReportDiscrepancyRateInFlankedRegions4_ ) { discrepancyRateInFlankedRegions4(); } if ( pCP->bAutoReportDiscrepancyRateInFlankedRegions5_ ) { discrepancyRateInFlankedRegions5(); } if ( pCP->bAutoReportGoodReadsBug_ ) { countReadsDueToBugInGoodReads(); } if ( pCP->bAutoReportCompareTopAndBottomStrandsNoHuman_ ) { compareTopAndBottomStrandsNoHuman(); } if ( pCP->bAutoReportHighQualitySegmentData_ ) { highQualitySegmentData(); } if ( pCP->bAutoReportPrintFlankedColumns_ ) { printFlankedColumns(); } if ( pCP->bAutoReportPrintFlankedColumns2_ ) { printFlankedColumns2(); } if ( pCP->bAutoReportPrintFlankedColumns3_ ) { printFlankedColumns3(); } if ( pCP->bAutoReportPrintFlankedColumns4_ ) { printFlankedColumns4(); } if ( pCP->bAutoReportCountAcceptableColumnsWithNoneOnLeft_ ) { countAcceptableColumnsWithNoneOnLeft(); } if ( pCP->bAutoReportCountAllMutations_ ) { countAllMutations(); } if ( pCP->bAutoReportCountAllMutationsML_ ) { countAllMutationsML(); } if ( pCP->bAutoReportPrintHighQualityDiscrepancies_ ) { printHighQualityDiscrepancies(); } if ( pCP->bAutoReportPrintLowConsensusQualityRegions_ ) { printLowConsensusQualityRegions(); } if ( pCP->bAutoReportPrintSingleSubcloneRegions_ ) { printSingleSubcloneRegions(); } if ( pCP->bAutoReportPrintMutationsWithContext_ ) { printMutationsWithContext(); } if ( pCP->bAutoReportPrintCrudeChimpHumanMutations_ ) { printCrudeChimpHumanMutations(); } if ( pCP->bAutoReportPrintToCompareToReich_ ) { printToCompareToReich(); } if ( pCP->bAutoReportPrintLinkingForwardReversePairs_ ) { printLinkingForwardReversePairs(); } if ( pCP->bAutoReportPrintFilteredInconsistentForwardReversePairs_ ) { printFilteredInconsistentForwardReversePairs(); } if ( pCP->bAutoReportPrintHighlyDiscrepantRegions_ ) { printHighlyDiscrepantRegions(); } if ( pCP->bAutoReportPrintReadNamesInRegion_ ) { ConsEd::pGetAssembly()->printReadNamesInRegion(); } if ( pCP->bAutoReportPrintAssemblySummary_ ) { ConsEd::pGetAssembly()->printAssemblySummary(); } fclose( pAO ); cerr << "see " << pCP->filAutoFinishFullOutput_ << endl; } void autoReport :: dumpSpeciesAlignment() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); nContig++) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); dumpSpeciesAlignmentForOneContig( pContig ); } } void autoReport :: dumpSpeciesAlignmentForOneContig( Contig* pContig ) { // create arrays for each species with length equal to this // contig length (unpadded ) aArrayOfPtrsToArraysOfBases_.clearAndDestroy(); aArrayOfPtrsToArraysOfQualities_.clearAndDestroy(); aArrayOfPtrsToArraysOfHighQualitySegmentFlags_.clearAndDestroy(); aArrayOfPtrsToArraysOfLocatedFragments_.clearAndDestroy(); aArrayOfPtrsToArraysOfReadPositionsInDirectionOfSequencing_.clearAndDestroy(); int nUnpaddedLength = pContig->nGetUnpaddedSeqLength(); int nSpecies; for( nSpecies = 0; nSpecies < aArrayOfSpecies_.length(); ++nSpecies ) { RWCString soSpecies = aArrayOfSpecies_[ nSpecies ]; RWTValVectorOfBases* pArrayOfBases = new RWTValVector( nUnpaddedLength, '0' ); RWTValVectorOfQualities* pArrayOfQualities = new RWTValVector( nUnpaddedLength, -1 ); RWCString soArrayName = soSpecies + " bit array of high quality segment"; RWTValVectorOfHighQualitySegmentFlags* pArrayOfHighQualitySegmentFlags = new RWTValVector( nUnpaddedLength, 0 ); pArrayOfHighQualitySegmentFlags->soName_ = soArrayName; RWTPtrVectorOfLocatedFragment* pArrayOfReads = new RWTPtrVector( nUnpaddedLength, NULL ); RWTValVectorOfUnpaddedReadPositionsInDirectionOfSequencing* pArrayOfUnpaddedReadPositionsInDirectionOfSequencing = new RWTValVector( nUnpaddedLength, -1 ); pArrayOfUnpaddedReadPositionsInDirectionOfSequencing->soName_ = soSpecies + " RWTValVectorOfUnpaddedReadPositionsInDirectionOfSequencing"; aArrayOfPtrsToArraysOfBases_.insert( pArrayOfBases ); aArrayOfPtrsToArraysOfQualities_.insert( pArrayOfQualities ); aArrayOfPtrsToArraysOfHighQualitySegmentFlags_.insert( pArrayOfHighQualitySegmentFlags ); aArrayOfPtrsToArraysOfLocatedFragments_.insert( pArrayOfReads ); aArrayOfPtrsToArraysOfReadPositionsInDirectionOfSequencing_.insert( pArrayOfUnpaddedReadPositionsInDirectionOfSequencing ); // find the reads for these species for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); if ( !pLocFrag->soGetName().bContains( soSpecies ) ) continue; // if reached here, the read is of the species we want if ( pLocFrag->bIsWholeReadUnaligned() ) continue; // figure out the unpadded read position in direction of // sequencing int nUnpaddedReadPosInDirectionOfSequencing = -666; bool bFirstTime = true; for( int nConsPos = pLocFrag->nGetAlignClipStart(); nConsPos <= pLocFrag->nGetAlignClipEnd(); ++nConsPos ) { Ntide nt = pLocFrag->ntGetFragFromConsPos( nConsPos ); // keeping track of read position if ( bFirstTime ) { bFirstTime = false; nUnpaddedReadPosInDirectionOfSequencing = pLocFrag->nOrientedUnpaddedFragPosFromConsPos( pLocFrag->nGetAlignClipStart() ); } else { if ( !nt.bIsPad() ) { if ( pLocFrag->bComp() ) { --nUnpaddedReadPosInDirectionOfSequencing; } else { ++nUnpaddedReadPosInDirectionOfSequencing; } } } // if the human sequence has a pad in this position, ignore // this position. Since the consensus is the same as the // human sequence, use it. if ( pContig->ntGetCons( nConsPos ).bIsPad() ) continue; int n0UnpaddedPos = pContig->nUnpaddedIndex( nConsPos ) - 1; bool bReplaceBase = false; if ( (*pArrayOfReads)[ n0UnpaddedPos ] == NULL ) { // this is the first read of this species bReplaceBase = true; } else { // case in which this is the 2nd read of this species. LocatedFragment* pLocFragB = (*pArrayOfReads)[ n0UnpaddedPos ]; if ( nt.cGetBase() == (*pArrayOfBases)[ n0UnpaddedPos ] ) { // update various arrays // Note that this means that the // RWTPtrVectorOfLocatedFragment array will not necessarily // contain a read of this quality and high quality segment // since one read may be in the high quality segment // and the other higher quality. We will report // the best of both. // update quality array if ( nt.qualGetQualityWithout98or99() > (*pArrayOfQualities)[ n0UnpaddedPos ] ) { (*pArrayOfQualities)[ n0UnpaddedPos ] = nt.qualGetQualityWithout98or99(); } // update the high quality segment array if ( pLocFrag->bIsInHighQualitySegmentOfRead( nConsPos ) && ( (*pArrayOfHighQualitySegmentFlags)[ n0UnpaddedPos ] == false ) ) { (*pArrayOfHighQualitySegmentFlags)[ n0UnpaddedPos ] = true; } } else { // case in which the 2 reads disagree // choose the one that is in the high quality segment bool bReadAInHighQualitySegment = pLocFrag->bIsInHighQualitySegmentOfRead( nConsPos ); bool bReadBInHighQualitySegment = pLocFragB->bIsInHighQualitySegmentOfRead( nConsPos ); if ( bReadAInHighQualitySegment && !bReadBInHighQualitySegment ) { bReplaceBase = true; } else if ( !bReadAInHighQualitySegment && bReadBInHighQualitySegment ) { continue; // keep using the existing read } else { assert( bReadAInHighQualitySegment == bReadBInHighQualitySegment ); // both reads are in high quality segment or // both reads are in low quality segment // choose the read based on the average quality // in a window const int nWindowSize = 11; float fQualityReadA = pLocFrag->fGetAverageQualityInWindow( nConsPos, nWindowSize ); float fQualityReadB = pLocFragB->fGetAverageQualityInWindow( nConsPos, nWindowSize ); if ( fQualityReadA > fQualityReadB ) { bReplaceBase = true; } } } // // if ( nt.cGetBase() != ... else } // if ( (*pArrayOfReads)[ n0UnpaddedPos ] == NULL ) else if ( bReplaceBase ) { (*pArrayOfBases)[ n0UnpaddedPos ] = nt.cGetBase(); (*pArrayOfQualities)[ n0UnpaddedPos ] = nt.qualGetQualityWithout98or99(); (*pArrayOfHighQualitySegmentFlags)[ n0UnpaddedPos ] = pLocFrag->bIsInHighQualitySegmentOfRead( nConsPos ); (*pArrayOfReads)[ n0UnpaddedPos ] = pLocFrag; (*pArrayOfUnpaddedReadPositionsInDirectionOfSequencing)[ n0UnpaddedPos ] = nUnpaddedReadPosInDirectionOfSequencing; } } // for( int nConsPos = pLocFrag->nGetAlignClipStart(); } // for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); } // for( int nSpecies = 0; nSpecies < aArrayOfSpecies.length(); ++nSpecies ) { printThisContig( pContig ); } void autoReport :: parseSpecies() { aArrayOfSpecies_.clear(); RWCTokenizer tok( pCP->soAutoReportSpecies_ ); RWCString soSpecies; while( !( soSpecies = tok() ).isNull() ) { aArrayOfSpecies_.insert( soSpecies ); } } void autoReport :: printThisContig( Contig* pContig ) { pContig->setStartNumberingUnpaddedConsensusAtUserDefinedIfNecessary(); // print headings fprintf( pAO, "\n\nAutoReportPrintSpeciesAlignment {\n" ); fprintf( pAO, "CONTIG: %s\n\n", pContig->soGetName().data() ); for( int nSpecies = 0; nSpecies < aArrayOfSpecies_.length(); ++nSpecies ) { fprintf( pAO, " %10s", aArrayOfSpecies_[ nSpecies ].data() ); } fprintf( pAO, "\n" ); RWCString soLineToOutput( (size_t) 1000 ); for( int n0Unpadded = 0; n0Unpadded < pContig->nGetUnpaddedSeqLength(); ++n0Unpadded ) { soLineToOutput = pCP->soAutoReportPrefix_; soLineToOutput += " "; soLineToOutput += soAddCommas( n0Unpadded + pContig->nStartNumberingUnpaddedConsensusAtUserDefined_ ); soLineToOutput.padOnLeft( 12 ); int nSpecies; for( nSpecies = 0; nSpecies < aArrayOfSpecies_.length(); ++nSpecies ) { RWTValVectorOfBases* pArrayOfBases = aArrayOfPtrsToArraysOfBases_[ nSpecies ]; soLineToOutput += " "; if ( (*pArrayOfBases)[ n0Unpadded ] == '*' ) soLineToOutput += "-"; else soLineToOutput += (*pArrayOfBases)[ n0Unpadded ]; } for( nSpecies = 0; nSpecies < aArrayOfSpecies_.length(); ++nSpecies ) { RWTValVectorOfQualities* pArrayOfQualities = aArrayOfPtrsToArraysOfQualities_[ nSpecies ]; soLineToOutput += " "; RWCString soTemp( (long) (*pArrayOfQualities)[ n0Unpadded ] ); soTemp.padOnLeft( 2 ); soLineToOutput += soTemp; } for( nSpecies = 0; nSpecies < aArrayOfSpecies_.length(); ++nSpecies ) { RWTValVectorOfHighQualitySegmentFlags* pArrayOfHighQualitySegmentFlags = aArrayOfPtrsToArraysOfHighQualitySegmentFlags_[ nSpecies ]; soLineToOutput += " "; soLineToOutput += ( (*pArrayOfHighQualitySegmentFlags)[ n0Unpadded ] ? "1" : "0" ); } if ( pCP->bAutoReportPrintReadPositions_ ) { for( nSpecies = 0; nSpecies < aArrayOfSpecies_.length(); ++nSpecies ) { RWTValVectorOfUnpaddedReadPositionsInDirectionOfSequencing* pArrayOfUnpaddedReadPositionsInDirectionOfSequencing = aArrayOfPtrsToArraysOfReadPositionsInDirectionOfSequencing_[ nSpecies ]; soLineToOutput += " "; int nReadPositionInDirectionOfSequencing = (*pArrayOfUnpaddedReadPositionsInDirectionOfSequencing)[ n0Unpadded ]; soLineToOutput += RWCString( (long) nReadPositionInDirectionOfSequencing ); } } if ( pCP->bAutoReportPrintChosenReadName_ ) { for( nSpecies = 0; nSpecies < aArrayOfSpecies_.length(); ++nSpecies ) { RWTPtrVectorOfLocatedFragment* pArrayOfReads = aArrayOfPtrsToArraysOfLocatedFragments_[ nSpecies ]; LocatedFragment* pLocFrag = (*pArrayOfReads)[ n0Unpadded]; RWCString soLastPartOfReadName; if ( pLocFrag ) { soLastPartOfReadName = pLocFrag->soGetName().soGetLastPart( pCP->nAutoReportNumbersOfCharactersOfChosenReadNameToBePrinted_ ); } else { soLastPartOfReadName = "0"; } soLineToOutput += " "; soLineToOutput += soLastPartOfReadName; } } fprintf( pAO, "%s\n", soLineToOutput.data() ); } fprintf( pAO, "\n} AutoReportPrintSpeciesAlignment\n\n" ); } void autoReport :: printReadAlignment() { readReadFile(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); nContig++) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); printReadAlignmentForOneContig( pContig ); } } void autoReport :: printReadAlignmentForOneContig( Contig* pContig ) { // print headings fprintf( pAO, "\n\nAutoReportPrintReadAlignment {\n" ); fprintf( pAO, "CONTIG: %s\n\n", pContig->soGetName().data() ); fprintf( pAO, "Columns {\n" ); fprintf( pAO, "prefix (typically chromosome)\n" ); fprintf( pAO, "user-defined position\n" ); fprintf( pAO, "bases (0 if unaligned)\n" ); fprintf( pAO, "qualities (-1 if unaligned)\n" ); fprintf( pAO, "high quality segment flags\n" ); fprintf( pAO, "read position in direction of sequencing (-1 if unaligned)\n" ); fprintf( pAO, "} Columns\n" ); // should print the list of reads RWTPtrOrderedVector aArrayOfReadsToPrintInThisContig; findListOfReadsInThisContig( pContig, &aArrayOfReadsToPrintInThisContig ); fprintf( pAO, "Reads {\n" ); int nRead; for( nRead = 0; nRead < aArrayOfReadsToPrintInThisContig.length(); ++nRead ) { LocatedFragment* pLocFrag = aArrayOfReadsToPrintInThisContig[ nRead ]; fprintf( pAO, "%s\n", pLocFrag->soGetName().data() ); } fprintf( pAO, "} Reads\n" ); // no point in printing out all of the positions before the first aligned // read and after the last aligned read int nFirstAlignedConsPos; int nLastAlignedConsPos; bool bFirst = true; for( nRead = 0; nRead < aArrayOfReadsToPrintInThisContig.length(); ++nRead ) { LocatedFragment* pLocFrag = aArrayOfReadsToPrintInThisContig[ nRead ]; if ( bFirst ) { bFirst = false; nFirstAlignedConsPos = pLocFrag->nGetAlignClipStart(); nLastAlignedConsPos = pLocFrag->nGetAlignClipEnd(); } else { nFirstAlignedConsPos = MIN( nFirstAlignedConsPos, pLocFrag->nGetAlignClipStart() ); nLastAlignedConsPos = MAX( nLastAlignedConsPos, pLocFrag->nGetAlignClipEnd() ); } } RWTValVector aLastReadPosition( aArrayOfReadsToPrintInThisContig.length(), -666 ); aLastReadPosition.soName_ = "autoReport::aLastReadPositions"; RWTValVector aLastReadPositionSet( aArrayOfReadsToPrintInThisContig.length(), false ); aLastReadPositionSet.soName_ = "autoReport::aLastReadPositionSet"; for( int nConsPos = nFirstAlignedConsPos; nConsPos <= nLastAlignedConsPos; ++nConsPos ) { // problem here with the read positions. Need to think of efficient // way to print them. Perhaps we can keep an array of the current // read positions for each read. Then as move through each // consensus position, could go through each read and recover its "state" // (past read position) and get its new position based on whether // it has a pad or not and its orientation. soLine = pCP->soAutoReportPrefix_; soLine += " "; soLine += soAddCommas( pContig->nUnpaddedIndexStartingAtUserDefinedPosition( nConsPos ) ); int nRead; // bases for( nRead = 0; nRead < aArrayOfReadsToPrintInThisContig.length(); ++nRead ) { LocatedFragment* pLocFrag = aArrayOfReadsToPrintInThisContig[ nRead ]; if ( pLocFrag->bIsInAlignedPartOfRead( nConsPos ) ) { Ntide nt = pLocFrag->ntGetFragFromConsPos( nConsPos ); soLine += " "; soLine += nt.cGetBase(); } else { soLine += " 0"; } } // qualities for( nRead = 0; nRead < aArrayOfReadsToPrintInThisContig.length(); ++nRead ) { LocatedFragment* pLocFrag = aArrayOfReadsToPrintInThisContig[ nRead ]; if ( pLocFrag->bIsInAlignedPartOfRead( nConsPos ) ) { Ntide nt = pLocFrag->ntGetFragFromConsPos( nConsPos ); soLine += " "; RWCString soTemp = RWCString( (long) nt.qualGetQualityWithout98or99() ); soTemp.padOnLeft( 2 ); soLine += soTemp; } else { soLine += " -1"; } } // high quality segment for( nRead = 0; nRead < aArrayOfReadsToPrintInThisContig.length(); ++nRead ) { LocatedFragment* pLocFrag = aArrayOfReadsToPrintInThisContig[ nRead ]; // can't decide whether this should be aligned as well if ( pLocFrag->bIsInHighQualitySegmentOfRead( nConsPos ) && pLocFrag->bIsInAlignedPartOfRead( nConsPos ) ) { soLine += " 1"; } else { soLine += " 0"; } } for( nRead = 0; nRead < aArrayOfReadsToPrintInThisContig.length(); ++nRead ) { LocatedFragment* pLocFrag = aArrayOfReadsToPrintInThisContig[ nRead ]; if ( pLocFrag->bIsInAlignedPartOfRead( nConsPos ) ) { Ntide nt = pLocFrag->ntGetFragFromConsPos( nConsPos ); if ( aLastReadPositionSet[ nRead ] ) { if ( !nt.bIsPad() ) { if ( pLocFrag->bComp() ) { --aLastReadPosition[ nRead ]; } else { ++aLastReadPosition[ nRead ]; } } } else { aLastReadPositionSet[ nRead ] = true; aLastReadPosition[ nRead ] = pLocFrag->nOrientedUnpaddedFragPosFromConsPos( nConsPos ); } // if this is a pad, then shouldn't we print something // else other than the most recently printed position? // I guess that any program reading this file would // know it was a pad by looking at the read base, but still... soLine += " "; soLine += RWCString( (long) aLastReadPosition[ nRead ] ); } else { // the read is unaligned at this position soLine += " -1"; } } // for( nRead = fprintf( pAO, "%s\n", soLine.data() ); } // for( int nConsPos = nFirstAlignedConsPos; fprintf( pAO, "} AutoReportPrintReadAlignment\n\n" ); } void autoReport :: readReadFile() { aArrayOfReads_.clear(); FILE* pReadFile = fopen( pCP->filAutoReportPrintTheseReads_.data(), "r" ); if ( pReadFile == NULL ) { RWCString soErrorMessage = "could not open consed.autoReportPrintTheseReads: "; soErrorMessage += pCP->filAutoReportPrintTheseReads_; soErrorMessage += soGetErrno(); THROW_ERROR( soErrorMessage ); } while( fgets( soLine.data(), nMaxLineSize, pReadFile ) != NULL ) { soLine.nCurrentLength_ = strlen( soLine.data() ); soLine.stripAllWhitespaceExceptInternal(); aArrayOfReads_.insert( soLine ); } fclose( pReadFile ); if ( aArrayOfReads_.length() == 0 ) { RWCString soErrorMessage = "there are no reads in consed.autoReportPrintTheseReads: "; soErrorMessage += pCP->filAutoReportPrintTheseReads_; THROW_ERROR( soErrorMessage ); } // might want these printed in a particular order // aArrayOfReads.removeDuplicates(); } void autoReport :: findListOfReadsInThisContig( Contig* pContig, RWTPtrOrderedVector* pArrayOfReadsInThisContigToPrint ) { pArrayOfReadsInThisContigToPrint->clear(); for( int nRead = 0; nRead < pContig->nGetNumberOfFragsInContig(); ++nRead ) { LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet( nRead ); for( int nRead2 = 0; nRead2 < aArrayOfReads_.length(); ++nRead2 ) { if ( pLocFrag->soGetName() == aArrayOfReads_[ nRead2 ] ) { pArrayOfReadsInThisContigToPrint->insert( pLocFrag ); } } } } void autoReport :: printDiscrepantRegions() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); nContig++) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printDiscrepantRegions( ); } } void autoReport :: printMinimumQualityHistogram() { RWTValVector aMinimumQualityHistogram( 101, // capacity 0 ); // initial value for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->addToMinimumQualityHistogram( aMinimumQualityHistogram ); } fprintf( pAO, "MINIMUM_QUALITY_HISTOGRAM {\n" ); for( int nQuality = 0; nQuality <= 99; ++nQuality ) { fprintf( pAO, "%d %d\n", nQuality, aMinimumQualityHistogram[ nQuality ] ); } fprintf( pAO, "} MINIMUM_QUALITY_HISTOGRAM\n" ); } void autoReport :: printNumberOfIsolatedPads() { mbtValVector aIsolatedPrimatePads( 101, 0, 0 ); // nCapacity, nOffset, initialValue mbtValVector aIsolatedHumanPads( 101, 0, 0 ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->addNumberOfIsolatedPads( aIsolatedPrimatePads, aIsolatedHumanPads ); } fprintf( pAO, "ISOLATED_PRIMATE_PADS {\n" ); int nQuality; for( nQuality = 0; nQuality <= 99; ++nQuality ) { fprintf( pAO, "%d %d\n", nQuality, aIsolatedPrimatePads[ nQuality ] ); } fprintf( pAO, "} ISOLATED_PRIMATE_PADS\n" ); fprintf( pAO, "ISOLATED_HUMAN_PADS {\n" ); for( nQuality = 0; nQuality <= 99; ++nQuality ) { fprintf( pAO, "%d %d\n", nQuality, aIsolatedHumanPads[ nQuality ] ); } fprintf( pAO, "} ISOLATED_HUMAN_PADS\n" ); } void autoReport :: printNumberOfIsolatedPadsForEachSpecies() { RWTValOrderedVector aSpecies; RWTValOrderedVector aNumberOfIsolatedPadsInPrimateButNotInHuman; RWTValOrderedVector aNumberOfIsolatedPadsInHumanButNotInPrimate; RWCString soSpecies; RWCTokenizer tok( pCP->soAutoReportSpecies_ ); while( !( soSpecies = tok() ).isNull() ) { aSpecies.insert( soSpecies ); } aNumberOfIsolatedPadsInPrimateButNotInHuman.clear(); aNumberOfIsolatedPadsInHumanButNotInPrimate.clear(); int nSpecies; for( nSpecies = 0; nSpecies <= aSpecies.length(); ++nSpecies ) { aNumberOfIsolatedPadsInPrimateButNotInHuman.insert( 0 ); aNumberOfIsolatedPadsInHumanButNotInPrimate.insert( 0 ); } int nPossibleSites = 0; for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->countPadsForEachSpecies( aSpecies, aNumberOfIsolatedPadsInPrimateButNotInHuman, aNumberOfIsolatedPadsInHumanButNotInPrimate, nPossibleSites ); } fprintf( pAO, "POSSIBLE_SITES: %d\n", nPossibleSites ); fprintf( pAO, "ISOLATED_PADS_FOR_EACH_SPECIES {\n" ); for( nSpecies = 0; nSpecies < aSpecies.length(); ++nSpecies ) { fprintf( pAO, "%s: %d %d %d\n", aSpecies[ nSpecies ].data(), aNumberOfIsolatedPadsInPrimateButNotInHuman[ nSpecies ], aNumberOfIsolatedPadsInHumanButNotInPrimate[ nSpecies ], aNumberOfIsolatedPadsInPrimateButNotInHuman[ nSpecies ] - aNumberOfIsolatedPadsInHumanButNotInPrimate[ nSpecies ] ); } fprintf( pAO, "} ISOLATE_PADS_FOR_EACH_SPECIES\n" ); } void autoReport :: printBasesInDiscrepantRegions() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printBasesInDiscrepantRegions( this ); } } void autoReport :: printAgreeDisagreeBetweenPairsOfSpecies() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printAgreeDisagreeBetweenPairsOfSpecies( this ); } } void autoReport :: printAgreeDisagreeBetweenPairsOfSpecies2() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printAgreeDisagreeBetweenPairsOfSpecies2( this ); } } void autoReport :: calculateErrorProbabilitiesByComparingPTroPPan() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->calculateErrorProbabilitiesByComparingPTroPPan( this ); } } void autoReport :: printScaffolds() { RWCString soContigMap = ConsEd::pGetAssembly()->soGetContigMap(); soContigMap.translate( ",", "\n" ); fprintf( pAO, "%s\n", soContigMap.data() ); } void autoReport :: printIfReadsAreCorrectlyAligned() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->areReadsCorrectlyAligned( this ); } } void autoReport :: printLengthsOfUnalignedHighQualitySegments() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printLengthsOfUnalignedHighQualitySegments(); } } void autoReport :: printLengthsOfAlignedSegments() { fprintf( pAO, "AlignedSegmentsOfReads{\n" ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printLengthsOfAlignedSegments(); } fprintf( pAO, "}AlignedSegmentsOfReads\n" ); } void autoReport :: compareTopAndBottomStrands() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->compareTopAndBottomStrands( this ); } } void autoReport :: singleSignalInfo() { fprintf( pAO, "singleSignalInfo {\n" ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->singleSignalInfo( this ); } fprintf( pAO, "} singleSignalInfo\n" ); } void autoReport :: singleSignalInfo2() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->singleSignalInfo2(); } } void autoReport :: compareTopAndBottomStrandsWithHuman() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->compareTopAndBottomStrandsWithHuman( this ); } } void autoReport :: compareTopAndBottomStrands2() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->compareTopAndBottomStrands2( this ); } } void autoReport :: compareTopAndBottomStrands3() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->compareTopAndBottomStrands3( this ); } } void autoReport :: compareTopAndBottomStrands4() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->compareTopAndBottomStrands4( this ); } } void autoReport :: countColumnsForGroupsOfSpecies() { parseSpecies(); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->countColumnsForGroupsOfSpecies( this ); } } void autoReport :: compareHQSWithLQS() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->compareHQSWithLQS( this ); } } void autoReport :: lowQualityBasesInHQS() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->lowQualityBasesInHQS( this ); } } void autoReport :: singleSignalOrQuality() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->singleSignalOrQuality( this ); } } void autoReport :: discrepancyRateInFlankedRegions() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->discrepancyRateInFlankedRegions( this ); } } void autoReport :: discrepancyRateInFlankedRegions2() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->discrepancyRateInFlankedRegions2( this ); } } void autoReport :: discrepancyRateInFlankedRegions4() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->discrepancyRateInFlankedRegions4( this ); } } void autoReport :: discrepancyRateInFlankedRegions5() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->discrepancyRateInFlankedRegions5( this ); } } void autoReport :: countReadsDueToBugInGoodReads() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->countReadsDueToBugInGoodReads(); } } void autoReport :: compareTopAndBottomStrandsNoHuman() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->compareTopAndBottomStrandsNoHuman( this ); } } void autoReport :: highQualitySegmentData() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->highQualitySegmentData(); } } void autoReport :: printFlankedColumns() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printFlankedColumns( this ); } } void autoReport :: printFlankedColumns2() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printFlankedColumns2( this ); } } void autoReport :: printFlankedColumns3() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printFlankedColumns3( this ); } } void autoReport :: printFlankedColumns4() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printFlankedColumns4( this ); } } void autoReport :: countAcceptableColumnsWithNoneOnLeft() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->countAcceptableColumnsWithNoneOnLeft( this ); } } void autoReport :: countAllMutations() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->countAllMutations( this ); } } void autoReport :: countAllMutationsML() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->countAllMutationsML( this ); } } void autoReport :: printHighQualityDiscrepancies() { fprintf( pAO, "highQualityDiscrepancies {\n" ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); highQualityDiscrepancyGotoList myGotoList( pContig, pCP->bAutoReportHighQualityDiscrepanciesExcludeCompressionOrG_dropoutTags_, pCP->bAutoReportHighQualityDiscrepanciesExcludeMostPads_ ); for( int nGotoItem = 0; nGotoItem < myGotoList.nGetNumGotoItems(); ++nGotoItem ) { gotoItem* pGotoItem = myGotoList.pGetGotoItem( nGotoItem ); fprintf( pAO, "%s\n", pGotoItem->soFullDescription_.data() ); } } fprintf( pAO, "} highQualityDiscrepancies\n" ); } void autoReport :: printLowConsensusQualityRegions() { fprintf( pAO, "lowConsensusQualityRegions {\n" ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); LowConsQualGotoList myGotoList( pContig ); for( int nGotoItem = 0; nGotoItem < myGotoList.nGetNumGotoItems(); ++nGotoItem ) { gotoItem* pGotoItem = myGotoList.pGetGotoItem( nGotoItem ); fprintf( pAO, "%s\n", pGotoItem->soFullDescription_.data() ); } } fprintf( pAO, "} lowConsensusQualityRegions\n" ); } void autoReport :: printSingleSubcloneRegions() { fprintf( pAO, "singleSubcloneRegions {\n" ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); singleSubcloneRegionsGotoList myGotoList( pContig ); for( int nGotoItem = 0; nGotoItem < myGotoList.nGetNumGotoItems(); ++nGotoItem ) { gotoItem* pGotoItem = myGotoList.pGetGotoItem( nGotoItem ); fprintf( pAO, "%s\n", pGotoItem->soFullDescription_.data() ); } } fprintf( pAO, "} singleSubcloneRegions\n" ); } void autoReport :: printLinkingForwardReversePairs() { fprintf( pAO, "linkingForwardReversePairs {\n" ); ConsEd::pGetAssembly()->figureOutContigOrderAndOrientation(); fprintf( pAO, "} linkingForwardReversePairs\n" ); } void autoReport :: printFilteredInconsistentForwardReversePairs() { Assembly* pAssembly = ConsEd::pGetAssembly(); pAssembly->setContigTemplateArrays(); pAssembly->filterInconsistentFwdRevPairs(); fprintf( pAO, "filteredInconsistentFwdRevPairs {\n" ); fprintf( pAO, "read1 read2 contig1 contig2 (-> or <- for read1) (-> or <- for read2) (why inconsistent)\n" ); for( int nPair = 0; nPair < pAssembly->pArrayOfConfirmedInconsistentFwdRevPairs_->length(); ++nPair ) { fwdRevPair* pFwdRevPair = (*(pAssembly->pArrayOfConfirmedInconsistentFwdRevPairs_))[ nPair ]; // how shall we print these? // read1 read2 contig1 contig2 -> <- problem RWCString soArrow1; if ( pFwdRevPair->pLocFrag1_->bComp() ) { soArrow1 = "<-"; } else { soArrow1 = "->"; } RWCString soArrow2; if ( pFwdRevPair->pLocFrag2_->bComp() ) { soArrow2 = "<-"; } else { soArrow2 = "->"; } // convert pFwdRevPair->cProblem_ to text fwdRevPair2 dummy( pFwdRevPair->pLocFrag1_, pFwdRevPair->pLocFrag2_, NULL, // pGctToUse ' ', // cType pFwdRevPair->cProblem_ ); RWCString soProblem = dummy.soGetProblem(); if ( soProblem.bContains( "->" ) || soProblem.bContains( "<-" ) ) { soProblem = "wrong orientation"; } fprintf( pAO, "%s %s %s %s %s %s %s\n", pFwdRevPair->pLocFrag1_->soGetName().data(), pFwdRevPair->pLocFrag2_->soGetName().data(), pFwdRevPair->pContigOfRead1_->soGetName().data(), pFwdRevPair->pContigOfRead2_->soGetName().data(), soArrow1.data(), soArrow2.data(), soProblem.data() ); } fprintf( pAO, "} filteredInconsistentFwdRevPairs\n" ); } void autoReport :: printMutationsWithContext() { fprintf( pAO, "printMutationsWithContext {\n" ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printMutationsWithContext( this ); } fprintf( pAO, "} printMutationsWithContext\n" ); } void autoReport :: printCrudeChimpHumanMutations() { for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printCrudeChimpHumanMutations( this ); } } void autoReport :: printToCompareToReich() { fprintf( pAO, "printToCompareToReich {\n" ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); pContig->printToCompareToReich( this ); } fprintf( pAO, "} printToCompareToReich\n" ); } void autoReport :: printHighlyDiscrepantRegions() { fprintf( pAO, "printHighlyDiscrepantRegions {\n" ); RWCString soTitle; RWCString soFirstLine; RWCString soSecondLine; soTitle = "Highly Discrepant Positions"; soFirstLine = "min # of discrepant reads: " + RWCString( (long) pCP->nNavigateByHighlyDiscrepantPositionsMinDiscrepantReads_ ) + " min quality: " + RWCString( (long) pCP->nNavigateByHighlyDiscrepantPositionsIgnoreBasesBelowThisQuality_ ) + " \"r\": base of reference seq"; soSecondLine = "max depth of coverage: " + RWCString( (long) pCP->nNavigateByHighlyDiscrepantPositionsMaxDepthOfCoverage_ ); if ( pCP->bNavigateByHighlyDiscrepantPositionsJustListIndels_ ) { soSecondLine += " just indels"; } soSecondLine += " and ignoring reference seq"; RWCString soThirdLine = " A C G T * pos contig"; fprintf( pAO, "%s\n", soTitle.data() ); fprintf( pAO, "%s\n", soFirstLine.data() ); fprintf( pAO, "%s\n", soSecondLine.data() ); fprintf( pAO, "%s\n", soThirdLine.data() ); for( int nContig = 0; nContig < ConsEd::pGetAssembly()->nNumContigs(); ++nContig ) { Contig* pContig = ConsEd::pGetAssembly()->pGetContig( nContig ); gotoList* pGotoList = new gotoList(); pContig->navigateByHighlyDiscrepantPositions( pCP->nNavigateByHighlyDiscrepantPositionsMinDiscrepantReads_, pCP->nNavigateByHighlyDiscrepantPositionsMaxDepthOfCoverage_, pCP->nNavigateByHighlyDiscrepantPositionsIgnoreBasesBelowThisQuality_, pCP->bNavigateByHighlyDiscrepantPositionsJustListIndels_, pCP->bNavigateByHighlyDiscrepantPositionsIgnoreOtherReadsStartingAtSameLocation_, pCP->bNavigateByHighlyDiscrepantPositionsIgnoreIfListedBasesInConsensus_, pCP->soNavigateByHighlyDiscrepantPositionsIgnoreIfTheseBasesInConsensus_, true, // bGotoListNotArrays pGotoList ); // print out for( int nGotoItem = 0; nGotoItem < pGotoList->nGetNumGotoItems(); ++nGotoItem ) { gotoItem* pGotoItem = pGotoList->pGetGotoItem( nGotoItem ); fprintf( pAO, "%s\n", pGotoItem->soFullDescription_.data() ); } } fprintf( pAO, "} printHighlyDiscrepantRegions\n" ); }