/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ // // assembly.cpp // // implementation for assembly.h // // Currently based on parsing the output // file from phrap, using the PhrapFile object. // this is a temporary approach, used for the // first cut. // // chrisa 21-Dec-94 // #include #include #include #include "rwcstring.h" #include "rwctokenizer.h" #include "sysdepend.h" #include "locatedFragment.h" #include "contig.h" #include "assembly.h" #include "basesegment.h" #include "mbt_exception.h" #include "filename.h" #include "listOfFragments.h" #include "listOfFragmentsAndEndPositions.h" #include "guiapp.h" #include "consed.h" #include "deleteFileIfItExists.h" #include "baseSegmentsToSort.h" #include "tag.h" extern bool bNoBaseSeg; extern bool bNoSetSequence; extern bool bNoFragments; extern bool bNoListOfFragments; // the _minimum_ storage allocated for string objects that // will hold DNA sequence - avoids needless alloc'ing static const RWSize_2T sizeMinDnaStringObjectSize = 10240; // max chars on input line static const int nLineBufLen = 2040; static const RWSize_2T nMaxLineChars = nLineBufLen; // // macros format message, form and throw exception objects // #define PARSE_PANIC(file,line,message) \ { ostrstream ost; \ ost << "Corrupted ace file detected by consed source file " \ << __FILE__ << " at " << __LINE__ <" line ExpectFragDna, // reading lines of fragment DNA sequence ExpectAsm, // reading Assembled_from* lines ExpectBase, // reading Base_segment* lnes ExpectClip, // reading Clipping lines ExpectAnything, // allow unrecognized bases ExpectBaseQualityTitleLine, // for phrap quality ExpectBaseQualityNumberLine, ExpectDescription } InputState; // // if true we have a buffered line that was "put back" // while parsing. look, Dave, globals! See, I'm not // a slave to religious dogma after all. I can write // totally crufty code when I want to. %-) // bool bHavePutBackLine; // initial storage for max line len RWCString soPutBackLine(nMaxLineChars); // // this procedure returns buffered line if exists, otherwise // reads from file. returns true if has a line (from either source). // static bool bGetNextLine(ifstream& ifsInFile, int& nCurLine, RWCString& soNextLine) { if (bHavePutBackLine) { soNextLine = soPutBackLine; bHavePutBackLine = false; return true; // return with buffered data } char szLineBuf[nLineBufLen]; // buffer for fstream getline() if (! ifsInFile.getline(szLineBuf, (nLineBufLen-1)) ) { soNextLine = ""; // clear passed arg return false; } // bump line counter - actually read one nCurLine++; // convert to string object soNextLine = szLineBuf; // return with new data return true; } // // utility to remove ".comp" from filename // returns true if extension was present // static bool removeCompFromFilename(RWCString& so) { size_t nFindIndex = so.index(".comp"); if (nFindIndex != RW_NPOS) { so.remove(nFindIndex); // clear to end of name return true; } return false; } // // buffer the passed line // static void putBackLine(RWCString& soLine) { bHavePutBackLine = true; soPutBackLine = soLine; } void Assembly :: tryParsingUsingAceFileFormat1(const char* szPhrapAceFileName, bool& bHasConsensusTags, long& lPositionOfConsensusTags) { bHasConsensusTags = false; // will be changed, if there are any baseSegmentsToSortType* pBaseSegmentsToSort; ifstream ifsInFile(szPhrapAceFileName, ios::in); // file open? if (!ifsInFile) { ostrstream ost; ost << "Unable to open file " << szPhrapAceFileName << endl << ends; SysRequestFailed srf(ost.str()); srf.includeErrnoDescription(); throw srf; } int nLinesInAceFile; int nNumberOfBaseSegments = 0; cout << "first pass through ace file" << endl; listOfFragments listOfFragmentss; if (!bNoListOfFragments ) { firstPassThroughAceFile( ifsInFile, listOfFragmentss, nLinesInAceFile, nNumberOfBaseSegments ); ifsInFile.close(); nNumberOfReadsInAssembly_ = listOfFragmentss.daFragmentName_.length(); // grab memory that we will use to store all base segments pBaseSegmentsToSort = (baseSegmentsToSortType*) malloc( sizeof( baseSegmentsToSortType ) * nNumberOfBaseSegments ); // rewind the file and parse again ifsInFile.open(szPhrapAceFileName, ios::in); // file open? if (!ifsInFile) { ostrstream ost; ost << "Unable to open file " << szPhrapAceFileName << endl << ends; SysRequestFailed srf(ost.str()); srf.includeErrnoDescription(); throw srf; } // create this array at the beginning of parsing and delete it when // parsing is done pListOfFragmentsAndEndPositions = new ListOfFragmentsAndEndPositions(); } cout << "second pass through ace file" << endl; // this string object will hold DNA seqs as they're read from // multiple lines RWCString soDna(sizeMinDnaStringObjectSize); // clear flags for line buffering bHavePutBackLine = false; soPutBackLine = ""; // hold name of fragment while reading related lines RWCString soCurFragName; // pointer to current located fragment LocatedFragment* pNewLocFrag = 0; LocatedFragment* pCurLocFrag = 0; // hold name of current contig while reading related lines RWCString soCurContigName; // hold pointer to current contig Contig* pNewContig = 0; // will be used to store phrap quality values int nLastNonPadBasePosition; // flag indicates that "Clipping*" line found after // "Sequence* " bool bHaveClippingLineForThisFrag = false; // used for sorting the base segments int nNumberOfBaseSegmentsInContig = 0; // read all lines in the file, keeping count int nCurLine = 0; int nAsmLines = 0; int nBaseLines = 0; // holds the current input file line RWCString soLine; // the first thing we expect is the contig consensus seq InputState inputState = ExpectContig; bool bAtBeginningOfConsensusTags = false; // read the entire file within this while loop while ( bGetNextLine(ifsInFile, nCurLine, soLine) ) { if ( ( nCurLine % 5000 ) == 0 ) { int nPerCentDone = (( nCurLine * 100 ) / nLinesInAceFile) / 2; cout << nPerCentDone << "% done. nCurLine = " << nCurLine << endl; } // cout << "Current state = "<< inputState // << " line = '" << soLine << "' " << endl; cout.flush(); // tokenize input line RWCTokenizer tokNext(soLine); // pull off the first token RWCString soFirstToken = tokNext(); switch (inputState) { case ExpectContig: if (soFirstToken.isNull()) break; // blank line ignored if (soFirstToken != "DNA") { PARSE_PANIC_TOKEN(soFileName_, nCurLine, soFirstToken); } // get contig name, check for format error soCurContigName = tokNext(); if (soCurContigName.isNull()) { PARSE_PANIC(soFileName_,nCurLine,"'DNA' not followed by contig name"); } if (!bNoFragments ) { // create a new contig object pNewContig = new Contig(soCurContigName, ConsEd::pGetAssembly() ); // add it to assembly dapContigs_.insert(pNewContig); } // prepare for adding base segments nNumberOfBaseSegmentsInContig = 0; // what follows should be the consensus sequence of the contig inputState = ExpectContDna; // clear out the dna holder soDna = ""; break; case ExpectContDna: if (soFirstToken.isNull()) { // there should be some DNA in the buffer by now if (soDna.length() == 0) { PARSE_PANIC(soFileName_, nCurLine, "0 length DNA sequence"); } // end of dna. add accumulated sequence to array if (! bNoSetSequence) pNewContig->setSequence(soDna); // end of DNA. "Sequence" line should follow inputState = ExpectAnything; } else { // not null, should be DNA sequence (ACTG's, etc.) soDna += soFirstToken; } break; // case ExpectContDna case ExpectAnything: if (soFirstToken == "Sequence" ) { putBackLine( soLine ); inputState = ExpectContSeq; } else if (soFirstToken == "BaseQuality" ) { putBackLine( soLine ); inputState = ExpectBaseQualityTitleLine; } else { // skip over unrecognized lines } break; case ExpectBaseQualityTitleLine: { RWCString soContigName = tokNext(); assert( pNewContig->soGetName() == soContigName ); } nLastNonPadBasePosition = pNewContig->nGetStartIndex() - 1; inputState = ExpectBaseQualityNumberLine; break; case ExpectBaseQualityNumberLine: if (soFirstToken.isNull() ) { if ( pNewContig->ntGetConsUnsafe( pNewContig->nGetEndConsensusIndex() ).bIsPad() ) { if ( nLastNonPadBasePosition > pNewContig->nGetEndConsensusIndex() ) { PARSE_PANIC( soFileName_, nCurLine, "There should be the same number of quality values as unpadded bases" ); } } else { if ( nLastNonPadBasePosition != pNewContig->nGetEndConsensusIndex() ) { PARSE_PANIC( soFileName_, nCurLine, "There should be the same number of quality values as unpadded bases" ); } } // we need to do this since the ace file only // contains quality for non-pads pNewContig->assignQualityAndPeakPositionsToPads( false ); // false for bAndPeakPositions inputState = ExpectContSeq; break; } processBaseQualityLine( soLine, pNewContig, nLastNonPadBasePosition ); break; case ExpectContSeq: if (soFirstToken.isNull()) break; // blank line ignored if (soFirstToken != "Sequence") { PARSE_PANIC(soFileName_, nCurLine, "Expected 'Sequence' lines for contig"); } // next token should be the contig name { // local variable in switch statement requires braces RWCString soTempContigName = tokNext(); // check for format error if (soTempContigName.isNull()) { PARSE_PANIC(soFileName_,nCurLine, "'Sequence' not followed by read name"); } } // next up are the assembled from lines inputState = ExpectAsm; nAsmLines = 0; break; case ExpectAsm: // blank line means no base segment lines - error if (soFirstToken.isNull()) { PARSE_PANIC(soFileName_, nCurLine, "Missing 'Base_segment' lines for contig"); } // end of asm lines should mean start of base lines if ((soFirstToken == "Base_segment") || (soFirstToken == "Base_segment*")) { inputState = ExpectBase; putBackLine(soLine); break; } // discard these "unpadded" (i.e. no "*") versions if (soFirstToken == "Assembled_from") break; // only one thing left it can legally be if (soFirstToken != "Assembled_from*") { PARSE_PANIC_TOKEN(soFileName_, nCurLine, soFirstToken); } nAsmLines++; // got one { RWCString soReadName = tokNext(); if (soReadName.isNull()) { PARSE_PANIC(soFileName_, nCurLine, "Missing filename on Assembled_from* line"); } bool bComp = removeCompFromFilename(soReadName); // get alignment of this fragment RWCString soStartPos = tokNext(); RWCString soEndPos = tokNext(); int nStart, nEnd, nResult; nResult = sscanf(soStartPos,"%d",&nStart); if (nResult != 1) { PARSE_PANIC_NUMERIC_TOKEN(soFileName_, nCurLine, soStartPos); } nResult = sscanf(soEndPos,"%d",&nEnd); if (nResult != 1) { PARSE_PANIC_NUMERIC_TOKEN(soFileName_, nCurLine, soEndPos); } // has a fragment of this name already been added to // the assembly? if (!bNoFragments ) { LocatedFragment* pOldLocF = pNewContig->pLocFragGetByName(soReadName); if (pOldLocF) { ostrstream ost; ost << "Error in file " << soFileName_ << " at line " << nCurLine << ": " << endl << "Fragment " << soReadName << " has already been included in this contig" << endl << ends; InputDataError ide(ost.str()); throw ide; } } if (!bNoFragments ) { // new up a located fragment // note that the dna sequence and clipping come later LocatedFragment* pLf = new LocatedFragment(soReadName, nStart, // start aligned at this pos in cons bComp, pNewContig); // add it to current contig pNewContig->addLocatedFragment(pLf); FragmentAndEndPosition *pFragAndEnd = new FragmentAndEndPosition( soReadName, nEnd ); pListOfFragmentsAndEndPositions->daFragmentsAndEndPositions_.insert( pFragAndEnd ); } } break; case ExpectBase: // blank line means expect DNA sequence of read if (soFirstToken.isNull()) { // sort all of the base segments for this contig qsort( pBaseSegmentsToSort, nNumberOfBaseSegmentsInContig, sizeof( baseSegmentsToSortType ), ( int(*)(const void*, const void*) )&nCompareBaseSegmentsToSort ); pNewContig->baseSegArray_.resize( (size_t) nNumberOfBaseSegmentsInContig ); for( int n = 0; n < nNumberOfBaseSegmentsInContig; ++n ) { baseSegmentsToSortType* pOneBaseSegmentToSort = pBaseSegmentsToSort + n; pNewContig->baseSegArray_.addPtrToNewBaseSeg( pOneBaseSegmentToSort->pBaseSegment ); } inputState = ExpectFragName; // after blank lines, // will be DNA break; } if (soFirstToken == "Base_segment") break; // ignore these if (soFirstToken != "Base_segment*") { PARSE_PANIC_NUMERIC_TOKEN(soFileName_, nCurLine, soFirstToken); } // create a BaseSegment object nBaseLines++; // braces around local vars in switch statement case { RWCString soStartInCon = tokNext(); RWCString soEndInCon = tokNext(); RWCString soReadName = tokNext(); // all of these must be present, rest of line discarded if (soStartInCon.isNull() || soEndInCon.isNull() || soReadName.isNull()) { PARSE_PANIC(soFileName_,nCurLine, "Missing token on Base_segment* line"); } // convert strings to integers int nStartInCon, nEndInCon, nResult; nResult = sscanf(soStartInCon,"%d",&nStartInCon); if (nResult != 1) { PARSE_PANIC_NUMERIC_TOKEN(soFileName_, nCurLine, soStartInCon); } nResult = sscanf(soEndInCon,"%d",&nEndInCon); if (nResult != 1) { PARSE_PANIC_NUMERIC_TOKEN(soFileName_, nCurLine, soEndInCon); } // reality check alignments if (nStartInCon > nEndInCon) { PARSE_PANIC(soFileName_, nCurLine, "Bad alignment in Base_segment* line"); } // remove ".comp" from readname (void )removeCompFromFilename(soReadName); // find the corresponding LocatedFragment, which // _must_ already have been added to the contig // // the input file has Base_segment* and Assembled_from* // lines that refer to the same fragment, identified // only by file name, and separated by an indeterminate // number of lines relating to other fragments. hence // we have to search the contig's located fragments // by name to get the located fragment's pointer. LocatedFragment* pLocF; if (!bNoFragments ) { pLocF = pNewContig->pLocFragGetByName(soReadName); if (! pLocF) { // this means we couldn't find a fragment of that name // since the base segments are assumed to follow // the assembled from lines, this is an error PARSE_PANIC(soFileName_, nCurLine, "Unrecognized fragment name in Base_segment* line"); } } // add a new base segment to the contig's array of them if (!bNoBaseSeg ) { BaseSegment* pNewBaseSeg = new BaseSegment(pLocF, nStartInCon, nEndInCon); // pBaseSegmentsToSort starts pointing to the // position of the first base segment to add // Hence don't increment nNumberOfBaseSegmentsInContig // until after you have loaded the base segment // into the array baseSegmentsToSortType* pOneBaseSegmentToSort = pBaseSegmentsToSort + nNumberOfBaseSegmentsInContig; ++nNumberOfBaseSegmentsInContig; pOneBaseSegmentToSort->nStartPosition = nStartInCon; pOneBaseSegmentToSort->pBaseSegment = pNewBaseSeg; } } break; case ExpectFragName: if (soFirstToken.isNull()) break; // blank line ignored if (soFirstToken == "CT{" ) { bHasConsensusTags = true; bAtBeginningOfConsensusTags = true; lPositionOfConsensusTags = ifsInFile.tellg(); lPositionOfConsensusTags -= 1; // trial and error lPositionOfConsensusTags -= soLine.length(); break; } if (soFirstToken != "DNA") { PARSE_PANIC(soFileName_, nCurLine, "Expected DNA lines for fragment"); } // second token is fragment name--braces make soFragName disappear // when } is encountered { // pick up the fragment name RWCString soFragName = tokNext(); if (soFragName.isNull()) { PARSE_PANIC(soFileName_, nCurLine, "Missing read name on DNA line"); } // // check if the 'fragment' is really a contig // if (! bNoListOfFragments) { if (listOfFragmentss.daFragmentName_.index( &soFragName ) == RW_NPOS ) { putBackLine(soLine); inputState = ExpectContig; // start fresh break; } } // it's a fragment. fix the name. (void )removeCompFromFilename(soFragName); // pick up the corresponding fragment name and save // because clip data actually comes on later lines if (!bNoFragments ) { pCurLocFrag = pNewContig->pLocFragGetByName(soFragName); if (! pCurLocFrag) { // this means we couldn't find a fragment of that name PARSE_PANIC(soFileName_, nCurLine, "Unrecognized fragment name in Sequence line"); } } } // and set state to expect Fragment dna lines soDna = ""; // clear out the dna holder inputState = ExpectFragDna; break; case ExpectFragDna: if (soFirstToken.isNull() ) { // there should be some DNA in the buffer by now if (soDna.length() == 0) { PARSE_PANIC(soFileName_, nCurLine, "Zero length DNA sequence"); } // end of dna. add accumulated sequence to array if (! bNoSetSequence) pCurLocFrag->setSequence(soDna); // end of DNA. "Sequence" line should follow inputState = ExpectFragSeq; } else { // not null, should be DNA sequence (ACTG's, etc.) soDna += soFirstToken; } break; // case ExpectFragDna case ExpectFragSeq: if (soFirstToken.isNull()) break; // blank line ignored if (soFirstToken != "Sequence") { PARSE_PANIC_TOKEN(soFileName_, nCurLine, soFirstToken); } { // pick up the fragment name RWCString soFragName = tokNext(); (void )removeCompFromFilename(soFragName); // pick up the corresponding fragment name and save // because clip data actually comes on later lines if (!bNoFragments ) { pCurLocFrag = pNewContig->pLocFragGetByName(soFragName); if (! pCurLocFrag) { // this means we couldn't find a fragment of that name PARSE_PANIC(soFileName_, nCurLine, "Unrecognized fragment name in Sequence line"); } } // set flag indicating that we don't yet have a clipping line // for the current located fragment bHaveClippingLineForThisFrag = false; // set the state to expect the Clipping* lines inputState = ExpectClip; } break; case ExpectClip: if (soFirstToken.isNull() ) break; // ignore unpadded clipping lines if (soFirstToken == "Clipping") break; // at this point there is only one legal alternative if (soFirstToken != "Clipping*") { PARSE_PANIC(soFileName_, nCurLine, "Missing Clipping* line for fragment"); } // bracket local variables in switch statement { // pick up the two expected numeric tokens RWCString soStartClip = tokNext(); RWCString soEndClip = tokNext(); if (soStartClip.isNull() || soEndClip.isNull()) { PARSE_PANIC(soFileName_, nCurLine, "Expected start and end fragment clip positions"); } // convert strings to integers int nStartClip, nEndClip, nResult; nResult = sscanf(soStartClip,"%d",&nStartClip); if (nResult != 1) { PARSE_PANIC_NUMERIC_TOKEN(soFileName_, nCurLine, soStartClip); } nResult = sscanf(soEndClip,"%d",&nEndClip); if (nResult != 1) { PARSE_PANIC_NUMERIC_TOKEN(soFileName_, nCurLine, soEndClip); } // set flag indicating that we have a clipping line // for the current located fragment bHaveClippingLineForThisFrag = true; // now set the located fragments members. // assumes pCurLocFrag was set to pointer to relevant // LocatedFragment object when we encountered // a Sequence line 2 (?) lines ago if (!bNoFragments ) { int nStartConsPos = pCurLocFrag->nGetConsPosFromFragIndex( nStartClip ); int nEndConsPos = pCurLocFrag->nGetConsPosFromFragIndex( nEndClip ); // this is a kludge just to allow people to read from // the old ace file format // This values really aren't the qual and align clipping // values, but they are close. To get the true values, // rephrap with the new phrap pCurLocFrag->setQualAndAlignClipping( nStartConsPos, nEndConsPos, nStartConsPos, nEndConsPos ); } } inputState = ExpectDescription; break; case ExpectDescription: // allow blank lines if (soFirstToken.isNull() ) break; // Description lines are not required if consed is being // run -nophd (wanted by Sequana) if (soFirstToken != "Description" ) { if ( ConsEd::pGetConsEd()->bUsingPhdFiles_ ) { PARSE_PANIC( soFileName_, nCurLine, "There must be a Description line following the Clipping* line in the ace file. This may be missing because you didn't use phredPhrap and phd2fasta. These programs came with consed" ); } else { putBackLine( soLine ); inputState = ExpectFragName; break; } } if (!bNoFragments ) parseDescriptionLine( soLine, pCurLocFrag, nCurLine ); inputState = ExpectFragName; break; default: assert(false); // program error break; } // switch // all break's end up here, and then loop around // in while statement if ( bAtBeginningOfConsensusTags ) break; } // while not end of file // handle end-of-file condition if (inputState == ExpectContDna) { if (soDna.length() == 0) { PARSE_PANIC(soFileName_, nCurLine, "Zero length DNA sequence"); } else { // end of dna. add accumulated sequence to contig if (! bNoSetSequence) pNewContig->setSequence(soDna); } } // handle end of file with dna seq still unstored if (inputState == ExpectFragDna) { if (soDna.length() == 0) { PARSE_PANIC(soFileName_, nCurLine, "Zero length DNA sequence"); } else { // end of dna. add accumulated sequence to contig if (! bNoSetSequence) pCurLocFrag->setSequence(soDna); } } // handle end of file with dna seq still unstored free( (char*) pBaseSegmentsToSort ); cleanUpAfterParsingAceFileFormat1(); } // ctor for PhrapAceFile void Assembly :: cleanUpAfterParsingAceFileFormat1() { // clean up any discontiguous base segment arrays for (int nCtg = 0; nCtg < dapContigs_.length(); nCtg++) { dapContigs_[nCtg]->baseSegArray_.forceSegsToBeContiguous(); } int nContig; // loop through all fragments in assembly for (nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); pContig->updateLastDisplayableContigPos(); } // check that the phrap end align positions (in the Assembled_from* lines) // match those calculated by using the start align position and the // sequence length for (nContig = 0; nContig < nNumContigs(); nContig++) { Contig* pContig = pGetContig( nContig ); int nNumFrags = pContig->nGetNumberOfFragsInContig(); for (int nFrag = 0; nFrag < nNumFrags; nFrag++) { // get pointer to this located frag from contig LocatedFragment* pLocFrag = pContig->pLocatedFragmentGet(nFrag); RWCString soFragmentName = pLocFrag->soGetFragmentName(); FragmentAndEndPosition fragAndEndPosition( soFragmentName ); FragmentAndEndPosition* pFragAndEnd = pListOfFragmentsAndEndPositions->daFragmentsAndEndPositions_.find( &fragAndEndPosition ); assert( pLocFrag->nGetAlignEnd() == ( pFragAndEnd->nEndPosition_ ) ); } } // Now that the check is completed, free up the memory. delete pListOfFragmentsAndEndPositions; } #define PARSE_PANIC2( file, nLine, soMessage, soLine ) \ { ostrstream ost; \ ost << "Corrupted ace file detected by consed source file " \ << __FILE__ << " at " << __LINE__ <setChromatFilename( filChromatFile ); } else if (soNextToken == "PHD_FILE:" ) { RWCString soTemp = tokNext(); FileName filPHDFile = soTemp; pCurLocFrag->setPHDFilename( filPHDFile ); bFoundPhdFile = true; } else if (soNextToken == "TIME:" ) { RWCString soDayOfWeek = tokNext(); RWCString soMonth = tokNext(); RWCString soDayOfMonth = tokNext(); RWCString soTime = tokNext(); RWCString soTimeZone = tokNext(); RWCString soYear = tokNext(); pCurLocFrag->setPHDTimestamp( soDayOfWeek + " " + soMonth + " " + soDayOfMonth + " " + soTime + " " + soTimeZone + soYear ); bFoundTime = true; } } if (!bFoundPhdFile && ConsEd::pGetConsEd()->bUsingPhdFiles_ ) { PARSE_PANIC2( soFileName_, nCurLine, "There must be a PHD_FILE: (filename) on this description line. This may be missing because you didn't use phredPhrap and phd2fasta. These programs came with consed", soLine ); } if (! bFoundTime && ConsEd::pGetConsEd()->bUsingPhdFiles_ ) { PARSE_PANIC2( soFileName_, nCurLine, "There must be a TIME: (timestamp) on this description line. This may be missing because you didn't use phredPhrap and phd2fasta. These programs came with consed", soLine ); } } void Assembly :: firstPassThroughAceFile( ifstream& ifs, listOfFragments& listOfFragmentss, int& nLinesInAceFile, int& nBaseSegments ) { const RWSize_2T rwsizeInitialLineSize = 200; RWCString soLine( rwsizeInitialLineSize ); nLinesInAceFile = 0; while( soLine.readLine( ifs, false ) ) { ++nLinesInAceFile; // tokenize input line RWCTokenizer tokNext(soLine); // pull off the first token RWCString soFirstToken = tokNext(); if (soFirstToken == "Assembled_from*" ) { RWCString* psoFragmentName = new RWCString( tokNext() ); listOfFragmentss.daFragmentName_.insert( psoFragmentName ); } else if (soFirstToken == "Base_segment*" ) ++nBaseSegments; } } void Assembly :: processBaseQualityLine( const RWCString& soLine, Contig* pCurrentContig, int& nLastBasePosition ) { // tokenize input line RWCTokenizer tokNext(soLine); RWCString soQuality; while( ! (soQuality = tokNext()).isNull() ) { unsigned char ucQuality = atoi( soQuality.data() ); ++nLastBasePosition; // move past any pads while( pCurrentContig->ntGetConsUnsafe( nLastBasePosition ).bIsPad() ) ++nLastBasePosition; pCurrentContig->setQualityAtSeqPos( nLastBasePosition, ucQuality ); } }