/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "testReadPhdBall.h" #include using namespace std; #include "mbt_exception.h" #include "soLine.h" #include "readsSortedByReadName.h" #include #include "locatedFragment.h" #include #include "findQueryWithinSubject.h" void testReadPhdBall :: doIt() { RWCString soQuery = "ttcaaagcaccat"; // RWCString soSubject = "ttcaaggcaccat"; RWCString soSubject = "ttcaaagcacttcacaatgggtcccacacacctcctaattcaaggcaccattttcagagtccaacccccagcgggctgcatcctgccctggggcccactaagtagctacttcaaagcaccat"; bool bFoundMatch = false; int n0SubjectStart; int n0SubjectEnd; int nScore; findQueryWithinSubject( soQuery, soSubject, 3, // gap 1, // match 2, // mismatch 5, // minscore 10, // max indels bFoundMatch, n0SubjectStart, n0SubjectEnd, nScore ); cerr << "bFoundMatch = " << szPrintBool( bFoundMatch ) << " n0SubjectStart = " << n0SubjectStart << " n0SubjectEnd = " << n0SubjectEnd << " nScore = " << nScore << endl; _exit( 0 ); readReadList(); time_t timeStart = time( NULL ); pPhdBall_ = fopen( filPhdBall_.data(), "r" ); if ( !pPhdBall_ ) { THROW_FILE_ERROR( filPhdBall_ ); } nLine_ = 0; justReadForBeginSequence(); fclose( pPhdBall_ ); time_t timeEnd = time( NULL ); double dTotalTime = difftime( timeEnd, timeStart ); cerr << "time: " << dTotalTime << endl; } void testReadPhdBall :: justReadForBeginSequence() { int nNumberOfReadsRead = 0; int nNumberOfReadsFound = 0; while( fgets( soLine.data(), nMaxLineSize, pPhdBall_ ) != NULL ) { if ( memcmp( soLine.data(), "BEGIN_SEQUENCE", 14 ) == 0 ) { soLine.nCurrentLength_ = strlen( soLine.data() ); size_t nPos = 15; RWCString soReadName = soLine.soGetNextToken( nPos ); ++nNumberOfReadsRead; if ( aReadList_.bContains( soReadName ) ) ++nNumberOfReadsFound; // if ( nNumberOfReads % 100 == 0 ) { // cout << soReadName << endl; // } } } cerr << "read read: " << nNumberOfReadsRead << " found: " << nNumberOfReadsFound << endl; } void testReadPhdBall :: readReadList() { FileName filReadList = "read_list.txt"; FILE* pReadList = fopen( filReadList.data(), "r" ); if ( !pReadList ) { THROW_FILE_ERROR( filReadList ); } while( fgets( soLine.data(), nMaxLineSize, pReadList ) != NULL ) { soLine.nCurrentLength_ = strlen( soLine.data() ); soLine.stripTrailingWhitespaceFast(); aReadList_.insert( soLine ); } fclose( pReadList ); aReadList_.resort(); cerr << "done sorting read list" << endl; } void testReadPhdBall :: timeSort() { const int nNumberOfReads = 10000000; readsSortedByReadName aManyReads( (size_t) nNumberOfReads ); RWCString soReadName( (size_t) 10 ); for( int nRead = 0; nRead < nNumberOfReads; ++nRead ) { int nRandom = rand(); soReadName.nCurrentLength_ = sprintf( soReadName.data(), "fixed%.4d_%d", nRandom, nRead ); LocatedFragment* pLocFrag = new LocatedFragment( soReadName, 0, false, NULL ); if ( nRead % 1000000 == 0 ) { cerr << "created " << nRead << " reads " << endl; cerr.flush(); } aManyReads.insert( pLocFrag ); } cerr << "about to sort" << endl; cerr.flush(); time_t timeStart = time( NULL ); aManyReads.resort(); time_t timeEnd = time( NULL ); double dTotalTime = difftime( timeEnd, timeStart ); cerr << "time: " << dTotalTime << endl; cerr.flush(); }