/***************************************************************************** # Copyright (C) 1994-2008 by David Gordon. # All rights reserved. # # This software is part of a beta-test version of the Consed/Autofinish # package. It should not be redistributed or # used for any commercial purpose, including commercially funded # sequencing, without written permission from the author and the # University of Washington. # # This software is provided ``AS IS'' and any express or implied # warranties, including, but not limited to, the implied warranties of # merchantability and fitness for a particular purpose, are disclaimed. # In no event shall the authors or the University of Washington be # liable for any direct, indirect, incidental, special, exemplary, or # consequential damages (including, but not limited to, procurement of # substitute goods or services; loss of use, data, or profits; or # business interruption) however caused and on any theory of liability, # whether in contract, strict liability, or tort (including negligence # or otherwise) arising in any way out of the use of this software, even # if advised of the possibility of such damage. # # Building Consed from source is error prone and not simple which is # why I provide executables. Due to time limitations I cannot # provide any assistance in building Consed. Even if you do not # modify the source, you may introduce errors due to using a # different version of the compiler, a different version of motif, # different versions of other libraries than I used, etc. For this # reason, if you discover Consed bugs, I can only offer help with # those bugs if you first reproduce those bugs with an executable # provided by me--not an executable you have built. # # Modifying Consed is also difficult. Although Consed is modular, # some modules are used by many other modules. Thus making a change # in one place can have unforeseen effects on many other features. # It may takes months for you to notice these other side-effects # which may not seen connected at all. It is not feasable for me to # provide help with modifying Consed sources because of the # potentially huge amount of time involved. # #*****************************************************************************/ #include "nextPhredPipeline.h" #include "soLine.h" #include #include "mbt_exception.h" #include "rwctokenizer.h" #include "bIsNumericMaybeWithWhitespace.h" #include "mbtValVectorOfBool.h" #include "mbtValOrderedVectorOfInt.h" #include "soGetDateTime.h" #include #include #include "soGetErrno.h" #include "mbtValOrderedVectorOfRWCString.h" #ifdef SOLARIS void nextPhredPipeline :: addFlowcells( FileName& filAddFlowcells ) {} void nextPhredPipeline :: readControlFile() {} void nextPhredPipeline :: unlockAndCloseControlFile() {} void nextPhredPipeline :: writeToControlFile( const RWCString& soLineToWrite ) {} void nextPhredPipeline :: openAndLockControlFile() {} bool nextPhredPipeline :: bCheckTilesPerLane( const RWCString& soTilesPerLane ) { return true;} bool nextPhredPipeline :: bCheckCrefFile( const FileName& filCrefFile ) { return true; } bool nextPhredPipeline :: bCheckFirstReadLength( const RWCString& soFirstReadLength ) { return true; } bool nextPhredPipeline :: bCreateRunDirectory( const FileName& filRunDirectory ) { return true; } bool nextPhredPipeline :: bCheckImagesAndLanes( FileName& filImages, const RWCString& soListOfLanes ) { return true; } #else void nextPhredPipeline :: addFlowcells( FileName& filAddFlowcells ) { cerr << "in nextPhredPipeline::addFlowcells" << endl; openAndLockControlFile(); readControlFile(); FILE* pAddFlowcells = fopen( filAddFlowcells.data(), "r" ); if ( !pAddFlowcells ) { THROW_FILE_ERROR( filAddFlowcells ); } while( fgets( soLine.data(), nMaxLineSize, pAddFlowcells ) ) { soLine.nCurrentLength_ = strlen( soLine.data() ); if ( soLine.bStartsWith( "#" ) ) { continue; } // if reached here, should start with Flowcell // should look like this: // # looks like: // Flowcell (name) (list of lanes in form 3,4 or 1-8) (full path of images) (full path of run directory) (1st read length) (cref file) (# of tiles/lane) // 0 1 2 3 4 5 6 7 RWCTokenizer tok( soLine ); RWCString soFlowcellKeyword = tok(); // "Flowcell" if ( soFlowcellKeyword != "Flowcell" ) { cerr << "line doesn't start with Flowcell:\n" << soLine << endl; continue; } RWCString soFlowcell = tok(); if ( aFlowcellsAlreadyInControlFile_.bContains( soFlowcell ) ) { cerr << "control file already contains flowcell " << soFlowcell << " so not adding line " << soLine << endl; continue; } RWCString soListOfLanes = tok(); FileName filImages = (FileName) tok(); FileName filRunDirectory = (FileName) tok(); RWCString soFirstReadLength = tok(); FileName filCrefFile = (FileName) tok(); RWCString soTilesPerLane = tok(); // check that images exist if ( !bCheckImagesAndLanes( filImages, soListOfLanes ) ) { continue; } if ( !bCreateRunDirectory( filRunDirectory ) ) { continue; } if ( !bCheckFirstReadLength( soFirstReadLength ) ) { continue; } if ( !bCheckCrefFile( filCrefFile ) ) { continue; } if ( !bCheckTilesPerLane( soTilesPerLane ) ) { continue; } // if reached here, everything is ok RWCString soLineToWrite( (size_t) soLine.length() ); soLineToWrite = soGetDateTime( nColonInMiddle ) + " " "Flowcell " + soFlowcell + " " + soListOfLanes + " " + filImages + " " + filRunDirectory + " " + soFirstReadLength + " " + filCrefFile + " " + soTilesPerLane + "\n"; writeToControlFile( soLineToWrite ); cerr << "wrote line " << soLineToWrite; } unlockAndCloseControlFile(); } bool nextPhredPipeline :: bCheckImagesAndLanes( FileName& filImages, const RWCString& soListOfLanes ) { mbtValVectorOfBool aLanesToUse( (size_t) 8, 1, "aLanesToUse" ); bool bOK = false; RWCTokenizer tok(soListOfLanes ); RWCString soCommaSeparatedGroup; while( !( soCommaSeparatedGroup = tok(',' ) ).bIsNull() ) { if ( soCommaSeparatedGroup.bContains( "-" ) ) { RWCTokenizer tok( soCommaSeparatedGroup ); RWCString soDigit1 = tok('-' ); RWCString soDigit2 = tok('-' ); RWCString soShouldBeNull = tok('-'); if ( soDigit1.bIsNull() || soDigit2.bIsNull() || !soShouldBeNull.bIsNull() ) { cerr << "lanes format has a hyphen in it but is not \\d-\\d " << soListOfLanes << endl; return false; } int nDigit1; int nDigit2; if ( !bIsNumericMaybeWithWhitespace( soDigit1, nDigit1 ) ) { cerr << "first digit in " << soListOfLanes << " is not numeric" << endl; return false; } if ( !bIsNumericMaybeWithWhitespace( soDigit2, nDigit2 ) ) { cerr << "second digit in " << soListOfLanes << " is not numeric" << endl; return false; } if ( ! ( ( 1 <= nDigit1 ) && ( nDigit1 <= nDigit2 ) && ( nDigit2 <= 8 ) ) ) { cerr << "digit in " << soListOfLanes << " must be in order between 1 and 8" << endl; return false; } for( int nLane = nDigit1; nLane <+ nDigit2; ++nLane ) { aLanesToUse.setValue( nLane, true ); } } else { // single digit int nLane; if ( !bIsNumericMaybeWithWhitespace( soCommaSeparatedGroup, nLane ) ) { cerr << "digit " << soCommaSeparatedGroup << " in " << soListOfLanes << " is not numeric" << endl; return false; } if ( ! ( ( 1 <= nLane ) && ( nLane <= 8 ) ) ) { cerr << "digit " << nLane << "must be between 1 and 8" << endl; return false; } aLanesToUse.setValue( nLane, true ); } } // while( !( soCommaSeparatedGroup = tok(',' ) ).bIsNull() ) { // if reached here, the lanes are fine // now check the images if ( !filImages.bEndsWith( "/" ) ) { filImages += "/"; } int nNumberOfTiffFilesEachCycle = -666; int nNumberOfCyclesEachLane = -666; for( int nLane = 1; nLane <= 8; ++nLane ) { if ( !aLanesToUse[ nLane ] ) continue; FileName filImageLane( (size_t) 2000 ); filImageLane.nCurrentLength_ = sprintf( filImageLane.data(), "%sL%03d", filImages.data(), nLane ); DIR* pLaneDir = opendir( filImageLane.data() ); if ( !pLaneDir ) { cerr << "couldn't open directory " + filImageLane << endl; return false; } mbtValOrderedVectorOfInt aCyclesThisLane( (size_t) 200 ); aCyclesThisLane.soName_ = "aCyclesThisLane"; struct dirent* pDirent; while( pDirent = readdir( pLaneDir ) ) { FileName filCycle( pDirent->d_name ); if ( filCycle.bStartsWith("C" ) ) { FileName filCycleTemp( filCycle ); filCycleTemp.bStartsWithAndRemove( "C" ); filCycleTemp.bEndsWithAndRemove( ".1" ); int nCycle; if ( !bIsNumericMaybeWithWhitespace( filCycleTemp, nCycle ) ) { cerr << "subdirectory " << filCycle << " is not of form C29.1" << endl; return false; } aCyclesThisLane.insert( nCycle ); // now count subdirectories FileName filCycleDir = filImageLane + "/" + filCycle; DIR* pCycleDir = opendir( filCycleDir.data() ); if ( !pCycleDir ) { cerr << "couldn't open cycle directory " << filCycleDir << endl; return false; } int nNumberOfTiffFiles = 0; struct dirent* pDirTiff; while( pDirTiff = readdir( pCycleDir ) ) { RWCString soFile( pDirTiff->d_name ); if ( soFile.bEndsWith(".tif" ) ) ++nNumberOfTiffFiles; } closedir( pCycleDir ); if ( nNumberOfTiffFilesEachCycle == -666 ) { nNumberOfTiffFilesEachCycle = nNumberOfTiffFiles; } else if ( nNumberOfTiffFilesEachCycle != nNumberOfTiffFilesEachCycle ) { cerr << "cycle " << filCycleDir << " has " << nNumberOfTiffFiles << " but other cycles have " << nNumberOfTiffFilesEachCycle << endl; } } // if ( filCycle.bStartsWith("C" ) ) { } // while( pDirent = readdir( pLaneDir ) ) { closedir( pLaneDir ); aCyclesThisLane.resort(); // check there are no missing cycles for( int nCycle = 1; nCycle < aCyclesThisLane.length(); ++nCycle ) { if ( aCyclesThisLane[ nCycle - 1 ] + 1 != aCyclesThisLane[ nCycle ] ) { cerr << "missing cycle " << aCyclesThisLane[ nCycle - 1 ] << " in lane " << filImageLane << endl; } } if ( nNumberOfCyclesEachLane == -666 ) { nNumberOfCyclesEachLane = aCyclesThisLane.length(); } else if ( nNumberOfCyclesEachLane != aCyclesThisLane.length() ) { cerr << "lane " << nLane << " has " << aCyclesThisLane.length() << " while earlier lanes have " << nNumberOfCyclesEachLane << " cycles " << endl; } } // for( int nLane = 1; nLane <= 8; ++nLane ) { return true; } // bool nextPhredPipeline :: bCheckImagesAndLanes( FileName& filImages, bool nextPhredPipeline :: bCreateRunDirectory( const FileName& filRunDirectory ) { if ( !filRunDirectory.bFileByThisNameExists() ) { RWCString soCommand = "mkdir -p " + filRunDirectory; int nRetStat = system( soCommand.data() ); if ( nRetStat != 0 ) { // how will we display this error message? cerr << "when creating run directory " << filRunDirectory << " error " << nRetStat; if ( strerror( nRetStat ) ) { cerr << " which means " << strerror( nRetStat ); } cerr << " when running phredPhrap. See xterm for details." << endl; return false; } // check that the directory now exists if ( !filRunDirectory.bFileByThisNameExists() ) { cerr << "tried to create run directory " << filRunDirectory << " but was unsuccessful" << endl; return false; } } return true; } bool nextPhredPipeline :: bCheckFirstReadLength( const RWCString& soFirstReadLength ) { int nFirstReadLength; if ( !bIsNumericMaybeWithWhitespace( soFirstReadLength, nFirstReadLength ) ) { cerr << "first read length: " << soFirstReadLength << " is not numeric" << endl; return false; } if ( nFirstReadLength < 20 || nFirstReadLength > 150 ) { cerr << "first read length, " << nFirstReadLength << " is either too small or too large" << endl; return false; } return true; } bool nextPhredPipeline :: bCheckCrefFile( const FileName& filCrefFile ) { FileName filCrefFileCref = filCrefFile + ".cref"; FileName filCrefFilePhast = filCrefFile + ".phast"; bool bOK = filCrefFileCref.bFileByThisNameExists() && filCrefFilePhast.bFileByThisNameExists(); if ( !bOK ) { cerr << "cref file " << filCrefFileCref << " does not exist or " << filCrefFilePhast << " does not exist" << endl; return false; } return true; } bool nextPhredPipeline :: bCheckTilesPerLane( const RWCString& soTilesPerLane ) { int nTilesPerLane; if ( !bIsNumericMaybeWithWhitespace( soTilesPerLane, nTilesPerLane ) ) { cerr << "tiles per lane " << soTilesPerLane << " is not numeric" << endl; return false; } if ( nTilesPerLane < 1 || nTilesPerLane > 120 ) { cerr << "tiles per lane " << nTilesPerLane << " is either too large or too small" << endl; return false; } return true; } void nextPhredPipeline :: openAndLockControlFile() { nFDControlFile_ = open( filControlFile_.data(), O_CREAT |O_RDWR ); if ( nFDControlFile_ == -1 ) { THROW_FILE_ERROR( filControlFile_ ); } int nTimesTried = 0; while( flock( nFDControlFile_, LOCK_EX ) != 0 ) { if ( nTimesTried > 3 ) { cerr << "giving up" << endl; _exit( 1 ); } cerr << "failed to get lock-=will try 3 times" << endl; ++nTimesTried; sleep( 1 ); } pControlFile_ = fdopen( nFDControlFile_, "r+" ); if ( !pControlFile_ ) { THROW_FILE_ERROR( filControlFile_ ); } } void nextPhredPipeline :: writeToControlFile( const RWCString& soLineToWrite ) { // position to end of file if ( fseek( pControlFile_, 0, SEEK_END ) != 0 ) { THROW_ERROR2( "couldn't seek to end of control file" ); } fputs( soLineToWrite.data(), pControlFile_ ); } void nextPhredPipeline :: unlockAndCloseControlFile() { if ( flock( nFDControlFile_, LOCK_UN ) != 0 ) { cerr << "failed to unlock control file " << filControlFile_ << " " << soGetErrno() << endl; } // will not work without this, presumably because the closing the file // descriptor will not flush what is in the stream. fclose( pControlFile_ ); close( nFDControlFile_ ); } void nextPhredPipeline :: readControlFile() { aFlowcellsAlreadyInControlFile_.clear(); aFlowcellsAlreadyInControlFile_.soName_ = "aFlowcellsAlreadyInControlFile_"; while( fgets( soLine.data(), nMaxLineSize, pControlFile_ ) ) { soLine.nCurrentLength_ = strlen( soLine.data() ); if ( soLine.bStartsWith( "#" ) ) { continue; } // if reached here, should start with Flowcell // should look like this: // # looks like: // date:time Flowcell (name) (list of lanes in form 3,4 or 1-8) (full path of images) (full path of run directory) (1st read length) (cref file) (# of tiles/lane) // 0 1 2 3 4 5 6 7 8 RWCTokenizer tok( soLine ); RWCString soDateTime = tok(); RWCString soFlowcellKeyword = tok(); // "Flowcell" if ( soFlowcellKeyword != "Flowcell" ) { continue; } RWCString soFlowcell = tok(); aFlowcellsAlreadyInControlFile_.insert( soFlowcell ); RWCString soListOfLanes = tok(); FileName filImages = (FileName) tok(); FileName filRunDirectory = (FileName) tok(); RWCString soFirstReadLength = tok(); FileName filCrefFile = (FileName) tok(); RWCString soTilesPerLane = tok(); } } #endif