/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "nextPhredPipeline.h"
#include    "soLine.h"
#include    <dirent.h>
#include    "mbt_exception.h"
#include    "rwctokenizer.h"
#include    "bIsNumericMaybeWithWhitespace.h"
#include    "mbtValVectorOfBool.h"
#include    "mbtValOrderedVectorOfInt.h"
#include    "soGetDateTime.h"
#include    <sys/file.h>
#include    <unistd.h>
#include    "soGetErrno.h"
#include    "mbtValOrderedVectorOfRWCString.h"

#ifdef SOLARIS

void nextPhredPipeline :: addFlowcells( FileName& filAddFlowcells ) {}
void nextPhredPipeline :: readControlFile() {}
void nextPhredPipeline :: unlockAndCloseControlFile() {}
void nextPhredPipeline :: writeToControlFile( const RWCString& soLineToWrite ) {}
void nextPhredPipeline :: openAndLockControlFile() {}
bool nextPhredPipeline :: bCheckTilesPerLane( const RWCString& soTilesPerLane ) {
   return true;}
bool nextPhredPipeline :: bCheckCrefFile( const FileName& filCrefFile ) {
   return true; }
bool nextPhredPipeline :: bCheckFirstReadLength( const RWCString& soFirstReadLength ) {
   return true; }
bool nextPhredPipeline :: bCreateRunDirectory( const FileName& filRunDirectory ) {
   return true; }
bool nextPhredPipeline :: bCheckImagesAndLanes( FileName& filImages, 
                                                const RWCString& soListOfLanes ) {
   return true; }



#else


void nextPhredPipeline :: addFlowcells( FileName& filAddFlowcells ) {


   cerr << "in nextPhredPipeline::addFlowcells" << endl;
   
   openAndLockControlFile();
   readControlFile();

   FILE* pAddFlowcells = fopen( filAddFlowcells.data(), "r" );
   if ( !pAddFlowcells ) {
      THROW_FILE_ERROR( filAddFlowcells );
   }

   while( fgets( soLine.data(), nMaxLineSize, pAddFlowcells ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      if ( soLine.bStartsWith( "#" ) ) {
         continue;
      }

      // if reached here, should start with Flowcell

      // should look like this:
      //     # looks like:
      // Flowcell (name) (list of lanes in form 3,4 or 1-8) (full path of images) (full path of run directory) (1st read length) (cref file) (# of tiles/lane)
      // 0         1     2                                  3                     4                            5                 6           7 


      RWCTokenizer tok( soLine );
      RWCString soFlowcellKeyword = tok();  // "Flowcell"
      if ( soFlowcellKeyword != "Flowcell" ) {
         cerr << "line doesn't start with Flowcell:\n" << 
            soLine << endl;
         continue;
      }

      RWCString soFlowcell = tok();

      if ( aFlowcellsAlreadyInControlFile_.bContains( soFlowcell ) ) {
         cerr << "control file already contains flowcell " << soFlowcell <<
            " so not adding line " << soLine << endl;
         continue;
      }


      RWCString soListOfLanes = tok();
      FileName filImages = (FileName) tok();
      FileName filRunDirectory = (FileName) tok();
      RWCString soFirstReadLength = tok();
      FileName filCrefFile = (FileName) tok();
      RWCString soTilesPerLane = tok();

      
      // check that images exist

      if ( !bCheckImagesAndLanes( filImages, soListOfLanes ) ) {
         continue;
      }

      if ( !bCreateRunDirectory( filRunDirectory ) ) {
         continue;
      }

      if ( !bCheckFirstReadLength( soFirstReadLength ) ) {
         continue;
      }

      if ( !bCheckCrefFile( filCrefFile ) ) {
         continue;
      }
      
      if ( !bCheckTilesPerLane( soTilesPerLane ) ) {
         continue;
      }


      // if reached here, everything is ok

      RWCString soLineToWrite( (size_t) soLine.length() );
      
      soLineToWrite = 
         soGetDateTime( nColonInMiddle ) + " " 
         "Flowcell " + 
         soFlowcell + " " + 
         soListOfLanes + " " +
         filImages + " " +
         filRunDirectory + " " +
         soFirstReadLength + " " +
         filCrefFile + " " +
         soTilesPerLane + "\n";

      writeToControlFile( soLineToWrite );
      cerr << "wrote line " << soLineToWrite;
   }

      
   unlockAndCloseControlFile();
}




bool nextPhredPipeline :: bCheckImagesAndLanes( FileName& filImages, 
                                                const RWCString& soListOfLanes ) {


   mbtValVectorOfBool aLanesToUse( (size_t) 8, 1, "aLanesToUse" );


   bool bOK = false;

   RWCTokenizer tok(soListOfLanes );

   RWCString soCommaSeparatedGroup;
   while( !( soCommaSeparatedGroup = tok(',' ) ).bIsNull() ) {

      if ( soCommaSeparatedGroup.bContains( "-" ) ) {
         RWCTokenizer tok( soCommaSeparatedGroup );
      
         RWCString soDigit1 = tok('-' );
         RWCString soDigit2 = tok('-' );
         RWCString soShouldBeNull = tok('-');
         if ( soDigit1.bIsNull() ||
              soDigit2.bIsNull() ||
              !soShouldBeNull.bIsNull() ) {

            cerr << "lanes format has a hyphen in it but is not \\d-\\d " << 
               soListOfLanes << endl;

            return false;
         }

         int nDigit1;
         int nDigit2;

         if ( !bIsNumericMaybeWithWhitespace( soDigit1, nDigit1 ) ) {
            cerr << "first digit in " << soListOfLanes << " is not numeric" << endl;
            return false;
         }

         if ( !bIsNumericMaybeWithWhitespace( soDigit2, nDigit2 ) ) {
            cerr << "second digit in " << soListOfLanes << " is not numeric" << endl;
            return false;
         }

         if ( ! ( 
                 ( 1 <= nDigit1 ) &&
                 ( nDigit1 <= nDigit2 ) &&
                 ( nDigit2 <= 8 ) ) ) {
            cerr << "digit in " << soListOfLanes << " must be in order between 1 and 8" << endl;
            return false;
         }

         for( int nLane = nDigit1; nLane <+ nDigit2; ++nLane ) {
            aLanesToUse.setValue( nLane, true );
         }
      }
      else {

         // single digit 

         int nLane;
         if ( !bIsNumericMaybeWithWhitespace( soCommaSeparatedGroup,
                                              nLane ) ) {

            cerr << "digit " << soCommaSeparatedGroup << " in " <<
               soListOfLanes << " is not numeric" << endl;
            return false;
         }

         if ( ! (
                 ( 1 <= nLane ) &&
                 ( nLane <= 8 ) ) ) {
            cerr << "digit " << nLane << "must be between 1 and 8" << endl;
            return false;
         }

         
         aLanesToUse.setValue( nLane, true );

      }
   } //    while( !( soCommaSeparatedGroup = tok(',' ) ).bIsNull() ) {

   // if reached here, the lanes are fine

   // now check the images

   if ( !filImages.bEndsWith( "/" ) ) {
      filImages += "/";
   }

   int nNumberOfTiffFilesEachCycle = -666;
   int nNumberOfCyclesEachLane = -666;

   for( int nLane = 1; nLane <= 8; ++nLane ) {
      if ( !aLanesToUse[ nLane ] ) continue;

      FileName filImageLane( (size_t) 2000 );
      filImageLane.nCurrentLength_ = sprintf( filImageLane.data(),
                                              "%sL%03d",
                                              filImages.data(),
                                              nLane );


      DIR* pLaneDir = opendir( filImageLane.data() );
      if ( !pLaneDir ) {
         cerr << "couldn't open directory " + filImageLane << endl;
         return false;
      }

      mbtValOrderedVectorOfInt aCyclesThisLane( (size_t) 200 );
      aCyclesThisLane.soName_ = "aCyclesThisLane";
      
      struct dirent* pDirent;

      while( pDirent = readdir( pLaneDir ) ) {
         FileName filCycle( pDirent->d_name );

         if ( filCycle.bStartsWith("C" ) ) {

            FileName filCycleTemp( filCycle );
            filCycleTemp.bStartsWithAndRemove( "C" );
            filCycleTemp.bEndsWithAndRemove( ".1" );

            int nCycle;
            if ( !bIsNumericMaybeWithWhitespace( filCycleTemp, nCycle ) ) {
               cerr << "subdirectory " << filCycle << " is not of form C29.1" 
                    << endl;
               return false;
            }

            aCyclesThisLane.insert( nCycle );

            // now count subdirectories

            FileName filCycleDir = filImageLane + "/" + filCycle;

            DIR* pCycleDir = opendir( filCycleDir.data() );
            if ( !pCycleDir ) {
               cerr << "couldn't open cycle directory " << filCycleDir <<
                  endl;
               return false;
            }

            int nNumberOfTiffFiles = 0;
            struct dirent* pDirTiff;
            while( pDirTiff = readdir( pCycleDir ) ) {
               RWCString soFile( pDirTiff->d_name );
               if ( soFile.bEndsWith(".tif" ) )
                  ++nNumberOfTiffFiles;
            }

            closedir( pCycleDir );

            if ( nNumberOfTiffFilesEachCycle == -666 ) {
               nNumberOfTiffFilesEachCycle = nNumberOfTiffFiles;
            }
            else if ( nNumberOfTiffFilesEachCycle != nNumberOfTiffFilesEachCycle ) {
               cerr << "cycle " << filCycleDir << " has " << 
                  nNumberOfTiffFiles << " but other cycles have " <<
                  nNumberOfTiffFilesEachCycle << endl;
            }
         } //  if ( filCycle.bStartsWith("C" ) ) {
      } //  while( pDirent = readdir( pLaneDir ) ) {

      closedir( pLaneDir );

      aCyclesThisLane.resort();

      // check there are no missing cycles

      for( int nCycle = 1; nCycle < aCyclesThisLane.length(); ++nCycle ) {
         
         if ( aCyclesThisLane[ nCycle - 1 ] + 1 != 
              aCyclesThisLane[ nCycle ] ) {
            cerr << "missing cycle " <<  aCyclesThisLane[ nCycle - 1 ] <<
               " in lane " <<  filImageLane << endl;
         }
      }

      if ( nNumberOfCyclesEachLane == -666 ) {
         nNumberOfCyclesEachLane = aCyclesThisLane.length();
      }
      else if ( nNumberOfCyclesEachLane != aCyclesThisLane.length() ) {
         cerr << "lane " << nLane << " has " << aCyclesThisLane.length() <<
            " while earlier lanes have " << nNumberOfCyclesEachLane << 
            " cycles " << endl;

      }
   } //     for( int nLane = 1; nLane <= 8; ++nLane ) {

   return true;
} //  bool nextPhredPipeline :: bCheckImagesAndLanes( FileName& filImages, 





bool nextPhredPipeline :: bCreateRunDirectory( const FileName& filRunDirectory ) {
   
   if ( !filRunDirectory.bFileByThisNameExists() ) {

      RWCString soCommand = "mkdir -p " + filRunDirectory;

      int nRetStat = system( soCommand.data() );

      if ( nRetStat != 0 ) {
         // how will we display this error message?  
         cerr << "when creating run directory " <<
            filRunDirectory << " error " << nRetStat;
         if ( strerror( nRetStat ) ) {
            cerr << " which means " << strerror( nRetStat );
         }
         cerr << " when running phredPhrap.  See xterm for details." << endl;
         return false;
      }

      // check that the directory now exists

      if ( !filRunDirectory.bFileByThisNameExists() ) {
         cerr << "tried to create run directory " <<
            filRunDirectory << " but was unsuccessful" << endl;
         return false;
      }

   }

   return true;
}

      



bool nextPhredPipeline :: bCheckFirstReadLength( const RWCString& soFirstReadLength ) {

   int nFirstReadLength;
   if ( !bIsNumericMaybeWithWhitespace( soFirstReadLength, 
                                        nFirstReadLength ) ) {
      cerr << "first read length: " << soFirstReadLength <<
         " is not numeric" << endl;
      return false;
   }

   if ( nFirstReadLength < 20 || nFirstReadLength > 150  ) {

      cerr << "first read length, " << nFirstReadLength << " is either too small or too large" << endl;
      return false;
   }

   return true;

}



bool nextPhredPipeline :: bCheckCrefFile( const FileName& filCrefFile ) {

   FileName filCrefFileCref  = filCrefFile + ".cref";
   FileName filCrefFilePhast = filCrefFile + ".phast";

   bool bOK =  filCrefFileCref.bFileByThisNameExists() &&
      filCrefFilePhast.bFileByThisNameExists();

   if ( !bOK ) {
      cerr << "cref file " << filCrefFileCref << " does not exist or " << 
         filCrefFilePhast << " does not exist" << endl;
      return false;
   }

   return true;

}


bool nextPhredPipeline :: bCheckTilesPerLane( const RWCString& soTilesPerLane ) {
   
   int nTilesPerLane;
   if ( !bIsNumericMaybeWithWhitespace( soTilesPerLane, nTilesPerLane ) ) {
      cerr << "tiles per lane " << soTilesPerLane << " is not numeric" << endl;
      return false;
   }


   if ( nTilesPerLane < 1 || nTilesPerLane > 120 ) {
      cerr << "tiles per lane " << nTilesPerLane << " is either too large or too small" << endl;
      return false;
   }

   return true;
}

      
   
   

void nextPhredPipeline :: openAndLockControlFile() {


   nFDControlFile_ = open( filControlFile_.data(), O_CREAT |O_RDWR );
   if ( nFDControlFile_ == -1 ) {
      THROW_FILE_ERROR( filControlFile_ );
   }

   int nTimesTried = 0;
   while( flock( nFDControlFile_, LOCK_EX ) != 0 ) {
      if ( nTimesTried > 3 ) {
         cerr << "giving up" << endl;
         _exit( 1 );
      }
      cerr << "failed to get lock-=will try 3 times" << endl;
      ++nTimesTried;
      sleep( 1 );
   }
      
   pControlFile_ = fdopen( nFDControlFile_, "r+" );
   if ( !pControlFile_ ) {
      THROW_FILE_ERROR( filControlFile_ );
   }
}



void nextPhredPipeline :: writeToControlFile( const RWCString& soLineToWrite ) {

   // position to end of file

   if ( fseek( pControlFile_, 0, SEEK_END ) != 0 ) {
      THROW_ERROR2( "couldn't seek to end of control file" );
   }

   fputs( soLineToWrite.data(), pControlFile_ );
}


   

void nextPhredPipeline :: unlockAndCloseControlFile() {

   if ( flock( nFDControlFile_, LOCK_UN ) != 0 ) {
      cerr << "failed to unlock control file " << filControlFile_ << " " <<
         soGetErrno() << endl;
   }


   // will not work without this, presumably because the closing the file
   // descriptor will not flush what is in the stream.

   fclose( pControlFile_ );

   close( nFDControlFile_ );
   
   

}

      
   
void nextPhredPipeline :: readControlFile() {


   aFlowcellsAlreadyInControlFile_.clear();
   aFlowcellsAlreadyInControlFile_.soName_ = "aFlowcellsAlreadyInControlFile_";
   
   while( fgets( soLine.data(), nMaxLineSize, pControlFile_ ) ) {
      soLine.nCurrentLength_ = strlen( soLine.data() );

      if ( soLine.bStartsWith( "#" ) ) {
         continue;
      }

      // if reached here, should start with Flowcell

      // should look like this:
      //     # looks like:
      // date:time Flowcell (name) (list of lanes in form 3,4 or 1-8) (full path of images) (full path of run directory) (1st read length) (cref file) (# of tiles/lane)
      // 0         1         2      3                                 4                     5                            6                 7           8          
      RWCTokenizer tok( soLine );
      RWCString soDateTime = tok();
      RWCString soFlowcellKeyword = tok();  // "Flowcell"
      if ( soFlowcellKeyword != "Flowcell" ) {
         continue;
      }

      RWCString soFlowcell = tok();

      aFlowcellsAlreadyInControlFile_.insert( soFlowcell );

      RWCString soListOfLanes = tok();
      FileName filImages = (FileName) tok();
      FileName filRunDirectory = (FileName) tok();
      RWCString soFirstReadLength = tok();
      FileName filCrefFile = (FileName) tok();
      RWCString soTilesPerLane = tok();
   }
}


      
      
#endif