/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
//
// filename.cpp
//




#include <ctype.h>
#include <stdio.h>
#include <sys/stat.h>
#include <string.h>
#include "rwcstring.h"
#include "rwcregexp.h"
#include <dirent.h>

#include "sysdepend.h"
#include "assert.h"
#include "filename.h"
#include <sys/stat.h>



// versions of a file are indicated by an appended integer
// number.  if the initial version is "file.ext", the next
// version is "file.ext.1", the next "file.ext.2", etc.
// this function returns a string object containing the
// next version up from this one
FileName FileName::filGetNextVersion() const {

   int  nNextVersion;

   int nVersion = nGetVersion();
   if (nVersion == -1 ) 
     nNextVersion = 1;
   else
     nNextVersion = nVersion + 1;


   //
   // we know the (integer) version number, now build the new file
   // name
   //
   FileName filNewFileName = soGetDirectory() + soGetBasenameWithoutVersion();

   // format the integer as a string
   char szNewNumStr[128];  // why take chances?
   sprintf(szNewNumStr,".%d",nNextVersion );

   // append to new file name
   filNewFileName.append(szNewNumStr);

   // return a copy of the new string object containing the
   // file name with next version
   return filNewFileName;
}



// versions of a file are indicated by an appended integer
// number.  if the initial version is "file.ext", the next
// version is "file.ext.1", the next "file.ext.2", etc.
// this function returns a string object containing the
// next version up from this one
int FileName:: nGetVersion() const {

   //
   // does this filename already have a version extension on it?
   //
   bool bHasVersion;

   // what is the location of the last '.' in the name (if any)
   size_t nLastDotPos = this->last('.');
   if (nLastDotPos == RW_NPOS) {
      bHasVersion = false;
   }
   else {
      //
      // are all chars following the last '.' numeric?
      //
      bHasVersion = true;  // true until non-numeric encountered
      int nPos = nLastDotPos + 1;
      while ( (bHasVersion) && (nPos < this->length()) ) {
         bHasVersion = isdigit((*this)[nPos++]);
      }
   }

   if (!bHasVersion) return(-1);


   // there already is a version number in this name
   // extract its integer value
   size_t nOldNumStringLen = this->length() - nLastDotPos - 1;
   
   // test for no number, just terminating '.'
   if (nOldNumStringLen == 0) {
      return( -1 );
   } 
   else {
      // extract a substring containing everything after '.'
      RWCString soOldNumString = (*this)(nLastDotPos + 1, nOldNumStringLen);
   
      // convert to integer
      int nIntVal;
      int nResult = sscanf(soOldNumString,"%d",&nIntVal);
      assert (nResult == 1);      // paranoia
   
      // int val is cool
      return( nIntVal );
   }
}



FileName FileName::filThisOrOneHigherThanHighestVersion( const FileName filDirectory ) const {
   
   if ( !bFileByThisNameExists() ) {
      return *this;
   }

   // if reached here, there is at least one file of this basename.
   // so return 1 higher than the highest version

   

   return( filFindOneHigherThanHighestVersion( filDirectory,
                                              2 ) );  // won't be used
}



FileName    FileName :: filFindOneHigherThanHighestVersion( 
                          const  FileName filDirectory,
                          const int nVersionIfNoneExists ) const {

   int nNewVersion;

   return( filFindOneHigherThanHighestVersion2( filDirectory,
                                               nVersionIfNoneExists,
                                               nNewVersion ) );
}
   

FileName    FileName :: filFindOneHigherThanHighestVersion2( 
                          const  FileName filDirectory,
                          const int nVersionIfNoneExists,
                          int& nNewVersion ) const {

   struct   stat    statBuf;

   int  nHighestVersion = 0;

   RWCString soBasename = soGetBasenameWithoutVersion();

   RWCString soTemp = soBasename + "\\.[0-9]+";
   RWCRegexp regPattern( soTemp );


   DIR *dirFile;
   struct dirent *pDirEntry;


   if( ( dirFile = opendir( filDirectory.data() ) ) == NULL ) {
      ostrstream    ost;
      ost << "error:  unable to open directory: " << filDirectory <<
        endl << ends;
      SysRequestFailed srf(ost.str());
      // what the hell, show 'em the errno
      srf.includeErrnoDescription();
      throw srf;
   }

   bool bFoundOne = false;
   while( ( pDirEntry = readdir( dirFile ) ) != NULL )
   {

      FileName filPHDFile( pDirEntry->d_name );
      if ( filPHDFile.index( regPattern ) != RW_NPOS ) {
         bFoundOne = true;
         int    nVersion = filPHDFile.nGetVersion();
         if (nVersion > nHighestVersion) {
            nHighestVersion = nVersion;

         }
      }
   }

   closedir( dirFile );

   int nHigherVersion = nHighestVersion + 1;

   if ( !bFoundOne )
      nHigherVersion = nVersionIfNoneExists;

   
   // format the integer as a string
   char szNewNumStr[128];  // why take chances?
   sprintf( szNewNumStr, ".%d", nHigherVersion );

   nNewVersion = nHigherVersion;

   return( soBasename + szNewNumStr );
}



FileName FileName :: filFindOneHigherThanHighestVersion3() {

   FileName filDirectory = soGetDirectory();  // includes final "/"
   int nNewVersion;


   FileName filFullPath = filDirectory + 
      filFindOneHigherThanHighestVersion2( filDirectory,
                                           1, // version if none exists
                                           nNewVersion );

   return( filFullPath );
}
                                         


RWCString FileName::soGetDirectory() const {
   RWCString soTempDirName;

   // where is last terminating slash
   size_t nLastSlashPos = this->last('/');

   // any thing at all?
   if (nLastSlashPos == RW_NPOS) {
      soTempDirName = "./";
   }
   else {   
      // return substring of that length
      soTempDirName = (*this)(0,nLastSlashPos+1);
   }

   return soTempDirName;
}


RWCString FileName::soGetBasename() const {
   RWCString soTempBaseName;

   // where is last terminating slash
   size_t nLastSlashPos = this->last('/');

   // any path info at all?
   if (nLastSlashPos == RW_NPOS) {
      soTempBaseName = *this;
   }
   else {   
      size_t nRemainingLength = this->length() - nLastSlashPos - 1;

      // return substring of that length
      soTempBaseName = (*this)(nLastSlashPos+1,nRemainingLength);
   }

   return soTempBaseName;
}


RWCString   FileName :: soGetBasenameWithoutVersion() const {
   RWCString soBasename = soGetBasename();


   size_t nLastDotPos = soBasename.last('.');
   if (nLastDotPos == RW_NPOS) {
      return ( soBasename );
   }
   else {
      //
      // are all chars following the last '.' numeric?
      //
      bool bHasVersion = true;  // true until non-numeric encountered
      int nPos = nLastDotPos + 1;
      while ( (bHasVersion) && (nPos < soBasename.length()) ) {
         bHasVersion = isdigit( soBasename[nPos++]);
      }
      
      if (! bHasVersion) return( soBasename );

      return( soBasename(0, nLastDotPos ) );

   }
}



FileName FileName :: filReplaceExtension( const RWCString& soNewExtension ) {

   FileName filNew = soGetBasenameWithoutExtension() + "." + soNewExtension;

   RWCString soDir = soGetDirectory();

   if ( soDir != "./" ) {
      filNew = soDir + "/" + filNew;
   }

   return filNew;
}





RWCString   FileName :: soGetBasenameWithoutExtension() const {
   
   RWCString soBasename = soGetBasename();

   size_t nLastDotPos = soBasename.last( '.' );

   if ( nLastDotPos == RW_NPOS ) {
      return( soBasename );
   }
   else {
      
      return( soBasename( 0, nLastDotPos ) );

   }
}






// returns true if it could stat the file by that name
bool FileName::bFileByThisNameExists() const {
   struct stat statBuf;

   // pass string object (as const char*) to system stat call
   int nRetval = stat( (*this), &statBuf);

   // could we stat at all ?  no attempt at subtlety...
   return (nRetval == 0);
}




bool FileName :: bIsExecutable() {

   struct stat statBuf;

   int nRetval = stat( (*this), &statBuf );

   if ( nRetval != 0 )
      return( false );
   else {

      //      printf( "mode for %s is %o\n", (char*)(this->data() ), statBuf.st_mode );

      
      if ( statBuf.st_mode & S_IEXEC )
         return( true );
      else
         return( false );
   }
}
      

RWCString FileName :: soGetExtension() {

   int nLastDot = last('.' );
   if ( nLastDot == RW_NPOS ) 
      return( "" ); 


   // if the name ends with a dot, no extension
   if ( nLastDot == ( length() - 1 ) )
      return( "" );

   // if the string is "abc.s1", then nLastDot is 3, length is 6
   // so use (4, 2) 
   // if string is ".a", then nLastDot is 0, length is 2, so extension
   // is (1,1)

   RWCString soExtension = (*this)( (nLastDot + 1), ( length() - nLastDot - 1) );
   return( soExtension );
}




RWCString FileName :: soGetReadNameFromPhdFileName() const {

   int nIndex = index( ".phd." );

   // case of no .phd.
   if ( nIndex == RW_NPOS ) {
      RWCString soEmpty;
      return( soEmpty );
   }


   // case of ".phd.1"
   if ( nIndex == 0 ) {
      RWCString soEmpty;
      return( soEmpty );
   }

   return( (*this)( 0, nIndex ) );
}


bool FileName :: bCreateFile() {

   FILE* pTemp = fopen( data(), "w" );
   if ( !pTemp ) {
      return false;
   }

   putc( ' ', pTemp );
   fclose( pTemp );
   return true;
}


RWCString FileName :: soGetProjectFromAceFilename() {

   RWCString soAceFileName = soGetBasename();

   RWCString soProjectName = soAceFileName;
   RWCRegexp regFasta("\\.fasta\\.screen\\..*$");
   soProjectName( regFasta ) = "";


   // if we were able to clip this off, return it

   if ( soProjectName != soAceFileName )
      return( soProjectName );
   else {

      // what shall we do now?  There is no .fasta.screen. at all?
      // Well, at least remove final .ace or .ace.(number)

      RWCRegexp regex3( "\\.ace$" );
      
      if ( soAceFileName( regex3 ).length() ) {
         soAceFileName( regex3 ) = "";
         return( soAceFileName );
      }

      RWCRegexp regex4( "\\.ace\\.[0-9]+$" );
      if ( soAceFileName( regex4 ).length() ) {
         soAceFileName( regex4 ) = "";
         return( soAceFileName );
      }

      // catch all--doesn't even end in .ace or .ace.(number)
      return( soAceFileName );
   }
}


RWCString FileName :: soGetProjectBeforeEditDir() {

   RWCString soProject = soGetDirectory();

   // get rid of last /edit_dir/
   
   assert( soProject.bEndsWithAndRemove("/edit_dir/" ) );

   RWCRegexp regLastDirectory("[^/]+$");

   soProject = soProject( regLastDirectory );

   return soProject;
}



long FileName :: lGetFileSize() {

   struct stat statBuf;
   
   int nRetStat = stat( data(), &statBuf );
   if ( nRetStat != 0 ) 
      return( -1 );
      

   return( statBuf.st_size );

}