/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
#include    "phdBall2Fasta.h"
#include    <stdio.h>
#include    "mbt_exception.h"
#include    "rwcstring.h"
#include    <unistd.h>
#include    "rwtvalorderedvector.h"
#include    "consedParameters.h"


void phdBall2Fasta :: doIt() {



   FILE* pPhdBall = fopen( filPhdBall_.data(), "r" );
   if ( !pPhdBall ) {
      THROW_FILE_ERROR( filPhdBall_ );
   }

   nLine_ = 0;

   FILE* pFasta;
   FILE* pQualities;
   FILE* pFastq;

   if ( !filFasta_.bIsNull() ) {

      pFasta = fopen( filFasta_.data(), "w" );
      if ( !pFasta ) {
         THROW_FILE_ERROR( filFasta_ );
      }

      filQualities_ = filFasta_ + ".qual";

      pQualities = fopen( filQualities_.data(), "w" );
      if ( !pQualities ) {
         THROW_FILE_ERROR( filQualities_ );
      }
   }

   if ( !filFastq_.bIsNull() ) {

      pFastq = fopen( filFastq_.data(), "w" );
      if ( !pFastq ) {
         THROW_FILE_ERROR( filFastq_ );
      }

   }


   const int nPrettyLargeSequence = 2000;
   
   RWCString soBases( (size_t) nPrettyLargeSequence );
   RWTValOrderedVector<int> aQualities( (size_t) nPrettyLargeSequence,
                                        "phdBall2Fasta::aQualities" );

   const int nPrettyLargeHeader = 2000;
   RWCString soFastaHeader( (size_t) nPrettyLargeHeader );
   RWCString soFastaQualHeader( (size_t) nPrettyLargeHeader );


   bool bEndOfFile = false;


   readFirstBeginSequenceLine( pPhdBall, bEndOfFile );

   if ( bEndOfFile ) {
      RWCString soError = "file ";
      soError += filPhdBall_;
      soError += " does not have a BEGIN_SEQUENCE line in it.\n";
      soError += "Is it a phd ball???";
      THROW_ERROR( soError );
   }


   while( !bEndOfFile ) {
      readFileOfPhdFilesForPhdBall2Fasta( pPhdBall,
                                          soFastaHeader,
                                          soFastaQualHeader,
                                          soBases,
                                          aQualities,
                                          bEndOfFile );


      if ( pCP->bPhdBall2FastaIgnoreLowQualityReads_ ) {

         float fMeanQuality = fGetMeanQuality( aQualities );
         if ( fMeanQuality < pCP->nPhdBall2FastaLowestAverageQuality_ ) {
            cerr << "ignoring read " << soFastaHeader << " because mean quality is " << fMeanQuality << " which is too low" << endl;
            
            continue;
         }
      }
         

      // we are guaranteed to have found a phd block even if bEndOfFile is true
      // since that just means there isn't another phd block


      if ( !filFasta_.bIsNull() ) {

         fprintf( pFasta, ">%.*s\n", soFastaHeader.length(), soFastaHeader.data() );
         fprintf( pQualities, ">%.*s\n", soFastaQualHeader.length(), soFastaQualHeader.data() );

         const int nMaxBasesOnLine = 50;
         for( int n0Pos = 0; n0Pos < soBases.length(); 
              n0Pos += nMaxBasesOnLine ) {

            int nNumberOfBasesToWrite = nMaxBasesOnLine;
            if ( n0Pos + nNumberOfBasesToWrite > soBases.length() ) {
               nNumberOfBasesToWrite = soBases.length() - n0Pos;
            }

            fprintf( pFasta, "%.*s\n", nNumberOfBasesToWrite, &soBases[ n0Pos ] );

            for( int nBase = 0; nBase < nNumberOfBasesToWrite; ++nBase ) {
               fprintf( pQualities, " %d", aQualities[ nBase + n0Pos ] );
            }
            fputc( '\n', pQualities );

         } // for( int n0Pos; ...
      } //  if ( !filFasta_.bIsNull() ) {

      if ( !filFastq_.bIsNull() ) {
         
         fprintf( pFastq, "@%.*s\n", 
                  soFastaQualHeader.length(), soFastaQualHeader.data() );
         fprintf( pFastq, "%.*s\n", soBases.length(), soBases.data() );

         RWCString soFastqQualities( (size_t) aQualities.length() );
         soFastqQualities.nCurrentLength_ = aQualities.length();

         for( int nBase = 0; nBase < aQualities.length(); ++nBase ) {
            char cQuality = aQualities[ nBase ] + '!';
            soFastqQualities.setBaseUnsafe( nBase, cQuality );
         }

         fprintf( pFastq, "+\n%.*s\n", 
                  soFastqQualities.length(),
                  soFastqQualities.data() );

      } //  if ( !filFastq_.bIsNull() ) {

   } //  while( !bEndOfFile ) {

   fclose( pPhdBall );

   if ( !filFasta_.bIsNull() ) {
      fclose( pFasta );
      fclose( pQualities );
   }

   if ( !filFastq_.bIsNull() ) {
      fclose( pFastq );
   }

}

   



float phdBall2Fasta :: fGetMeanQuality( const RWTValOrderedVector<int>& 
                                        aQualities ) {

   float fSum = 0.0;
   
   for( int n = 0; n < aQualities.length(); ++n ) {
      fSum += aQualities[ n ];
   }

   if ( aQualities.length() == 0 )
      return 0.0;
   else 
      return fSum / aQualities.length();
}