/*****************************************************************************
#   Copyright (C) 1994-2008 by David Gordon.
#   All rights reserved.                           
#                                                                           
#   This software is part of a beta-test version of the Consed/Autofinish
#   package.  It should not be redistributed or
#   used for any commercial purpose, including commercially funded
#   sequencing, without written permission from the author and the
#   University of Washington.
#   
#   This software is provided ``AS IS'' and any express or implied
#   warranties, including, but not limited to, the implied warranties of
#   merchantability and fitness for a particular purpose, are disclaimed.
#   In no event shall the authors or the University of Washington be
#   liable for any direct, indirect, incidental, special, exemplary, or
#   consequential damages (including, but not limited to, procurement of
#   substitute goods or services; loss of use, data, or profits; or
#   business interruption) however caused and on any theory of liability,
#   whether in contract, strict liability, or tort (including negligence
#   or otherwise) arising in any way out of the use of this software, even
#   if advised of the possibility of such damage.
#
#   Building Consed from source is error prone and not simple which is
#   why I provide executables.  Due to time limitations I cannot
#   provide any assistance in building Consed.  Even if you do not
#   modify the source, you may introduce errors due to using a
#   different version of the compiler, a different version of motif,
#   different versions of other libraries than I used, etc.  For this
#   reason, if you discover Consed bugs, I can only offer help with
#   those bugs if you first reproduce those bugs with an executable
#   provided by me--not an executable you have built.
# 
#   Modifying Consed is also difficult.  Although Consed is modular,
#   some modules are used by many other modules.  Thus making a change
#   in one place can have unforeseen effects on many other features.
#   It may takes months for you to notice these other side-effects
#   which may not seen connected at all.  It is not feasable for me to
#   provide help with modifying Consed sources because of the
#   potentially huge amount of time involved.
#
#*****************************************************************************/
/* Written by David Gordon.  Parts of it written by James Knight and gratefully used with permission. */


/*
 * checksff
 *
 * Usage:  checksff sfffile
 *
 * This program extracts the fields from an SFF file and outputs a text
 * version of the data to standard output.
 */

#include <stdio.h>
#include <stdlib.h>

/* solaris does not have stdint.h but time.h includes sys/types.h
   which includes sys/int_types.h which takes the place of stdint.h */

#ifndef SOLARIS
#include <stdint.h>
#endif

#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <time.h>
#include "rwtptrorderedvector.h"
#include "basesAndQualitiesAndPeaks.h"
#include "mbtValOrderedVectorOfRWCString.h"


#define SFF_MAGIC_NUMBER 0x2E736666
#define SFF_VERSION "\0\0\0\1"

#define MANIFEST_INDEX_MAGIC_NUMBER ( (((unsigned int) '.') << 24) + (((unsigned int) 'm') << 16) + (((unsigned int) 'f') << 8) + ((unsigned int) 't') )
#define SORT_INDEX_MAGIC_NUMBER ( (((unsigned int) '.') << 24) + (((unsigned int) 's') << 16) + (((unsigned int) 'r') << 8) + ((unsigned int) 't') )
#define INDEX_VERSION "1.00"

#define TEXT_MODE 0
#define FASTA_MODE 1
#define QUAL_MODE 2
#define FLOWGRAM_MODE 3
#define MANIFEST_MODE 4
#define ACCNO_MODE 5

struct {
    char *accnoPrefix;
    char *runName;
    char *analysisName;
    char *runPath;
} manifestList[10000];

int numManifest;

/*
 * Forward references.
 */
void parseManifest(char *buf);
int getManifestInfo(char *accno, char *runNameOut,
                    char *analysisNameOut, char *runPathOut);
int getUAccnoInfo(char *accno, char *timeOut, int *regionOut, char *xyOut);
unsigned int lookupAccno(char *accno, unsigned char *indexBuf, int indexSize);
void getBytes(FILE *fp, unsigned int size, unsigned char *buf, char *filename);
int prefixCheck(char *s, char *t, int minlen);
uint32_t getuint8(unsigned char *b);
uint32_t getuint16(unsigned char *b);
uint32_t getuint32(unsigned char *b);
unsigned int getuint4_255(unsigned char *b);
uint64_t getuint64(unsigned char *b);
void readOneSffFile( char* szSffFileFullPath, FILE* fpPhdBall,
                     char* szTimeStamp );
void removeTrailingWhitespace( char* szLine, int nInitialLength, int* pFinalLength );

void getBasename( char* szFullPath, char* szBasename );

 
void readOneSffFile( char* szSffFileFullPath,
                     RWTPtrOrderedVector<basesAndQualitiesAndPeaks>& aArrayOfReads,
                     const bool bWantOnlyCertainReads,
                     mbtValOrderedVectorOfRWCString& aDesiredReads ) {
          

    int i, j, argnum, pos, offset, outputMode, firstarg, accnoMode;
    int indexMode, indexSize, left, right, trimMode, prevIndex;
    int flowIndex[10000], scores[10000], fileMode, tabMode, uaccnoFlag;
    int accnoRegionId;
    uint32_t magic_number, index_length, number_of_reads;
    uint32_t header_length, key_length, number_of_flows_per_read;
    uint32_t flowgram_format_code, flowgram_bytes, number_reads_found;
    uint32_t read_header_length, name_length, number_of_bases;
    uint32_t clip_qual_left, clip_qual_right, clip_adapter_left;
    uint32_t clip_adapter_right, read_data_length, eight_byte_padding;
    uint32_t enc_value, clip_left, clip_right, index_mft_length;
    uint64_t index_offset, cur_offset, index_idx_length;
    float leftCut, rightCut, sig, flowgram[10000];
    char *s, seq[10000], szReadName[100];
    unsigned char buf[50000];
    char runName[1000], analysisName[1000], runPath[1000], flowChars[1000];
    char accnoTime[1000], accnoXY[1000];
    unsigned char *indexBuf;
    FILE* fpSFF;
    char szSffFileWithoutPath[1000];


    fpSFF = fopen( szSffFileFullPath, "r" );
    if ( !fpSFF ) {
       fprintf( stderr, "fatal: could not open sff file %s due to error %s (%d)\n",
                szSffFileFullPath,
                strerror( errno ),
                errno );
       exit( -1 );
    }

    fprintf( stderr, "Now processing sff file %s\n", szSffFileFullPath );


    getBasename( szSffFileFullPath, szSffFileWithoutPath );

    accnoMode = 0;
    numManifest = 0;

    outputMode = TEXT_MODE;
    trimMode = 1;
    tabMode = 0;

    /*
     * Read the beginning of the common header, check that the magic number
     * is 0x2E736666 and the version string is "\0\0\0\1", then extract the
     * other fields of the header, read the flow_chars and key_sequence
     * strings, then output the text information in the common header.
     */
    getBytes(fpSFF, 31, buf, szSffFileFullPath);

    magic_number = getuint32(buf);
    if (magic_number != SFF_MAGIC_NUMBER) {
        fprintf(stderr, "Error:  File is not an SFF file:  %s\n", szSffFileFullPath);
        exit(-1);
    }

    if (strncmp(( (char*)buf) + 4, SFF_VERSION, 4) != 0) {
        fprintf(stderr, "Error:  Unsupported SFF file version number.\n");
        exit(-1);
    }

    index_offset = getuint64(buf+8);
    index_length = getuint32(buf+16);
    number_of_reads = getuint32(buf+20);

    aArrayOfReads.resize( number_of_reads );

    fprintf( stderr, "number_of_reads = %d\n", number_of_reads );

    header_length = getuint16(buf+24);
    key_length = getuint16(buf+26);
    number_of_flows_per_read = getuint16(buf+28);
    flowgram_format_code = getuint8(buf+30);
    if (flowgram_format_code != 1) {
        fprintf(stderr, "Error:  Invalid flowgram_format_code:  %d\n",
                flowgram_format_code);
        exit(-1);
    }
    flowgram_bytes = 2;

    getBytes(fpSFF, header_length - 31, buf + 31, szSffFileFullPath);

    strncpy(flowChars, (char*)buf + 31, number_of_flows_per_read);
    flowChars[number_of_flows_per_read] = '\0';

/*     /* */
/*      * Print out the text version of the common header. */
/*      */
/*     if (outputMode == TEXT_MODE) { */
/*         printf("Common Header:\n"); */
/*         printf("  Magic Number:  0x2E736666\n"); */
/*         printf("  Version:       %c%c%c%c\n", */
/*                buf[4] + '0', buf[5] + '0', buf[6] + '0', buf[7] + '0'); */
/*         printf("  Index Offset:  %ld\n", (long) index_offset); */
/*         printf("  Index Length:  %d\n", index_length); */
/*         printf("  # of Reads:    %d\n", number_of_reads); */
/*         printf("  Header Length: %d\n", header_length); */
/*         printf("  Key Length:    %d\n", key_length); */
/*         printf("  # of Flows:    %d\n", number_of_flows_per_read); */
/*         printf("  Flowgram Code: %d\n", flowgram_format_code); */

/*         pos = 31; */
/*         printf("  Flow Chars:    "); */
/*         for (i=0; i < number_of_flows_per_read; i++) { */
/*             fputc(buf[pos++], stdout); */
/*         } */
/*         fputc('\n', stdout); */
/*         printf("  Key Sequence:  "); */
/*         for (i=0; i < key_length; i++) { */
/*             fputc(buf[pos++], stdout); */
/*         } */
/*         fputc('\n', stdout); */
/*         fflush(stdout); */
/*     } */

    /*
     * Read the index, if there are accno's on the command line, the index
     * exists and is the ".srt" or ".mft" format.
     */
    indexMode = 0;
    indexSize = 0;
    indexBuf = NULL;
    fileMode = 1;
    if ( fileMode && index_length > 0) {

        fseek(fpSFF, index_offset, SEEK_SET);
        getBytes(fpSFF, 8, buf, szSffFileFullPath);
        magic_number = getuint32(buf);

        if (magic_number == MANIFEST_INDEX_MAGIC_NUMBER &&
            strncmp( (char*)buf+4, INDEX_VERSION, 4) == 0) {
            getBytes(fpSFF, 8, buf, szSffFileFullPath);
            index_mft_length = getuint32(buf);
            index_idx_length = getuint32(buf+4);
        } else if (magic_number == SORT_INDEX_MAGIC_NUMBER && 
                   strncmp( (char*) buf+4, INDEX_VERSION, 4) == 0) {
            index_mft_length = 0;
            index_idx_length = index_length - 8;
        } else {
            index_mft_length = 0;
            index_idx_length = 0;
        }

        if (index_mft_length > 0) {
            if (outputMode == TEXT_MODE || outputMode == MANIFEST_MODE) {
               indexBuf = (unsigned char*) malloc(index_mft_length + 1);
                if (indexBuf == NULL) {
                    fprintf(stderr, "Error:  Ran out of memory.\n");
                    exit(-1);
                }
                getBytes(fpSFF, index_mft_length, indexBuf, szSffFileFullPath);
                indexBuf[index_mft_length] = '\0';

                parseManifest( (char*) indexBuf);
                
/*                 if (outputMode == MANIFEST_MODE) { */
/*                     fwrite(indexBuf, 1, index_mft_length, stdout); */
/*                     return 0; */
/*                 } */

                free(indexBuf);
                indexBuf = NULL;
            } else {
                fseek(fpSFF, index_mft_length, SEEK_CUR);
            }
        } 
/*         else if (outputMode == MANIFEST_MODE) { */
/*             printf("No manifest found.\n"); */
/*             return 0; */
/*         } */

        if (accnoMode) {
            if (index_idx_length > 0) {
               indexBuf = (unsigned char*) malloc(index_idx_length);
                if (indexBuf == NULL) {
                    fprintf(stderr, "Error:  Ran out of memory.\n");
                    exit(-1);
                }

                getBytes(fpSFF, index_idx_length, indexBuf, szSffFileFullPath);
                indexSize = index_idx_length;
                while (indexSize > 0 && indexBuf[indexSize-1] == '\0') {
                    indexSize--;
                }
                indexMode = 1;
            } else {
                fprintf(stderr,
                        "Warning:  Unsupported index format.  Scanning file...\n");
            }
        }
        fseek(fpSFF, header_length, SEEK_SET);
    }

    /*
     * Either scan the entire file, or skip from entry to entry to access
     * the necessary entries.
     */
    number_reads_found = 0;
    if (!indexMode) {
        cur_offset = header_length;
/*         argnum = argc; */
    } else {
        cur_offset = header_length;
/*         argnum = firstarg; */
    }

    while (1) {

       /*
        * If the current file pointer is at the beginning of the index,
        * skip the index section, or write the manifest.
        */


       if (cur_offset == index_offset) {

          if (fileMode) {
             
             fseek(fpSFF, index_length, SEEK_CUR);
          } else {

             indexBuf = (unsigned char*) malloc(index_length);
             if (indexBuf == NULL) {
                fprintf(stderr, "Error:  Ran out of memory.\n");
                exit(-1);
             }
             getBytes(fpSFF, index_length, indexBuf, szSffFileFullPath);

/*              if (outputMode == MANIFEST_MODE) { */
/*                 magic_number = getuint32(buf); */
/*                 if (magic_number == MANIFEST_INDEX_MAGIC_NUMBER && */
/*                     strncmp(buf+4, INDEX_VERSION, 4) == 0) { */
/*                    index_mft_length = getuint32(buf+8); */
/*                    s = buf+16; */
/*                    fwrite(s, 1, index_mft_length, stdout); */
/*                 } else { */
/*                    printf("No manifest found.\n"); */
/*                 } */
                        
/*                 return 0; */
/*              } */
          }

          cur_offset += index_length;
       }

       /*
        * Read the next 16 bytes of the next entry,
        * and check for end of file.
        */
       if (fread(buf, 1, 16, fpSFF) != 16) {
          break;
       }
            /*         } */
/*         else { */
/*             if (argnum == argc) { */
/*                 break; */
/*             } */

/*             /* */
/*              * Lookup the accno in the index, and set the file pointer to */
/*              * the file location. */
/*              */ 
/*             cur_offset = lookupAccno(argv[argnum], indexBuf, indexSize); */
/*             if (cur_offset == 0) { */
/*                 fprintf(stderr, */
/*                         "Error:  Sequence %s does not exist in SFF file.\n", */
/*                         argv[argnum]); */
/*                 argnum++; */
/*                 continue; */
/*             } */
            
/*             fseek(fpSFF, cur_offset, SEEK_SET); */
/*             getBytes(fpSFF, 16, buf, szSffFileFullPath); */
/*             argnum++; */
/*         } */

        /*
         * Parse the initial read header bytes, and read the rest of the
         * read header.
         */
        read_header_length = getuint16(buf);
        name_length = getuint16(buf+2);
        number_of_bases = getuint32(buf+4);
        clip_qual_left = getuint16(buf+8);
        clip_qual_right = getuint16(buf+10);
        clip_adapter_left = getuint16(buf+12);
        clip_adapter_right = getuint16(buf+14);


        getBytes(fpSFF, read_header_length - 16, buf + 16, szSffFileFullPath);
        strncpy(szReadName, (char*)buf + 16, name_length);
        szReadName[name_length] = '\0';

        cur_offset += read_header_length;


/*         /* */
/*          * If accnos are specified, but no index is available, check to */
/*          * see if this entry is in the list. */
/*          */
/*         if (accnoMode && !indexMode) { */
/*             for (i=firstarg; i < argc; i++) { */
/*                 if (strcmp(argv[i], name) == 0) { */
/*                     break; */
/*                 } */
/*             } */
/*             if (i == argc) { */
/*                 continue; */
/*             } */
/*         } */

        /*
         * Read the read data section.
         */
        read_data_length =
            number_of_flows_per_read * flowgram_bytes +
            number_of_bases * 3;
        eight_byte_padding = (read_data_length % 8 > 0 
                              ? 8 - read_data_length % 8
                              : 0);
        getBytes(fpSFF, read_data_length + eight_byte_padding, buf, szSffFileFullPath);

        cur_offset += read_data_length + eight_byte_padding;

        // added April 2010 for selected reads

        if ( bWantOnlyCertainReads ) {
           RWCString soReadName( szReadName );

           if ( !aDesiredReads.bContains( soReadName ) ) {
              continue;
           }
        }
        // end new section


        /*        if (outputMode != ACCNO_MODE) { */
        offset = 0;
        for (i=0; i < number_of_flows_per_read; i++) {
           enc_value = getuint16(buf + offset);
           flowgram[i] = enc_value * 1.0 / 100.0;
           offset += flowgram_bytes;
        }

        prevIndex = 0;
        for (i=0; i < number_of_bases; i++) {
           flowIndex[i] = prevIndex + getuint8(buf + offset);
           prevIndex = flowIndex[i];
           offset++;
        }
        
        for (i=0; i < number_of_bases; i++) {
           seq[i] = buf[offset];
           offset++;
        }
        
        for (i=0; i < number_of_bases; i++) {
           scores[i] = getuint8(buf + offset);
           offset++;
        }
        /*        } */

        /*
         * Compute the left and right clip points.
         */
        clip_left = 1;
        clip_left = (clip_qual_left > clip_left ? clip_qual_left : clip_left);
        clip_left = (clip_adapter_left > clip_left
                     ? clip_adapter_left : clip_left);

        clip_right = number_of_bases;
        if (clip_qual_right > 0 && clip_qual_right < clip_right) {
            clip_right = clip_qual_right;
        }
        if (clip_adapter_right > 0 && clip_adapter_right < clip_right) {
            clip_right = clip_adapter_right;
        }
        
        /*
         * Print out the read information.
         */
        uaccnoFlag = getUAccnoInfo(szReadName, accnoTime, &accnoRegionId, accnoXY);

        basesAndQualitiesAndPeaks* pSeq = new basesAndQualitiesAndPeaks();
        pSeq->soSequenceName_ = szReadName;
        aArrayOfReads.insert( pSeq );

        pSeq->n1ClipLeft_ = clip_left;
        pSeq->n1ClipRight_ = clip_right;


        pSeq->soBases_.increaseMaxLength( number_of_bases );
        pSeq->aQualities_.resize( number_of_bases );

        // pLittlePeakPositions_ can be used for positions up to size
        // about 32,000 which corresponds (since each base takes about
        // 19 positions), up to about 1700 bases.  454 reads might
        // someday be this large so let's be smart about whether we
        // use pLittlePeakPositions_ or pBigPeakPositions_
        
        
        pSeq->createPointPosArray( true, // bFoundTraceArrayMaxIndex,
                                   // well, we don't really know
                                   // nTraceArrayMaxIndex_ but we have
                                   // a pretty good estimate:
                                   ( number_of_bases - 1 ) * 19 + 15 + 100,
                                   // this is the largest peak position
                                   // + 100 for good measure
                                   number_of_bases, // nInitialSizeOfArray
                                   false ); // bFillUpArray

        for( i = 0; i < number_of_bases; i++ ) {

           pSeq->soBases_.append( tolower( seq[i] ) );
           pSeq->aQualities_.append( scores[i] );

           pSeq->appendPointPos( i * 19 + 15 );

        }

        // numSamples = number_of_bases * 19 + 12 from studying
        // sff2scf.c 

        pSeq->nTraceArrayMaxIndex_ = number_of_bases * 19 + 11;
        
        number_reads_found++;

        if ( number_reads_found % 10000 == 0 ) {
           cerr << ".";
           cerr.flush();
        }

    } /*     while (1) { */

    fclose(fpSFF);

    /*
     * Check to make sure the file contains the correct number of reads.
     */
    if (!bWantOnlyCertainReads && !indexMode && number_reads_found != number_of_reads) {
        fprintf(stderr, "Error:  Number of reads found in SFF file does not equal number_of_reads field.\n");
        exit(-1);
    }

    /*    return 0; */
} /* void readOneSffFile( char* szSffFileFullPath... */


/*
 * parseManifest
 *
 * Scan the manifest text for the run information.
 */
void parseManifest(char *buf)
{
    char *s, *s2, *s3, *s4, *t, accnoPrefix[1000], runName[1000];
    char analysisName[1000], runPath[1000];

    s = buf;
    while ((s2 = strstr(s, "<run>")) && (s4 = strstr(s2, "</run>"))) {
        *s4 = '\0';

        accnoPrefix[0] = '\0';
        runName[0] = '\0';
        analysisName[0] = '\0';
        runPath[0] = '\0';

        if ((s3 = strstr(s2, "<accession_prefix>"))) {
            s3 += 18;
            for (t=accnoPrefix; *s3 && strncmp(s3, "</accession", 11) != 0; s3++,t++) {
                *t = *s3;
            }
            *t = '\0';
        }

        if ((s3 = strstr(s2, "<run_name>"))) {
            s3 += 10;
            for (t=runName; *s3 && strncmp(s3, "</run_name>", 11) != 0; s3++,t++) {
                *t = *s3;
            }
            *t = '\0';
        }

        if ((s3 = strstr(s2, "<analysis_name>"))) {
            s3 += 15;
            for (t=analysisName; *s3 && strncmp(s3, "</analysis_", 11) != 0; s3++,t++) {
                *t = *s3;
            }
            *t = '\0';
        }

        if ((s3 = strstr(s2, "<path>"))) {
            s3 += 6;
            for (t=runPath; *s3 && strncmp(s3, "</path>", 7) != 0; s3++,t++) {
                *t = *s3;
            }
            *t = '\0';
        }

        if (runPath[0] != '\0') {
            if (accnoPrefix[0] != '\0') {
                manifestList[numManifest].accnoPrefix = strdup(accnoPrefix);
            }
            if (runName[0] != '\0') {
                manifestList[numManifest].runName = strdup(runName);
            }
            if (analysisName[0] != '\0') {
                manifestList[numManifest].analysisName = strdup(analysisName);
            }
            manifestList[numManifest].runPath = strdup(runPath);
            numManifest++;
        }

        *s4 = '<';
        s = s4 + 1;
    }
}

/*
 * getManifestInfo
 *
 * Return the run information for an accno from the manifest.
 */
int getManifestInfo(char *accno, char *runNameOut,
                    char *analysisNameOut, char *runPathOut)
{
    int i;

    if (numManifest == 0) {
        return 0;
    }

    if (numManifest == 1) {
        i = 0;
    } else {
        for (i=0; i < numManifest; i++) {
            if (manifestList[i].accnoPrefix != NULL &&
                strncmp(accno, manifestList[i].accnoPrefix,
                        strlen(manifestList[i].accnoPrefix)) == 0) {
                break;
            }
        }
        if (i == numManifest) {
            return 0;
        }
    }

    runNameOut[0] = '\0';
    analysisNameOut[0] = '\0';
    runPathOut[0] = '\0';

    if (manifestList[i].runName != NULL) {
        strcpy(runNameOut, manifestList[i].runName);
    }
    if (manifestList[i].analysisName != NULL) {
        strcpy(analysisNameOut, manifestList[i].analysisName);
    }
    if (manifestList[i].runPath != NULL) {
        strcpy(runPathOut, manifestList[i].runPath);
    }

    return 1;
}

/*
 * lookupAccno
 *
 * Search through the index looking for the given accno, and either
 * returning the associated file offset, or 0 if the accno cannot be found.
 */
unsigned int lookupAccno(char *accno, unsigned char *indexBuf, int indexSize)
{
    int start, end, mid, val;
    unsigned int offset;
    unsigned char *us;

    /*
     * Perform a binary search of the index, stopping when the search
     * region becomes relatively small.  This assumes that no accession
     * number is near 200 characters.
     */
    start = 0;
    end = indexSize;
    while (end - start > 200) {
        mid = (start + end) / 2;

        /*
         * From the byte midpoint, scan backwards to the beginning of the
         * index record that covers that byte midpoint.
         */
        while (mid > start && indexBuf[mid-1] != 255) {
            mid--;
        }
        val = strcmp(accno, ((char*)indexBuf ) +mid);

        if (val == 0) {
            break;
        } else if (val < 0) {
            end = mid;
        } else {
            start = mid;
        }
    }
    
    /*
     * Scan through the small search region, looking for the accno.
     */
    while (start < end) {
       if (strcmp(accno, (char*)indexBuf+start) == 0) {
            /*
             * If the accno is found, skip the accno characters,
             * then get the record offset.
             */
            for (us=indexBuf+start; *us; us++,start++) ;
            us++;
            start++;

            offset = getuint4_255(us);
            if (us[4] != 255) {
                fprintf(stderr, "Error:  Invalid TVF index format.\n");
                exit(1);
            }

            return offset;
        }

        /*
         * Skip to the beginning of the next index element.
         */
        while (start < end && indexBuf[start] != 255) {
            start++;
        }
        start++;
    }

    return 0;
}


/*
 * getUAccnoInfo
 *
 * Extract the run timestamp and the XY position information from
 * a universal accno.
 */
int getUAccnoInfo(char *accno, char *timeOut, int *regionOut, char *xyOut)
{
    int i, chval;
    unsigned int total;

    if (strlen(accno) != 14) {
        return 0;
    }
    for (i=0; i < 14; i++) {
        if (i == 0) {
            if (!isalpha(accno[i])) {
                return 0;
            }
        } else if (i == 7 || i == 8) {
            if (!isdigit(accno[i])) {
                return 0;
            }
        } else {
            if (!isalnum(accno[i])) {
                return 0;
            }
        }
    }

    total = ((unsigned int) 0);
    for (i=0; i < 6; i++) {
        if (isalpha(accno[i])) {
            chval = ((int) toupper(accno[i])) - ((int) 'A');
        } else if (isdigit(accno[i])) {
            chval = 26 + ((int) accno[i]) - ((int) '0');
        } else {
            return 0;
        }

        if (chval < 0 || chval >= 36) {
            return 0;
        }
        total = total * ((unsigned int) 36) + ((unsigned int) chval);
    }

    sprintf(timeOut, "R_%04d_%02d_%02d_%02d_%02d_%02d_",
            2000 + total / (13 * 32 * 24 * 60 * 60),
            (total / (32 * 24 * 60 * 60)) % 13,
            (total / (24 * 60 * 60)) % 32,
            (total / (60 * 60)) % 24,
            (total / 60) % 60,
            total % 60);

    *regionOut =
        (((int) accno[7]) - ((int) '0')) * 10 +
        (((int) accno[8]) - ((int) '0'));

    total = ((unsigned int) 0);
    for (i=9; i < 14; i++) {
        if (isalpha(accno[i])) {
            chval = ((int) toupper(accno[i])) - ((int) 'A');
        } else if (isdigit(accno[i])) {
            chval = 26 + ((int) accno[i]) - ((int) '0');
        } else {
            return 0;
        }

        if (chval < 0 || chval >= 36) {
            return 0;
        }
        total = total * ((unsigned int) 36) + ((unsigned int) chval);
    }

    sprintf(xyOut, "%04d_%04d",
            (total / 4096) % 4096,
            total % 4096);

    return 1;
}


/*
 * prefixCheck
 *
 * Compare two strings are report that they are equal if they match to
 * the end of one of the two, and the length is at least minlen chars.
 */
int prefixCheck(char *s, char *t, int minlen)
{
    int i;

    for (i=0; *s && *t; i++,s++,t++) {
        if (toupper(*s) != toupper(*t)) {
            break;
        }
    }
    return ((!*s || !*t) && i >= minlen);
}


/*
 * getBytes
 *
 * Read a region of the file of length size into
 * the buffer specified by buf.
 */
void getBytes(FILE *fpSFF, unsigned int size, unsigned char *buf, char *filename)
{
    int numread, numleft, bytes;

    numread = 0;
    numleft = size;
    while (numleft > 0) {
        bytes = fread(buf+numread, 1, numleft, fpSFF);
        if (bytes <= 0) {
            fprintf(stderr, "Error:  Unable to read SFF file:  %s\n",
                    filename);
            exit(-1);
        }
        numread += bytes;
        numleft -= bytes;
    }
}


/*
 * getuint8
 *
 * A function to convert a 1-byte value into an integer.
 */
uint32_t getuint8(unsigned char *b)
{
    return ((uint32_t) b[0]);
}


/*
 * getuint16
 *
 * A function to convert a big endian 2-byte value into an integer.
 */
uint32_t getuint16(unsigned char *b)
{
    return ((uint32_t) b[0]) * 256 + ((uint32_t) b[1]);
}


/*
 * getuint32
 *
 * A function to convert a big endian 4-byte value into an integer.
 */
uint32_t getuint32(unsigned char *b)
{
    return
        ((uint32_t) b[0]) * 256 * 256 * 256 +
        ((uint32_t) b[1]) * 256 * 256 +
        ((uint32_t) b[2]) * 256 +
        ((uint32_t) b[3]);
}

/*
 * getuint4_255
 *
 * A function to convert a 4-byte index offset value into an integer, where
 * the bytes are base-255 numbers.  This is used to store the index offsets.
 */
unsigned int getuint4_255(unsigned char *b)
{
    return
        ((unsigned int) b[0]) * 255 * 255 * 255 +
        ((unsigned int) b[1]) * 255 * 255 +
        ((unsigned int) b[2]) * 255 +
        ((unsigned int) b[3]);
}

/*
 * getuint64
 *
 * A function to convert a big endian 8-byte value into an integer.
 */
uint64_t getuint64(unsigned char *b)
{
    return
        (((uint64_t) b[0]) << 56) +
        (((uint64_t) b[1]) << 48) +
        (((uint64_t) b[2]) << 40) +
        (((uint64_t) b[3]) << 32) +
        (((uint64_t) b[4]) << 24) +
        (((uint64_t) b[5]) << 16) +
        (((uint64_t) b[6]) << 8) +
        ((uint64_t) b[7]);
}


void removeTrailingWhitespace( char* szLine, int nInitialLength, int* pFinalLength ) {
   int nLength;
   
   nLength = nInitialLength - 1;
   while( nLength >= 0 && isspace( szLine[ nLength ] ) ) {
      --nLength;
   }

   szLine[ nLength + 1 ] = 0;

   *pFinalLength = nLength + 1;
}


void getBasename( char* szFullPath, char* szBasename ) {

   int nLength;
   int nSlashPosition;
   int n;

   nLength = strlen( szFullPath );

   nSlashPosition = -1;
   for( n = nLength - 1; n >= 0; --n ) {
      if ( szFullPath[n] == '/' ) {
         nSlashPosition = n;
         break;
      }
   }

   /* allowing n to go to nLength makes it copy the null at the
      end */

   for( n = nSlashPosition + 1; n <= nLength; ++n ) {
      szBasename[n - nSlashPosition - 1 ] = szFullPath[n];
   }
      

}