/* * Copyright (c) 2005-2008, 2013 Genome Research Ltd. * Author(s): James Bonfield * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger * Institute nor the names of its contributors may be used to endorse * or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef HAVE_CONFIG_H #include "io_lib_config.h" #endif #include #include #include #include #include #include #include #include #include #ifdef _MSC_VER #include #endif #include "io_lib/os.h" #include "io_lib/mFILE.h" #include "io_lib/vlen.h" #ifdef HAVE_MMAP #include #endif //Moved from mFILE.h to avoid conflicting with WinUser.h #define MF_READ 1 #define MF_WRITE 2 #define MF_APPEND 4 #define MF_BINARY 8 #define MF_TRUNC 16 #define MF_MODEX 32 #define MF_MMAP 64 /* * This file contains memory-based versions of the most commonly used * (by io_lib) stdio functions. * * Actual file IO takes place either on opening or closing an mFILE. * * Coupled to this are a bunch of rather scary macros which can be obtained * by including stdio_hack.h. It is recommended though that you use mFILE.h * instead and replace fopen with mfopen (etc). This is more or less * mandatory if you wish to use both FILE and mFILE structs in a single file. */ static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */ /* * Reads the entirety of fp into memory. If 'fn' exists it is the filename * associated with fp. This will be used for more optimal reading (via a * stat to identify the size and a single read). Otherwise we use successive * reads until EOF. * * Returns a malloced buffer on success of length *size * NULL on failure */ static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) { struct stat sb; char *data = NULL; size_t allocated = 0, used = 0; int bufsize = 8192; #ifdef _WIN32 if (binary) _setmode(_fileno(fp), _O_BINARY); else _setmode(_fileno(fp), _O_TEXT); #endif if (fn && -1 != stat(fn, &sb)) { data = malloc(allocated = sb.st_size); bufsize = sb.st_size; } else { fn = NULL; } do { size_t len; if (used + bufsize > allocated) { allocated += bufsize; data = realloc(data, allocated); } len = fread(data + used, 1, allocated - used, fp); if (len > 0) used += len; } while (!feof(fp) && (fn == NULL || used < sb.st_size)); *size = used; return data; } #ifdef HAVE_MMAP /* * mmaps in the file, but only for reading currently. * * Returns 0 on success * -1 on failure */ int mfmmap(mFILE *mf, FILE *fp, const char *fn) { struct stat sb; if (stat(fn, &sb) != 0) return -1; mf->size = sb.st_size; mf->data = mmap(NULL, mf->size, PROT_READ, MAP_SHARED, fileno(fp), 0); if (!mf->data) return -1; mf->alloced = 0; return 0; } #endif /* * Creates and returns m_channel[0]. * We initialise this on the first attempted read, which then slurps in * all of stdin until EOF is met. */ mFILE *mstdin(void) { if (m_channel[0]) return m_channel[0]; m_channel[0] = mfcreate(NULL, 0); if (NULL == m_channel[0]) return NULL; m_channel[0]->fp = stdin; return m_channel[0]; } static void init_mstdin(void) { static int done_stdin = 0; if (done_stdin) return; m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1); m_channel[0]->mode = MF_READ; done_stdin = 1; } /* * Creates and returns m_channel[1]. This is the fake for stdout. It starts as * an empty buffer which is physically written out only when mfflush or * mfclose are called. */ mFILE *mstdout(void) { if (m_channel[1]) return m_channel[1]; m_channel[1] = mfcreate(NULL, 0); if (NULL == m_channel[1]) return NULL; m_channel[1]->fp = stdout; m_channel[1]->mode = MF_WRITE; return m_channel[1]; } /* * Stderr as an mFILE. * The code handles stderr by returning m_channel[2], but also checking * for stderr in fprintf (the common usage of it) to auto-flush. */ mFILE *mstderr(void) { if (m_channel[2]) return m_channel[2]; m_channel[2] = mfcreate(NULL, 0); if (NULL == m_channel[2]) return NULL; m_channel[2]->fp = stderr; m_channel[2]->mode = MF_WRITE; return m_channel[2]; } /* * For creating existing mFILE pointers directly from memory buffers. */ mFILE *mfcreate(char *data, int size) { mFILE *mf = (mFILE *)malloc(sizeof(*mf)); if (NULL == mf) return NULL; mf->fp = NULL; mf->data = data; mf->alloced = size; mf->size = size; mf->eof = 0; mf->offset = 0; mf->flush_pos = 0; mf->mode = MF_READ | MF_WRITE; return mf; } /* * Recreate an existing mFILE to house new data/size. * It also rewinds the file. */ void mfrecreate(mFILE *mf, char *data, int size) { if (mf->data) free(mf->data); mf->data = data; mf->size = size; mf->alloced = size; mf->eof = 0; mf->offset = 0; mf->flush_pos = 0; } /* * Creates a new mFILE to contain the contents of the FILE pointer. * This mFILE is purely for in-memory operations and has no links to the * original FILE* it came from. It also doesn't close the FILE pointer. * Consider using mfreopen() is you need different behaviour. * * Returns mFILE * on success * NULL on failure. */ mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) { mFILE *mf; /* Open using mfreopen() */ if (NULL == (mf = mfreopen(path, mode_str, fp))) return NULL; /* Disassociate from the input stream */ mf->fp = NULL; return mf; } /* * Converts a FILE * to an mFILE *. * Use this for wrapper functions to turn external prototypes requring * FILE * as an argument into internal code using mFILE *. */ mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { mFILE *mf; int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0; /* Parse mode: * r = read file contents (if truncated => don't read) * w = write on close * a = position at end of buffer * x = position at same location as the original fp, don't seek on flush * + = for update (read and write) * m = mmap (read only) */ if (strchr(mode_str, 'r')) r = 1, mode |= MF_READ; if (strchr(mode_str, 'w')) w = 1, mode |= MF_WRITE | MF_TRUNC; if (strchr(mode_str, 'a')) w = a = 1, mode |= MF_WRITE | MF_APPEND; if (strchr(mode_str, 'b')) b = 1, mode |= MF_BINARY; if (strchr(mode_str, 'x')) x = 1; if (strchr(mode_str, '+')) { w = 1, mode |= MF_READ | MF_WRITE; if (a) r = 1; } #ifdef HAVE_MMAP if (strchr(mode_str, 'm')) if (!w) mode |= MF_MMAP; #endif if (r) { mf = mfcreate(NULL, 0); if (NULL == mf) return NULL; if (!(mode & MF_TRUNC)) { #ifdef HAVE_MMAP if (mode & MF_MMAP) { if (mfmmap(mf, fp, path) == -1) { mf->data = NULL; mode &= ~MF_MMAP; } } #endif if (!mf->data) { mf->data = mfload(fp, path, &mf->size, b); mf->alloced = mf->size; if (!a) fseek(fp, 0, SEEK_SET); } } } else if (w) { /* Write - initialise the data structures */ mf = mfcreate(NULL, 0); if (NULL == mf) return NULL; } else { fprintf(stderr, "Must specify either r, w or a for mode\n"); return NULL; } mf->fp = fp; mf->mode = mode; if (x) { mf->mode |= MF_MODEX; } if (a) { mf->flush_pos = mf->size; fseek(fp, 0, SEEK_END); } return mf; } /* * Opens a file. If we have read access (r or a+) then it loads the entire * file into memory. If We have write access then the pathname is stored. * We do not actually write until an mfclose, which then checks this pathname. */ mFILE *mfopen(const char *path, const char *mode) { FILE *fp; char mode2[11]; int i1 = 0, i2 = 0; /* Remove the 'm' mmap symbol from mode before calling fopen() as * MS Visual Studio dislikes it remaining. */ while (i1 < 10 && mode[i1]) { if (mode[i1] != 'm') mode2[i2++] = mode[i1]; i1++; } mode2[i2] = 0; if (NULL == (fp = fopen(path, mode2))) return NULL; return mfreopen(path, mode, fp); } /* * Closes an mFILE. If the filename is known (implying write access) then this * also writes the data to disk. * * Stdout is handled by calling mfflush which writes to stdout if appropriate. */ int mfclose(mFILE *mf) { if (!mf) return -1; mfflush(mf); #ifdef HAVE_MMAP if ((mf->mode & MF_MMAP) && mf->data) { /* Mmaped */ munmap(mf->data, mf->size); mf->data = NULL; } #endif if (mf->fp) fclose(mf->fp); mfdestroy(mf); return 0; } /* * Closes the file pointer contained within the mFILE without destroying * the in-memory data. * * Attempting to do this on an mmaped buffer is an error. */ int mfdetach(mFILE *mf) { if (!mf) return -1; mfflush(mf); if (mf->mode & MF_MMAP) return -1; if (mf->fp) { fclose(mf->fp); mf->fp = NULL; } return 0; } /* * Destroys an mFILE structure but does not flush or close it */ int mfdestroy(mFILE *mf) { if (!mf) return -1; if (mf->data) free(mf->data); free(mf); return 0; } /* * Steals that data out of an mFILE. The mFILE itself will be closed. * It is up to the caller to free the stolen buffer. If size_out is * not NULL, mf->size will be stored in it. * This is more-or-less the opposite of mfcreate(). * * Note, we cannot steal the allocated buffer from an mmaped mFILE. */ void *mfsteal(mFILE *mf, size_t *size_out) { void *data; if (!mf) return NULL; data = mf->data; if (NULL != size_out) *size_out = mf->size; if (mfdetach(mf) != 0) return NULL; mf->data = NULL; mfdestroy(mf); return data; } /* * Seek/tell functions. Nothing more than updating and reporting an * in-memory index. NB we can seek on stdin or stdout even provided we * haven't been flushing. */ int mfseek(mFILE *mf, long offset, int whence) { switch (whence) { case SEEK_SET: mf->offset = offset; break; case SEEK_CUR: mf->offset += offset; break; case SEEK_END: mf->offset = mf->size + offset; break; default: errno = EINVAL; return -1; } mf->eof = 0; return 0; } long mftell(mFILE *mf) { return mf->offset; } void mrewind(mFILE *mf) { mf->offset = 0; mf->eof = 0; } /* * mftruncate is not directly a translation of ftruncate as the latter * takes a file descriptor instead of a FILE *. It performs the analogous * role though. * * If offset is -1 then the file is truncated to be the current file * offset. */ void mftruncate(mFILE *mf, long offset) { mf->size = offset != -1 ? offset : mf->offset; if (mf->offset > mf->size) mf->offset = mf->size; } int mfeof(mFILE *mf) { return mf->eof; } /* * mFILE read/write functions. Basically these turn fread/fwrite syntax * into memcpy statements, with appropriate memory handling for writing. */ size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) { size_t len; char *cptr = (char *)ptr; if (mf == m_channel[0]) init_mstdin(); if (mf->size <= mf->offset) return 0; len = size * nmemb <= mf->size - mf->offset ? size * nmemb : mf->size - mf->offset; if (!size) return 0; memcpy(cptr, &mf->data[mf->offset], len); mf->offset += len; if (len != size * nmemb) { mf->eof = 1; } return len / size; } size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) { if (!(mf->mode & MF_WRITE)) return 0; /* Append mode => forced all writes to end of file */ if (mf->mode & MF_APPEND) mf->offset = mf->size; /* Make sure we have enough room */ while (size * nmemb + mf->offset > mf->alloced) { size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024; void * new_data = realloc(mf->data, new_alloced); if (NULL == new_data) return 0; mf->alloced = new_alloced; mf->data = new_data; } /* Record where we need to reflush from */ if (mf->offset < mf->flush_pos) mf->flush_pos = mf->offset; /* Copy the data over */ memcpy(&mf->data[mf->offset], ptr, size * nmemb); mf->offset += size * nmemb; if (mf->size < mf->offset) mf->size = mf->offset; return nmemb; } int mfgetc(mFILE *mf) { if (mf == m_channel[0]) init_mstdin(); if (mf->offset < mf->size) { return (unsigned char)mf->data[mf->offset++]; } mf->eof = 1; return -1; } int mungetc(int c, mFILE *mf) { if (mf->offset > 0) { mf->data[--mf->offset] = c; return c; } mf->eof = 1; return -1; } char *mfgets(char *s, int size, mFILE *mf) { int i; if (mf == m_channel[0]) init_mstdin(); *s = 0; for (i = 0; i < size-1;) { if (mf->offset < mf->size) { s[i] = mf->data[mf->offset++]; if (s[i++] == '\n') break; } else { mf->eof = 1; break; } } s[i] = 0; return i ? s : NULL; } /* * Flushes an mFILE. If this is a real open of a file in write mode then * mFILE->fp will be set. We then write out any new data in mFILE since the * last flush. We cannot tell what may have been modified as we don't keep * track of that, so we typically rewrite out the entire file contents between * the last flush_pos and the end of file. * * For stderr/stdout we also reset the offsets so we cannot modify things * we've already output. */ int mfflush(mFILE *mf) { if (!mf->fp) return 0; /* FIXME: only do this when opened in write mode */ if (mf == m_channel[1] || mf == m_channel[2]) { if (mf->flush_pos < mf->size) { size_t bytes = mf->size - mf->flush_pos; if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) return -1; if (0 != fflush(mf->fp)) return -1; } /* Stdout & stderr are non-seekable streams so throw away the data */ mf->offset = mf->size = mf->flush_pos = 0; } /* only flush when opened in write mode */ if (mf->mode & MF_WRITE) { if (mf->flush_pos < mf->size) { size_t bytes = mf->size - mf->flush_pos; if (!(mf->mode & MF_MODEX)) { fseek(mf->fp, mf->flush_pos, SEEK_SET); } if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) return -1; if (0 != fflush(mf->fp)) return -1; } if (ftell(mf->fp) != -1 && ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1) return -1; mf->flush_pos = mf->size; } return 0; } /* * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to * estimate how many additional bytes of storage will be required for the * vsprintf to work. */ int mfprintf(mFILE *mf, char *fmt, ...) { int ret; size_t est_length; va_list args; va_start(args, fmt); est_length = vflen(fmt, args); va_end(args); while (est_length + mf->offset > mf->alloced) { size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024; void * new_data = realloc(mf->data, new_alloced); if (NULL == new_data) return -1; mf->alloced = new_alloced; mf->data = new_data; } va_start(args, fmt); ret = vsprintf(&mf->data[mf->offset], fmt, args); va_end(args); if (ret > 0) { mf->offset += ret; if (mf->size < mf->offset) mf->size = mf->offset; } if (mf->fp == stderr) { /* Auto-flush for stderr */ if (0 != mfflush(mf)) return -1; } return ret; } /* * Converts an mFILE from binary to ascii mode by replacing all * cr-nl with nl. * * Primarily used on windows when we've uncompressed a binary file which * happens to be a text file (eg Experiment File). Previously we would have * seeked back to the start and used _setmode(fileno(fp), _O_TEXT). * * Side effect: resets offset and flush_pos back to the start. */ void mfascii(mFILE *mf) { size_t p1, p2; for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) { if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') { p2--; /* delete the \r */ } mf->data[p2] = mf->data[p1]; } mf->size = p2; mf->offset = mf->flush_pos = 0; }