/* * Copyright (c) 2005, 2007, 2010 Genome Research Ltd. * Author(s): James Bonfield * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger * Institute nor the names of its contributors may be used to endorse * or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Author(s): Simon Dear, James Bonfield * * Copyright (c) 1991-1992, 1997-1998, 2001 MEDICAL RESEARCH COUNCIL * All rights reserved * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1 Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2 Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF * MOLECULAR BIOLOGY nor the names of its contributors may be used * to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) Medical Research Council 1994. All rights reserved. * * Permission to use, copy, modify and distribute this software and its * documentation for any purpose is hereby granted without fee, provided that * this copyright and notice appears in all copies. * * This file was written by James Bonfield, Simon Dear, Rodger Staden, * as part of the Staden Package at the MRC Laboratory of Molecular * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. * * MRC disclaims all warranties with regard to this software. */ /* * Title: seqIOALF * * File: seqIOALF.c * Purpose: IO of ALF sequences * Last update: 9th September 1994 */ /* * Change Log :- * 14.01.91 SD * when complimenting the sequence with an odd number of bases, * the middle base position was not adjusted. * 15.01.91 SD Put StLouis stuff on compilation flag * 15.01.91 SD New include file (opp.h) * 02.08.91 SD Changes the mapping of uncertainty codes so that we * now only generate A C G T and - * Previously... bug in interpreting ALF integer fields. * We now treat them as unsigned. * 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS * and AUTO_CLIP * 25.10.91 SD Machine independent I/O...removed BIGENDIAN flag * 25.11.91 SD There was a hard limit (of 1024) for allocation of * space for number of bases, yet program would * read in more if there were any, causing nasties to happen. * * 11.11.92 LFW added section to actually check that the trace it * is trying to open is an ALF file using traceType sub * * 10.11.92 SD SCF comments now stored in seq data structure * 09.09.94 JKB Update to use Read instead of Seq library. * 04.03.98 JKB Look for "Raw data" when "Processed data" is not found. */ /* RMD I made substantial changes to this file 12/28/90 so as to * read sequence data more freely (necessary when reading data from * multiple trace files). * The affected area is indicated by comments starting RMD, like * this one. */ /* This file was adapted by LFW from seqIOABI.c. * The ALF results file is a concatenation of many files with an * index structure at the beginning, consisting of a 512 byte * block that we ignore, followed by 128 byte blocks describing * each file. All files, including the header region, are rounded * up to a multiple of 512 bytes long. * The getIndexEntry routines identify the 128 byte index component * of interest by matching 4 chars of its ASCII label, then extract * the field of choice from that entry. * * Note that the SUN and PC are of opposite endian-ness, so that * we have to provide special routines to read words and longwords * from the results file. Luckily the floating point numbers are * written out in ASCII. */ /* ---- Imports ---- */ #ifdef HAVE_CONFIG_H #include "io_lib_config.h" #endif #include #include #include #include "io_lib/stdio_hack.h" #include "io_lib/Read.h" #include "io_lib/mach-io.h" #include "io_lib/xalloc.h" /* ---- Constants ---- */ #define BasesPerLine 50 /* For output formatting */ #define IndexEntryLength ((off_t)128) /* * Here are some labels we will be looking for, four chars packed * into a long word. */ #define EntryLabel ((uint_4) ((((('A'<<8)+'L')<<8)+'F')<<8)+' ') #define BaseEntryLabel ((uint_4) ((((('S'<<8)+'e')<<8)+'q')<<8)+'u') #define DataEntryLabel ((uint_4) ((((('P'<<8)+'r')<<8)+'o')<<8)+'c') #define RawDataEntryLabel ((uint_4) ((((('R'<<8)+'a')<<8)+'w')<<8)+' ') /* RMD make enough space for bases - hard limit */ #define BASELIMIT 4096 /* ---- Internal functions ---- */ /* * From the ALF results file connected to `fp' whose index starts * at byte offset `indexO', return in `val' the `lw'th long word * from the entry labelled `label'. * The result is 0 for failure, 1 for success. */ static int getIndexEntryLW(FILE *fp, off_t indexO, uint_4 label, int lw, uint_4 *val) { off_t entryNum=-1; int i; uint_4 entryLabel; do { entryNum++; if (fseek(fp, indexO+(entryNum*IndexEntryLength), 0) != 0) return 0; if (!be_read_int_4(fp, &entryLabel)) return 0; } while (!(entryLabel == label)); for(i=2; ibase[numBases] = ch; read->prob_A[numBases] = 0; read->prob_C[numBases] = 0; read->prob_G[numBases] = 0; read->prob_T[numBases] = 0; read->basePos[numBases] = bp; ++numBases; } } read->base[numBases] = 0; read->NBases = numBases; } /************************************************************* * Read the trace information *************************************************************/ if (sections & READ_SAMPLES) { /* * Traces are stored as 2 byte integers in records in the order of * A C G T A C G T ... */ if (fseek(fp, (off_t)(dataO+header_size), 0) != 0) goto bail_out; num_points = 0; for (i=0; i < read->NPoints; i++) { if (!le_read_int_2(fp, &(read->traceA[i]))) goto bail_out; if (read->maxTraceVal < read->traceA[i]) read->maxTraceVal = read->traceA[i]; if (!le_read_int_2(fp, &(read->traceC[i]))) goto bail_out; if (read->maxTraceVal < read->traceC[i]) read->maxTraceVal = read->traceC[i]; if (!le_read_int_2(fp, &(read->traceG[i]))) goto bail_out; if (read->maxTraceVal < read->traceG[i]) read->maxTraceVal = read->traceG[i]; if (!le_read_int_2(fp, &(read->traceT[i]))) goto bail_out; if (read->maxTraceVal < read->traceT[i]) read->maxTraceVal = read->traceT[i]; if (read->traceA[i]==0 && read->traceT[i]==0 && read->traceC[i]==0 && read->traceG[i]==0 && i > (numPoints-64)) break; num_points++; } } /* SUCCESS */ read->format = TT_ALF; return(read); /* FAILURE */ bail_out: if (read) read_deallocate(read); return NULLRead; } /* * Read the ALF format sequence with name `fn' into a Read structure. * All printing characters (as defined by ANSII C `isprint') * are accepted, but `N's are translated to `-'s. In this respect we * are adhering (more or less) to the CSET_DEFAULT uncertainty code set. * * Returns: * Read * - Success, the Read structure read. * NULLRead - Failure. */ Read *read_alf(char *fn) { FILE *fp; Read *read; /* Open file */ if ((fp = fopen(fn, "rb")) == NULL) return NULLRead; read = fread_alf(fp); fclose(fp); if (read && (read->trace_name = (char *)xmalloc(strlen(fn)+1))) strcpy(read->trace_name, fn); return read; } /* * Write to an ALF file - unsupported. */ /* ARGSUSED */ int write_alf(char *fn, Read *read) { fprintf(stderr, "ALF write support is unavailable\n"); return -1; } /* * Write to an ALF file - unsupported. */ /* ARGSUSED */ int fwrite_alf(FILE *fp, Read *read) { fprintf(stderr, "ALF write support is unavailable\n"); return -1; }