/* * Title: seqIOALF * * File: seqIOALF.c * Purpose: IO of ALF sequences * Last update: 9th September 1994 */ /* * Change Log :- * 14.01.91 SD * when complimenting the sequence with an odd number of bases, * the middle base position was not adjusted. * 15.01.91 SD Put StLouis stuff on compilation flag * 15.01.91 SD New include file (opp.h) * 02.08.91 SD Changes the mapping of uncertainty codes so that we * now only generate A C G T and - * Previously... bug in interpreting ALF integer fields. * We now treat them as unsigned. * 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS * and AUTO_CLIP * 25.10.91 SD Machine independant I/O...removed BIGENDIAN flag * 25.11.91 SD There was a hard limit (of 1024) for allocation of * space for number of bases, yet program would * read in more if there were any, causing nasties to happen. * * 11.11.92 LFW added section to actually check that the trace it * is trying to open is an ALF file using traceType sub * * 10.11.92 SD SCF comments now stored in seq data structure * 09.09.94 JKB Update to use Read instead of Seq library. */ /* RMD I made substantial changes to this file 12/28/90 so as to * read sequence data more freely (necessary when reading data from * multiple trace files). * The affected area is indicated by comments starting RMD, like * this one. */ /* This file was adapted by LFW from seqIOABI.c. * The ALF results file is a concatenation of many files with an * index structure at the beginning, consisting of a 512 byte * block that we ignore, followed by 128 byte blocks describing * each file. All files, including the header region, are rounded * up to a multiple of 512 bytes long. * The getIndexEntry routines identify the 128 byte index component * of interest by matching 4 chars of its ASCII label, then extract * the field of choice from that entry. * * Note that the SUN and PC are of opposite endian-ness, so that * we have to provide special routines to read words and longwords * from the results file. Luckily the floating point numbers are * written out in ASCII. */ /* ---- Imports ---- */ #include #include #include #include "Read.h" #include "mach-io.h" #include "xalloc.h" /* ---- Constants ---- */ #define BasesPerLine 50 /* For output formatting */ #define IndexEntryLength ((off_t)128) /* * Here are some labels we will be looking for, four chars packed * into a long word. */ #define EntryLabel ((uint_4) ((((('A'<<8)+'L')<<8)+'F')<<8)+' ') #define BaseEntryLabel ((uint_4) ((((('S'<<8)+'e')<<8)+'q')<<8)+'u') #define DataEntryLabel ((uint_4) ((((('P'<<8)+'r')<<8)+'o')<<8)+'c') /* RMD make enough space for 1024 bases - hard limit */ #define BASELIMIT 1024 /* ---- Internal functions ---- */ /* * From the ALF results file connected to `fp' whose index starts * at byte offset `indexO', return in `val' the `lw'th long word * from the entry labelled `label'. * The result is 0 for failure, 1 for success. */ static int getIndexEntryLW(FILE *fp, off_t indexO, uint_4 label, int lw, uint_4 *val) { off_t entryNum=-1; int i; uint_4 entryLabel; do { entryNum++; if (fseek(fp, indexO+(entryNum*IndexEntryLength), 0) != 0) return 0; if (!be_read_int_4(fp, &entryLabel)) return 0; } while (!(entryLabel == label)); for(i=2; ibase[numBases] = ch; read->prob_A[numBases] = 0; read->prob_C[numBases] = 0; read->prob_G[numBases] = 0; read->prob_T[numBases] = 0; if (bp < last_bp) bp = last_bp; read->basePos[numBases] = bp; ++numBases; } } read->base[numBases] = 0; read->NBases = numBases; } /************************************************************* * Read the trace information *************************************************************/ /* * Traces are stored as 2 byte integers in records in the order of * A C G T A C G T ... */ if (fseek(fp, (off_t)(dataO+header_size), 0) != 0) goto bail_out; num_points = 0; for (i=0; i < read->NPoints; i++) { if (!le_read_int_2(fp, &(read->traceA[i]))) goto bail_out; if (read->maxTraceVal < read->traceA[i]) read->maxTraceVal = read->traceA[i]; if (!le_read_int_2(fp, &(read->traceC[i]))) goto bail_out; if (read->maxTraceVal < read->traceC[i]) read->maxTraceVal = read->traceC[i]; if (!le_read_int_2(fp, &(read->traceG[i]))) goto bail_out; if (read->maxTraceVal < read->traceG[i]) read->maxTraceVal = read->traceG[i]; if (!le_read_int_2(fp, &(read->traceT[i]))) goto bail_out; if (read->maxTraceVal < read->traceT[i]) read->maxTraceVal = read->traceT[i]; if (read->traceA[i]==0 && read->traceT[i]==0 && read->traceC[i]==0 && read->traceG[i]==0 && i > (numPoints-64)) break; num_points++; } /* SUCCESS */ read->format = TT_ALF; return(read); /* FAILURE */ bail_out: if (read) read_deallocate(read); return NULLRead; } /* * Read the ALF format sequence with name `fn' into a Read structure. * All printing characters (as defined by ANSII C `isprint') * are accepted, but `N's are translated to `-'s. In this respect we * are adhering (more or less) to the CSET_DEFAULT uncertainty code set. * * Returns: * Read * - Success, the Read structure read. * NULLRead - Failure. */ Read *read_alf(char *fn) { FILE *fp; Read *read; /* Open file */ if ((fp = fopen(fn, "rb")) == NULL) return NULLRead; read = fread_alf(fp); fclose(fp); if (read && (read->trace_name = (char *)xmalloc(strlen(fn)+1))) strcpy(read->trace_name, fn); return read; } /* * Write to an ALF file - unsupported. */ /* ARGSUSED */ int write_alf(char *fn, Read *read) { fprintf(stderr, "ALF write support is unavailable\n"); return -1; } /* * Write to an ALF file - unsupported. */ /* ARGSUSED */ int fwrite_alf(FILE *fp, Read *read) { fprintf(stderr, "ALF write support is unavailable\n"); return -1; }