/* * Copyright (c) Medical Research Council 1994. All rights reserved. * * Permission to use, copy, modify and distribute this software and its * documentation for any purpose is hereby granted without fee, provided that * this copyright and notice appears in all copies. * * This file was written by James Bonfield, Simon Dear, Rodger Staden, * as part of the Staden Package at the MRC Laboratory of Molecular * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. * * MRC disclaims all warranties with regard to this software. */ /* Title: seqIOPlain File: seqIOPlain.c Purpose: IO of plain sequences Last update: Tuesday Jan 15 1991 Change log: 28.11.90 SD put undesirables under STLOUIS compilation flag 15.01.91 SD new include file (opp.h) 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS and AUTO_CLIP 08.09.94 JKB Plain files now also uses the ';<' and ';>' lines. 09.09.94 JKB Update to use Read instead of Seq library. */ #define LINE_LENGTH 60 /* * This module should be regarded as part of `read' since it is * privy to the internal structure of `Read'. * * This library also requires use of the mach-io code for the endian * independent machine IO. * * Any references to the writing or reading of edited sequences, * or to the bottom strand were added by lfw */ /* ---- Imports ---- */ #include /* IMPORT: fopen, fclose, fseek, ftell, fgetc */ #include /* IMPORT: isprint */ #include #include "misc.h" #include "plain.h" #include "Read.h" #include "xalloc.h" #include "traceType.h" /* ---- Constants ---- */ #define BasesPerLine 50 /* For output formatting */ /* ---- Exports ---- */ /* * Read the plain format sequence from FILE *fp into a Read structure. * All printing characters (as defined by ANSII C `isprint') * are accepted, but `N's are translated to `-'s. * * Returns: * Read * - Success, the Read structure read. * NULLRead - Failure. */ Read *fread_pln(FILE *fp) { Read *read = NULLRead; off_t fileLen; int ch; char *leftc, *rightc, *leftcp, *rightcp; int first = 1; /* * Find the length of the file. * Use this as an overestimate of the length of the sequence. */ fseek(fp, (off_t) 0, 2); if ((fileLen = ftell(fp)) > MAXINT2) goto bail_out; fseek(fp, (off_t) 0, 0); /* Allocate the sequence */ if (NULLRead == (read = read_allocate(0, fileLen))) goto bail_out; if (NULL == (leftc = (char *)xmalloc(fileLen))) goto bail_out; if (NULL == (rightc = (char *)xmalloc(fileLen))) goto bail_out; leftcp = leftc; rightcp = rightc; /* Read in the bases */ read->NBases = 0; read->format = TT_PLN; while ((ch = fgetc(fp)) != EOF) { if (ch==';') { /* * ;< is left cutoff, * ;> is right cutoff. * Any other ';'s we can treat as a comments. */ ch = fgetc(fp); if (first == 1 && ch != '<' && ch != '>') { int d; char type[5], name[17], line[1024]; line[0] = ch; fgets(&line[1], 1022, fp); if (5 == sscanf(line, "%6d%6d%6d%4c%s", &d, &d, &d, type, name)) { char * p; if ((p = strchr(type, ' '))) *p = 0; read->format = trace_type_str2int(type); read->trace_name = (char *)xmalloc(strlen(name)+1); if (read->trace_name) strcpy(read->trace_name, name); } } else if (ch == '<') { ch = fgetc(fp); while (ch != '\n') { *leftcp++ = ch; ch = fgetc(fp); } } else if (ch == '>') { ch = fgetc(fp); while (ch != '\n') { *rightcp++ = ch; ch = fgetc(fp); } } else { while(ch != '\n' && ch != EOF) ch = fgetc(fp); } } else if (isprint(ch)) { read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch); } first = 0; } *leftcp = *rightcp = 0; read->leftCutoff = strlen(leftc); read->rightCutoff = read->leftCutoff + read->NBases + 1; memmove(&read->base[read->leftCutoff], read->base, read->NBases); memmove(read->base, leftc, read->leftCutoff); memmove(&read->base[read->leftCutoff + read->NBases], rightc, strlen(rightc)); read->NBases += read->leftCutoff + strlen(rightc); read->base[read->NBases] = 0; xfree(leftc); xfree(rightc); /* SUCCESS */ return(read); /* FAILURE */ bail_out: if (read) read_deallocate(read); return NULLRead; } /* * Read the plain format sequence with name `fn' into a Read structure. * All printing characters (as defined by ANSII C `isprint') * are accepted, but `N's are translated to `-'s. * * Returns: * Read * - Success, the Read structure read. * NULLRead - Failure. */ Read *read_pln(char *fn) { FILE *fp; Read *read; /* Open file */ if ((fp = fopen(fn, "r")) == NULL) return NULLRead; read = fread_pln(fp); fclose(fp); if (read && read->trace_name == NULL && (read->trace_name = (char *)xmalloc(strlen(fn)+1))) strcpy(read->trace_name, fn); return read; } /* * Write to a Plain file */ int fwrite_pln(FILE *fp, Read *read) { int i, err = 0; int i_start, i_end; i_start = read->leftCutoff; i_end = read->rightCutoff ? read->rightCutoff-1 : read->NBases; for (i = i_start; i < i_end; i += LINE_LENGTH) if (-1 == fprintf(fp, "%.*s\n", i_end - i > LINE_LENGTH ? LINE_LENGTH : i_end - i, &read->base[i])) err = 1; return err ? -1 : 0; } int write_pln(char *fn, Read *read) { FILE *fp; if ((fp = fopen(fn,"w")) == NULL) return -1; if (fwrite_pln(fp, read)) { fclose(fp); return -1; } fclose(fp); return 0; }