#include #define MAX_SEQ 200000 main (argc, argv) int argc; char *argv []; { FILE *finp; if (argc > 1) { for (argv++; *argv; argv++) { if (!(finp = fopen (*argv, "r"))) { fprintf (stderr, "Could not read \"%s\" ", *argv); perror ("because"); return; } do_file (finp); } } else do_file(stdin); } do_file (finp) FILE *finp; { char locus_name [300], seq [MAX_SEQ]; int len; while (next_locus (finp, locus_name)) { len = 0; get_sequence (seq, finp, &len, MAX_SEQ); seq[len] = '\0'; if ((seq[len - 1] == '1') || (seq[len - 1] == '.')) { seq[len - 1] = '\0'; len--; } if (len == 0) { fprintf(stderr, "ERROR: seq \"%s\" has length = 0; Continuing ...\n", locus_name); } else printf("%s\t%s\n", locus_name, seq); /* process (locus_name, seq, len); */ } fclose (finp); } /* process (locus_name, seq, len) char locus_name[20], seq[10000]; int len; { printf("%s\t%s\n", locus_name, seq); } */ /* -------------------------------------------------------------------- The functions 'next_locus' and 'get_sequence' are from D.V. Faulkner's C-library --------------------------------------------------------------------- */ #include #include #include #include #define TRUE 1 #define FALSE 0 #define MAX_SEQ 200000 #define min(x, y) (((x) < (y)) ? (x) : (y)) int next_locus (); /* (file, locus) FILE *file, char *locus */ /* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */ /*STARTDOC*/ next_locus (finp, locus) char *locus; FILE *finp; /* advances pointer on finp to point to the next sequence. pointer is left pointing to the first line past the locus line. locus is returned containing the locus name. returns TRUE if a locus was found, FALSE if the end of the file was met before a locus was found ASSUMES that there will be at least one comment line before the locus name appears */ /*ENDDOC*/ { int ptr, temp; char line [300], *pt1; /* search till we find a comment line */ do { if (fgets (line, 300, finp) == NULL) return (FALSE); } while ((*line != ';') && (*line != 0xc)); do { if (fgets (line, 300, finp) == NULL) return (FALSE); } while ((line [0] == ';') || (line [0] <= ' ')); /* line contains first line that does not begin with control char, space, nor ; */ /* assume that line now contains locus name */ for (pt1 = line; *pt1 > ' '; pt1++) { *locus = *pt1; locus++; } *locus = '\0'; return (TRUE); /* return saying we found a locus */ } /* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */ /* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */ /*STARTDOC*/ get_sequence (inseq, finp, len, max) char *inseq; FILE *finp; int *len, max; /* reads the remainder of a sequence, up to max length. returns TRUE if some sequence was read, FALSE if no sequence information was found */ /*ENDDOC*/ { int temp, start, ch2; char line [100], ptr; max -= 70; /* so that reading in that last line will not overrun the buffer */ start = *len; ch2 = 65; do { ch2 = getc (finp); if ((ch2 == ';') || (ch2 == 0xc) || (ch2 == EOF)) { ungetc (ch2, finp); return (start != *len); /* return FALSE if EOF */ } if (ch2 != '\n') { line [0] = ch2; if (!(fgets (&(line [1]), 100, finp))) return (start != *len); /* return FALSE if EOF */ for (ptr = 0; (temp = line [ptr]) != 0; ptr++) if (temp > ' ') { *inseq = temp; inseq++; (*len)++; } if (*len > max) /* we have an overrun */ return (TRUE); } } while (1 == 1); } /* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */