#include #define TRUE 1 #define FALSE 0 #define MAX_LINE 20000 char cluster_id[480], ref_locus[480], msp_locus_1[480], msp_locus_2[480]; char sub_locus_1[480], sub_locus_2[480], sub_cluster_id[480]; FILE *fp; int ref_true = FALSE; int not_msp = TRUE; int sub_true = FALSE; float msp_score = -1; float sub_score; main(argc, argv) int *argc; char *argv[]; { int no_id = FALSE; int new_id = FALSE; int do_stdin = TRUE; int n = 1; char next_char, new_cluster_id[480], cluster[MAX_LINE]; double atof(); argv++; while (*argv) { if (!strcmp(*argv, "-n")) { no_id = TRUE; argv++; } if (!strcmp(*argv, "-m")) { not_msp = FALSE; argv++; } else if (!strcmp(*argv, "-i")) { argv++; if (!*argv) { fprintf(stderr, "ERROR: cluster ID not found !!\n", *argv); exit(1); } strcpy(new_cluster_id, *argv); new_id = TRUE; argv++; } else if (!strcmp(*argv, "-r")) { argv++; if (!*argv) { fprintf(stderr, "ERROR: ref_locus not found !!\n", *argv); exit(1); } strcpy(ref_locus, *argv); ref_true = TRUE; argv++; } else if (!strcmp(*argv, "-s")) { argv++; strcpy(sub_locus_1, *argv); argv++; sub_score = ((float) atof(*argv)); argv++; strcpy(sub_locus_2, *argv); argv++; strcpy(sub_cluster_id, *argv); sub_true = TRUE; argv++; } else { do_stdin = FALSE; if (!(fp = fopen(*argv, "r"))) { fprintf(stderr, "ERROR: input file %s not found !!\n", *argv); exit(1); } argv++; } } if (do_stdin) fp = stdin; if (no_id || new_id) { while ((next_char = getc(fp)) != -1) { ungetc(next_char, fp); if (next_char == '(') { if (new_id) printf("%s.%d\t", new_cluster_id, n); read_node(); printf("\n"); getc(fp); /* get CR */ n++; } else { if (new_id) printf("%s.%d\t", new_cluster_id, n); fgets(cluster, MAX_LINE, fp); printf("%s", cluster); n++; } } } else { while (fscanf(fp, "%s ", cluster_id) != EOF) { printf("%s\t", cluster_id); next_char = getc(fp); ungetc(next_char, fp); if (next_char == '(') { read_node(); if (not_msp) printf("\n"); else { printf("%s\t%.2f\t%s\n", msp_locus_1, msp_score, msp_locus_2); msp_score = -1; } } else { fgets(cluster, MAX_LINE, fp); printf("%s", cluster); } } } } read_node() { char locus_1[60], locus_2[60], next_char_1, next_char_2; float score; getc(fp); /* get open paren */ next_char_1 = getc(fp); if (next_char_1 == '(' ) { if (not_msp) printf("("); ungetc(next_char_1, fp); read_node(); fscanf(fp, "%f ", &score); if (not_msp) printf(" %.2f ", score); next_char_2 = getc(fp); if (next_char_2 == '(' ) { ungetc(next_char_2, fp); read_node(); getc(fp); /* get closed paren */ if (not_msp) printf(")"); } else { ungetc(next_char_2, fp); fscanf(fp, "%[^)]", locus_2); getc(fp); /* get closed paren */ if (not_msp) printf("%s)", locus_2); fflush(stdout); } } else { ungetc(next_char_1, fp); fscanf(fp, "%s ", locus_1); fscanf(fp, "%f ", &score); next_char_2 = getc(fp); if (next_char_2 == '(' ) { ungetc(next_char_2, fp); if (not_msp) { printf("(%s ", locus_1); printf("%.2f ", score); fflush(stdout); } read_node(); getc(fp); /* get closed paren */ if (not_msp) printf(")"); } else { ungetc(next_char_2, fp); fscanf(fp, "%[^)]", locus_2); getc(fp); /* get closed paren */ if (not_msp) { if (ref_true && !strcmp(locus_2, ref_locus)) printf("(%s %.2f %s)",locus_2, score, locus_1); else if (sub_true && !strcmp(locus_1, sub_locus_1) && !strcmp(locus_2, sub_locus_2) && (score == sub_score) ) printf("%s", sub_cluster_id); else printf("(%s %.2f %s)",locus_1, score, locus_2); fflush(stdout); } else if (score > msp_score) { msp_score = score; strcpy(msp_locus_1, locus_1); strcpy(msp_locus_2, locus_2); } } } }