/****************************************************************************** #### ##### ## # # #### # # # # # # # # # # # # # # # # # # # # # # ##### ###### # ## # ### # # # # # # # ## ## ### # # ### # # # # # # # ### #### ******************************************************************************/ /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute for Biomedical Research. All rights reserved. See READ.ME for license. */ /* QTL raw data */ #define INC_LIB #define INC_SHELL #define INC_QLOWLEVEL #define INC_CALLQCTM #define INC_QTOPLEVEL #include "qtl.h" /* External statics */ RAW raw; int **make_interval_genotype; int *left_genotype, *right_genotype; real *map_space; int *order; int segregation_distortion; bool altered_chroms; bool already_printed; /* Internal interfaces */ char name_chars[70]; int nam_len; #define LEFT 0 #define RIGHT 1 #define F2VERSION 3 void getdataln(); void read_map_locus(); void read_quant_trait(); real interval_likelihood(); real pheno_given_geno(); void make_count_recs_etc(); bool missing_data(); /* OBSOLETE, NOT USED */ void initial_prob(); void condition(); void altered_chroms_message(); #define MAXEXCEED "max # indiv or inter exceeded" #define FILE_MISMATCH "\ The '.data' and '.trait' files were not prepared from the same raw\n\ data file. You will need to re-prepare your data in MAPMAKER." #define OLD_FILENUM "\ This file was prepared using an older version of MAPMAKER. You will\n\ need to re-prepare it using the 'prepare data' command in MAPMAKER." /********** PRELIMINARIES **********/ void raw_init() { raw.n_loci= 0; raw.n_traits= 0; raw.trait= NULL; raw.trait_name= NULL; raw.locus= NULL; raw.locus_name= NULL; raw.map_dist= NULL; raw.data_type= NO_DATA; raw.f3=FALSE; raw.left_cond_prob= NULL; raw.right_cond_prob= NULL; raw.file[0]='\0'; raw.trait_eqn= NULL; strcpy(default_backcross_chars, "ABH-"); strcpy(default_intercross_chars,"ABCDH-"); altered_chroms = FALSE; already_printed = FALSE; make_count_recs_etc(); } void make_count_recs_etc() { int **x, *y; matrix(make_interval_genotype,MAX_LOCUS_GENOTYPES,MAX_LOCUS_GENOTYPES,int); array(left_genotype,MAX_INTERVAL_GENOTYPES,int); array(right_genotype,MAX_INTERVAL_GENOTYPES,int); x=make_interval_genotype; x[A][A]=AA; x[A][H]=AH; x[A][B]=AB; x[H][A]=HA; x[H][H]=HH; x[H][B]=HB; x[B][A]=BA; x[B][H]=BH; x[B][B]=BB; y=left_genotype; y[AA]= y[AB]= y[AH]= A; y[HA]= y[HB]= y[HH]= H; y[BA]= y[BB]= y[BH]= B; y=right_genotype; y[AA]= y[BA]= y[HA]= A; y[AH]= y[BH]= y[HH]= H; y[AB]= y[BB]= y[HB]= B; } void free_raw() /* alloced by read_data - NEVER BEEN TESTED!! */ { unmatrix(raw.locus, raw.n_indivs, char); unmatrix(raw.locus_name, raw.n_loci, char); unmatrix(raw.trait, raw.n_indivs, real); unmatrix(raw.trait_name, raw.max_traits, char); unmatrix(raw.trait_eqn, raw.max_traits, char); unmatrix(raw.left_cond_prob, raw.n_indivs, LOCUS_GENOTYPE_PROBS); unmatrix(raw.right_cond_prob, raw.n_indivs, LOCUS_GENOTYPE_PROBS); unarray(raw.map_dist, real); raw.n_loci= raw.n_traits= raw.n_indivs= raw.name_len= 0; raw.file[0]='\0'; } int data_loaded() { return(raw.file[0]!='\0'); } /********** THE DATA READER **********/ void getdataln(fp) /* get next nonblank/noncomment data file line */ FILE *fp; { do { fgetln(fp); BADDATA_line_num++; } while(nullstr(ln) || ln[0]=='#'); BADDATA_ln= ln; } real read_map_distance(); void read_map_locus(); void read_quant_trait(); #define DUMMY_LOCI 1 void read_data(fpa,fpb,fpm,temp,number_of_file) int number_of_file; FILE *fpa, *fpb, *fpm; char *temp; { int k=0,v,l,i,seq_size,e,hist_size,num_chrom,num_loc=0,t_loc=0,j=0,loc; int mapnum,name_len=80, checker, mapm_loci; int n_indivs, n_loci, n_traits, random_check1, random_check2, num_entries; real ma=0,rf; char *size_of_sequence,number_of_lines[TOKLEN+1],*name,*err; char num_of_chroms[TOKLEN+1],pointer_to_lines[TOKLEN+1]; char all_str[5*SEQ_LEN], current_chrom[SEQ_LEN]; char default_intercross_chars[10], default_backcross_chars[10]; name = NULL; run { getdataln(fpa); sscanf(ln,"%*ld %*d %d\n",&mapm_loci); mapm_loci *= 2; if(mapm_loci < 1) error("insufficient genetic data for analysis\n"); array(map_space,mapm_loci,real); array(order,mapm_loci,int); array(name, NAME_LEN+1, char); strcpy(default_backcross_chars, "ABH-"); strcpy(default_intercross_chars,"ABCDH-"); getdataln(fpm); while(nstrcmp(ln, "*Chromosomes:", 13) != 0) getdataln(fpm); /***** DON'T BOTHER WITH THESE MAPM VARIABLES - JUST USE QTL DEFAULTS *****/ /********** while(strcmp(ln, "*Print names: 0") != 0 && strcmp(ln,"*Print names: 1") != 0) getdataln(fpa); sscanf(ln,"%*s %*s %d",&print_names); fscanf(fpa,"%*s %*s %*d\n"); fscanf(fpa,"%*s %*s %d\n",&print_maps); fscanf(fpa,"%*s %*s %d\n",&segregation_distortion); fscanf(fpa,"%*s %d\n",&mapnum); fscanf(fpa,"%*s %*s %d\n",&wizard_mode); getdataln(fpa); getdataln(fpa); getdataln(fpa); **********/ stoken(&ln, sREQUIRED, num_of_chroms); /* get rid of "*Chromosomes:" */ itoken(&ln, iREQUIRED, &num_chrom); if(num_chrom == 0) error("no linkage maps saved...QTL cannot use this data yet"); raw.n_chroms = num_chrom; array(raw.chrom_start,num_chrom,int); array(raw.chrom_n_loci,num_chrom,int); array(raw.original_locus,mapm_loci,int); e=0; /* Now I loop through the chromosomes in order to get all the important loci, and the map distances between them */ strcpy(all_str,""); dum_loc=0; for(i=0;i0 && itoken(&ln,iREQUIRED,&n_indivs) && n_indivs>0 && itoken(&ln,iREQUIRED,&n_loci) && n_loci>0)) send(BADDATA); if (random_check1%10 != F2VERSION) error(OLD_FILENUM); raw.n_indivs = n_indivs; /*if(max_individuals<0) { if (max_individuals>MAX_INDIVIDUALS) { why_(MAXEXCEED); send(BADDATA); } else max_individuals= imaxf(n_indivs+1,0-max_individuals); } else if (max_individuals>0 && n_indivs>max_individuals) { why_(MAXEXCEED); send(BADDATA); }*/ matrix(raw.locus_name,t_loc,name_len,char); matrix(raw.locus,n_indivs,t_loc,char); array(raw.map_dist,t_loc,real); matrix(raw.left_cond_prob, n_indivs, t_loc, LOCUS_GENOTYPE_PROBS); matrix(raw.right_cond_prob, n_indivs, t_loc, LOCUS_GENOTYPE_PROBS); for(i=0;i= .49)) altered_chroms_message(); } } if (DUMMY_LOCI) k++; k++; getdataln(fpb);getdataln(fpb); } } fscanf(fpb,"%*s %*s %*s %d\n",&num_contexts); fscanf(fpb,"%*s %*s %d\n",&active_context); for(i = 0; i < num_contexts; i++) { fscanf(fpb,"%*s %*d\n"); fscanf(fpb,"%*s %d\n", &trait); if(!altered_chroms) { fscanf(fpb,"%*s %*s %d\n",&num_entries); load_table(context[i]->named_sequences, fpb,INDEX_BY_NAME,num_entries); fscanf(fpb,"%*s %*s %d\n",&num_entries); load_table(context[i]->sequence_history, fpb,INDEX_BY_NUMBER,num_entries); context[i]->seq_history_num= context[i]->sequence_history->next_entry_num; } else { context[i]->seq_history_num = 0; } } } on_exit { unarray(name, char); relay_messages; } return; } void save_traitfile(fp) FILE *fp; { char *name,mapnum; int i,j,loci_tot,map_tot,usenum; run { sf(ps,"%d mapmaker trait data\n",raw.filenumber); fpr(fp); sf(ps,"%d\n", raw.n_traits); fpr(fp); for(i=0;inamed_sequences)); fpr(fp); save_table(context[i]->named_sequences,fp,INDEX_BY_NAME); sf(ps,"*Sequence history: %d\n", count_table_entries(context[i]->sequence_history)); fpr(fp); save_table(context[i]->sequence_history,fp,INDEX_BY_NUMBER); } } on_exit { relay_messages; } } void read_map_locus(fp,indivs,t_loc,order,n_loci) FILE *fp; int indivs, n_loci, *order; /* could send BADDATA or IOERROR */ { char **temp_set,c, nam[TOKLEN+1], *namp; int j,k,i,name_len=80,t,num_of_terms = 0; matrix(temp_set,n_loci,80,char); for(i=0;i 0.5) { map_dist= unhaldane_cm(map_dist); } /* sf(ps,"%-3d %lf\n",num,raw.map_dist[num]); print(ps); */ rrange(&map_dist,MIN_REC_FRAC,MAX_REC_FRAC); return(map_dist); } void read_quant_trait(fp,num,indivs) FILE *fp; int num, indivs; /* could send IOERROR or BADDATA */ { char tok[TOKLEN+1], c; int i; getdataln(fp); /* read the locus name */ if (!nstoken(&ln, sREQUIRED, tok, NAME_LEN)) { send(BADDATA); } strcpy(raw.trait_name[num],&tok[1]); raw.trait_eqn[num][0]='\0'; if (parse_char(&ln,"=",TRUE,&c)) { if (nullstr(ln)) send(BADDATA); nstrcpy(raw.trait_eqn[num],ln,TRAIT_EQN_LEN); ln = NULL; } /* read its data */ for (i=0; i0 && itoken(&ln,iREQUIRED,&n_loci) && n_loci>0 && itoken(&ln,iREQUIRED,&n_traits) && n_traits>0 && itoken(&ln,iREQUIRED,&nam_len) && nam_len>0 && nam_len<=TOKLEN)) { why_("bad header or header entries"); send(BADDATA); } raw.n_loci= n_loci; raw.n_traits= n_traits; raw.n_indivs= n_indivs; raw.name_len= nam_len; raw.left_cond_prob= NULL; raw.right_cond_prob=NULL; matrix(raw.locus, n_indivs, n_loci, char); matrix(raw.locus_name, n_loci, nam_len, char); array(raw.map_dist, n_loci, real); raw.max_traits=MAX_TRAITS(raw.n_traits); matrix(raw.trait,n_indivs,raw.max_traits,real); matrix(raw.trait_name,raw.max_traits,nam_len,char); matrix(raw.left_cond_prob, n_indivs, n_loci, LOCUS_GENOTYPE_PROBS); matrix(raw.right_cond_prob, n_indivs, n_loci,LOCUS_GENOTYPE_PROBS); for (j=0; j 0.5) { bar= (int) (raw.map_dist[num] + 0.499); raw.map_dist[num]= unkosambi((real) bar); } /* sf(ps,"%-3d %lf\n",num,raw.map_dist[num]); print(ps); */ rrange(&raw.map_dist[num],MIN_REC_FRAC,MAX_REC_FRAC); } /*** THE NEW AND IMPROVED RAW DATA CRUNCHER - USED AFTER LOADING DATA ****/ #define PROBS_NOT_EQ \ "*** WARNING: left and right probs unequal: indiv %d locus %d genotype %d\n" #define PROBS_NOT_1 \ "*** WARNING: interval genotype probs not 1: indiv %d interval %d\n" void crunch_data() /* side effects the raw data struct */ { int i, k, first, last; real r[3], l[3], right, left; INTERVAL_GENOTYPE_PROBS *indiv_probs; int geno, foo; first=0; last=raw.n_loci-1; for (i=0; i=first; k--) condition(raw.right_cond_prob[i],k,k+1,raw.locus[i][k], raw.map_dist[k],RIGHT); } /* DEBUGGING STUFF: We now check the L-R- conditioning */ array(indiv_probs,2,INTERVAL_GENOTYPE_PROBS); for (i=0; i=0.01) { sf(ps,PROBS_NOT_EQ,i+1,k+2,geno); pr(); } left+=l[geno]; right+=r[geno]; } if (fabs(left-1.0)>=0.01) { sf(ps,PROBS_NOT_1,i+1,k+1); pr(); } if (fabs(right-1.0)>=0.01) { sf(ps,PROBS_NOT_1,i+1,k+2); pr(); } /* sf(ps,"Indiv = %d Locus = %d left=%lf right=%lf\n",i+1,k+2, left,right); pr();*/ /* for_interval_genotypes(raw.data_type,foo) { sf(ps,"geno %d: left_int_prob=%lf right_int_prob=%lf\n", foo,indiv_probs[0][foo],indiv_probs[1][foo]); pr(); } */ /* for_locus_genotypes(raw.data_type,foo) { sf(ps,"geno %d lcp=%lf rcp=%lf\n",foo, raw.left_cond_prob[i][k+1][foo], raw.right_cond_prob[i][k+1][foo]); pr(); } */ } } void initial_prob(prob,locus,this_genotype) LOCUS_GENOTYPE_PROBS *prob; int locus; char this_genotype; { int i; prob[locus][A]= prob[locus][B]= prob[locus][H]= 0.0; if (raw.data_type==BACKCROSS) switch (this_genotype) { case 'A': prob[locus][A]=1.0; break; case 'B': case 'H': prob[locus][H]=1.0; break; case '-': prob[locus][A]=0.5; prob[locus][H]=0.5; break; default: send(CRASH); } else if (raw.data_type==INTERCROSS && !raw.f3) switch (this_genotype) { case 'A': prob[locus][A]=1.0; break; case 'B': prob[locus][B]=1.0; break; case 'H': prob[locus][H]=1.0; break; case 'C': prob[locus][H]=2.0/3.0; prob[locus][B]=1.0/3.0; break; case 'D': prob[locus][H]=2.0/3.0; prob[locus][A]=1.0/3.0; break; case '-': prob[locus][A]=0.25; prob[locus][B]=0.25; prob[locus][H]=0.5; break; default: send(CRASH); } else if (raw.data_type==INTERCROSS && raw.f3) switch (this_genotype) { case 'A': prob[locus][A]=1.0; break; case 'B': prob[locus][B]=1.0; break; case 'H': prob[locus][H]=1.0; break; case 'C': prob[locus][H]=0.4; prob[locus][B]=0.6; break; case 'D': prob[locus][H]=0.4; prob[locus][A]=0.6; break; case '-': prob[locus][A]=3.0/8.0; prob[locus][B]=3.0/8.0; prob[locus][H]=0.25; break; default: send(CRASH); } else send(CRASH); } real pheno_given_geno(observation,genotype) char observation; /* eg the 'phenotype' */ int genotype; { if (raw.data_type==BACKCROSS) switch (observation) { case 'A': return((genotype==A) ? 1.0 : 0.0); case 'B': case 'H': return((genotype==H) ? 1.0 : 0.0); case '-': return(1.0); default: send(CRASH); } else if (raw.data_type==INTERCROSS && !raw.f3) switch (observation) { case 'A': return((genotype==A) ? 1.0 : 0.0); case 'B': return((genotype==B) ? 1.0 : 0.0); case 'H': return((genotype==H) ? 1.0 : 0.0); case 'C': return((genotype!=A) ? 1.0 : 0.0); case 'D': return((genotype!=B) ? 1.0 : 0.0); case '-': return(1.0); default: send(CRASH); } else if (raw.data_type==INTERCROSS && raw.f3) /* no different??? */ switch (observation) { case 'A': return((genotype!=B) ? 1.0 : 0.0); case 'B': return((genotype!=A) ? 1.0 : 0.0); case 'H': return((genotype==H) ? 1.0 : 0.0); case 'C': return((genotype!=A) ? 1.0 : 0.0); case 'D': return((genotype!=B) ? 1.0 : 0.0); case '-': return(1.0); default: send(CRASH); } else send(CRASH); } void condition(prob,locus,prev_locus,observation,rec_frac,side) LOCUS_GENOTYPE_PROBS *prob; int locus, prev_locus; char observation; real rec_frac; int side; { int i, geno_was, geno_is, max_recs,geno1,geno2; real rec, norec, total; for (i=0; iMAX_REC_FRAC) return(MAX_REC_FRAC); else total_cm+= haldane_cm(raw.map_dist[i]); if (total_cm>MAX_CM) return(MAX_REC_FRAC); } total_rf= unhaldane_cm(total_cm); rrange(&total_rf,MIN_REC_FRAC,MAX_REC_FRAC); return(total_rf); } void indiv_interval_probs(prob,data_indiv_num,raw_indiv_num,left_locus_num, right_locus_num,theta) INTERVAL_GENOTYPE_PROBS *prob; /* [num][geno-code] => real */ int data_indiv_num, raw_indiv_num; int left_locus_num, right_locus_num; real theta; /* provided as an argument for efficiency */ { real sum; int interval_geno, left, right; sum= 0.0; for_interval_genotypes(raw.data_type,interval_geno) { left= left_genotype[interval_geno]; right=right_genotype[interval_geno]; sum+= prob[data_indiv_num][interval_geno]= raw.left_cond_prob[raw_indiv_num][left_locus_num][left] * transition_prob(raw.data_type,left,right,theta) * raw.right_cond_prob[raw_indiv_num][right_locus_num][right]; } for_interval_genotypes(raw.data_type,interval_geno) { if(sum == 0.0) print("sum = 0\n"); prob[data_indiv_num][interval_geno]/= sum; } } real apriori_prob(data_type,geno) int data_type, geno; { if (data_type==BACKCROSS) return(0.5); else switch(geno) { case A: return(0.25); case B: return(0.25); case H: return(0.50); } } void altered_chroms_message() { if(!already_printed) { print("The linkage map data in the '.data' file has been altered since its\n"); print("last usage. The saved scan results and saved names are obsolete and\n"); print("will not be loaded.\n"); already_printed = TRUE; } altered_chroms = TRUE; }