/****************************************************************************** #### #### ##### # # #### # # # # # ## ## # # # # # # # ## # # # # # # # # # ### # # # # # # # # ### # # ### # #### # # # ### #### ******************************************************************************/ /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute for Biomedical Research. All rights reserved. See READ.ME for license. */ #define INC_LIB #define INC_MISC #define INC_CALLQCTM #define INC_QLOWLEVEL #include "qtl.h" /* Globals available to the outside world: */ real like_tolerance, pos_tolerance, mat_tolerance; bool print_iter, print_rec_mat, bag_qctm, brute_force, print_brute_force; bool debug_newton; int max_intervals, max_genotype_vars, max_continuous_vars; /* functions alloc_qctm_globals(), free_qctm_globals(), qctm_globals_avail(), qctm_init(), qtl_conv_to_map(); */ /* Local to this file: */ void qctm(); void set_qctm_globals(); void fill_in_qtl_map(); real E_step(); void likelihood(); void make_rec_probs(); void ML_qtl_weight(); void kill_entry(); void ML_qtl_pos(); real do_brute_force(); real pos_like(); real variance(); real normal_density(); bool debug_qctm; int n_individuals, n_intervals, n_qtl_genotypes; int n_genotype_vars, n_continuous_vars; /* WARNING: Make sure that the (wrong) globals from qtop.c, num_continuous_vars and num_intervals, aren't used in this file! */ /* All of these variables are set & alloced by routines in qdata.c */ int max_interx_genotypes, max_backx_genotypes; int **lookup_genotype; /* [bit-vect][interval#]=> A,B,H */ real **lookup_coded_genotype; /* [bit-vect][geno-var#]=> real# */ /* State variables for the QCTM E-M loop */ real **expected_genotype; /* [indivs][geno_vars+1] */ real *int_like; /* [intervals] */ real **S_matrix, **S_inverse; /* [geno_vars+1][geno_vars+1] */ real **indiv_S_matrix; /* ditto */ real *qctm_qtl_pos; /* [intervals] */ real *null_qtl_weight, *qctm_qtl_weight; /* [geno_vars+1] */ real *fix_qtl_weight, *temp_row; /* [geno_vars+1] see ML_qtl_weight */ GENO_PROBS *indiv_rec_like; /* [interval][int-geno][qtl-geno] */ GENO_PROBS *expected_recs, *trans_prob; /* ditto */ INTERVAL_GENOTYPE_PROBS *rec_like; /* [interval][int-geno] */ bool epistasis_kludge; #define SMALL_VARIANCE 1e-5 #define TINY_VARIANCE 0.001 #define TINY_LIKELIHOOD 1e-40 #define SQRT_2_PI 2.506628 #define BIGGEST_EXPONENT 1e20 #define NOREC 0 #define REC 1 #define RECS 2 /****************************** PRELIMINARIES ******************************/ void qctm_init() { max_interx_genotypes= max_backx_genotypes= 0; max_intervals= max_genotype_vars= max_continuous_vars= 0; pos_tolerance= like_tolerance= mat_tolerance= -1.0; debug_qctm= FALSE; lookup_genotype= NULL; lookup_coded_genotype= NULL; int_like= NULL; temp_row= NULL; expected_genotype= NULL; S_matrix= S_inverse= indiv_S_matrix= NULL; expected_recs= indiv_rec_like= trans_prob= NULL; rec_like= NULL; qctm_qtl_weight= qctm_qtl_pos= NULL; null_qtl_weight= NULL; } void set_qctm_globals(data,map) /* called only by qctm itself */ DATA *data; QTL_MAP *map; { int i, j, n, k, last; int n1, n2, g1, g2; real het_additive_coeff, a, b, c; /* debug_qctm=TRUE; */ n_individuals= data->num_individuals; n_intervals= data->num_intervals; n_genotype_vars= (raw.data_type==INTERCROSS ? 2:1) * n_intervals; if (n_intervals==0) n_qtl_genotypes=0; else n_qtl_genotypes= ipow((raw.data_type==INTERCROSS ? 3:2),n_intervals); n_continuous_vars= data->num_continuous_vars; epistasis_kludge= FALSE; /* Paranoia: Check if call to QCTM is OK? */ /* The n_inter=n_individuals || !qctm_globals_avail() || n_intervals>max_intervals || n_continuous_vars>max_continuous_vars) send(CRASH); if (raw.data_type==BACKCROSS) { /* send(CRASH); Needs to be modified for epistasis and lookups */ for (i=0; iqtl_weight[i]; fix_qtl_weight[i]= map->constraint[i].backx_weight; } for (k=0; kcont_var_weight[k]; fix_qtl_weight[n_intervals+k]= map->fix_cont_var_weight[k]; } fix_qtl_weight[n_intervals+n_continuous_vars]= DONT_FIX; } else if (raw.data_type==INTERCROSS) { for (j=0, n=0; jqtl_weight[j]; qctm_qtl_weight[n+1]= map->qtl_dominance[j]; a=map->constraint[j].a; b=map->constraint[j].b; c=map->constraint[j].c; if (fix_weight_kludge) { /* for scan weight KLUDGE */ fix_qtl_weight[n]= map->qtl_weight[j]; fix_qtl_weight[n+1]=0.0; } else if (a==0.0 && b==0.0) { /* nothing fixed */ fix_qtl_weight[n]= DONT_FIX; fix_qtl_weight[n+1]= DONT_FIX; } else if (b==0.0) { /* fix additive term */ fix_qtl_weight[n]= c/a; fix_qtl_weight[n+1]= DONT_FIX; } else { /* b!=0.0 -> fix dominance term */ fix_qtl_weight[n]= DONT_FIX; fix_qtl_weight[n+1]= c/b; } het_additive_coeff= (b!=0.0 ? (b-a)/b : 1.0); for (i=0; icont_var_weight[k]; fix_qtl_weight[last+k]= map->fix_cont_var_weight[k]; } if (n_continuous_vars>=1 && map->cont_var[0]==EPISTASIS_TERM) { if (n_intervals<2) send(CRASH); n1= n_intervals-2; n2= n_intervals-1; /* the 2 epistatic loci */ for (i=0; imu; fix_qtl_weight[last+n_continuous_vars]= DONT_FIX; } else send(CRASH); } void fill_in_qtl_map(map,new_like,qtl_weight) QTL_MAP *map; real new_like, *qtl_weight; { int j, k, n, num, last; real a, b, c; map->abs_log_like= new_like; map->log_like= new_like - map->null_log_like; map->var_explained= 1.0 - (map->sigma_sq/map->null_sigma_sq); map->chi_sq= 2.0 * map->log_like * log(10.0); if (raw.data_type==BACKCROSS) { for (j=0; jqtl_weight[j]= qtl_weight[j]; map->qtl_dominance[j]= 0.0; } for (k=0; kcont_var_weight[k]= qctm_qtl_weight[n_intervals+k]; } } else if (raw.data_type== INTERCROSS) { for (j=0, n=0; jconstraint[j].a; b=map->constraint[j].b; c=map->constraint[j].c; map->qtl_weight[j]= qtl_weight[n]; if (b==0.0 || fix_weight_kludge) map->qtl_dominance[j]= qtl_weight[n+1]; else map->qtl_dominance[j]= (c - a*qtl_weight[n])/b; } last= (!epistasis_kludge ? n_genotype_vars: n_genotype_vars-1); num= (!epistasis_kludge ? n_continuous_vars: n_continuous_vars+1); for (k=0; kcont_var_weight[k]= qctm_qtl_weight[last+k]; } } else send(CRASH); } /************************** QTL_CONV_TO_MAP (QCTM) **************************/ void qtl_conv_to_map(data,map) DATA *data; QTL_MAP *map; /* many parts of this are side-effected */ { real old_like, new_like; int i; bool done; /* use externs null_qtl_weight, qctm_qtl_weight, qctm_qtl_pos */ set_qctm_globals(data,map); map->chi_sq= map->var_explained= map->log_like= 0.0; /*** BAG_QCTM - SPEED THINGS UP FOR DEBUGGING OTHER PARTS OF THE CODE ***/ if (bag_qctm) { map->log_like= map->abs_log_like= map->chi_sq= 1.0; map->null_log_like= map->no_data_like= 0.0; map->var_explained= 0.001; return; } /*** COMPUTE THE NULL QTL MAPS ***/ map->null_log_like= E_step(map->qtl_pos,null_qtl_weight,&map->null_mu,&map->null_sigma_sq, data,expected_genotype,S_matrix,expected_recs); map->no_data_like= 0.0; /* was no_data_like(data,qctm_qtl_weight,mu,sigma_sq) - see Obsolete.c */ if (print_iter) print_null_iteration(map); /*** COMPUTE THE ACTUAL QTL MAP ***/ new_like= VERY_UNLIKELY; i=0; done=FALSE; do { old_like=new_like; new_like= E_step(map->qtl_pos,qctm_qtl_weight,&map->mu,&map->sigma_sq,data, expected_genotype,S_matrix,expected_recs); if ((fabs(new_like-old_like) < like_tolerance) || (map->sigma_sq < SMALL_VARIANCE) || (fix_weight_kludge && i==200)) done=TRUE; /* converged! */ else if (new_like < old_like) { print("*** warning: likelihood decreased, quitting...\n"); done=TRUE; } if (print_iter && (!done || print_rec_mat)) { fill_in_qtl_map(map,new_like,qctm_qtl_weight); print_iteration(i,map,(i==0 ? 0.0 : new_like-old_like)); if (print_rec_mat) do_print_E_step(expected_genotype,S_matrix,expected_recs, n_individuals,n_genotype_vars,n_continuous_vars,n_intervals); } if (done) break; ML_qtl_weight(S_matrix,expected_genotype,data->phenotype, fix_qtl_weight,&map->mu,qctm_qtl_weight,&map->sigma_sq); ML_qtl_pos(expected_recs,data->interval_len,map->fix_pos,map->qtl_pos); i++; /* #iterations counter */ } while(TRUE); /* break is in the middle of the loop */ fill_in_qtl_map(map,new_like,qctm_qtl_weight); } /********************************** E-STEP **********************************/ real E_step(qtl_pos,qtl_weight,mu,sigma_sq,data,expected_genotype, S_matrix,expected_recs) /* return the log-likelihood */ real *qtl_pos, *qtl_weight; real *mu, *sigma_sq; /* just ptrs to single reals */ DATA *data; real **expected_genotype, **S_matrix; /* both side-effected */ GENO_PROBS *expected_recs; /* side-effected */ { int *poss_genotype; real *genotype_contribution; real sum_exp_genotype, prediction; real like, indiv_like, int_like, sum_int_like, total_log_like; int i, j, n, k; /* use i=individuals; j=intervals; n=genotype_vars */ int x, y, z, geno, qtl, last, c, d, entry; /* externs side-effected: indiv_S_matrix, indiv_rec_like, trans_prob, and rec_like */ if (debug_qctm) { /* DEBUGGING CODE */ sf(ps,"qtl_pos: %lf\n",qtl_pos[0]); pr(); sf(ps,"qtl_weight: %lf\n",qtl_weight[0]); pr(); sf(ps,"mu: %lf\n",*mu); pr(); sf(ps,"sigma_sq: %lf\n",*sigma_sq); pr(); } /*** Init expected_genotype, expected_recs, S_matrix and rec_probs ***/ for (j=0; jinterval_len,trans_prob); /*** Now compute expected genotypes, recs, S_matrix and likelihood ***/ total_log_like = 0.0; for (i=0; igenotype_prob[i][j][y]); pr(); z++; if (z%4==0 && y!=0) nl(); } if (z%4!=0) nl(); } } /*** For each indiv we calculate indiv_like, indiv_rec_like and indiv_S_matrix. These are then used in the running computation of expected_genotype, S_matrix, expected_recs and the log like. First, we initialize the indiv... variables. ***/ indiv_like=0.0; sum_int_like=0.0; for (j=0; jnum_genotypes[i][j]; y++) { geno= data->genotype[i][j][y]; indiv_rec_like[j][geno][poss_genotype[j]]+=rec_like[j][geno]; } for (n=0; n0) { if (fabs(sum_int_like-1.0)>0.01) print("*** warning: sum_int_like in E_step is not 1.0\n"); if (indiv_like > VERY_SMALL) { total_log_like+= log10(indiv_like); for (j=0; j VERY_SMALL) */ } else { /* n_genotype_vars==0 */ for (prediction= *mu, c=0; ccont_var[c][i] * qtl_weight[c]; indiv_like= normal_density((data->phenotype[i]-prediction),sigma_sq); if (indiv_like > VERY_SMALL) total_log_like+=log10(indiv_like); } if (debug_qctm) { /* DEBUGGING CODE */ int g, q; print("expected_geno: "); for (n=0; ncont_var[c][i]; for (n=0; ncont_var[c][i] * expected_genotype[i][n]; S_matrix[n][entry]=S_matrix[entry][n]= sum_exp_genotype; } for (sum_exp_genotype=0.0, i=0; icont_var[c][i]; S_matrix[entry][last]=S_matrix[last][entry]= sum_exp_genotype; x=n_genotype_vars+c; for (d=0; d<=c; d++) { y=n_genotype_vars+d; for (sum_exp_genotype=0.0, i=0; icont_var[c][i] * data->cont_var[d][i]; S_matrix[x][y]= S_matrix[y][x]= sum_exp_genotype; } } for (n=0; ngenotype_prob[indiv]; int *num_int_genos= data->num_genotypes[indiv]; INT_POSS_GENOTYPES *poss_geno= data->genotype[indiv]; /* poss_geno[interval#][poss-geno#]=> genotype code */ /* int_like is used as a temp in here */ *total_int_like= 1.0; for (j=0; jcont_var[c][indiv] * qtl_weight[n_genotype_vars+c]; normal_like= normal_density((data->phenotype[indiv]-prediction),sigma_sq); *total_like= *total_int_like * normal_like; for (j=0; jVERY_SMALL ? *total_like/int_like[j] : 0.0); for (y=0; y0.01) print("*** warning: the sum of the trans_probs is not 1.0\n"); } } if (debug_qctm) { for (j=0; jmax_like) { max_like=like; best_pos=pos; } } return(best_pos); } real pos_like(left_theta,interval_theta,expected_recs,i) GENO_PROBS *expected_recs; real left_theta, interval_theta; int i; /* the interval# */ { real event_like, pos_like, right_theta; int interval_geno, left_geno, right_geno, qtl_geno; right_theta= (interval_theta-left_theta)/(1.0-2.0*left_theta); pos_like=0.0; for_interval_genotypes(raw.data_type,interval_geno) { left_geno= left_genotype[interval_geno]; right_geno= right_genotype[interval_geno]; for_locus_genotypes(raw.data_type,qtl_geno) { event_like= apriori_prob(raw.data_type,left_geno) * transition_prob(raw.data_type,left_geno,qtl_geno,left_theta) * transition_prob(raw.data_type,qtl_geno,right_geno,right_theta); pos_like+= log(event_like) * expected_recs[i][interval_geno][qtl_geno]; } } return(pos_like); } /* real left_recs, right_recs, left_norecs, right_norecs; left_recs= left_norecs= right_recs= right_norecs= 0.0; if (raw.data_type!=BACKCROSS) send(CRASH); if (left_geno==qtl_geno) left_norecs+= expected_recs[i][interval_geno][qtl_geno]; else left_recs+= expected_recs[i][interval_geno][qtl_geno]; if (right_geno==qtl_geno) right_norecs+= expected_recs[i][interval_geno][qtl_geno]; else right_recs+= expected_recs[i][interval_geno][qtl_geno]; like= left_recs*log(left_theta) + left_norecs*log(1.0-left_theta) + + right_recs*log(right_theta) + right_norecs*log(1.0-right_theta); */ #ifdef VERY_OLD_CODE start= 0.0; inc= (haldane_cm(interval_rf[i])/(DIVS-1)); if (print_brute_force) { sf(ps,PrBF1,i,interval_rf[i],qtl_pos[i]); print(ps); } max_pos=do_brute_force(start,inc,interval_rf[i], expected_recs,i,print_brute_force,&unimodal); if (!unimodal) { print("*** warning: pos_likes is not unimodal...\n"); qtl_pos[i]= max_pos; } else { /* unimodal */ /* Do it again, with interval= max_pos+/-inc */ start= haldane_cm(max_pos)-inc; /* rrange takes care */ inc= (2*inc)/(DIVS-1); /* of fencepost error */ /* if (print_brute_force) { print("second pass:\n"); } */ qtl_pos[i]=do_brute_force(start,inc,interval_rf[i], expected_recs,i,FALSE,&unimodal); } if (print_iter && !print_brute_force) { sf(ps,PrI,i,interval_rf[i], guess_pos(expected_recs,i,interval_rf[i]), qtl_pos[i]); print(ps); } real do_brute_force(start,inc,theta,expected_recs,interval,do_print,unimodal) real start, inc, theta; GENO_PROBS *expected_recs; int interval, do_print, *unimodal; { real pos, cm, max_pos, max_like, d, d2, f, prev_f; int j, max_j, dip, got_f; max_like= VERY_UNLIKELY; dip= FALSE; *unimodal=TRUE; for (j=0, cm=start; jmax_like) { max_like=f; max_pos=pos; max_j=j; if (dip) *unimodal= FALSE; } else if (fchi_sq= map->var_explained= map->log_like= 0.0; /*** COMPUTE THE NULL QTL MAPS ***/ map->null_log_like= E_step(map->qtl_pos,null_qtl_weight,&map->null_mu,&map->null_sigma_sq, data,expected_genotype,S_matrix,expected_recs); map->no_data_like= 0.0; /*** COMPUTE THE ACTUAL QTL MAP ***/ /* qctm_qtl_weight[] was initialized from the map->qtl_weight[] and map->qtl_dominance[] entries by set_qctm_globals() */ new_like= E_step(map->qtl_pos,qctm_qtl_weight,&map->mu,&map->sigma_sq,data, expected_genotype,S_matrix,expected_recs); fill_in_qtl_map(map,new_like,qctm_qtl_weight); } #ifdef DEBUGGING_CODE printf("Expected Genotype:\n"); for (i=0; i