/****************************************************************************** ##### # # #### #### # # ##### #### #### # # # # # # # ## ## # # # # # # # # # # # # ## # # # #### # # # ## # # # # # # # # # ### # # ## ## # # # # # # # # # # ### # # # # # #### ####### #### # # ##### #### ### #### ******************************************************************************/ /* This file is part of MAPMAKER 3.0b, Copyright 1987-1992, Whitehead Institute for Biomedical Research. All rights reserved. See READ.ME for license. */ #define INC_LIB #define INC_SHELL #define INC_MISC #include "mapm.h" /* internal procedures */ bool print_biglod(); void print_lodtable(); #define PRINT_HALF_LODTABLE 1 #define PRINT_FULL_LODTABLE 2 void print_2pt_criteria(); void parse_2pt_criteria(); void print_lod_line(); void print_2pt_criteria(str,lod,theta) char *str; real lod, theta; { sf(ps,"%s at min LOD %.2lf, max Distance %s",str,lod,rag(rf2str(theta))); pr(); } void parse_2pt_criteria(argptr,lod,theta) char **argptr; real *lod, *theta; { if (!rtoken(argptr,default_lod,lod) || !rrange(lod,0.0,1000.0)) error("bad value for min LOD score"); if (!rtoken(argptr,default_theta,theta) || !input_dist(theta)) error("bad value for max distance"); } command two_point() { int a, b, num_loci, count, total, *locus=NULL; mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL); nomore_args(0); run { count=total=0; alloc_list_of_all_loci(seq,&locus,&num_loci); crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ); total=(num_loci*(num_loci-1))/2; for_all_locus_pairs (locus,num_loci,a,b) { keep_user_amused("pair",count++,total); compute_two_pt(a,b); } print("two-point data are available.\n"); } on_exit { unarray(locus,int); relay_messages; } } #define GROUP_DIVIDER "-------\n" command group() { real lodbound, thetabound; int *loci=NULL, num_loci, *linkage_group=NULL, group_size, num_left; int *really_unlinked=NULL, num_unlinked, num_linkage_groups, i; mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL); parse_2pt_criteria(&args,&lodbound,&thetabound); nomore_args(2); run { alloc_list_of_all_loci(seq,&loci,&num_loci); crunch_locus_list(loci,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ); array(linkage_group,num_loci,int); array(really_unlinked,num_loci,int); print_2pt_criteria("Linkage Groups",lodbound,thetabound); nl(); num_groups= 0; /* The global - set to 0 just in case something dies */ num_linkage_groups= num_unlinked= 0; for (i=0; i=2) { inv_isort(linkage_group,group_size); if (num_linkage_groups>0) print(GROUP_DIVIDER); else nl(); sf(ps,"group%d= ",num_linkage_groups+1); pr(); for (i=0; i0); if (num_unlinked>0) { if (num_linkage_groups>0) print(GROUP_DIVIDER); else nl(); print("unlinked= "); for (i=0; i=2) { sort_loci(linkage_group,group_size); do { find_haplo_group(linkage_group,&group_size,haplo_group, &num_haplo,obs1,obs2); if (num_haplo>=2) { setup_haplo_group(haplo_group,num_haplo); sf(ps,"Haplotype Group %2d: %s= [", ++num_haplo_groups, locname(haplo_first[haplo_group[0]],TRUE)); pr(); for (i=0; i2 */ } while(group_size>=1); num_linkage_groups++; } /* group_size>2 */ } while (num_left>0); nl(); if (num_linkage_groups==0) print(HAP_NO_GROUPS); else if (num_haplo_groups==0) print(HAP_NO_HAPLOS); } if (num_changed>0) { if (!find_em) nl(); sort_loci(changed,num_changed); bash_mapping_data(changed,num_changed); bash_order_info(changed,num_changed); } for (any=FALSE, i=0; i0) { sort_loci(changed,num_changed); bash_mapping_data(changed,num_changed); bash_order_info(changed,num_changed); } } on_exit { unarray(loci,int); unarray(changed,int); relay_messages; } } command list_haplotypes() { int num_loci, *locus=NULL; mapm_ready(ANY_DATA,MAYBE_SEQ,UNCRUNCHED_LIST,NULL); run { if (!nullstr(args)) { parse_locus_args(&locus,&num_loci); /* error if fails */ if (num_loci==0) error("no loci in arguments\n"); } else { if (!alloc_list_of_all_loci(seq,&locus,&num_loci)) error(NEED_SEQ_OR_ARGS); } nl(); crunch_locus_list(locus,&num_loci,SILENTLY,ANY_CHROMS,0); hold(more_mode) print_haplo_summary(locus,num_loci); } on_exit { unarray(locus,int); relay_messages; } } command list_loci() { int source, num_loci, *locus=NULL; mapm_ready(ANY_DATA,MAYBE_SEQ,UNCRUNCHED_LIST,NULL); run { if (!nullstr(args)) { parse_locus_args(&locus,&num_loci); /* error if fails */ if (num_loci==0) error("no loci in arguments\n"); source=IN_ARGS; } else { if (!alloc_list_of_all_loci(seq,&locus,&num_loci)) error(NEED_SEQ_OR_ARGS); source=IN_SEQ; } crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,source); nl(); hold(more_mode) print_locus_summary(locus,num_loci,use_haplotypes); } on_exit { unarray(locus,int); relay_messages; } } #define NOSEX_BIGLODS "\n Marker-1 Marker-2 Theta LOD cM\n" #define SEX_BIGLODS \ "\n Marker-1 Marker-2 Theta(M) Theta(F) LOD cM(M) cM(F)\n" command biglods() { int a, b, n, num_loci, *loci=NULL; real lodbound, thetabound; mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL); parse_2pt_criteria(&args,&lodbound,&thetabound); nomore_args(2); run { alloc_list_of_all_loci(seq,&loci,&num_loci); crunch_locus_list(loci,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ); print_2pt_criteria("Linked Marker Pairs",lodbound,thetabound); nl(); if (!sex_specific) print(NOSEX_BIGLODS); else print(SEX_BIGLODS); n=0; hold(more_mode) for_all_locus_pairs(loci,num_loci,a,b) if (a!=b && print_biglod(a,b,lodbound,thetabound,sex_specific,NO_CHROM)) n++; if (n==0) print("no linked markers in sequence\n"); } on_exit { unarray(loci,int); relay_messages; } } command near_locus() { int a, b, i, j, n, num_loci, *locus=NULL, *trial_locus=NULL, num_trials; real lodbound, thetabound; char *rest; mapm_ready(ANY_DATA,2,UNCRUNCHED_LIST,NULL); if (split_arglist(&rest,':') && !nullstr(rest)) { parse_2pt_criteria(&rest,&lodbound,&thetabound); if (!nullstr(rest)) usage_error(0); } else { lodbound=default_lod; thetabound=default_theta; } if (nullstr(args)) usage_error(0); parse_locus_args(&trial_locus,&num_trials); crunch_locus_list(trial_locus,&num_trials,CRUNCH_WARNINGS,ANY_CHROMS, IN_ARGS); run { alloc_list_of_all_loci(seq,&locus,&num_loci); crunch_locus_list(locus,&num_loci,CRUNCH_WARNINGS,ANY_CHROMS,IN_SEQ); print_2pt_criteria("Linked Markers in Sequence",lodbound,thetabound); nl(); if (!sex_specific) print(NOSEX_BIGLODS); else print(SEX_BIGLODS); hold(more_mode) for (j=0; j=lodbound && theta<=thetabound) { sf(ps," %-10s %-10s %s %s %s",loc2str(i),loc2str(j), rsd(5.2,theta),rsd(5.2,lod),rsd(6.2,cm_dist(theta))); pr(); if (chrom!=NO_CHROM) { sf(ps," (%s)",chrom2str(chrom)); pr(); } nl(); return(TRUE); } else return(FALSE); } else { /* sex */ if (!get_sex_two_pt(i,j,&lod,&thetam,&thetaf)) return(FALSE); if (lod>=lodbound && (thetam<=thetabound || thetaf<=thetabound)) { sf(ps," %-10s %-10s %s %s %s %s %s", loc2str(i),loc2str(j), rsd(5.2,thetam),rsd(5.2,thetaf),rsd(5.2,lod), cm_dist(thetam),cm_dist(thetaf)); pr(); if (chrom!=NO_CHROM) { sf(ps," (%s)",chrom2str(chrom)); pr(); } nl(); return(TRUE); } else return(FALSE); } } void print_lodtable(locus1,locus2,num_loci1,num_loci2,how) int *locus1, *locus2, num_loci1, num_loci2, how; { int across, down, num_across, num_done, across_to_print; char *line1, *line2; print("Bottom number is LOD score, top number is "); if (units==CENTIMORGANS) print("centimorgan distance:\n"); else print("recombination fraction:\n"); num_done=0; do { num_across= (num_loci1>10 ? 10:num_loci1); across_to_print= num_loci1 - (how==PRINT_HALF_LODTABLE ? 1:0); nl(); if (!print_names) { print(" "); for (across=0; across0) locus1+=num_across; } while (num_loci1>0); } void print_lod_line(loc,toploc,topnum,how,topref,downref) int loc, *toploc, topnum, how, topref, downref; { int across, stop_at; real lod, theta, thetam, thetaf; if (how==PRINT_HALF_LODTABLE) { if (topref >= downref) return; stop_at= ((downref-topref)>topnum ? topnum:(downref - topref)); } else stop_at= topnum; if (sex_specific) { if (print_names) print("\n "); else print("\n "); for (across=0; across=3) { num_groups++; /* compute acceptable three-point orders in the group */ sort_loci(linkage_group,group_size); for_all_3pt_seqs (linkage_group,group_size,three_seq) { get_list_of_all_loci(three_seq,three_locus,&foo,3); if (three_linked(three_locus,lodbound3,thetabound3, num_links,TRIPLET_SEX)) num_trips++; } } } while(num_unlinked>0); if (num_groups==0) { sf(ps,THREE_NO_GRPS); pr(); abort_command(); } else if (num_trips==0) { sf(ps,THREE_NO_TRIPS,num_groups,maybe_s(num_groups)); pr(); abort_command(); } /* else */ sf(ps,THREE_DO_THIS,num_trips,maybe_s(num_trips),num_groups, maybe_s(num_groups)); pr(); if (!print_names) { print("\n log-likelihood differences\n"); print( " count markers a-b-c b-a-c a-c-b\n"); /* 12345: 1234 1234 1234 */ } else { print("\n log-likelihood differences\n"); print( " count markers a-b-c b-a-c a-c-b\n"); /* 12345: 123456789 123456789 123456789 */ } /* do it again, for real this time... */ get_list_of_all_loci(seq,loci,&num_unlinked,num_loci); crunch_locus_list(loci,&num_loci,SILENTLY,ANY_CHROMS,IN_SEQ); num_trips=0; do { get_linkage_group(loci,&num_unlinked,linkage_group,&group_size, lodbound2,thetabound2); if (group_size>=3) { /* compute acceptable three-point orders in the group */ inv_isort(linkage_group,group_size); for_all_3pt_seqs (linkage_group,group_size,three_seq) { get_list_of_all_loci(three_seq,three_locus,&foo,3); if (three_linked(three_locus,lodbound3,thetabound3, num_links,TRIPLET_SEX)) { compute_3pt(three_seq,TRIPLET_SEX,triplet_error_rate, three_like,map); if (!print_names) { sf(ps,"%5d: %d %d %d",++num_trips, three_locus[0]+1,three_locus[1]+1, three_locus[2]+1); pad_to_len(ps,23); pr(); } else { sf(ps,"%5d: %s %s %s",++num_trips, raw.locus_name[three_locus[0]], raw.locus_name[three_locus[1]], raw.locus_name[three_locus[2]]); pad_to_len(ps,38); pr(); } sf(ps,"%s %s %s\n",rsd(6.2,three_like[0]), rsd(6.2,three_like[1]),rsd(6.2,three_like[2])); pr(); } } } } while(num_unlinked>0); } on_exit { free_map(map); unarray(loci,int); unarray(linkage_group,int); three_pt_touched= TRUE; relay_messages; } } command forget_three_point() { mapm_ready(ANY_DATA,0,0,NULL); nomore_args(0); bash_all_three_pt(raw.num_markers); print("All three-point data forgotten.\n"); } #define INF_CRITERIA \ "Informativeness: min #Individuals %d%s, min Distance %s\n" command suggest_subset() { real lodbound, thetabound, seed_like; int *loci=NULL, num_loci, *linkage_group=NULL, group_size, groups_done; int *subset=NULL, subset_size, num_unlinked, i, prev; mapm_ready(F2_DATA,3,LIST_SEQ,&num_loci); /* F2 because #indivs */ parse_2pt_criteria(&args,&lodbound,&thetabound); nomore_args(0); print_2pt_criteria("Informative Subgroups",lodbound,thetabound); nl(); sf(ps,INF_CRITERIA,npt_min_indivs,(npt_codominant ? " (codominant)":""), rag(rf2str(npt_min_theta))); pr(); run { alloc_list_of_all_loci(seq,&loci,&num_loci); array(linkage_group,num_loci,int); array(subset,num_loci,int); num_orders=0; /* global - deletes all pre-existing orders */ for (i=0; i=3) { if (groups_done!=0) print(GROUP_DIVIDER); sort_loci(linkage_group,group_size); groups_done++; sf(ps,"Linkage group %d: ",groups_done); pr(); for (i=0; i0); if (groups_done==0) print("No Linkage Groups Found.\n"); num_orders= groups_done; } on_exit { unarray(loci,int); unarray(linkage_group,int); unarray(subset,int); relay_messages; } }