#include #include #include #include #include #include #include "io_lib_header.h" #include "util_lib_header.h" #include "define_header.h" #include "dp_lib_header.h" float compute_lambda (int **matrix,char *alphabet); /*********************************************************************************************/ /* */ /* FUNCTIONS FOR EVALUATING THE CONSISTENCY BETWEEN ALN AND CL */ /* */ /*********************************************************************************************/ /*Fast: score= extended_res/max_extended_residue_for the whole aln slow: score= extended_res/sum all extended score for that residue non_extended score= non_ext /sum all non extended score for that residue heuristic score= extended /sum of extended score of all pairs in the library (i.e. Not ALL the possible pairs) */ Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode ); int sub_aln2ecl_raw_score (Alignment *A, Constraint_list *CL, int ns, int *ls) { int **pos; int p1,r1, r2, s1, s2; int score=0; if ( !A) return 0; A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); pos=aln2pos_simple ( A,A->nseq); CL=index_res_constraint_list (CL, CL->weight_field); for (p1=0; p1len_aln; p1++) { for (s1=0; s10 && r2>0) { score+= residue_pair_extended_list_pc (CL,ls[s1], r1, ls[s2], r2)*SCORE_K; } } } } free_int (pos, -1); return score; return (score/(((ns*ns)-ns)/2))/A->len_aln; } int aln2ecl_raw_score (Alignment *A, Constraint_list *CL) { int **pos; int p1,r1, r2, s1, s2; int score=0; if ( !A) return 0; A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq); pos=aln2pos_simple ( A,A->nseq); CL=index_res_constraint_list (CL, CL->weight_field); for (p1=0; p1len_aln; p1++) { for (s1=0; s1nseq-1; s1++) { for (s2=s1+1; s2nseq; s2++) { r1=pos[s1][p1]; r2=pos[s2][p1]; if (r1>0 && r2>0)score+= residue_pair_extended_list_pc (CL,s1, r1, s2, r2); } } } free_int (pos, -1); return score; return (score/(((A->nseq*A->nseq)-A->nseq)/2))/A->len_aln; } int node2sub_aln_score (Alignment *A,Constraint_list *CL, char *mode, NT_node T) { if ( !T || !T->right ||!T->left)return 0; else { int *ns; int **ls; ns=vcalloc (2, sizeof (int)); ls=vcalloc (2, sizeof (int*)); ns[0]= (T->left)->nseq; ns[1]=(T->right)->nseq; ls[0]= (T->left)->lseq; ls[1]=(T->right)->lseq; return sub_aln2sub_aln_score (A, CL, mode, ns, ls); } return -1; } int sub_aln2sub_aln_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls) { /*Warning: Does Not Take Gaps into account*/ int **pos; int a; float score=0; /*Make sure evaluation functions update their cache if needed*/ A=update_aln_random_tag (A); pos=aln2pos_simple ( A, -1, ns, ls); for (a=0; a< A->len_aln; a++) score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL); free_int (pos, -1); score=(int)(((float)score)/(A->len_aln*SCORE_K)); score=(int)(CL->L && CL->normalise)?((score*MAXID)/(CL->normalise)):(score); return (int)score; } int sub_aln2sub_aln_raw_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls) { /*Warning: Does Not Take Gaps into account*/ int **pos; int a; float score=0; /*Make sure evaluation functions update their cache if needed*/ A=update_aln_random_tag (A); pos=aln2pos_simple ( A, -1, ns, ls); for (a=0; a< A->len_aln; a++) score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL); free_int (pos, -1); return (int) score; } Alignment* main_coffee_evaluate_output_sub_aln ( Alignment *A,Constraint_list *CL, const char *mode, int *n_s, int **l_s) { Alignment *SUB1, *SUB2, *SUB3; int a, b, c,*list_seq; if (strm ( CL->evaluate_mode, "no"))return NULL; else { list_seq=vcalloc (n_s[0]+n_s[1], sizeof (int)); for (b=0, a=0; a< 2; a++){for (c=0;c< n_s[a]; c++)list_seq[b++]=l_s[a][c];} SUB1=copy_aln (A, NULL); SUB2=extract_sub_aln (SUB1,n_s[0]+n_s[1],list_seq); SUB3=main_coffee_evaluate_output (SUB2,CL,CL->evaluate_mode); free_aln (SUB1); free_aln (SUB2); vfree (list_seq); return SUB3; } } Alignment * overlay_alignment_evaluation ( Alignment *I, Alignment *O) { int a, b, c, r, i; int *buf; if ( !I || !O) return O; if ( I->len_aln!=O->len_aln)printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation"); buf=vcalloc ( MAX(I->len_aln, O->len_aln), sizeof (int)); for (a=0; anseq; a++) { if (!strm (I->name[a], O->name[a]))printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation"); for (b=0; blen_aln; b++) { r=I->seq_al[a][b]; if ( islower(r))O->seq_al[a][b]=0; else if (r<=9 || (r>='0' && r<='9'))O->seq_al[a][b]=I->seq_al[a][b]; } } return O; } Alignment * main_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL, const char *mode ) { Alignment *TopS=NULL, *LastS=NULL, *CurrentS=NULL; if ( IN->A)IN=IN->A; while (IN) { CurrentS= main_coffee_evaluate_output2(IN, CL, mode); if (!TopS)LastS=TopS=CurrentS; else { LastS->A=CurrentS; LastS=CurrentS; } IN=IN->A; } return TopS; } Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode ) { /*Make sure evaluation functions update their cache if needed*/ IN=update_aln_random_tag (IN); if ( CL->evaluate_residue_pair==evaluate_matrix_score || CL->ne==0 ||strm ( mode , "categories") || strm ( mode , "matrix")|| strm(mode, "sar")|| strstr (mode, "boxshade") ) { if ( strm ( mode , "categories")) return categories_evaluate_output (IN, CL); else if ( strm ( mode , "matrix"))return matrix_evaluate_output (IN, CL); else if ( strm ( mode, "sar"))return sar_evaluate_output (IN, CL); else if ( strstr ( mode, "boxshade"))return boxshade_evaluate_output (IN, CL, atoi (strstr(mode, "_")+1)); else if ( CL->evaluate_residue_pair==evaluate_matrix_score) return matrix_evaluate_output (IN, CL); else if ( CL->ne==0) return matrix_evaluate_output (IN, CL); } else if ( strm (mode, "no"))return NULL; else if ( strm4 ( mode, "t_coffee_fast","tcoffee_fast","fast_tcoffee", "fast_t_coffee")) { return fast_coffee_evaluate_output ( IN,CL); } else if ( strm4 ( mode, "t_coffee_slow","tcoffee_slow","slow_tcoffee","slow_t_coffee" )) { return slow_coffee_evaluate_output ( IN,CL); } else if ( strm4 ( mode, "tcoffee_non_extended","t_coffee_non_extended","non_extended_tcoffee","non_extended_t_coffee")) { return non_extended_t_coffee_evaluate_output ( IN,CL); } else if ( strm5 ( mode, "tcoffee_heuristic","t_coffee_heuristic","heuristic_tcoffee","heuristic_t_coffee", "dali")) { return heuristic_coffee_evaluate_output ( IN,CL); } else { fprintf ( stderr, "\nUNKNOWN MODE FOR ALIGNMENT EVALUATION: *%s* [FATAL:%s]",mode, PROGRAM); crash (""); return NULL; } return IN; } Alignment * coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) { fprintf ( stderr, "\n[WARNING:%s]THE FUNCTION coffee_evaluate_output IS NOT ANYMORE SUPPORTED\n", PROGRAM); fprintf ( stderr, "\n[WARNING]fast_coffee_evaluate_output WILL BE USED INSTEAD\n"); return fast_coffee_evaluate_output (IN,CL); } Alignment * matrix_evaluate_output ( Alignment *IN,Constraint_list *CL) { int a,b, c,r, s, r1, r2; Alignment *OUT=NULL; double **tot_res; double **max_res; double **tot_seq; double **max_seq; double **tot_col; double **max_col; double max_aln=0; double tot_aln=0; /* Residue x: sum of observed extended X.. /sum of possible X.. */ if ( !CL->M)CL->M=read_matrice ("blosum62mt"); OUT=copy_aln (IN, OUT); tot_res=declare_double ( IN->nseq, IN->len_aln); max_res=declare_double ( IN->nseq, IN->len_aln); tot_seq=declare_double ( IN->nseq, 1); max_seq=declare_double ( IN->nseq, 1); tot_col=declare_double ( IN->len_aln,1); max_col=declare_double ( IN->len_aln,1); max_aln=tot_aln=0; for (a=0; a< IN->len_aln; a++) { for ( b=0; b< IN->nseq; b++) { r1=tolower(IN->seq_al[b][a]); if ( is_gap(r1))continue; r= CL->M[r1-'A'][r1-'A']; r= 1; for ( c=0; cnseq; c++) { r2=tolower(IN->seq_al[c][a]); if (b==c || is_gap (r2))continue; s=CL->M[r2-'A'][r1-'A']; s=(s<=0)?0:1; tot_res[b][a]+=s; max_res[b][a]+=r; tot_col[a][0]+=s; max_col[a][0]+=r; tot_seq[b][0]+=s; max_seq[b][0]+=r; tot_aln+=s; max_aln+=r; } } } for ( a=0; a< IN->nseq; a++) { if ( !max_seq[a][0])continue; OUT->score_seq[a]=(tot_seq[a][0]*100)/max_seq[a][0]; for (b=0; b< IN->len_aln; b++) { r1=IN->seq_al[a][b]; if ( is_gap(r1) || !max_res[a][b])continue; r1=(tot_res[a][b]*10)/max_res[a][b]; r1=(r1>=10)?9:r1; r1=r1<0?0:r1; (OUT)->seq_al[a][b]=r1+'0'; } } for ( a=0; a< IN->len_aln; a++) { r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]); r1=(r1>=10)?9:r1; (OUT)->seq_al[OUT->nseq][a]=r1+'0'; } sprintf ( OUT->name[IN->nseq], "cons"); if (max_aln)OUT->score_seq[OUT->nseq]=OUT->score_aln=(100*tot_aln)/max_aln; free_double (tot_res,-1); free_double (max_res,-1); free_double (tot_seq,-1); free_double (max_seq,-1); return OUT; } Alignment * sar_evaluate_output ( Alignment *IN,Constraint_list *CL) { int a,b, c,r, s, r1, r2; Alignment *OUT=NULL; double **tot_res; double **max_res; double **tot_seq; double **max_seq; double **tot_col; double **max_col; double max_aln=0; double tot_aln=0; /* Residue x: sum of observed extended X.. /sum of possible X.. */ if ( !CL->M)CL->M=read_matrice ("blosum62mt"); OUT=copy_aln (IN, OUT); tot_res=declare_double ( IN->nseq, IN->len_aln); max_res=declare_double ( IN->nseq, IN->len_aln); tot_seq=declare_double ( IN->nseq, 1); max_seq=declare_double ( IN->nseq, 1); tot_col=declare_double ( IN->len_aln,1); max_col=declare_double ( IN->len_aln,1); max_aln=tot_aln=0; for (a=0; a< IN->len_aln; a++) { for (b=0; b< IN->nseq; b++) { r1=tolower(IN->seq_al[b][a]); for (c=0; cnseq; c++) { r2=tolower(IN->seq_al[c][a]); if (b==c)continue; if ( is_gap(r1) && is_gap(r2))s=0; else s=(r1==r2)?1:0; r=1; tot_res[b][a]+=s; max_res[b][a]+=r; tot_col[a][0]+=s; max_col[a][0]+=r; tot_seq[b][0]+=s; max_seq[b][0]+=r; tot_aln+=s; max_aln+=r; } } } for ( a=0; a< IN->nseq; a++) { if ( !max_seq[a][0])continue; OUT->score_seq[a]=(max_seq[a][0]*100)/max_seq[a][0]; for (b=0; b< IN->len_aln; b++) { r1=IN->seq_al[a][b]; if ( is_gap(r1) || !max_res[a][b])continue; r1=(tot_res[a][b]*10)/max_res[a][b]; r1=(r1>=10)?9:r1; r1=r1<0?0:r1; (OUT)->seq_al[a][b]=r1+'0'; } } for ( a=0; a< IN->len_aln; a++) { r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]); r1=(r1>=10)?9:r1; (OUT)->seq_al[OUT->nseq][a]=r1+'0'; } sprintf ( OUT->name[IN->nseq], "cons"); if (max_aln)OUT->score_aln=(100*tot_aln)/max_aln; free_double (tot_res,-1); free_double (max_res,-1); free_double (tot_seq,-1); free_double (max_seq,-1); return OUT; } Alignment * boxshade_evaluate_output ( Alignment *IN,Constraint_list *CL, int T) { Alignment *OUT=NULL; int **aa; int r,br, bs, a, b; float f; /* Residue x: sum of observed extended X.. /sum of possible X.. */ OUT=copy_aln (IN, OUT); aa=declare_int (26, 2); for ( a=0; a< OUT->len_aln; a++) { for ( b=0; b< 26; b++){aa[b][1]=0;aa[b][0]='a'+b;} for ( b=0; b< OUT->nseq; b++) { r=tolower(OUT->seq_al[b][a]); if ( !is_gap(r))aa[r-'a'][1]++; } sort_int ( aa, 2, 1, 0,25); f=(aa[25][1]*100)/OUT->nseq; if (fnseq; b++) { r=tolower(OUT->seq_al[b][a]); if (r==br && bs>'1')OUT->seq_al[b][a]=bs; } OUT->seq_al[b][a]=bs; } } sprintf ( OUT->name[IN->nseq], "cons"); return OUT; } Alignment * categories_evaluate_output ( Alignment *IN,Constraint_list *CL) { Alignment *OUT=NULL; int a, b, r; int *aa; float score, nseq2, tot_aln; float n; /* Residue x: sum of observed extended X.. /sum of possible X.. */ OUT=copy_aln (IN, OUT); aa=vcalloc ( 26, sizeof (int)); nseq2=IN->nseq*IN->nseq; for (tot_aln=0, a=0; a< IN->len_aln; a++) { for (n=0,b=0; b< IN->nseq; b++) { r=IN->seq_al[b][a]; if ( is_gap(r))n++; else { aa[tolower(r)-'a']++; n++; } } n=n*n; for ( score=0,b=0; b< 26; b++){score+=aa[b]*aa[b];aa[b]=0;} /*score/=nseq2;*/ score=(n==0)?0:score/n; tot_aln+=score; r=score*10; r=(r>=10)?9:r; (OUT)->seq_al[OUT->nseq][a]='0'+r; } OUT->score_aln=(tot_aln/OUT->len_aln)*100; sprintf ( OUT->name[IN->nseq], "cons"); vfree(aa); return OUT; } Alignment * categories_evaluate_output_old ( Alignment *IN,Constraint_list *CL) { Alignment *OUT=NULL; int nc,a, b, r; int *aa, ng; float score, nseq2, tot_aln, min=0; /* Residue x: sum of observed extended X.. /sum of possible X.. */ OUT=copy_aln (IN, OUT); aa=vcalloc ( 26, sizeof (int)); nseq2=IN->nseq*IN->nseq; for (tot_aln=0, a=0; a< IN->len_aln; a++) { for (ng=0,b=0; b< IN->nseq; b++) { r=IN->seq_al[b][a]; if ( is_gap(r))ng++; else { aa[tolower(r)-'a']++; } } for (nc=0, b=0; b<26; b++) { if ( aa[b])nc++; aa[b]=0; } if (nc>9)score=0; else score=9-nc; score=(2*min)/IN->nseq; tot_aln+=score; r=score*10; r=(r>=10)?9:r; (OUT)->seq_al[OUT->nseq][a]='0'+r; } OUT->score_aln=(tot_aln/OUT->len_aln)*100; sprintf ( OUT->name[IN->nseq], "cons"); vfree(aa); return OUT; } Alignment * fast_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) { int a,b, c, m,res, s, s1, s2, r1, r2; Alignment *OUT=NULL; int **pos, **pos2; double score_col=0, score_aln=0, score_res=0; double max_col, max_aln; double *max_seq, *score_seq; int local_m; int local_nseq; /*NORMALIZE: with the highest scoring pair found in the multiple alignment*/ if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } OUT=copy_aln (IN, OUT); pos=aln2pos_simple(IN, IN->nseq); pos2=aln2defined_residues (IN, CL); max_seq=vcalloc ( IN->nseq, sizeof (double)); score_seq=vcalloc ( IN->nseq, sizeof (double)); /*1: Identify the highest scoring pair within the alignment*/ for ( m=0, a=0; a< IN->len_aln; a++) { for ( b=0; b< IN->nseq; b++) { s1=IN->order[b][0]; r1=pos[b][a]; for ( c=0; c< IN->nseq; c++) { s2=IN->order[c][0]; r2=pos[c][a]; if ( s1==s2 && !CL->do_self)continue; if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); s=(s!=UNDEFINED)?s:0; m=MAX(m, s); } } } local_m=m; sprintf ( OUT->name[IN->nseq], "cons"); for ( max_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) { OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; for ( local_nseq=0,b=0; bnseq; b++){local_nseq+=(pos[b][a]>0 && pos2[b][a])?1:0;} local_m=m*(local_nseq-1); for ( max_col=0, score_col=0,b=0; b< IN->nseq; b++) { OUT->seq_al[b][a]=NO_COLOR_RESIDUE; s1=IN->order[b][0]; r1=pos[b][a]; if (r1<=0 || !pos2[b][a]) { continue; } for ( score_res=0,c=0; c< IN->nseq; c++) { s2=IN->order[c][0]; r2=pos[c][a]; if ((s1==s2 && !CL->do_self) || r2<=0 || !pos2[c][a]){continue;} max_col +=m; max_seq[b]+=m; max_aln +=m; if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); s=(s!=UNDEFINED)?s:0; score_res+=s; score_col+=s; score_seq[b]+=s; score_aln+=s; } res=(local_m==0)?NO_COLOR_RESIDUE:((score_res*10)/local_m); (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); } res=(max_col==0)?NO_COLOR_RESIDUE:((score_col*10)/max_col); OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); } IN->score_aln=OUT->score_aln=(max_aln==0)?0:((score_aln*100)/max_aln); for ( a=0; a< OUT->nseq; a++) { OUT->score_seq[a]=(max_seq[a]==0)?0:((score_seq[a]*100)/max_seq[a]); } free_int (pos , -1); free_int (pos2, -1); vfree ( score_seq); vfree ( max_seq); return OUT; } Alignment * slow_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) { int a,b, c,res, s, s1, s2, r1, r2; Alignment *OUT=NULL; int **pos, **pos2; double max_score_r, score_r, max; double score_col=0, score_aln=0; double max_score_col, max_score_aln; double *max_score_seq, *score_seq; int ***res_extended_weight; int n_res_in_col; /* Residue x: sum of observed extended X.. /sum of possible X.. */ if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } OUT=copy_aln (IN, OUT); pos=aln2pos_simple(IN, IN->nseq); pos2=aln2defined_residues (IN, CL); max_score_seq=vcalloc ( IN->nseq, sizeof (double)); score_seq=vcalloc ( IN->nseq, sizeof (double)); res_extended_weight=declare_arrayN(3,sizeof(int), (CL->S)->nseq, (CL->S)->max_len+1, 2); max=(CL->normalise)?(100*CL->normalise)*SCORE_K:100; for (a=0; a< IN->len_aln; a++) { for ( b=0; b< IN->nseq-1; b++) { s1=IN->order[b][0]; r1=pos[b][a]; for ( c=b+1; c< IN->nseq; c++) { s2=IN->order[c][0]; r2=pos[c][a]; if ( s1==s2 && !CL->do_self)continue; else if ( r1<=0 || r2<=0) continue; else { s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); res_extended_weight[s1][r1][0]+=s*100; res_extended_weight[s2][r2][0]+=s*100; res_extended_weight[s1][r1][1]+=max; res_extended_weight[s2][r2][1]+=max; } } } } sprintf ( OUT->name[IN->nseq], "cons"); for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) { OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; for ( n_res_in_col=0,b=0; bnseq; b++){n_res_in_col+=(pos[b][a]>0 && pos2[b][a]>0)?1:0;} for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++) { OUT->seq_al[b][a]=NO_COLOR_RESIDUE; s1=IN->order[b][0]; r1=pos[b][a]; if (r1<=0 || pos2[b][a]<1)continue; else { max_score_r =res_extended_weight[s1][r1][1]; score_r =res_extended_weight[s1][r1][0]; if ( max_score_r==0 && n_res_in_col>1)res=0; else if ( n_res_in_col==1)res=NO_COLOR_RESIDUE; else res=((score_r*10)/max_score_r); (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9)); max_score_col+=max_score_r; score_col+=score_r; max_score_seq[b]+=max_score_r; score_seq[b]+=score_r; max_score_aln+=max_score_r; score_aln+=score_r; } if ( max_score_col==0 && n_res_in_col>1)res=0; else if ( n_res_in_col<2)res=NO_COLOR_RESIDUE; else res=((score_col*10)/max_score_col); OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); } } IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln); for ( a=0; a< OUT->nseq; a++) { OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]); } vfree ( score_seq); vfree ( max_score_seq); free_arrayN((void*)res_extended_weight, 3); free_int (pos, -1); free_int (pos2, -1); return OUT; } Alignment * heuristic_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) { int a,b, c,res, s, s1, s2, r1, r2; Alignment *OUT=NULL; int **pos; int max_score_r, score_r; double score_col=0, score_aln=0; int max_score_col, max_score_aln; double *max_score_seq, *score_seq; int **tot_extended_weight; int **res_extended_weight; int n_res_in_col; /* Residue x: sum of observed extended X.. /sum of possible X.. */ if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } OUT=copy_aln (IN, OUT); pos=aln2pos_simple(IN, IN->nseq); max_score_seq=vcalloc ( IN->nseq, sizeof (double)); score_seq=vcalloc ( IN->nseq, sizeof (double)); tot_extended_weight=list2residue_partial_extended_weight(CL); res_extended_weight=declare_int ((CL->S)->nseq, (CL->S)->max_len+1); for (a=0; a< IN->len_aln; a++) { for ( b=0; b< IN->nseq-1; b++) { s1=IN->order[b][0]; r1=pos[b][a]; for ( c=b+1; c< IN->nseq; c++) { s2=IN->order[c][0]; r2=pos[c][a]; if ( s1==s2 && !CL->do_self)continue; else if ( r1<=0 || r2<=0) continue; else { if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2); else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1); res_extended_weight[s1][r1]+=s; res_extended_weight[s2][r2]+=s; } } } } sprintf ( OUT->name[IN->nseq], "cons"); for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) { OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; for ( n_res_in_col=0,b=0; bnseq; b++){n_res_in_col+=(pos[b][a]>0)?1:0;} for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++) { OUT->seq_al[b][a]=NO_COLOR_RESIDUE; s1=IN->order[b][0]; r1=pos[b][a]; if (r1<=0)continue; else { max_score_r =tot_extended_weight[s1][r1]; score_r=res_extended_weight[s1][r1]; res=(max_score_r==0 ||n_res_in_col<2 )?NO_COLOR_RESIDUE:((score_r*10)/max_score_r); (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9)); max_score_col+=max_score_r; score_col+=score_r; max_score_seq[b]+=max_score_r; score_seq[b]+=score_r; max_score_aln+=max_score_r; score_aln+=score_r; } res=(max_score_col==0 || n_res_in_col<2)?NO_COLOR_RESIDUE:((score_col*10)/max_score_col); OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); } } IN->score_aln=OUT->score_aln=MIN(100,((max_score_aln==0)?0:((score_aln*100)/max_score_aln))); for ( a=0; a< OUT->nseq; a++) { OUT->score_seq[a]=MIN(100,((max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]))); } vfree ( score_seq); vfree ( max_score_seq); free_int (tot_extended_weight, -1); free_int (res_extended_weight, -1); free_int (pos, -1); return OUT; } Alignment * non_extended_t_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL) { int a,b, c,res, s1, s2, r1, r2; Alignment *OUT=NULL; int **pos; int max_score_r, score_r; double score_col=0, score_aln=0; int max_score_col, max_score_aln; double *max_score_seq, *score_seq; int local_nseq; int **tot_non_extended_weight; int **res_non_extended_weight; int *l; CLIST_TYPE *entry=NULL; int p; int max_score=0; entry=vcalloc (CL->entry_len, CL->el_size); if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; } OUT=copy_aln (IN, OUT); pos=aln2pos_simple(IN, IN->nseq); max_score_seq=vcalloc ( IN->nseq, sizeof (double)); score_seq=vcalloc ( IN->nseq, sizeof (double)); tot_non_extended_weight=list2residue_total_weight(CL); res_non_extended_weight=declare_int ((CL->S)->nseq, (CL->S)->max_len+1); for (a=0; a< IN->len_aln; a++) { for ( b=0; b< IN->nseq-1; b++) { s1=IN->order[b][0]; r1=pos[b][a]; for ( c=b+1; c< IN->nseq; c++) { s2=IN->order[c][0]; r2=pos[c][a]; if ( s1==s2 && !CL->do_self)continue; else if ( r1<=0 || r2<=0) continue; else { entry[SEQ1]=s1; entry[SEQ2]=s2; entry[R1]=r1; entry[R2]=r2; if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL) { res_non_extended_weight[s1][r1]+=l[WE]; res_non_extended_weight[s2][r2]+=l[WE]; } entry[SEQ1]=s2; entry[SEQ2]=s1; entry[R1]=r2; entry[R2]=r1; if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL) { res_non_extended_weight[s1][r1]+=l[WE]; res_non_extended_weight[s2][r2]+=l[WE]; } max_score=MAX(max_score,res_non_extended_weight[s1][r1]); max_score=MAX(max_score,res_non_extended_weight[s2][r2]); } } } } sprintf ( OUT->name[IN->nseq], "cons"); for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++) { OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE; for ( local_nseq=0,b=0; bnseq; b++){local_nseq+=(pos[b][a]>0)?1:0;} for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++) { OUT->seq_al[b][a]=NO_COLOR_RESIDUE; s1=IN->order[b][0]; r1=pos[b][a]; if (r1<=0)continue; else { max_score_r =max_score;/*tot_non_extended_weight[s1][r1];*/ score_r=res_non_extended_weight[s1][r1]; res=(max_score_r==0 || local_nseq<2 )?NO_COLOR_RESIDUE:((score_r*10)/max_score_r); (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9)); max_score_col+=max_score_r; score_col+=score_r; max_score_seq[b]+=max_score_r; score_seq[b]+=score_r; max_score_aln+=max_score_r; score_aln+=score_r; } res=(max_score_col==0 || local_nseq<2)?NO_COLOR_RESIDUE:((score_col*10)/max_score_col); OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9)); } } IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln); for ( a=0; a< OUT->nseq; a++) { OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]); OUT->score_seq[a]=(OUT->score_seq[a]>100)?100:OUT->score_seq[a]; } OUT->score_aln=(OUT->score_aln>100)?100:OUT->score_aln; vfree ( score_seq); vfree ( max_score_seq); free_int (tot_non_extended_weight, -1); free_int (res_non_extended_weight, -1); vfree(entry); free_int (pos, -1); return OUT; } /*********************************************************************************************/ /* */ /* PROFILE/PRofile Functions */ /* */ /*********************************************************************************************/ int channel_profile_profile (int *prf1, int *prf2, Constraint_list *CL); Profile_cost_func get_profile_mode_function (char *name, Profile_cost_func func) { int a; static int nfunc; static Profile_cost_func *flist; static char **nlist; /*The first time: initialize the list of pairwse functions*/ /*If func==NULL:REturns a pointer to the function associated with a name*/ /*If name is empty:Prints the name of the function associated with name*/ if ( nfunc==0) { flist=vcalloc ( 100, sizeof (Pwfunc)); nlist=declare_char (100, 100); flist[nfunc]=cw_profile_profile; sprintf (nlist[nfunc], "cw_profile_profile"); nfunc++; flist[nfunc]=muscle_profile_profile; sprintf (nlist[nfunc], "muscle_profile_profile"); nfunc++; flist[nfunc]=channel_profile_profile; sprintf (nlist[nfunc], "channel_profile_profile"); nfunc++; } for ( a=0; a0 && r2>0) { r1--; r2--; prf1=(Profile1)?(Profile1->P)->count2[r1]:NULL; prf2=(Profile2)?(Profile2->P)->count2[r2]:NULL; if (!prf1) {prf1=dummy; prf1[3]=(CL->S)->seq[s1][r1];} else if (!prf2){prf2=dummy; prf2[3]=(CL->S)->seq[s2][r2];} score=((prf_prf==NULL)?cw_profile_profile:prf_prf) (prf1, prf2, CL); return score; } else return 0; } int cw_profile_profile_count (int *prf1, int *prf2, Constraint_list *CL) { /*see function aln2count2 for prf structure*/ int a, b, n; int res1, res2; double score=0; for ( n=0,a=3; aM[res1-'A'][res2-'A']; n+=prf1[a+1]*prf2[b+1]; } score=(score*SCORE_K)/n; return score; } int muscle_profile_profile (int *prf1, int *prf2, Constraint_list *CL) { /*see function aln2count2 for prf structure*/ int a, b; int res1, res2; double score=0, fg1, fg2, fi, fj, m; static double *exp_lu; if (exp_lu==NULL) { exp_lu=vcalloc ( 10000, sizeof (double)); exp_lu+=2000; for ( a=-1000; a<1000; a++) exp_lu[a]=exp((double)a); } for (a=3; aM[res1-'A'][res2-'A']);*/ m=exp_lu[CL->M[res1-'A'][res2-'A']]; score+=m*fi*fj; } } fg1=(double)prf1[2]/100; fg2=(double)prf2[2]/100; score=(score==0)?0:log(score)*(1-fg1)*(1-fg2); score=(score*SCORE_K); /*if ( score<-100)fprintf ( stderr, "\nSCORE %d %d", (int)score, cw_profile_profile(prf1, prf2, CL));*/ return (int)score; } int cw_profile_profile (int *prf1, int *prf2, Constraint_list *CL) { /*see function aln2count2 for prf structure*/ int a, b, n,p; int res1, res2; double score=0; for ( n=0,a=3; aM[res1-'A'][res2-'A']; } score=(score*SCORE_K)/((double)(n==0)?1:n); return score; } int cw_profile_profile_old (int *prf1, int *prf2, Constraint_list *CL) { /*see function aln2count2 for prf structure*/ int a, b, n,p; int res1, res2; double score=0; for ( n=0,a=3; aM[res1-'A'][res2-'A']; } score=(score*SCORE_K)/((double)(n==0)?1:n); return score; } int channel_profile_profile ( int *prf1, int *prf2, Constraint_list *CL) { int score=0; prf1+=prf1[1]; prf2+=prf2[1]; if (prf1[0]!=prf1[0]){fprintf ( stderr, "\nERROR: Inconsistent number of channels [channel_profile_profile::FATAL%s]", PROGRAM);} else { int a, n; for (a=1, n=0; a<=prf1[0]; a++) { if (prf1[a]>0 && prf2[a]>0) { n++;score+=CL->M[prf1[a]-'A'][prf2[a]-'A']; } } if ( n==0)return 0; score=(n==0)?0:(score*SCORE_K)/n; } return score; } /*********************************************************************************************/ /* */ /* FUNCTIONS FOR GETING THE COST : (Sequences) ->evaluate_residue_pair */ /* */ /*********************************************************************************************/ int evaluate_blast_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2) { Alignment *PRF1; Alignment *PRF2; PRF1=(Alignment*)atop(seq2T_value (CL->S, s1, "A", "_RB_")); PRF2=(Alignment*)atop(seq2T_value (CL->S, s2, "A", "_RB_")); return generic_evaluate_profile_score (CL,PRF1,s1, r1, PRF2,s2, r2, CL->profile_mode); } int evaluate_aln_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2) { return generic_evaluate_profile_score (CL,seq2R_template_profile((CL->S),s1),s1, r1, seq2R_template_profile(CL->S,s2),s2, r2, CL->profile_mode); } int evaluate_profile_score (Constraint_list *CL,Alignment *Prf1, int s1, int r1, Alignment *Prf2,int s2, int r2) { return generic_evaluate_profile_score (CL, Prf1, s1,r1,Prf2, s2,r2,CL->profile_mode); } int evaluate_cdna_matrix_score (Constraint_list *CL, int s1, int r1, int s2, int r2) { char a1, a2; if (r1>0 && r2>0) { r1--; r2--; a1=translate_dna_codon((CL->S)->seq[s1]+r1,'x'); a2=translate_dna_codon((CL->S)->seq[s2]+r2,'x'); if (a1=='x' || a2=='x')return 0; else return CL->M[a1-'A'][a2-'A']*SCORE_K; } else { return 0; } } int evaluate_physico_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) { int a, b, p; double tot; static float **prop_table; static int n_prop; static double max; if (r1<0 || r2<0)return 0; if ( !prop_table) { prop_table= initialise_aa_physico_chemical_property_table(&n_prop); for (p=0; p< n_prop; p++)max+=100; max=sqrt(max); } a=tolower (( CL->S)->seq[s1][r1]); b=tolower (( CL->S)->seq[s2][r2]); for (tot=0,p=0; p< n_prop; p++) { tot+=(double)(prop_table[p][a]-prop_table[p][b])*(prop_table[p][a]-prop_table[p][b]); } tot=(sqrt(tot)/max)*10; return (int) tot*SCORE_K; } int evaluate_diaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) { static int ****m; static int *alp; if (m==NULL) { FILE *fp; char k1[2], k2[2]; int v1, v2, c; char *buf=NULL; int a; m=declare_arrayN(4, sizeof (int), 26, 26, 26, 26); fp=vfopen ("diaa_mat.mat", "r"); while ((c=fgetc (fp))!=EOF) { ungetc (c, fp); buf=vfgets(buf, fp); if (c=='#'); else { sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2); m[k1[0]-'a'][k1[1]-'a'][k2[0]-'a'][k2[1]-'a']=v1; m[k2[0]-'a'][k2[1]-'a'][k1[0]-'a'][k1[1]-'a']=v1; } } vfclose (fp); alp=vcalloc (256, sizeof (int)); for (a=0; a<26; a++)alp[a+'a']=1; alp['b']=0; alp['j']=0; alp['o']=0; alp['u']=0; alp['x']=0; alp['z']=0; } if (r1>0 && r2>0) { int s=0, n=0; char aa1, aa2, aa3, aa4, u; r1--; r2--; if (r1>0 && r2>0) { aa1=tolower((CL->S)->seq[s1][r1-1]); aa2=tolower((CL->S)->seq[s1][r1]); aa3=tolower((CL->S)->seq[s2][r2-1]); aa4=tolower((CL->S)->seq[s2][r2]); u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4]; if (u==4) { s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a']; n++; } } aa1=tolower((CL->S)->seq[s1][r1]); aa2=tolower((CL->S)->seq[s1][r1+1]); aa3=tolower((CL->S)->seq[s2][r2]); aa4=tolower((CL->S)->seq[s2][r2+1]); u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4]; if (u==4) { s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a']; n++; } if (n)return (s*SCORE_K)/n; else return 0; } return 0;} int evaluate_monoaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) { static int **m; static int *alp; if (m==NULL) { FILE *fp; char k1[2], k2[2]; int v1, v2, c; char *buf=NULL; int a; m=declare_arrayN(2, sizeof (int), 26, 26); fp=vfopen ("monoaa_mat.mat", "r"); while ((c=fgetc (fp))!=EOF) { ungetc (c, fp); buf=vfgets(buf, fp); if (c=='#'); else { sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2); m[k1[0]-'a'][k2[0]-'a']=v1; m[k2[0]-'a'][k1[0]-'a']=v1; } } vfclose (fp); alp=vcalloc (256, sizeof (int)); for (a=0; a<26; a++)alp[a+'a']=1; alp['b']=0; alp['j']=0; alp['o']=0; alp['u']=0; alp['x']=0; alp['z']=0; } if (r1>0 && r2>0) { int s=0, n=0; char aa1, aa3, u; r1--; r2--; if (r1>0 && r2>0) { aa1=tolower((CL->S)->seq[s1][r1]); aa3=tolower((CL->S)->seq[s2][r2]); u=alp[(int)aa1];u+=alp[(int)aa3]; if (u==2) { s+=m[aa1-'a'][aa3-'a']; n++; } } if (n)return (s*SCORE_K)/n; else return 0; } return 0;} int evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) { if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) { return evaluate_aln_profile_score ( CL, s1,r1, s2, r2); } if (r1>0 && r2>0) { r1--; r2--; return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; } else return 0; } int *get_curvature ( int s1, Constraint_list *CL); int evaluate_curvature_score( Constraint_list *CL, int s1, int r1, int s2, int r2) { static int **st; int score; CL->gop=0; CL->gep=0; if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*)); if (!st[s1]) { st[s1]=get_curvature (s1, CL); } if (!st[s2]) { st[s2]=get_curvature (s2, CL); } if (r1>0 && r2>0) { char p1, p2; r1--; r2--; p1=st[s1][r1]; p2=st[s2][r2]; score=p1-p2; score=FABS(score); score=20-score; //HERE ("%d", score); //if (score<0)HERE ("%d", score); return score; } else { return 0; } } int *get_curvature ( int s1, Constraint_list *CL) { int *array, n=0, a; char c; FILE *fp; char name [1000], b1[100], b2[100]; float f; sprintf ( name, "%s.curvature", (CL->S)->name[s1]); array=vcalloc (strlen ((CL->S)->seq[s1]), sizeof (int)); fp=vfopen ( name, "r"); while ( fscanf (fp, "%s %d %c %f\n",b1, &a, &c,&f )==4) { if ( c!=(CL->S)->seq[s1][n]){HERE ("ERROR: %c %c", c,(CL->S)->seq[s1][n] );exit (0);} else array[n++]=(int)(float)100*(float)f; } vfclose (fp); return array; } int evaluate_tm_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) { static char **st; static int **tmat; if (!tmat) { tmat=read_matrice ("blosum62mt"); } if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*)); if (!st[s1])st[s1]=seq2T_template_string((CL->S),s1); if (!st[s2])st[s2]=seq2T_template_string((CL->S),s2); if (r1>0 && r2>0) { int p1, p2; r1--; r2--; p1=p2=-1; if (st[s1])p1=tolower (st[s1][r1]); if (st[s2])p2=tolower (st[s2][r2]); if ( p1=='h' && p2=='h')return tmat[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; //else if (p1=='h' || p2=='h' ) return -100*SCORE_K; else return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; } else { return 0; } } int evaluate_ssp_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) { static char **st; static int **alpha, **beta, **coil; if (!alpha) { beta=read_matrice ("beta_mat"); alpha=read_matrice ("alpha_mat"); coil=read_matrice ("coil_mat"); } if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*)); if (!st[s1])st[s1]=seq2E_template_string((CL->S),s1); if (!st[s2])st[s2]=seq2E_template_string((CL->S),s2); if ( !st[s1])HERE ("1******"); if ( !st[s2])HERE ("2******"); if (r1>0 && r2>0) { char p1, p2; float F; int score=0; p1=p2=-1; r1--; r2--; if (st[s1])p1=tolower (st[s1][r1]); if (st[s2])p2=tolower (st[s2][r2]); if (p1!=-1 && p1==p2)F=1.3; else F=1; score= CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*F*SCORE_K; return score; } else { return 0; } } int evaluate_combined_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2) { /* function documentation: start int evaluate_matrix_score ( Constraint_list *CL, int s1, int s2, int r1, int r2) this function evaluates the score for matching residue S1(r1) wit residue S2(r2) using Matrix CL->M; function documentation: end */ if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) { return evaluate_aln_profile_score ( CL, s1,r1, s2, r2); } if (r1>0 && r2>0) { r1--; r2--; if (r1==0 || r2==0)return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K; else { int A1, A2, B1, B2, a2, b2; int score; A1=toupper((CL->S)->seq[s1][r1-1]); A2=toupper((CL->S)->seq[s1][r1]); B1=toupper((CL->S)->seq[s2][r2-1]); B2=toupper((CL->S)->seq[s2][r2]); a2=tolower(A2); b2=tolower(B2); A1-='A';A2-='A';B1-='A'; B2-='A';a2-='A';b2-='A'; score=CL->M[a2][b2]-FABS((CL->M[A1][A2])-(CL->M[B1][B2])); score*=SCORE_K; return score; } } else return 0; } int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { /* This is the generic Function->works with everything int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field ); Computes the non extended score for aligning residue seq1(r1) Vs seq2(r2) This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ int p; static int *entry; int *r; int field; field = CL->weight_field; if ( r1<=0 || r2<=0)return 0; else if ( !CL->extend_jit) { if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int)); entry[SEQ1]=s1; entry[SEQ2]=s2; entry[R1]=r1; entry[R2]=r2; if ( r1==r2 && s1==s2) return UNDEFINED; r=main_search_in_list_constraint( entry,&p,4,CL); if (r==NULL)return 0; else return r[field]*SCORE_K; } else return UNDEFINED;/*ERROR*/ } int residue_pair_extended_list_mixt (Constraint_list *CL, int s1, int r1, int s2, int r2 ) { int score=0; score+= residue_pair_extended_list_quadruplet(CL, s1, r1, s2, r2); score+= residue_pair_extended_list (CL, s1, r1, s2, r2); return score*SCORE_K; } int residue_pair_extended_list_quadruplet (Constraint_list *CL, int s1, int r1, int s2, int r2 ) { double score=0; int t_s, t_r, t_w, q_s, q_r, q_w; int a, b; static int **hasch; int field; /* This measure the quadruplets cost on a pair of residues*/ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch) { hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); } CL=index_res_constraint_list ( CL, field); hasch[s1][r1]=100000; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; t_w=CL->residue_index[s1][r1][a+2]; if ( CL->seq_for_quadruplet[t_s]) { for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) { q_s=CL->residue_index[t_s][t_r][b]; q_r=CL->residue_index[t_s][t_r][b+1]; q_w=CL->residue_index[t_s][t_r][b+2]; if (CL-> seq_for_quadruplet[q_s]) hasch[q_s][q_r]=MIN(q_w,t_w); } } } for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; t_w=CL->residue_index[s2][r2][a+2]; if ( CL->seq_for_quadruplet[t_s]) { for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) { q_s=CL->residue_index[t_s][t_r][b]; q_r=CL->residue_index[t_s][t_r][b+1]; q_w=CL->residue_index[t_s][t_r][b+2]; if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s]) score+=MIN(hasch[q_s][q_r],MIN(q_w,t_w)); } } } score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; t_w=CL->residue_index[s1][r1][a+2]; if ( CL->seq_for_quadruplet[t_s]) { for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) { q_s=CL->residue_index[t_s][t_r][b]; q_r=CL->residue_index[t_s][t_r][b+1]; hasch[q_s][q_r]=0; } } } return (int)(score*SCORE_K); } Constraint_list * R_extension ( Constraint_list *CL, Constraint_list *R); int residue_pair_extended_list4rna4 (Constraint_list *CL,int s1, int r1, int s2, int r2 ) { static int rna_lib; if (!rna_lib) { sprintf ( CL->rna_lib, "%s", seq2rna_lib (CL->S, NULL)); rna_lib=1; } return residue_pair_extended_list4rna2 (CL, s1, r1, s2,r2); } int residue_pair_extended_list4rna1 (Constraint_list *CL, int s1, int r1, int s2, int r2 ) { static Constraint_list *R; if (!R)R=read_rna_lib (CL->S, CL->rna_lib); return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2); } int residue_pair_extended_list4rna3 (Constraint_list *CL,int s1, int r1, int s2, int r2 ) { static Constraint_list *R; if (!R) { R=read_rna_lib (CL->S, CL->rna_lib); rna_lib_extension (CL,R); } return residue_pair_extended_list (CL, s1,r1, s2,r2); } int residue_pair_extended_list4rna2 (Constraint_list *CL,int s1, int r1, int s2, int r2 ) { static Constraint_list *R; if (!R) { R=read_rna_lib (CL->S, CL->rna_lib); rna_lib_extension (CL,R); } return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2); } int residue_pair_extended_list4rna ( Constraint_list *CL,Constraint_list *R, int s1, int r1, int s2, int r2 ) { int a, b, n1, n2; int list1[100]; int list2[100]; int score=0, score2; if ( r1<0 || r2<0)return 0; n1=n2=0; list1[n1++]=r1; for (a=1; aresidue_index[s1][r1][0]; a+=3) { list1[n1++]=R->residue_index[s1][r1][a+1]; } list2[n2++]=r2; for (a=1; aresidue_index[s2][r2][0]; a+=3) { list2[n2++]=R->residue_index[s2][r2][a+1]; } score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]); for (score2=0,a=1; arna_lib, &n); R=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL); for (a=0; a< n; a++) { R=read_constraint_list_file (R, list[a]); } R=index_res_constraint_list (R, CL->weight_field); } if ( r1<0 || r2<0)return 0; n1=n2=0; list1[n1++]=r1; for (a=1; aresidue_index[s1][r1][0]; a+=3) { list1[n1++]=R->residue_index[s1][r1][a+1]; } list2[n2++]=r2; for (a=1; aresidue_index[s2][r2][0]; a+=3) { list2[n2++]=R->residue_index[s2][r2][a+1]; } score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]); for (score2=0,a=1; aweight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch || max_len!=(CL->S)->max_len) { max_len=(CL->S)->max_len; if ( hasch) free_int ( hasch, -1); hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); } CL=index_res_constraint_list ( CL, field); /* Check matches for R1 in the indexed lib*/ hasch[s1][r1]=FORBIDEN; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; } /*Check Matches for r1 <-> r2 in the indexed lib */ for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; if (hasch[t_s][t_r]) { if (hasch[t_s][t_r]==FORBIDEN) { score+=CL->residue_index[s2][r2][a+2]; } else { delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); score+=delta; } } } clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL); return score; } int residue_pair_extended_list_pc ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { double score=0; int a, t_s, t_r; static int **hasch; static int max_len; int field; double delta; /* function documentation: start int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) Computes: matrix_score non extended score extended score The extended score depends on the function index_res_constraint_list. This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch || max_len!=(CL->S)->max_len) { max_len=(CL->S)->max_len; if ( hasch) free_int ( hasch, -1); hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); } CL=index_res_constraint_list ( CL, field); /* Check matches for R1 in the indexed lib*/ hasch[s1][r1]=FORBIDEN; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; } /*Check Matches for r1 <-> r2 in the indexed lib */ for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; if (hasch[t_s][t_r]) { if (hasch[t_s][t_r]==FORBIDEN) { score+=((float)CL->residue_index[s2][r2][a+2]/NORM_F); } else { //delta=((float)hasch[t_s][t_r]/NORM_F)*((float)CL->residue_index[s2][r2][a+2]/NORM_F); delta=MIN((((float)hasch[t_s][t_r]/NORM_F)),(((float)CL->residue_index[s2][r2][a+2]/NORM_F))); score+=delta; } } } clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL); score/=(CL->S)->nseq; return score*NORM_F; } int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { double score=0; double max_score=0; double max_val=0; int a, t_s, t_r; static int **hasch; static int max_len; int field; /* new function: self normalized function documentation: start int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) Computes: matrix_score non extended score extended score The extended score depends on the function index_res_constraint_list. This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch || max_len!=(CL->S)->max_len) { max_len=(CL->S)->max_len; if ( hasch) free_int ( hasch, -1); hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); } CL=index_res_constraint_list ( CL, field); /* Check matches for R1 in the indexed lib*/ hasch[s1][r1]=FORBIDEN; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; max_score+=CL->residue_index[s1][r1][a+2]; } /*Check Matches for r1 <-> r2 in the indexed lib */ for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; if (hasch[t_s][t_r]) { if (hasch[t_s][t_r]==FORBIDEN) { score+=CL->residue_index[s2][r2][a+2]; max_score+=CL->residue_index[s2][r2][a+2]; } else { double delta; delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); score+=delta; max_score-=hasch[t_s][t_r]; max_score+=delta; max_val=MAX(max_val,delta); } } else { max_score+=CL->residue_index[s2][r2][a+2]; } } max_score-=hasch[s2][r2]; clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL); if ( max_score==0)score=0; else if ( CL->normalise) { score=((score*CL->normalise)/max_score)*SCORE_K; if (max_val> CL->normalise) { score*=max_val/(double)CL->normalise; } } return (int) score; } int ** clean_residue_pair_hasch (int s1, int r1, int s2, int r2,int **hasch, Constraint_list *CL) { int a, t_s, t_r; if ( !hasch) return hasch; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=0; } hasch[s1][r1]=hasch[s2][r2]=0; return hasch; } int residue_pair_extended_list_old ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { double score=0; int a, t_s, t_r; static int **hasch; static int max_len; int field; /* function documentation: start int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) Computes: matrix_score non extended score extended score The extended score depends on the function index_res_constraint_list. This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch || max_len!=(CL->S)->max_len) { max_len=(CL->S)->max_len; if ( hasch) free_int ( hasch, -1); hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); } CL=index_res_constraint_list ( CL, field); hasch[s1][r1]=100000; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; } for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; if (hasch[t_s][t_r]) { if (field==WE) { score+=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); } } } score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=0; } hasch[s1][r1]=hasch[s2][r2]=0; return (int)(score*SCORE_K); } int residue_pair_test_function ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { double score=0; int a, t_s, t_r; static int **hasch; static int max_len; int cons1; int cons2; int field; /* function documentation: start int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) Computes: matrix_score non extended score extended score The extended score depends on the function index_res_constraint_list. This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ CL->weight_field=WE; field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch || max_len!=(CL->S)->max_len) { max_len=(CL->S)->max_len; if ( hasch) free_int ( hasch, -1); hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); } CL=index_res_constraint_list ( CL, field); hasch[s1][r1]=1000; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; } for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; if (hasch[t_s][t_r]) { cons1=hasch[t_s][t_r]; cons2=CL->residue_index[s2][r2][a+2]; score +=MIN(cons1,cons2); } } score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=0; } hasch[s1][r1]=hasch[s2][r2]=0; return (int)(score*SCORE_K); } int residue_pair_relative_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { int a, t_s, t_r; static int **hasch; static int max_len; int score=0; int total_score=0; int field; /* function documentation: start int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) Computes: matrix_score non extended score extended score The extended score depends on the function index_res_constraint_list. This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch || max_len!=(CL->S)->max_len) { max_len=(CL->S)->max_len; if ( hasch) free_int ( hasch, -1); hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); } CL=index_res_constraint_list ( CL, field); hasch[s1][r1]=100000; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; total_score+=CL->residue_index[s1][r1][a+2]; } for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; total_score+=CL->residue_index[s1][r1][a+2]; if (hasch[t_s][t_r]) { if (field==WE){score+=2*MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]);} } } score=((CL->normalise*score)/total_score); for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=0; } hasch[s1][r1]=hasch[s2][r2]=0; return score*SCORE_K; } int residue_pair_extended_list_g_coffee_quadruplet ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { int t_s, t_r, t_w, q_s, q_r, q_w; int a, b; static int **hasch; int score=0, s=0; int field; /* This measure the quadruplets cost on a pair of residues*/ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch) { hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); } CL=index_res_constraint_list ( CL, field); hasch[s1][r1]=100000; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; t_w=CL->residue_index[s1][r1][a+2]; if ( CL->seq_for_quadruplet[t_s]) { for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) { q_s=CL->residue_index[t_s][t_r][b]; q_r=CL->residue_index[t_s][t_r][b+1]; if (CL-> seq_for_quadruplet[q_s]) { hasch[q_s][q_r]=MIN(CL->residue_index[t_s][t_r][b+2],t_w); } } } } for (s=0,score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; t_w=CL->residue_index[s2][r2][a+2]; if ( CL->seq_for_quadruplet[t_s]) { for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) { q_s=CL->residue_index[t_s][t_r][b]; q_r=CL->residue_index[t_s][t_r][b+1]; q_w=CL->residue_index[t_s][t_r][b+2]; if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s]) s=MIN(hasch[q_s][q_r],MIN(CL->residue_index[t_s][t_r][b+2],q_w)); score=MAX(score, s); } } } for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; t_w=CL->residue_index[s1][r1][a+2]; if ( CL->seq_for_quadruplet[t_s]) { for ( b=1; bresidue_index[t_s][t_r][0]; b+=3) { q_s=CL->residue_index[t_s][t_r][b]; q_r=CL->residue_index[t_s][t_r][b+1]; hasch[q_s][q_r]=0; } } } return score*SCORE_K; } int residue_pair_extended_list_g_coffee ( Constraint_list *CL, int s1, int r1, int s2, int r2 ) { int a, t_s, t_r; static int **hasch; int score=0,s; int field; /* function documentation: start int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2); Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) Computes: matrix_score non extended score extended score The extended score depends on the function index_res_constraint_list. This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; if ( !hasch) { hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); } CL=index_res_constraint_list ( CL, field); hasch[s1][r1]=100000; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; } for (s=0, score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; if (hasch[t_s][t_r]) { if (field==WE) {s=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]); score=MAX(s,score); } } } for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=0; } hasch[s1][r1]=hasch[s2][r2]=0; return score*SCORE_K; } int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2) { double score=0; int a, t_s, t_r, p; static int **hasch; static int *entry; int *r; int field; /* This is the generic Function->works with everything should be gradually phased out int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field ) Computes the extended score for aligning residue seq1(r1) Vs seq2(r2) Computes: matrix_score non extended score extended score The extended score depends on the function index_res_constraint_list. This function can compare a sequence with itself. Associated functions: See util constraint list, list extention functions. function documentation: end */ field=CL->weight_field; if ( r1<=0 || r2<=0)return 0; else if ( !CL->L && CL->M) { return evaluate_matrix_score (CL, s1,r1, s2, r2); } else if ( !CL->extend_jit) { if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int)); entry[SEQ1]=s1; entry[SEQ2]=s2; entry[R1]=r1; entry[R2]=r2; r=main_search_in_list_constraint( entry,&p,4,CL); if (r==NULL)return 0; else return r[field]; } else { if ( !hasch) { hasch=vcalloc ( (CL->S)->nseq, sizeof (int*)); for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int)); } CL=index_res_constraint_list ( CL, field); hasch[s1][r1]=100000; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2]; } for (a=1; a< CL->residue_index[s2][r2][0]; a+=3) { t_s=CL->residue_index[s2][r2][a]; t_r=CL->residue_index[s2][r2][a+1]; if (hasch[t_s][t_r]) { if (field==WE)score+=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2] ); } } score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; for (a=1; a< CL->residue_index[s1][r1][0]; a+=3) { t_s=CL->residue_index[s1][r1][a]; t_r=CL->residue_index[s1][r1][a+1]; hasch[t_s][t_r]=0; } hasch[s1][r1]=hasch[s2][r2]=0; return (int)(score*SCORE_K); } } /*********************************************************************************************/ /* */ /* FUNCTIONS FOR GETTING THE PW COST : CL->get_dp_cost */ /* */ /*********************************************************************************************/ int get_dp_cost_blosum_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { int s1, r1, s2, r2; static int **matrix; if (!matrix) matrix=read_matrice ("blosum62mt"); s1=A->order[list1[0]][0]; s2=A->order[list2[0]][0]; r1=pos1[list1[0]][col1]; r2=pos2[list2[0]][col2]; /*dp cost function: works only with two sequences*/ if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K; else if (r1>0 && r2>0) { r1--; r2--; return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K; } else return -CL->nomatch*SCORE_K ; } int get_dp_cost_pam_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { int s1, r1, s2, r2; static int **matrix; if (!matrix) matrix=read_matrice ("pam250mt"); s1=A->order[list1[0]][0]; s2=A->order[list2[0]][0]; r1=pos1[list1[0]][col1]; r2=pos2[list2[0]][col2]; /*dp cost function: works only with two sequences*/ if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K; else if (r1>0 && r2>0) { r1--; r2--; return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K; } else return -CL->nomatch*SCORE_K ; } int get_dp_cost_pw_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { int s1, r1, s2, r2; s1=A->order[list1[0]][0]; s2=A->order[list2[0]][0]; r1=pos1[list1[0]][col1]; r2=pos2[list2[0]][col2]; /*dp cost function: works only with two sequences*/ if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2)) return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K; else if (r1>0 && r2>0) { r1--; r2--; return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K; } else return -CL->nomatch*SCORE_K ; } /*********************************************************************************************/ /* */ /* FUNCTIONS FOR GETTING THE COST : CL->get_dp_cost */ /* */ /*********************************************************************************************/ int get_cdna_best_frame_dp_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { int a, b; int n=4; int s; char a1, a2; static int l1, l2; static Alignment *B; static int **score; if ( !score)score=declare_int(3, 2); if (!A) { free_aln(B); B=NULL; return UNDEFINED; } if (!B) { if (ns1+ns2>2){fprintf ( stderr, "\nERROR: get_cdna_dp_cost mode is only for pair-wise ALN [FATAL]\n");crash("");} free_aln (B); B=copy_aln (A, NULL); l1=(int)strlen ( A->seq_al[list1[0]]); for ( b=0; bseq_al[list1[0]][b]=translate_dna_codon (A->seq_al[list1[0]]+b, 'x'); l2=(int)strlen ( A->seq_al[list2[0]]); for ( b=0; bseq_al[list2[0]][b]=translate_dna_codon (A->seq_al[list2[0]]+b, 'x'); } /*Set the frame*/ for ( a=0; a< 3; a++)score[a][0]=score[a][1]=0; for ( a=col1-(n*3),b=col2-(n*3); a=l1 || b>=l2)continue; a1=tolower(B->seq_al[list1[0]][a]); a2=tolower(B->seq_al[list2[0]][b]); score[a%3][0]+=(a1=='x' || a2=='x')?0:CL->M[a1-'A'][a2-'A']; score[a%3][1]++; } for ( a=0; a< 3; a++)score[a][0]=(score[a][1]>0)?(score[a][0]/score[a][1]):0; if ( score[0][0]>score[1][0] && score[0][0]>score[2][0]) s=score[0][0]; else if ( score[1][0]>score[0][0] && score[1][0]>score[2][0]) s=score[1][0]; else s=score[2][0]; return s*SCORE_K; } int get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { int score; if ( ns1==1 || ns2==1) score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); else score=fast_get_dp_cost_quadruplet ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch); } int get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { int MODE=0; int score; if (A==NULL)return 0; if (MODE!=2 || MODE==0 || (!CL->L && CL->M) || (!CL->L && CL->T)|| ns1==1 || ns2==1) score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); else if (MODE==1 || MODE==2) score=fast_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL); else score=0; return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch); } int ***make_cw_lu (int **cons, int l, Constraint_list *CL); int ***make_cw_lu (int **cons, int l, Constraint_list *CL) { int ***lu; int p, a,r; lu=declare_arrayN(3, sizeof (int),l,26, 2); for ( p=0; pM[r][cons[p][a]-'A']; lu[p][r][1]+=cons[p][a+1]; } } } return lu; } int cw_profile_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { static int last_tag; static int *pr, ***lu; int score; static int *list[2], ns[2], **cons[2], ref; int eva_col,ref_col, a, p, r; float t1, t2; if (last_tag!=A->random_tag) { int n1, n2; last_tag=A->random_tag; list[0]=list1;ns[0]=ns1; list[1]=list2;ns[1]=ns2; free_int (cons[0],-1);free_int (cons[1],-1);free_arrayN((void*)lu,3); cons[0]=NULL;cons[1]=NULL;lu=NULL; n1=sub_aln2nseq_prf (A, ns[0], list[0]); n2=sub_aln2nseq_prf (A, ns[1], list[1]); if ( n1>1 || n2>1) { cons[0]=sub_aln2count_mat2 (A, ns[0], list[0]); cons[1]=sub_aln2count_mat2 (A, ns[1], list[1]); ref=(ns[0]>ns[1])?0:1; lu=make_cw_lu(cons[ref],(int)strlen(A->seq_al[list[ref][0]]), CL); } } if (!lu) { char r1, r2; r1=A->seq_al[list1[0]][col1]; r2=A->seq_al[list2[0]][col2]; if ( r1!='-' && r2!='-') return CL->M[r1-'A'][r2-'A']*SCORE_K -SCORE_K*CL->nomatch; else return -SCORE_K*CL->nomatch; } else { eva_col= (ref==0)?col2:col1; ref_col= (ref==0)?col1:col2; pr=cons[1-ref][eva_col]; t1=t2=0; for (a=3; a< pr[1]; a+=3) { r=tolower(pr[a]); p= pr[a+1]; t1+=lu[ref_col][r-'a'][0]*p; t2+=lu[ref_col][r-'a'][1]*p; } score=(t2==0)?0:(t1*SCORE_K)/t2; score -=SCORE_K*CL->nomatch; return score; } } int cw_profile_get_dp_cost_old ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { static int last_tag; static int **cons1, **cons2; int score; if (last_tag!=A->random_tag) { last_tag=A->random_tag; free_int (cons1,-1);free_int (cons2,-1); cons1=sub_aln2count_mat2 (A, ns1, list1); cons2=sub_aln2count_mat2 (A, ns2, list2); } score=cw_profile_profile (cons1[col1], cons2[col2], CL)-SCORE_K*CL->nomatch; return score; } int cw_profile_get_dp_cost_window ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { static int last_tag; static int **cons1, **cons2; int a, score, window_size=5, n, p1, p2; if (last_tag!=A->random_tag) { last_tag=A->random_tag; free_int (cons1,-1);free_int (cons2,-1); cons1=sub_aln2count_mat2 (A, ns1, list1); cons2=sub_aln2count_mat2 (A, ns2, list2); } for (n=0,score=0,a=0; anomatch; n++; } if (n>0)score/=n; return score; } int consensus_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { static int last_tag; static char *seq1, *seq2; /*Works only with matrix*/ if (last_tag !=A->random_tag) { int ls1, ls2, lenls1, lenls2; last_tag=A->random_tag; vfree (seq1);vfree (seq2); seq1=sub_aln2cons_seq_mat (A, ns1, list1, "blosum62mt"); seq2=sub_aln2cons_seq_mat (A, ns2, list2, "blosum62mt"); ls1=list1[ns1-1];ls2=list2[ns2-1]; lenls1=(int)strlen (A->seq_al[ls1]); lenls2=(int)strlen (A->seq_al[ls2]); } return (CL->M[seq1[col1]-'A'][seq2[col2]-'A']*SCORE_K)-SCORE_K*CL->nomatch; } int fast_get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { /*WARNING: WORKS ONLY WITH List to Extend*/ /*That function does a quruple extension beween two columns by pooling the residues together*/ double score=0; int a,b, c; int n_gap1=0; int n_gap2=0; int s1, rs1, r1, t_r, t_s,t_w, q_r, q_s, q_w, s2, rs2, r2; int **buf_pos, buf_ns, *buf_list, buf_col; static int **hasch1; static int **hasch2; static int **n_hasch1; static int **n_hasch2; static int **is_in_col1; static int **is_in_col2; if (ns2>ns1) { buf_pos=pos1; buf_ns=ns1; buf_list=list1; buf_col=col1; pos1=pos2; ns1=ns2; list1=list2; col1=col2; pos2=buf_pos; ns2=buf_ns; list2=buf_list; col2=buf_col; } CL=index_res_constraint_list ( CL, WE); if ( !hasch1) { hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); } for ( a=0; a< ns1; a++) { rs1= list1[a]; s1=A->order[rs1][0]; r1=pos1[rs1][col1]; if (r1<0)n_gap1++; else { is_in_col1[s1][r1]=1; for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) { t_s=CL->residue_index[s1][r1][b]; t_r=CL->residue_index[s1][r1][b+1]; t_w=CL->residue_index[s1][r1][b+2]; for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) { q_s=CL->residue_index[t_s][t_r][c]; q_r=CL->residue_index[t_s][t_r][c+1]; q_w=CL->residue_index[t_s][t_r][c+2]; hasch1[q_s][q_r]+=MIN(q_w, t_w); n_hasch1[q_s][q_r]++; } } } } for ( a=0; a< ns2; a++) { rs2=list2[a]; s2=A->order[rs2][0]; r2=pos2[rs2][col2]; if (r2<0)n_gap2++; else { is_in_col2[s2][r2]=1; for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) { t_s=CL->residue_index[s2][r2][b]; t_r=CL->residue_index[s2][r2][b+1]; t_w=CL->residue_index[s2][r2][b+2]; for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) { q_s=CL->residue_index[t_s][t_r][c]; q_r=CL->residue_index[t_s][t_r][c+1]; q_w=CL->residue_index[t_s][t_r][c+2]; hasch2[q_s][q_r]+=MIN(t_w, q_w); n_hasch2[q_s][q_r]++; } } } } for ( a=0; a< ns2; a++) { rs2=list2[a]; s2=A->order[rs2][0]; r2=pos1[rs2][col2]; if (r2<0); else { for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) { t_s=CL->residue_index[s2][r2][b]; t_r=CL->residue_index[s2][r2][b+1]; for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) { q_s=CL->residue_index[t_s][t_r][c]; q_r=CL->residue_index[t_s][t_r][c+1]; if ( hasch2[q_s][q_r] && hasch1[q_s][q_r]&& !(is_in_col1[q_s][q_r] || is_in_col2[q_s][q_r])) { score+=MIN(hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]),hasch1[q_s][q_r]*(n_hasch2[q_s][q_r])); } else if ( hasch2[q_s][q_r] && is_in_col1[q_s][q_r]) { score+=hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]+1); } else if (hasch1[q_s][q_r] && is_in_col2[q_s][q_r]) { score+=hasch1[q_s][q_r]*(n_hasch2[q_s][q_r]+1); } hasch2[q_s][q_r]=0; n_hasch2[q_s][q_r]=0; } } hasch2[s2][r2]=0; is_in_col2[s2][r2]=0; } } for ( a=0; a< ns1; a++) { rs1= list1[a]; s1=A->order[rs1][0]; r1=pos1[rs1][col1]; if (r1<0); else { is_in_col1[s1][r1]=0; hasch1[s1][r1]=0; for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) { t_s=CL->residue_index[s1][r1][b]; t_r=CL->residue_index[s1][r1][b+1]; for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3) { q_s=CL->residue_index[t_s][t_r][c]; q_r=CL->residue_index[t_s][t_r][c+1]; hasch1[q_s][q_r]=0; n_hasch1[q_s][q_r]=0; } } } } score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2)); score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; return (int)(score-SCORE_K*CL->nomatch); } int fast_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { /*WARNING: WORKS ONLY WITH List to Extend*/ double score=0; int a,b; int n_gap1=0; int n_gap2=0; int s1, rs1, r1, t_r, t_s, s2, rs2, r2; static int **hasch1; static int **hasch2; static int **n_hasch1; static int **n_hasch2; static int **is_in_col1; static int **is_in_col2; CL=index_res_constraint_list ( CL, WE); if ( !hasch1) { hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1); n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1); } for ( a=0; a< ns1; a++) { rs1= list1[a]; s1=A->order[rs1][0]; r1=pos1[rs1][col1]; if (r1<0)n_gap1++; else { is_in_col1[s1][r1]=1; for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) { t_s=CL->residue_index[s1][r1][b]; t_r=CL->residue_index[s1][r1][b+1]; hasch1[t_s][t_r]+=CL->residue_index[s1][r1][b+2]; n_hasch1[t_s][t_r]++; } } } for ( a=0; a< ns2; a++) { rs2=list2[a]; s2=A->order[rs2][0]; r2=pos2[rs2][col2]; if (r2<0)n_gap2++; else { is_in_col2[s2][r2]=1; for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) { t_s=CL->residue_index[s2][r2][b]; t_r=CL->residue_index[s2][r2][b+1]; hasch2[t_s][t_r]+=CL->residue_index[s2][r2][b+2]; n_hasch2[t_s][t_r]++; } } } /*return 2;*/ if ( ns2order[rs2][0]; r2=pos1[rs2][col2]; if (r2<0); else { for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) { t_s=CL->residue_index[s2][r2][b]; t_r=CL->residue_index[s2][r2][b+1]; if ( hasch2[t_s][t_r] && hasch1[t_s][t_r]&& !(is_in_col1[t_s][t_r] || is_in_col2[t_s][t_r])) { score+=MIN(hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]),hasch1[t_s][t_r]*(n_hasch2[t_s][t_r])); } else if ( hasch2[t_s][t_r] && is_in_col1[t_s][t_r]) { score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1); } else if (hasch1[t_s][t_r] && is_in_col2[t_s][t_r]) { score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1); } hasch2[t_s][t_r]=0; n_hasch2[t_s][t_r]=0; } hasch2[s2][r2]=0; is_in_col2[s2][r2]=0; } } for ( a=0; a< ns1; a++) { rs1= list1[a]; s1=A->order[rs1][0]; r1=pos1[rs1][col1]; if (r1<0); else { is_in_col1[s1][r1]=0; hasch1[s1][r1]=0; for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) { t_s=CL->residue_index[s1][r1][b]; t_r=CL->residue_index[s1][r1][b+1]; hasch1[t_s][t_r]=0; n_hasch1[t_s][t_r]=0; } } } } else { for ( a=0; a< ns1; a++) { rs1=list1[a]; s1=A->order[rs1][0]; r1=pos1[rs1][col1]; if (r1<0); else { for (b=1; b< CL->residue_index[s1][r1][0]; b+=3) { t_s=CL->residue_index[s1][r1][b]; t_r=CL->residue_index[s1][r1][b+1]; if ( hasch1[t_s][t_r] && hasch2[t_s][t_r]&& !(is_in_col2[t_s][t_r] || is_in_col1[t_s][t_r])) { score+=MIN(hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]),hasch2[t_s][t_r]*(n_hasch1[t_s][t_r])); } else if ( hasch1[t_s][t_r] && is_in_col2[t_s][t_r]) { score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1); } else if (hasch2[t_s][t_r] && is_in_col1[t_s][t_r]) { score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1); } hasch1[t_s][t_r]=0; n_hasch1[t_s][t_r]=0; } hasch1[s1][r1]=0; is_in_col1[s1][r1]=0; } } for ( a=0; a< ns2; a++) { rs2= list2[a]; s2=A->order[rs2][0]; r2=pos1[rs2][col2]; if (r2<0); else { is_in_col2[s2][r2]=0; hasch1[s2][r2]=0; for (b=1; b< CL->residue_index[s2][r2][0]; b+=3) { t_s=CL->residue_index[s2][r2][b]; t_r=CL->residue_index[s2][r2][b+1]; hasch2[t_s][t_r]=0; n_hasch2[t_s][t_r]=0; } } } } score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2)); score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; return (int)(score-SCORE_K*CL->nomatch); } int fast_get_dp_cost_2 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { double score=0; int a, b, s1, s2,r1, r2; static int n_pair; int s; int res_res=0; int rs1, rs2; static int last_ns1; static int last_ns2; static int last_top1; static int last_top2; static int **check_list; /*New heuristic get dp_cost on a limited number of pairs*/ /*This is the current default*/ if ( last_ns1==ns1 && last_top1==list1[0] && last_ns2==ns2 && last_top2==list2[0]); else { last_ns1=ns1; last_ns2=ns2; last_top1=list1[0]; last_top2=list2[0]; if ( check_list) free_int (check_list, -1); check_list=declare_int ( (CL->S)->nseq*(CL->S)->nseq, 3); for ( n_pair=0,a=0; a< ns1; a++) { s1 =list1[a]; rs1=A->order[s1][0]; for ( b=0; b< ns2; b++, n_pair++) { s2 =list2[b]; rs2=A->order[s2][0]; check_list[n_pair][0]=s1; check_list[n_pair][1]=s2; check_list[n_pair][2]=(!CL->DM)?0:(CL->DM)->similarity_matrix[rs1][rs2]; } sort_int ( check_list, 3, 2, 0, n_pair-1); } } if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;} for ( a=n_pair-1; a>=0; a--) { s1= check_list[a][0]; rs1=A->order[s1][0]; r1 =pos1[s1][col1]; s2= check_list[a][1]; rs2=A->order[s2][0]; r2 =pos2[s2][col2]; if ( r1>0 && r2 >0) {res_res++;} if ( rs1>rs2) { SWAP (rs1, rs2); SWAP (r1, r2); } if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s; else { return UNDEFINED; } if ( res_res>=CL->max_n_pair && CL->max_n_pair!=0)a=0; } score=(res_res==0)?0:( (score)/res_res); score=score-SCORE_K*CL->nomatch; return (int)score; } int fast_get_dp_cost_3 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { static int last_tag; static Constraint_list *NCL; int score; if ( ns1==1 && ns2==1) { return slow_get_dp_cost( A,pos1, ns1,list1, col1, pos2, ns2, list2, col2,CL); } if ( last_tag !=A->random_tag) { int *ns, **ls; last_tag=A->random_tag; ns=vcalloc (2, sizeof (int));ns[0]=ns1; ns[1]=ns2; ls=vcalloc (2, sizeof (int*));ls[0]=list1; ls[1]=list2; NCL=progressive_index_res_constraint_list ( A, ns, ls, CL); vfree (ls); vfree (ns); } score=residue_pair_extended_list ( NCL,list1[0],col1, list2[0], col2); score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score; score=(score-SCORE_K*CL->nomatch); return score; } int slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { double score=0; int a, b, s1, s2,r1, r2; int s; int gap_gap=0; int gap_res=0; int res_res=0; int rs1, rs2; static int last_tag; static int *dummy; if (col1<0 || col2<0) return 0; if ( last_tag !=A->random_tag) { last_tag=A->random_tag; if (!dummy) { dummy=vcalloc (10, sizeof(int)); dummy[0]=1;/*Number of Amino acid types on colum*/ dummy[1]=5;/*Length of Dummy*/ dummy[3]='\0';/*Amino acid*/ dummy[4]=1; /*Number of occurences*/ dummy[5]=100; /*Frequency in the MSA column*/ } } if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;} for ( a=0; a< ns1; a++) { for ( b=0; border[s1][0]; r1 =pos1[s1][col1]; s2 =list2[b]; rs2=A->order[s2][0]; r2 =pos2[s2][col2]; if ( rs1>rs2) { SWAP (rs1, rs2); SWAP (r1, r2); } if (r1==0 && r2==0)gap_gap++; else if ( r1<0 || r2<0) gap_res++; else { res_res++; if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s; else { return UNDEFINED; } } } } score=(res_res==0)?0:( (score)/res_res); return score-SCORE_K*CL->nomatch; } int slow_get_dp_cost_pc ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { double score=0; int a, b, s1, s2,r1, r2; int s; int gap_gap=0; int gap_res=0; int res_res=0; int rs1, rs2; static int last_tag; static int *dummy; if (col1<0 || col2<0) return 0; if ( last_tag !=A->random_tag) { last_tag=A->random_tag; if (!dummy) { dummy=vcalloc (10, sizeof(int)); dummy[0]=1;/*Number of Amino acid types on colum*/ dummy[1]=5;/*Length of Dummy*/ dummy[3]='\0';/*Amino acid*/ dummy[4]=1; /*Number of occurences*/ dummy[5]=100; /*Frequency in the MSA column*/ } } if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;} for ( a=0; a< ns1; a++) { for ( b=0; border[s1][0]; r1 =pos1[s1][col1]; s2 =list2[b]; rs2=A->order[s2][0]; r2 =pos2[s2][col2]; if ( rs1>rs2) { SWAP (rs1, rs2); SWAP (r1, r2); } if (r1==0 && r2==0)gap_gap++; else if ( r1<0 || r2<0) gap_res++; else { res_res++; if ((s=residue_pair_extended_list_pc(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s; else { return UNDEFINED; } } } } score=(res_res==0)?0:( (score)/res_res); return score; } int slow_get_dp_cost_test ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { double score=0; int a, b, s1, s2,r1, r2; int gap_gap=0, gap_res=0, res_res=0, rs1, rs2; for ( a=0; a< ns1; a++) { for ( b=0; border[s1][0]; r1 =pos1[s1][col1]; s2 =list2[b]; rs2=A->order[s2][0]; r2 =pos2[s2][col2]; if ( rs1>rs2) { SWAP (rs1, rs2); SWAP (r1, r2); } if (r1==0 && r2==0)gap_gap++; else if ( r1<0 || r2<0) gap_res++; else { res_res++; score+=residue_pair_extended_list_raw (CL, rs1, r1, rs2, r2); } } } return (int)(score*10)/(ns1*ns2); } int sw_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL) { int a, b; int s1,r1,rs1; int s2,r2,rs2; for ( a=0; a< ns1; a++) { s1 =list1[a]; rs1=A->order[s1][0]; r1 =pos1[s1][col1]; if ( r1<=0)continue; for ( b=0; b< ns2; b++) { s2 =list2[b]; rs2=A->order[s2][0]; r2 =pos2[s2][col2]; if (r2<=0)continue; if (sw_pair_is_defined (CL, rs1, r1, rs2, r2)==UNDEFINED)return UNDEFINED; } } return slow_get_dp_cost ( A, pos1, ns1, list1, col1, pos2, ns2, list2,col2, CL); } int get_domain_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2,Constraint_list *CL , int scale , int gop, int gep) { int a, b, s1, s2,r1, r2; static int *entry; int *r; int score=0; int gap_gap=0; int gap_res=0; int res_res=0; int rs1, rs2; int flag_list_is_aa_sub_mat=0; int p; /*Needs to be cleanned After Usage*/ if ( entry==NULL) entry=vcalloc (LIST_N_FIELDS , sizeof (int)); for (a=0; a< ns1; a++) { s1=list1[a]; rs1=A->order[s1][0]; for ( b=0; border[s2][0]; entry[SEQ1]=rs1; entry[SEQ2]=rs2; r1=entry[R1]=pos1[s1][col1]; r2=entry[R2]=pos2[s2][col2]; if ( !flag_list_is_aa_sub_mat) { if ( r1==r2 && rs1==rs2) { return UNDEFINED; } else if (r1==0 && r2==0) { gap_gap++; } else if ( r1<=0 || r2<=0) { gap_res++; } else if ((r=main_search_in_list_constraint ( entry,&p,4,CL))!=NULL) { res_res++; if (r[WE]!=UNDEFINED) { score+=(r[WE]*SCORE_K)+scale; } else { fprintf ( stderr, "**"); return UNDEFINED; } } } } } return score; score=((res_res+gap_res)==0)?0:score/(res_res+gap_res); return score; } /*********************************************************************************************/ /* */ /* FUNCTIONS FOR ANALYSING AL OR MATRIX */ /* */ /*********************************************************************************************/ int aln2n_res ( Alignment *A, int start, int end) { int a, b; int score=0; for ( a=start; anseq; b++)score+=!is_gap(A->seq_al[b][a]); return score; } float get_gop_scaling_factor ( int **matrix,float id, int l1, int l2) { return id* get_avg_matrix_mm(matrix, AA_ALPHABET); } float get_avg_matrix_mm ( int **matrix, char *alphabet) { int a, b; float naa; float gop; int l; l=MIN(20,(int)strlen (alphabet)); for (naa=0, gop=0,a=0; a=0){naa++;tot+=matrix[alphabet[a]-'A'][alphabet[b]-'A'];} } return tot/naa; } float measure_matrix_enthropy (int **matrix,char *alphabet) { int a, b; double s, p, q, h=0, tq=0; float lambda; float *frequency; /*frequencies tqken from psw*/ frequency=set_aa_frequencies (); lambda=compute_lambda(matrix,alphabet); fprintf ( stderr, "\nLambda=%f", (float)lambda); for ( a=0; a< 20; a++) for ( b=0; b<=a; b++) { s=matrix[alphabet[a]-'A'][alphabet[b]-'A']; p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A']; if ( p==0)continue; q=exp(lambda*s+log(p)); tq+=q; h+=q*log(q/p)*log(2); } fprintf ( stderr,"\ntq=%f\n", (float)tq); return (float) h; } float compute_lambda (int **matrix,char *alphabet) { int a, b; double lambda, best_lambda=0, delta, best_delta=0, p, tq,s; static float *frequency; if ( !frequency)frequency=set_aa_frequencies (); for ( lambda=0.001; lambda<1; lambda+=0.005) { tq=0; for ( a=0; a< 20; a++) for ( b=0; b<20; b++) { p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A']; s=matrix[alphabet[a]-'A'][alphabet[b]-'A']; tq+=exp(lambda*s+log(p)); } delta=fabs(1-tq); if (lambda==0.001) { best_delta=delta; best_lambda=lambda; } else { if (delta1)break; } fprintf ( stderr, "\nRESULT: %f %f ", best_lambda, best_delta); return (float) best_lambda; } float evaluate_random_match (char *mat, int n, int len,char *alp) { int **matrix; matrix=read_matrice ( mat); fprintf ( stderr, "Matrix=%15s ", mat); return evaluate_random_match2 (matrix, n,len,alp); } float evaluate_random_match2 (int **matrix, int n, int len,char *alp) { int a, b, c, d, c1, c2, tot; static int *list; static float *freq; float score_random=0; float score_id=0; float score_good=0; float tot_len=0; float tot_try=0; if ( !list) { vsrand(0); freq=set_aa_frequencies (); list=vcalloc ( 10000, sizeof (char)); } for (tot=0,c=0,a=0;a<20; a++) { b=freq[alp[a]-'A']*1000; tot+=b; for (d=0; d=0){score_good+=matrix[list[c1]-'A'][list[c2]-'A']; tot_len++;} } score_random=score_random/tot_len; score_id=score_id/tot_len; score_good=score_good/tot_len; fprintf ( stderr, "Random=%8.3f Id=%8.3f Good=%8.3f [%7.2f]\n",score_random, score_id, score_good, tot_len/tot_try); return score_random; } float compare_two_mat (char *mat1,char*mat2, int n, int len,char *alp) { int **matrix1, **matrix2; evaluate_random_match (mat1, n, len,alp); evaluate_random_match (mat2, n, len,alp); matrix1=read_matrice ( mat1); matrix2=read_matrice ( mat2); matrix1=rescale_matrix(matrix1, 10, alp); matrix2=rescale_matrix(matrix2, 10, alp); compare_two_mat_array(matrix1,matrix2, n, len,alp); return 0; } int ** rescale_two_mat (char *mat1,char*mat2, int n, int len,char *alp) { float lambda; int **matrix1, **matrix2; lambda=measure_lambda2 (mat1, mat2, n, len, alp)*10; fprintf ( stderr, "\nLambda=%.2f", lambda); matrix2=read_matrice(mat2); matrix2=neg_matrix2pos_matrix(matrix2); matrix2=rescale_matrix( matrix2, lambda,"abcdefghiklmnpqrstvwxyz"); matrix1=read_matrice(mat1); matrix1=neg_matrix2pos_matrix(matrix1); matrix1=rescale_matrix( matrix1,10,"abcdefghiklmnpqrstvwxyz"); output_matrix_header ( "stdout", matrix2, alp); evaluate_random_match2(matrix1, 1000, 100, alp); evaluate_random_match2(matrix2, 1000, 100, alp); compare_two_mat_array(matrix1,matrix2, n, len,alp); return matrix2; } float measure_lambda2(char *mat1,char*mat2, int n, int len,char *alp) { int **m1, **m2; float f1, f2; m1=read_matrice (mat1); m2=read_matrice (mat2); m1=neg_matrix2pos_matrix(m1); m2=neg_matrix2pos_matrix(m2); f1=measure_matrix_pos_avg( m1, alp); f2=measure_matrix_pos_avg( m2, alp); return f1/f2; } float measure_lambda (char *mat1,char*mat2, int n, int len,char *alp) { int c; int **matrix1, **matrix2, **mat; float a; float best_quality=0, quality=0, best_lambda=0; matrix1=read_matrice ( mat1); matrix2=read_matrice ( mat2); matrix1=rescale_matrix(matrix1, 10, alp); matrix2=rescale_matrix(matrix2, 10, alp); for (c=0, a=0.1; a< 2; a+=0.05) { fprintf ( stderr, "Lambda=%.2f\n", a); mat=duplicate_int (matrix2,-1,-1); mat=rescale_matrix(mat, a, alp); quality=compare_two_mat_array(matrix1,mat, n, len,alp); quality=MAX((-quality),quality); if (c==0 || (best_quality>quality)) { c=1; fprintf ( stderr, "*"); best_quality=quality; best_lambda=a; } evaluate_random_match2(mat, 1000, 100, alp); evaluate_random_match2(matrix1, 1000, 100, alp); free_int (mat, -1); } return best_lambda; } float compare_two_mat_array (int **matrix1,int **matrix2, int n, int len,char *alp) { int a, b, c, d, c1, c2, tot; static int *list; static float *freq; float delta_random=0; float delta2_random=0; float delta_id=0; float delta2_id=0; float delta_good=0; float delta2_good=0; float delta; float tot_len=0; float tot_try=0; if ( !list) { vsrand(0); freq=set_aa_frequencies (); list=vcalloc ( 10000, sizeof (char)); } for (tot=0,c=0,a=0;a<20; a++) { b=freq[alp[a]-'A']*1000; tot+=b; for (d=0; d=0 || matrix2[list[c1]-'A'][list[c2]-'A'] ) { delta=matrix1[list[c1]-'A'][list[c2]-'A']-matrix2[list[c1]-'A'][list[c2]-'A']; delta_good+=delta; delta2_good+=MAX(delta,(-delta)); tot_len++; } } delta_random=delta_random/tot_len; delta2_random=delta2_random/tot_len; delta_id=delta_id/tot_len; delta2_id=delta2_id/tot_len; delta_good=delta_good/tot_len; delta2_good=delta2_good/tot_len; fprintf ( stderr, "\tRand=%8.3f %8.3f\n\tId =%8.3f %8.3f\n\tGood=%8.3f %8.3f\n",delta_random, delta2_random, delta_id,delta2_id, delta_good,delta2_good); return delta_good; } int ** rescale_matrix ( int **matrix, float lambda, char *alp) { int a, b; for ( a=0; a< 20; a++) for ( b=0; b< 20; b++) { matrix[alp[a]-'A'][alp[b]-'A']= matrix[alp[a]-'A'][alp[b]-'A']*lambda; } return matrix; } int **mat2inverted_mat (int **matrix, char *alp) { int a, b, min, max, v,l; int c1,c2, C1, C2; l=(int)strlen (alp); min=max=matrix[alp[0]-'A'][alp[0]-'A']; for ( a=0; amax)?p[a][b]:max; } for (b='a'; b<='z'; b++) { p[a][b]=((p[a][b]-min)/(max-min))*10; } } return p; } Constraint_list * choose_extension_mode ( char *extend_mode, Constraint_list *CL) { if ( !CL) { fprintf ( stderr, "\nWarning: CL was not set"); return CL; } else if ( strm ( extend_mode, "rna0")) { CL->evaluate_residue_pair=residue_pair_extended_list; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "rna1") || strm (extend_mode, "rna")) { CL->evaluate_residue_pair=residue_pair_extended_list4rna1; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "rna2")) { CL->evaluate_residue_pair=residue_pair_extended_list4rna2; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "rna3")) { CL->evaluate_residue_pair=residue_pair_extended_list4rna3; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "rna4")) { CL->evaluate_residue_pair=residue_pair_extended_list4rna4; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "pc") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list_pc; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "triplet") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list; CL->get_dp_cost =get_dp_cost; } else if ( strm ( extend_mode, "relative_triplet") && !CL->M) { CL->evaluate_residue_pair=residue_pair_relative_extended_list; CL->get_dp_cost =fast_get_dp_cost_2; } else if ( strm ( extend_mode, "g_coffee") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "g_coffee_quadruplets") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee_quadruplet; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "fast_triplet") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list; CL->get_dp_cost =fast_get_dp_cost; } else if ( strm ( extend_mode, "test_triplet") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list; CL->get_dp_cost =fast_get_dp_cost_3; } else if ( strm ( extend_mode, "very_fast_triplet") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list; CL->get_dp_cost =fast_get_dp_cost_2; } else if ( strm ( extend_mode, "slow_triplet") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list; CL->get_dp_cost =slow_get_dp_cost; } else if ( strm ( extend_mode, "mixt") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list_mixt; CL->get_dp_cost=slow_get_dp_cost; } else if ( strm ( extend_mode, "quadruplet") && !CL->M) { CL->evaluate_residue_pair=residue_pair_extended_list_quadruplet; CL->get_dp_cost =get_dp_cost_quadruplet; } else if ( strm ( extend_mode, "test") && !CL->M) { CL->evaluate_residue_pair=residue_pair_test_function; CL->get_dp_cost =slow_get_dp_cost_test; } else if ( strm ( extend_mode, "ssp")) { CL->evaluate_residue_pair=evaluate_ssp_matrix_score; CL->get_dp_cost=slow_get_dp_cost; CL->normalise=1; } else if ( strm ( extend_mode, "tm")) { CL->evaluate_residue_pair=evaluate_tm_matrix_score; CL->get_dp_cost=slow_get_dp_cost; CL->normalise=1; } else if ( strm ( extend_mode, "matrix")) { CL->evaluate_residue_pair=evaluate_matrix_score; CL->get_dp_cost=cw_profile_get_dp_cost; CL->normalise=1; } else if ( strm ( extend_mode, "curvature")) { CL->evaluate_residue_pair=evaluate_curvature_score; CL->get_dp_cost=slow_get_dp_cost; CL->normalise=1; } else if ( CL->M) { CL->evaluate_residue_pair=evaluate_matrix_score; CL->get_dp_cost=cw_profile_get_dp_cost; CL->normalise=1; } else { fprintf ( stderr, "\nERROR: %s is an unknown extend_mode[FATAL:%s]\n", extend_mode, PROGRAM); myexit (EXIT_FAILURE); } return CL; } int ** combine_two_matrices ( int **mat1, int **mat2) { int naa, re1, re2, Re1, Re2, a, b, u, l; naa=(int)strlen (BLAST_AA_ALPHABET); for ( a=0; a< naa; a++) for ( b=0; b< naa; b++) { re1=BLAST_AA_ALPHABET[a]; re2=BLAST_AA_ALPHABET[b]; if (re1=='*' || re2=='*'); else { Re1=toupper(re1);Re2=toupper(re2); re1-='A';re2-='A';Re1-='A';Re2-='A'; l=mat1[re1][re2]; u=mat2[re1][re2]; mat1[re1][re2]=mat2[re1][re2]=l; mat2[Re1][Re2]=mat2[Re1][Re2]=u; } } return mat1; } /* Off the shelves evaluations */ /*********************************************************************************************/ /* */ /* OFF THE SHELVES EVALUATION */ /* */ /*********************************************************************************************/ int lat_sum_pair (Alignment *A, char *mat) { int a,b,c, tot=0, v1, v2, score; int **matrix; matrix=read_matrice (mat); for (a=0; anseq; a++) for ( b=0; bnseq; b++) { for (c=1; clen_aln; c++) { char r11, r12; r11=A->seq_al[a][c-1]; r12=A->seq_al[a][c]; if (is_gap(r11) || is_gap(r12))continue; else v1=matrix[r11-'A'][r12-'A']; r11=A->seq_al[b][c-1]; r12=A->seq_al[b][c]; if (is_gap(r11) || is_gap(r12))continue; else v2=matrix[r11-'A'][r12-'A']; score+=(v1-v2)*(v1-v2); tot++; } } score=(100*score)/tot; return (float)score; } /* Off the shelves evaluations */ /*********************************************************************************************/ /* */ /* OFF THE SHELVES EVALUATION */ /* */ /*********************************************************************************************/ int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE); int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE); void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE); int gap_type ( char a, char b); float sum_pair ( Alignment*A,char *mat_name, int gap_op, int gap_ex) { int a,b; float pscore=0; int start, end; static int **tgp; double score=0; int MODE=1; int **matrix; matrix=read_matrice (mat_name); matrix=mat2inverted_mat (matrix, "acdefghiklmnpqrstvwy"); start=0; end=A->len_aln-1; if ( tgp==NULL) tgp= declare_int (A->nseq,2); evaluate_tgp_decoded_chromosome ( A,tgp,start, end,MODE); for ( a=0; a< A->nseq-1; a++) for (b=a+1; bnseq; b++) { pscore= comp_pair (A->len_aln,A->seq_al[a], A->seq_al[b],a, b,tgp[a], tgp[b],gap_op,gap_ex, start, end,matrix, MODE); score+=pscore*100; /*score+=(double)pscore*(int)(PARAM->OFP)->weight[A->order[a][0]][A->order[b][0]];*//*NO WEIGHTS*/ } score=score/(A->nseq*A->nseq); return (float)score; } int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE) { int score=0, a, ex; if ( end-start>=0) score+= score_gap (len, sa,sb, seqA, seqB,tgp_a, tgp_b, gap_op,gap_ex, start, end,MODE); ex=gap_ex; for (a=start; a<=end; a++) { if ( is_gap(sa[a]) || is_gap(sb[a])) { if (is_gap(sa[a]) && is_gap(sb[a])); else { score +=ex; } } else { score += matrix [sa[a]-'A'][sb[a]-'A']; } } return score; } int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE) { int a,b; int ga=0,gb=0; int score=0; int right_gap, left_gap; int type; int flag1=0; int flag2=0; int continue_loop; int sequence_pattern[2][3]; int null_gap; int natural_gap=1; /*op= gor_gap_op ( 0,seqA, seqB, PARAM); ex= gor_gap_ext ( 0, seqA, seqB, PARAM);*/ for (a=start; a<=end; ++a) { type= gap_type ( sa[a], sb[a]); if ( type==2 && ga<=gb) {++ga; gb=0; score += op; } else if (type==1 && ga >=gb) { ++gb; ga=0; score +=op; } else if (type==0) { ga++; gb++; } else if (type== -1) ga=gb=0; if (natural_gap==0) { if ( type== -1) flag1=flag2=0; else if ( type==0) flag2=1; else if ( (type==flag1) && flag2==1) { score+=op; flag2=0; } else if ( (type!=flag1) && flag2==1) { flag1=type; flag2=0; } else if ( flag2==0) flag1=type; } } /*gap_type -/-:0, X/X:-1 X/-:1, -/X:2*/ /*evaluate the pattern of gaps*/ continue_loop=1; sequence_pattern[0][0]=sequence_pattern[1][0]=0; for ( a=start; a<=end && continue_loop==1; a++) { left_gap= gap_type ( sa[a], sb[a]); if ( left_gap!= 0) { if ( left_gap==-1) { sequence_pattern[0][0]=sequence_pattern[1][0]=0; continue_loop=0; } else { null_gap=0; for (b=a; b<=end && continue_loop==1; b++) {type=gap_type( sa[b], sb[b]); if (type==0) null_gap++; if ( type!=left_gap && type !=0) { continue_loop=0; sequence_pattern[2-left_gap][0]= b-a-null_gap; sequence_pattern [1-(2-left_gap)][0]=0; } } if ( continue_loop==1) { continue_loop=0; sequence_pattern[2-left_gap][0]= b-a-null_gap; sequence_pattern [1-(2-left_gap)][0]=0; } } } } sequence_pattern[0][2]=sequence_pattern[1][2]=1; for ( a=start; a<=end; a++) { if ( !is_gap(sa[a])) sequence_pattern[0][2]=0; if ( !is_gap(sb[a])) sequence_pattern[1][2]=0; } continue_loop=1; sequence_pattern[0][1]=sequence_pattern[1][1]=0; for ( a=end; a>=start && continue_loop==1; a--) { right_gap= gap_type ( sa[a], sb[a]); if ( right_gap!= 0) { if ( right_gap==-1) { sequence_pattern[0][1]=sequence_pattern[1][1]=0; continue_loop=0; } else { null_gap=0; for (b=a; b>=start && continue_loop==1; b--) {type=gap_type( sa[b], sb[b]); if ( type==0) null_gap++; if ( type!=right_gap && type !=0) { continue_loop=0; sequence_pattern[2-right_gap][1]= a-b-null_gap; sequence_pattern [1-(2-right_gap)][1]=0; } } if ( continue_loop==1) { continue_loop=0; sequence_pattern[2-right_gap][1]= a-b-null_gap; sequence_pattern [1-(2-right_gap)][1]=0; } } } } /* printf ( "\n*****************************************************"); printf ( "\n%c\n%c", sa[start],sb[start]); printf ( "\n%d %d %d",sequence_pattern[0][0] ,sequence_pattern[0][1], sequence_pattern[0][2]); printf ( "\n%d %d %d",sequence_pattern[1][0] ,sequence_pattern[1][1], sequence_pattern[1][2]); printf ( "\n*****************************************************"); */ /*correct the scoring*/ if ( MODE==0) { if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0]))) score-= (sequence_pattern[0][0]>0)?op:0; if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0]))) score-= (sequence_pattern[1][0]>0)?op:0; } else if ( MODE ==1 || MODE ==2) { if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0])) && (tgp_a[1]!=1 || sequence_pattern[0][2]==0)) score-= (sequence_pattern[0][0]>0)?op:0; if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0])) && (tgp_b[1]!=1 || sequence_pattern[1][2]==0)) score-= (sequence_pattern[1][0]>0)?op:0; if ( tgp_a[0]>=1 && tgp_a[0]==tgp_b[0]) score -=(sequence_pattern[0][0]>0)?op:0; if ( tgp_b[0]>=1 && tgp_a[0]==tgp_b[0]) score-= (sequence_pattern[1][0]>0)?op:0; if ( tgp_a[1]==1 && sequence_pattern[0][2]==0) score -= ( sequence_pattern[0][1]>0)?op:0; else if ( tgp_a[1]==1 && sequence_pattern[0][2]==1 && tgp_a[0]<=0) score -= ( sequence_pattern[0][1]>0)?op:0; if ( tgp_b[1]==1 && sequence_pattern[1][2]==0) score -= ( sequence_pattern[1][1]>0)?op:0; else if ( tgp_b[1]==1 && sequence_pattern[1][2]==1 && tgp_b[0]<=0) score -= ( sequence_pattern[1][1]>0)?op:0; if ( MODE==2) { if ( tgp_a[0]>0) score -=sequence_pattern[0][0]*ex; if ( tgp_b[0]>0) score -= sequence_pattern[1][0]*ex; if ( tgp_a[1]>0) score-=sequence_pattern[0][1]*ex; if ( tgp_b[1]>0) score-=sequence_pattern[1][1]*ex; } } return score; } void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE) { int a,b; int continue_loop; if (MODE==11 || MODE==13|| MODE==14) { if ( start==0)for ( a=0; anseq; a++)TGP[a][0]=-1; else for ( a=0; anseq; a++)TGP[a][0]=(is_gap(A->seq_al[a][start-1])==1)?0:1; if ( end==A->len_aln-1)for ( a=0; anseq; a++)TGP[a][1]=-1; else for ( a=0; anseq; a++)TGP[a][1]=(is_gap(A->seq_al[a][start-1])==1)?0:1; } else { /* 0: in the middle of the alignement 1: natural end 2: q left gap is the continuation of another gap that was open outside the bloc ( don't open it) */ for ( a=0; a< A->nseq; a++) { TGP[a][0]=1; TGP[a][1]=1; for ( b=0; b< start; b++) if ( !is_gap(A->seq_al[a][b])) TGP[a][0]=0; if ( start>0 ) { if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]!=1) {TGP[a][0]=-1; continue_loop=1; for ( b=(start-1); b>=0 && continue_loop==1; b--) {TGP[a][0]-= ( is_gap(A->seq_al[a][b])==1)?1:0; continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0; } } } else if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]==1) { TGP[a][0]=1; continue_loop=1; for ( b=(start-1); b>=0 && continue_loop==1; b--) {TGP[a][0]+= ( is_gap(A->seq_al[a][b])==1)?1:0; continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0; } } for ( b=(A->len_aln-1); b>end; b--) if ( !is_gap(A->seq_al[a][b])) TGP[a][1]=0; } } } int gap_type ( char a, char b) { /*gap_type -/-:0, X/X:-1 X/-:1, -/STAR:2*/ if ( is_gap(a) && is_gap(b)) return 0; else if ( !is_gap(a) && !is_gap(b)) return -1; else if ( !is_gap(a)) return 1; else if ( !is_gap(b)) return 2; else return -1; } /*********************************COPYRIGHT NOTICE**********************************/ /*© Centro de Regulacio Genomica */ /*and */ /*Cedric Notredame */ /*Tue Oct 27 10:12:26 WEST 2009. */ /*All rights reserved.*/ /*This file is part of T-COFFEE.*/ /**/ /* T-COFFEE is free software; you can redistribute it and/or modify*/ /* it under the terms of the GNU General Public License as published by*/ /* the Free Software Foundation; either version 2 of the License, or*/ /* (at your option) any later version.*/ /**/ /* T-COFFEE is distributed in the hope that it will be useful,*/ /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ /* GNU General Public License for more details.*/ /**/ /* You should have received a copy of the GNU General Public License*/ /* along with Foobar; if not, write to the Free Software*/ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ /*............................................... |*/ /* If you need some more information*/ /* cedric.notredame@europe.com*/ /*............................................... |*/ /**/ /**/ /* */ /*********************************COPYRIGHT NOTICE**********************************/