/* $Id: $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * ===========================================================================*/ /***************************************************************************** File name: sls_fsa1_pvalues.cpp Author: Sergey Sheetlin Contents: Calculation of P-values using precalculated Gumbel parameters ******************************************************************************/ #include "sls_fsa1_pvalues.hpp" #include "sls_fsa1_utils.hpp" #include "sls_normal_distr_array.hpp" using namespace Sls; using namespace std; const double nat_cut_off_in_max=2.0;//nat cut-off in max used in FSC void FALP_pvalues::compute_intercepts( FALP_set_of_parameters &par_) { if(!par_.d_params_flag) { throw error("Unexpected error: FALP_pvalues::compute_intercepts is called for undefined parameters\n",1); }; par_.b_I=2.0*par_.G*(par_.gapless_a_I-par_.a_I); par_.b_I_error=2.0*par_.G*FSA_utils::error_of_the_sum(par_.gapless_a_I_error,par_.a_I_error); par_.beta_I=2.0*par_.G*(par_.gapless_alpha_I-par_.alpha_I); par_.beta_I_error=2.0*par_.G*FSA_utils::error_of_the_sum(par_.gapless_alpha_I_error,par_.alpha_I_error); par_.b_J=2.0*par_.G*(par_.gapless_a_J-par_.a_J); par_.b_J_error=2.0*par_.G*FSA_utils::error_of_the_sum(par_.gapless_a_J_error,par_.a_J_error); par_.beta_J=2.0*par_.G*(par_.gapless_alpha_J-par_.alpha_J); par_.beta_J_error=2.0*par_.G*FSA_utils::error_of_the_sum(par_.gapless_alpha_J_error,par_.alpha_J_error); par_.tau=2.0*par_.G*(par_.gapless_sigma-par_.sigma); par_.tau_error=2.0*par_.G*FSA_utils::error_of_the_sum(par_.gapless_sigma_error,par_.sigma_error); long int vector_size=(long int)par_.m_LambdaSbs.size(); par_.m_BISbs.resize(vector_size); par_.m_BJSbs.resize(vector_size); par_.m_BetaISbs.resize(vector_size); par_.m_BetaJSbs.resize(vector_size); par_.m_TauSbs.resize(vector_size); long int i; for(i=0;i0) { par_.vi_y_thr=FSA_utils::Tmax(nat_cut_off_in_max*par_.alpha_I/par_.lambda,0.0); par_.vj_y_thr=FSA_utils::Tmax(nat_cut_off_in_max*par_.alpha_J/par_.lambda,0.0); par_.c_y_thr=FSA_utils::Tmax(nat_cut_off_in_max*par_.sigma/par_.lambda,0.0); } else { par_.vi_y_thr=0; par_.vj_y_thr=0; par_.c_y_thr=0; par_.d_params_flag=false; }; } void FALP_pvalues::get_appr_tail_prob_with_cov_without_errors( const FALP_set_of_parameters &par_, bool blast_, double y_, double m_, double n_, double &P_, double &E_, double &area_, bool &area_is_1_flag_, bool compute_only_area_) { //to optimize performance blast_=false; double lambda_=par_.lambda; double k_=par_.K; double ai_hat_=par_.a_I; double bi_hat_; double alphai_hat_=par_.alpha_I; double betai_hat_; double aj_hat_=par_.a_J; double bj_hat_; double alphaj_hat_=par_.alpha_J; double betaj_hat_; double sigma_hat_=par_.sigma; double tau_hat_; { bi_hat_=par_.b_I; betai_hat_=par_.beta_I; bj_hat_=par_.b_J; betaj_hat_=par_.beta_J; tau_hat_=par_.tau; }; if(blast_) { alphai_hat_=0; betai_hat_=0; alphaj_hat_=0; betaj_hat_=0; sigma_hat_=0; tau_hat_=0; }; double m_li_y=0; double tmp=ai_hat_*y_+bi_hat_; m_li_y=m_-tmp; double vi_y=0; vi_y=FSA_utils::Tmax(par_.vi_y_thr,alphai_hat_*y_+betai_hat_); double sqrt_vi_y=sqrt(vi_y); double m_F; if(sqrt_vi_y==0.0||blast_) { m_F=1e100; } else { m_F=m_li_y/sqrt_vi_y; }; double P_m_F=sls_basic::normal_probability(m_F); double E_m_F=-const_val*exp(-0.5*m_F*m_F); double m_li_y_P_m_F=m_li_y*P_m_F; double sqrt_vi_y_E_m_F=sqrt_vi_y*E_m_F; double p1=m_li_y_P_m_F-sqrt_vi_y_E_m_F; double n_lj_y=0; tmp=aj_hat_*y_+bj_hat_; n_lj_y=n_-tmp; double vj_y=0; vj_y=FSA_utils::Tmax(par_.vj_y_thr,alphaj_hat_*y_+betaj_hat_); double sqrt_vj_y=sqrt(vj_y); double n_F; if(sqrt_vj_y==0.0||blast_) { n_F=1e100; } else { n_F=n_lj_y/sqrt_vj_y; }; double P_n_F=sls_basic::normal_probability(n_F); double E_n_F=-const_val*exp(-0.5*n_F*n_F); double n_lj_y_P_n_F=n_lj_y*P_n_F; double sqrt_vj_y_E_n_F=sqrt_vj_y*E_n_F; double p2=n_lj_y_P_n_F-sqrt_vj_y_E_n_F; double c_y=0; c_y=FSA_utils::Tmax(par_.c_y_thr,sigma_hat_*y_+tau_hat_); double P_m_F_P_n_F=P_m_F*P_n_F; double c_y_P_m_F_P_n_F=c_y*P_m_F_P_n_F; double p1_p2=p1*p2; double area=p1_p2+c_y_P_m_F_P_n_F; if(!blast_) { //area=FSA_utils::Tmax(area,1.0); } else { if(area<=1.0) { area_is_1_flag_=true; }; if(area_is_1_flag_) { area=1.0; }; }; area_=area; if(compute_only_area_) { return; }; double exp_lambda_y=exp(-lambda_*y_); double k_exp_lambda_y=k_*exp_lambda_y; double area_k_exp_lambda_y=-area*k_exp_lambda_y; E_=-area_k_exp_lambda_y; P_=sls_basic::one_minus_exp_function(area_k_exp_lambda_y); // P_=1-exp(-k_*area*exp(-lambda_*y_)); } void FALP_pvalues::get_P_error_using_splitting_method( const FALP_set_of_parameters &par_, bool blast_, double y_, double m_, double n_, double &P_, double &P_error_, double &E_, double &E_error_, bool &area_is_1_flag_) { long int dim=par_.m_LambdaSbs.size(); if(dim==0) { throw error("Unexpected error in get_P_error_using_splitting_method\n",1); }; P_=0; P_error_=0; E_=0; E_error_=0; double exp_E_values_aver=0; double exp_E_values_error=0; vector P_values(dim); vector E_values(dim); vector exp_E_values(dim); long int i; for(i=0;i &P_values, vector &P_values_errors, vector &E_values, vector &E_values_errors) { if(Score20) { get_P_error_using_splitting_method( ParametersSet, blast, Score, Seq1Len, Seq2Len, P_tmp, P_error, E_tmp, E_error, area_is_1_flag); if(P_tmp>0) { P_error=P_error/P_tmp*P; }; P_value_error=P_error; if(E_tmp>0) { E_error=E_error/E_tmp*E; }; E_value_error=E_error; } else { P_value_error=-DBL_MAX; E_value_error=-DBL_MAX; }; } else { get_appr_tail_prob_with_cov( ParametersSet, blast, Score, Seq1Len, Seq2Len, P, P_error, E, E_error, area, area_is_1_flag); P_value_error=P_error; E_value_error=E_error; }; P_value=P; E_value=E; } //input/output Gumbel parameters namespace Sls { std::ostream &operator<<(std::ostream &s_, const FALP_set_of_parameters &gumbel_params_) { s_<<"Lambda\tLambda error\tK\tK error\tC\tC error\ta_1\ta_1 error\ta_2\ta_2 error\tsigma\tsigma error\talpha_1\talpha_1 error\talpha_2\talpha_2 error\tGapless a_1\tGapless a_1 error\tGapless a_2\tGapless a_2 error\tGapless sigma\tGapless sigma error\tGapless alpha_1\tGapless alpha_1 error\tGapless alpha_2\tGapless alpha_2 error\tG\tCalculation time\tArrays for error calculation\n"; s_.precision(20); s_<< gumbel_params_.lambda<<"\t"< &tmp=gumbel_params_.m_LambdaSbs; s_< &tmp=gumbel_params_.m_KSbs; s_< &tmp=gumbel_params_.m_CSbs; s_< &tmp=gumbel_params_.m_AISbs; s_< &tmp=gumbel_params_.m_AJSbs; s_< &tmp=gumbel_params_.m_SigmaSbs; s_< &tmp=gumbel_params_.m_AlphaISbs; s_< &tmp=gumbel_params_.m_AlphaJSbs; s_<>(std::istream &s_, FALP_set_of_parameters &gumbel_params_) { string st; getline(s_,st); s_>> gumbel_params_.lambda>>gumbel_params_.lambda_error>> gumbel_params_.K>>gumbel_params_.K_error>> gumbel_params_.C>>gumbel_params_.C_error>> gumbel_params_.a_I>>gumbel_params_.a_I_error>> gumbel_params_.a_J>>gumbel_params_.a_J_error>> gumbel_params_.sigma>>gumbel_params_.sigma_error>> gumbel_params_.alpha_I>>gumbel_params_.alpha_I_error>> gumbel_params_.alpha_J>>gumbel_params_.alpha_J_error>> gumbel_params_.gapless_a_I>>gumbel_params_.gapless_a_I_error>> gumbel_params_.gapless_a_J>>gumbel_params_.gapless_a_J_error>> gumbel_params_.gapless_sigma>>gumbel_params_.gapless_sigma_error>> gumbel_params_.gapless_alpha_I>>gumbel_params_.gapless_alpha_I_error>> gumbel_params_.gapless_alpha_J>>gumbel_params_.gapless_alpha_J_error>> gumbel_params_.G>> gumbel_params_.m_CalcTime; long int i; { vector &tmp=gumbel_params_.m_LambdaSbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; { vector &tmp=gumbel_params_.m_KSbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; { vector &tmp=gumbel_params_.m_CSbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; { vector &tmp=gumbel_params_.m_AISbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; { vector &tmp=gumbel_params_.m_AJSbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; { vector &tmp=gumbel_params_.m_SigmaSbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; { vector &tmp=gumbel_params_.m_AlphaISbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; { vector &tmp=gumbel_params_.m_AlphaJSbs; long int tmp_size; s_>>tmp_size; if(tmp_size<=0) { throw error("Error in the input parameters\n",4); }; tmp.resize(tmp_size); for(i=0;i>tmp[i]; }; }; return s_; } //returns "true" if the Gumbel parameters are properly defined and "false" otherwise bool FALP_pvalues::assert_Gumbel_parameters( const FALP_set_of_parameters &par_)//a set of Gumbel parameters { if(par_.lambda<=0|| par_.lambda_error<0|| //the parameters C and K_C are not necessary for the P-value calculation //par_.C<0|| //par_.C_error<0|| //par_.K_C<0|| //par_.K_C_error<0|| par_.K<=0|| par_.K_error<0|| par_.a_I<0|| par_.a_I_error<0|| par_.a_J<0|| par_.a_J_error<0|| par_.sigma<0|| par_.sigma_error<0|| par_.alpha_I<0|| par_.alpha_I_error<0|| par_.alpha_J<0|| par_.alpha_J_error<0|| par_.gapless_a_I<0|| par_.gapless_a_I_error<0|| par_.gapless_a_J<0|| par_.gapless_a_J_error<0|| par_.gapless_alpha_I<0|| par_.gapless_alpha_I_error<0|| par_.gapless_alpha_J<0|| par_.gapless_alpha_J_error<0|| par_.gapless_sigma<0|| par_.gapless_sigma_error<0|| par_.G<0|| par_.G1<0|| par_.G2<0|| //intercepts par_.b_I_error<0|| par_.b_J_error<0|| par_.beta_I_error<0|| par_.beta_J_error<0|| par_.tau_error<0 ) { return false; }; size_t size_tmp=par_.m_LambdaSbs.size(); if( par_.m_KSbs.size()!=size_tmp|| //par_.m_K_CSbs.size()!=size_tmp|| //par_.m_CSbs.size()!=size_tmp|| par_.m_SigmaSbs.size()!=size_tmp|| par_.m_AlphaISbs.size()!=size_tmp|| par_.m_AlphaJSbs.size()!=size_tmp|| par_.m_AISbs.size()!=size_tmp|| par_.m_AJSbs.size()!=size_tmp|| par_.m_BISbs.size()!=size_tmp|| par_.m_BJSbs.size()!=size_tmp|| par_.m_BetaISbs.size()!=size_tmp|| par_.m_BetaJSbs.size()!=size_tmp|| par_.m_TauSbs.size()!=size_tmp) { return false; }; return true; } }