/* $Id: $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's offical duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * ===========================================================================*/ /***************************************************************************** File name: sls_fsa1_utils.cpp Author: Sergey Sheetlin, Martin Frith Contents: Frameshift alignment algorithms ******************************************************************************/ #include "sls_fsa1_utils.hpp" using namespace Sls; using namespace std; static long int length_max=1000; void FSA_utils::read_RR( string RR_file_name_, double *&RR_, double *&RR_sum_, long int *&RR_sum_elements_, long int &number_of_AA_RR_, long int number_of_AA_RR_default_) { read_RR( RR_file_name_, RR_, number_of_AA_RR_, number_of_AA_RR_default_); calculate_RR_sum( RR_, number_of_AA_RR_, RR_sum_, RR_sum_elements_); } void FSA_utils::check_RR_sum( double sum_tmp_, long int number_of_AA_RR_, string RR_file_name_) { if(number_of_AA_RR_<=0) { throw error("Error - number of letters in the probabilities file must be greater than 0\n",3); }; double diff_tmp=fabs(sum_tmp_-1.0); if(diff_tmp>0) { double lg_diff=-(log(diff_tmp)-log((double)number_of_AA_RR_))/log(10.0); double lg_eps=-log(DBL_EPSILON)/log(10.0)-1; if(lg_diff flag_RR; if(!flag_RR[RR_file_name_]) { cout<<"\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"; cout<<"Warning: the sum of the probabilities from the file "<0) { long int i; for(i=0;i>number_of_AA_RR_; if(number_of_AA_RR_<=0) { throw error("Error - number of letters in the probabilities file must be greater than 0\n",3); }; RR_=new double[number_of_AA_RR_]; assert_mem(RR_); double sum_tmp=0; for(i=0;i>RR_[i]; if(RR_[i]<0) { throw error("Error - the frequencies defined in the file "+RR_file_name_+" must be non-negative\n",3); }; sum_tmp+=RR_[i]; }; check_RR_sum( sum_tmp, number_of_AA_RR_, RR_file_name_); f.close(); } catch (...) { if(f.is_open()) { f.close(); }; delete[]RR_;RR_=NULL; throw; }; } void FSA_utils::read_smatr( string smatr_file_name_, long int **&smatr_, long int &number_of_AA_smatr_, long int &smatr_min_) { ifstream f; try { if(smatr_file_name_=="") { //default for the scoring matrix (BLOSUM80) number_of_AA_smatr_=25; FSA_utils::get_memory_for_matrix(number_of_AA_smatr_,number_of_AA_smatr_,smatr_); smatr_min_=-6; smatr_[0][0]=5; smatr_[0][1]=-2; smatr_[0][2]=-2; smatr_[0][3]=-2; smatr_[0][4]=-1; smatr_[0][5]=-1; smatr_[0][6]=-1; smatr_[0][7]=0; smatr_[0][8]=-2; smatr_[0][9]=-2; smatr_[0][10]=-2; smatr_[0][11]=-1; smatr_[0][12]=-1; smatr_[0][13]=-3; smatr_[0][14]=-1; smatr_[0][15]=1; smatr_[0][16]=0; smatr_[0][17]=-3; smatr_[0][18]=-2; smatr_[0][19]=0; smatr_[0][20]=-2; smatr_[0][21]=-2; smatr_[0][22]=-1; smatr_[0][23]=-1; smatr_[0][24]=-6; smatr_[1][0]=-2; smatr_[1][1]=6; smatr_[1][2]=-1; smatr_[1][3]=-2; smatr_[1][4]=-4; smatr_[1][5]=1; smatr_[1][6]=-1; smatr_[1][7]=-3; smatr_[1][8]=0; smatr_[1][9]=-3; smatr_[1][10]=-3; smatr_[1][11]=2; smatr_[1][12]=-2; smatr_[1][13]=-4; smatr_[1][14]=-2; smatr_[1][15]=-1; smatr_[1][16]=-1; smatr_[1][17]=-4; smatr_[1][18]=-3; smatr_[1][19]=-3; smatr_[1][20]=-1; smatr_[1][21]=-3; smatr_[1][22]=0; smatr_[1][23]=-1; smatr_[1][24]=-6; smatr_[2][0]=-2; smatr_[2][1]=-1; smatr_[2][2]=6; smatr_[2][3]=1; smatr_[2][4]=-3; smatr_[2][5]=0; smatr_[2][6]=-1; smatr_[2][7]=-1; smatr_[2][8]=0; smatr_[2][9]=-4; smatr_[2][10]=-4; smatr_[2][11]=0; smatr_[2][12]=-3; smatr_[2][13]=-4; smatr_[2][14]=-3; smatr_[2][15]=0; smatr_[2][16]=0; smatr_[2][17]=-4; smatr_[2][18]=-3; smatr_[2][19]=-4; smatr_[2][20]=5; smatr_[2][21]=-4; smatr_[2][22]=0; smatr_[2][23]=-1; smatr_[2][24]=-6; smatr_[3][0]=-2; smatr_[3][1]=-2; smatr_[3][2]=1; smatr_[3][3]=6; smatr_[3][4]=-4; smatr_[3][5]=-1; smatr_[3][6]=1; smatr_[3][7]=-2; smatr_[3][8]=-2; smatr_[3][9]=-4; smatr_[3][10]=-5; smatr_[3][11]=-1; smatr_[3][12]=-4; smatr_[3][13]=-4; smatr_[3][14]=-2; smatr_[3][15]=-1; smatr_[3][16]=-1; smatr_[3][17]=-6; smatr_[3][18]=-4; smatr_[3][19]=-4; smatr_[3][20]=5; smatr_[3][21]=-5; smatr_[3][22]=1; smatr_[3][23]=-1; smatr_[3][24]=-6; smatr_[4][0]=-1; smatr_[4][1]=-4; smatr_[4][2]=-3; smatr_[4][3]=-4; smatr_[4][4]=9; smatr_[4][5]=-4; smatr_[4][6]=-5; smatr_[4][7]=-4; smatr_[4][8]=-4; smatr_[4][9]=-2; smatr_[4][10]=-2; smatr_[4][11]=-4; smatr_[4][12]=-2; smatr_[4][13]=-3; smatr_[4][14]=-4; smatr_[4][15]=-2; smatr_[4][16]=-1; smatr_[4][17]=-3; smatr_[4][18]=-3; smatr_[4][19]=-1; smatr_[4][20]=-4; smatr_[4][21]=-2; smatr_[4][22]=-4; smatr_[4][23]=-1; smatr_[4][24]=-6; smatr_[5][0]=-1; smatr_[5][1]=1; smatr_[5][2]=0; smatr_[5][3]=-1; smatr_[5][4]=-4; smatr_[5][5]=6; smatr_[5][6]=2; smatr_[5][7]=-2; smatr_[5][8]=1; smatr_[5][9]=-3; smatr_[5][10]=-3; smatr_[5][11]=1; smatr_[5][12]=0; smatr_[5][13]=-4; smatr_[5][14]=-2; smatr_[5][15]=0; smatr_[5][16]=-1; smatr_[5][17]=-3; smatr_[5][18]=-2; smatr_[5][19]=-3; smatr_[5][20]=0; smatr_[5][21]=-3; smatr_[5][22]=4; smatr_[5][23]=-1; smatr_[5][24]=-6; smatr_[6][0]=-1; smatr_[6][1]=-1; smatr_[6][2]=-1; smatr_[6][3]=1; smatr_[6][4]=-5; smatr_[6][5]=2; smatr_[6][6]=6; smatr_[6][7]=-3; smatr_[6][8]=0; smatr_[6][9]=-4; smatr_[6][10]=-4; smatr_[6][11]=1; smatr_[6][12]=-2; smatr_[6][13]=-4; smatr_[6][14]=-2; smatr_[6][15]=0; smatr_[6][16]=-1; smatr_[6][17]=-4; smatr_[6][18]=-3; smatr_[6][19]=-3; smatr_[6][20]=1; smatr_[6][21]=-4; smatr_[6][22]=5; smatr_[6][23]=-1; smatr_[6][24]=-6; smatr_[7][0]=0; smatr_[7][1]=-3; smatr_[7][2]=-1; smatr_[7][3]=-2; smatr_[7][4]=-4; smatr_[7][5]=-2; smatr_[7][6]=-3; smatr_[7][7]=6; smatr_[7][8]=-3; smatr_[7][9]=-5; smatr_[7][10]=-4; smatr_[7][11]=-2; smatr_[7][12]=-4; smatr_[7][13]=-4; smatr_[7][14]=-3; smatr_[7][15]=-1; smatr_[7][16]=-2; smatr_[7][17]=-4; smatr_[7][18]=-4; smatr_[7][19]=-4; smatr_[7][20]=-1; smatr_[7][21]=-5; smatr_[7][22]=-3; smatr_[7][23]=-1; smatr_[7][24]=-6; smatr_[8][0]=-2; smatr_[8][1]=0; smatr_[8][2]=0; smatr_[8][3]=-2; smatr_[8][4]=-4; smatr_[8][5]=1; smatr_[8][6]=0; smatr_[8][7]=-3; smatr_[8][8]=8; smatr_[8][9]=-4; smatr_[8][10]=-3; smatr_[8][11]=-1; smatr_[8][12]=-2; smatr_[8][13]=-2; smatr_[8][14]=-3; smatr_[8][15]=-1; smatr_[8][16]=-2; smatr_[8][17]=-3; smatr_[8][18]=2; smatr_[8][19]=-4; smatr_[8][20]=-1; smatr_[8][21]=-4; smatr_[8][22]=0; smatr_[8][23]=-1; smatr_[8][24]=-6; smatr_[9][0]=-2; smatr_[9][1]=-3; smatr_[9][2]=-4; smatr_[9][3]=-4; smatr_[9][4]=-2; smatr_[9][5]=-3; smatr_[9][6]=-4; smatr_[9][7]=-5; smatr_[9][8]=-4; smatr_[9][9]=5; smatr_[9][10]=1; smatr_[9][11]=-3; smatr_[9][12]=1; smatr_[9][13]=-1; smatr_[9][14]=-4; smatr_[9][15]=-3; smatr_[9][16]=-1; smatr_[9][17]=-3; smatr_[9][18]=-2; smatr_[9][19]=3; smatr_[9][20]=-4; smatr_[9][21]=3; smatr_[9][22]=-4; smatr_[9][23]=-1; smatr_[9][24]=-6; smatr_[10][0]=-2; smatr_[10][1]=-3; smatr_[10][2]=-4; smatr_[10][3]=-5; smatr_[10][4]=-2; smatr_[10][5]=-3; smatr_[10][6]=-4; smatr_[10][7]=-4; smatr_[10][8]=-3; smatr_[10][9]=1; smatr_[10][10]=4; smatr_[10][11]=-3; smatr_[10][12]=2; smatr_[10][13]=0; smatr_[10][14]=-3; smatr_[10][15]=-3; smatr_[10][16]=-2; smatr_[10][17]=-2; smatr_[10][18]=-2; smatr_[10][19]=1; smatr_[10][20]=-4; smatr_[10][21]=3; smatr_[10][22]=-3; smatr_[10][23]=-1; smatr_[10][24]=-6; smatr_[11][0]=-1; smatr_[11][1]=2; smatr_[11][2]=0; smatr_[11][3]=-1; smatr_[11][4]=-4; smatr_[11][5]=1; smatr_[11][6]=1; smatr_[11][7]=-2; smatr_[11][8]=-1; smatr_[11][9]=-3; smatr_[11][10]=-3; smatr_[11][11]=5; smatr_[11][12]=-2; smatr_[11][13]=-4; smatr_[11][14]=-1; smatr_[11][15]=-1; smatr_[11][16]=-1; smatr_[11][17]=-4; smatr_[11][18]=-3; smatr_[11][19]=-3; smatr_[11][20]=-1; smatr_[11][21]=-3; smatr_[11][22]=1; smatr_[11][23]=-1; smatr_[11][24]=-6; smatr_[12][0]=-1; smatr_[12][1]=-2; smatr_[12][2]=-3; smatr_[12][3]=-4; smatr_[12][4]=-2; smatr_[12][5]=0; smatr_[12][6]=-2; smatr_[12][7]=-4; smatr_[12][8]=-2; smatr_[12][9]=1; smatr_[12][10]=2; smatr_[12][11]=-2; smatr_[12][12]=6; smatr_[12][13]=0; smatr_[12][14]=-3; smatr_[12][15]=-2; smatr_[12][16]=-1; smatr_[12][17]=-2; smatr_[12][18]=-2; smatr_[12][19]=1; smatr_[12][20]=-3; smatr_[12][21]=2; smatr_[12][22]=-1; smatr_[12][23]=-1; smatr_[12][24]=-6; smatr_[13][0]=-3; smatr_[13][1]=-4; smatr_[13][2]=-4; smatr_[13][3]=-4; smatr_[13][4]=-3; smatr_[13][5]=-4; smatr_[13][6]=-4; smatr_[13][7]=-4; smatr_[13][8]=-2; smatr_[13][9]=-1; smatr_[13][10]=0; smatr_[13][11]=-4; smatr_[13][12]=0; smatr_[13][13]=6; smatr_[13][14]=-4; smatr_[13][15]=-3; smatr_[13][16]=-2; smatr_[13][17]=0; smatr_[13][18]=3; smatr_[13][19]=-1; smatr_[13][20]=-4; smatr_[13][21]=0; smatr_[13][22]=-4; smatr_[13][23]=-1; smatr_[13][24]=-6; smatr_[14][0]=-1; smatr_[14][1]=-2; smatr_[14][2]=-3; smatr_[14][3]=-2; smatr_[14][4]=-4; smatr_[14][5]=-2; smatr_[14][6]=-2; smatr_[14][7]=-3; smatr_[14][8]=-3; smatr_[14][9]=-4; smatr_[14][10]=-3; smatr_[14][11]=-1; smatr_[14][12]=-3; smatr_[14][13]=-4; smatr_[14][14]=8; smatr_[14][15]=-1; smatr_[14][16]=-2; smatr_[14][17]=-5; smatr_[14][18]=-4; smatr_[14][19]=-3; smatr_[14][20]=-2; smatr_[14][21]=-4; smatr_[14][22]=-2; smatr_[14][23]=-1; smatr_[14][24]=-6; smatr_[15][0]=1; smatr_[15][1]=-1; smatr_[15][2]=0; smatr_[15][3]=-1; smatr_[15][4]=-2; smatr_[15][5]=0; smatr_[15][6]=0; smatr_[15][7]=-1; smatr_[15][8]=-1; smatr_[15][9]=-3; smatr_[15][10]=-3; smatr_[15][11]=-1; smatr_[15][12]=-2; smatr_[15][13]=-3; smatr_[15][14]=-1; smatr_[15][15]=5; smatr_[15][16]=1; smatr_[15][17]=-4; smatr_[15][18]=-2; smatr_[15][19]=-2; smatr_[15][20]=0; smatr_[15][21]=-3; smatr_[15][22]=0; smatr_[15][23]=-1; smatr_[15][24]=-6; smatr_[16][0]=0; smatr_[16][1]=-1; smatr_[16][2]=0; smatr_[16][3]=-1; smatr_[16][4]=-1; smatr_[16][5]=-1; smatr_[16][6]=-1; smatr_[16][7]=-2; smatr_[16][8]=-2; smatr_[16][9]=-1; smatr_[16][10]=-2; smatr_[16][11]=-1; smatr_[16][12]=-1; smatr_[16][13]=-2; smatr_[16][14]=-2; smatr_[16][15]=1; smatr_[16][16]=5; smatr_[16][17]=-4; smatr_[16][18]=-2; smatr_[16][19]=0; smatr_[16][20]=-1; smatr_[16][21]=-1; smatr_[16][22]=-1; smatr_[16][23]=-1; smatr_[16][24]=-6; smatr_[17][0]=-3; smatr_[17][1]=-4; smatr_[17][2]=-4; smatr_[17][3]=-6; smatr_[17][4]=-3; smatr_[17][5]=-3; smatr_[17][6]=-4; smatr_[17][7]=-4; smatr_[17][8]=-3; smatr_[17][9]=-3; smatr_[17][10]=-2; smatr_[17][11]=-4; smatr_[17][12]=-2; smatr_[17][13]=0; smatr_[17][14]=-5; smatr_[17][15]=-4; smatr_[17][16]=-4; smatr_[17][17]=11; smatr_[17][18]=2; smatr_[17][19]=-3; smatr_[17][20]=-5; smatr_[17][21]=-3; smatr_[17][22]=-3; smatr_[17][23]=-1; smatr_[17][24]=-6; smatr_[18][0]=-2; smatr_[18][1]=-3; smatr_[18][2]=-3; smatr_[18][3]=-4; smatr_[18][4]=-3; smatr_[18][5]=-2; smatr_[18][6]=-3; smatr_[18][7]=-4; smatr_[18][8]=2; smatr_[18][9]=-2; smatr_[18][10]=-2; smatr_[18][11]=-3; smatr_[18][12]=-2; smatr_[18][13]=3; smatr_[18][14]=-4; smatr_[18][15]=-2; smatr_[18][16]=-2; smatr_[18][17]=2; smatr_[18][18]=7; smatr_[18][19]=-2; smatr_[18][20]=-3; smatr_[18][21]=-2; smatr_[18][22]=-3; smatr_[18][23]=-1; smatr_[18][24]=-6; smatr_[19][0]=0; smatr_[19][1]=-3; smatr_[19][2]=-4; smatr_[19][3]=-4; smatr_[19][4]=-1; smatr_[19][5]=-3; smatr_[19][6]=-3; smatr_[19][7]=-4; smatr_[19][8]=-4; smatr_[19][9]=3; smatr_[19][10]=1; smatr_[19][11]=-3; smatr_[19][12]=1; smatr_[19][13]=-1; smatr_[19][14]=-3; smatr_[19][15]=-2; smatr_[19][16]=0; smatr_[19][17]=-3; smatr_[19][18]=-2; smatr_[19][19]=4; smatr_[19][20]=-4; smatr_[19][21]=2; smatr_[19][22]=-3; smatr_[19][23]=-1; smatr_[19][24]=-6; smatr_[20][0]=-2; smatr_[20][1]=-1; smatr_[20][2]=5; smatr_[20][3]=5; smatr_[20][4]=-4; smatr_[20][5]=0; smatr_[20][6]=1; smatr_[20][7]=-1; smatr_[20][8]=-1; smatr_[20][9]=-4; smatr_[20][10]=-4; smatr_[20][11]=-1; smatr_[20][12]=-3; smatr_[20][13]=-4; smatr_[20][14]=-2; smatr_[20][15]=0; smatr_[20][16]=-1; smatr_[20][17]=-5; smatr_[20][18]=-3; smatr_[20][19]=-4; smatr_[20][20]=5; smatr_[20][21]=-4; smatr_[20][22]=0; smatr_[20][23]=-1; smatr_[20][24]=-6; smatr_[21][0]=-2; smatr_[21][1]=-3; smatr_[21][2]=-4; smatr_[21][3]=-5; smatr_[21][4]=-2; smatr_[21][5]=-3; smatr_[21][6]=-4; smatr_[21][7]=-5; smatr_[21][8]=-4; smatr_[21][9]=3; smatr_[21][10]=3; smatr_[21][11]=-3; smatr_[21][12]=2; smatr_[21][13]=0; smatr_[21][14]=-4; smatr_[21][15]=-3; smatr_[21][16]=-1; smatr_[21][17]=-3; smatr_[21][18]=-2; smatr_[21][19]=2; smatr_[21][20]=-4; smatr_[21][21]=3; smatr_[21][22]=-3; smatr_[21][23]=-1; smatr_[21][24]=-6; smatr_[22][0]=-1; smatr_[22][1]=0; smatr_[22][2]=0; smatr_[22][3]=1; smatr_[22][4]=-4; smatr_[22][5]=4; smatr_[22][6]=5; smatr_[22][7]=-3; smatr_[22][8]=0; smatr_[22][9]=-4; smatr_[22][10]=-3; smatr_[22][11]=1; smatr_[22][12]=-1; smatr_[22][13]=-4; smatr_[22][14]=-2; smatr_[22][15]=0; smatr_[22][16]=-1; smatr_[22][17]=-3; smatr_[22][18]=-3; smatr_[22][19]=-3; smatr_[22][20]=0; smatr_[22][21]=-3; smatr_[22][22]=5; smatr_[22][23]=-1; smatr_[22][24]=-6; smatr_[23][0]=-1; smatr_[23][1]=-1; smatr_[23][2]=-1; smatr_[23][3]=-1; smatr_[23][4]=-1; smatr_[23][5]=-1; smatr_[23][6]=-1; smatr_[23][7]=-1; smatr_[23][8]=-1; smatr_[23][9]=-1; smatr_[23][10]=-1; smatr_[23][11]=-1; smatr_[23][12]=-1; smatr_[23][13]=-1; smatr_[23][14]=-1; smatr_[23][15]=-1; smatr_[23][16]=-1; smatr_[23][17]=-1; smatr_[23][18]=-1; smatr_[23][19]=-1; smatr_[23][20]=-1; smatr_[23][21]=-1; smatr_[23][22]=-1; smatr_[23][23]=-1; smatr_[23][24]=-6; smatr_[24][0]=-6; smatr_[24][1]=-6; smatr_[24][2]=-6; smatr_[24][3]=-6; smatr_[24][4]=-6; smatr_[24][5]=-6; smatr_[24][6]=-6; smatr_[24][7]=-6; smatr_[24][8]=-6; smatr_[24][9]=-6; smatr_[24][10]=-6; smatr_[24][11]=-6; smatr_[24][12]=-6; smatr_[24][13]=-6; smatr_[24][14]=-6; smatr_[24][15]=-6; smatr_[24][16]=-6; smatr_[24][17]=-6; smatr_[24][18]=-6; smatr_[24][19]=-6; smatr_[24][20]=-6; smatr_[24][21]=-6; smatr_[24][22]=-6; smatr_[24][23]=-6; smatr_[24][24]=1; return; }; long int i,j; f.open(smatr_file_name_.data(),ios::in); if(!f) { throw error("Error - file "+smatr_file_name_+" is not found\n",3); }; f>>number_of_AA_smatr_; if(number_of_AA_smatr_<=0) { throw error("Error - number of letters in the scoring matrix file must be greater than 0\n",3); }; get_memory_for_matrix(number_of_AA_smatr_,number_of_AA_smatr_,smatr_); for(i=0;i>smatr_[i][j]; }; }; f.close(); smatr_min( smatr_, number_of_AA_smatr_, smatr_min_); } catch (...) { if(f.is_open()) { f.close(); }; throw; }; } void FSA_utils::smatr_min( long int **smatr_, long int number_of_AA_smatr_, long int &smatr_min_) { long int small_number=numeric_limits::min()/10000; long int i,j; for(i=0;ismatr_[i][j]) { smatr_min_=smatr_[i][j]; }; }; }; }; } void FSA_utils::remove_zero_probabilities( double *&RR1_, double *&RR1_sum_, long int *&RR1_sum_elements_, long int &alphabet_letters_number1_, double *&RR2_, double *&RR2_sum_, long int *&RR2_sum_elements_, long int &alphabet_letters_number2_, long int **&smatr_, long int &number_of_AA_smatr_, long int &smatr_min_, long int &number_of_letters1_,//number of letters for the sequence 1 long int &number_of_letters2_,//number of letters for the sequence 2 char *&alphabet1_,//alphabet letters for the sequence #1 char *&alphabet2_,//alphabet letters for the sequence #2 long int *&alphabet1_to_long_,//d_alphabet1_to_long[c] returns order number of the letter c (for the sequence #1) long int *&alphabet2_to_long_,//d_alphabet2_to_long[c] returns order number of the letter c (for the sequence #2) long int &codon_length_,//codon length long int *&codon_AA_)// { vector codon_flag_RR2; { long int number_of_codons_old=FSA_utils::power_long(alphabet_letters_number1_,codon_length_); codon_flag_RR2.resize(number_of_codons_old,false); long int i; for(i=0;i zero_flag_RR1; map zero_flag_RR2; map old1_numbering_to_the_new1; map new1_numbering_to_the_old1; long int count1=-1; long int i; for(i=0;i old2_numbering_to_the_new2; map new2_numbering_to_the_old2; long int count2=-1; for(i=0;i long int *codon=new long int [codon_length_]; FSA_utils::assert_mem(codon); for(c1=0;c1; original codons long int alphabet_letters_number1_,//number of letters for the sequence #1 long int codon_length_,//codon length long int *&codon_AA_reversed_)//; reversed codons { long int number_of_codons=FSA_utils::power_long(alphabet_letters_number1_,codon_length_); codon_AA_reversed_ = new long int[number_of_codons];// FSA_utils::assert_mem(codon_AA_reversed_); long int *codon=new long int [codon_length_]; FSA_utils::assert_mem(codon); long int *codon2=new long int [codon_length_]; FSA_utils::assert_mem(codon2); long int c1; for(c1=0;c1 long int number_of_letters1_,//number of letters for the sequence 1 long int *codon_) { long int codon_code=codon_[0]; long int i; for(i=1;i=0;i--) { codon_[i]=code_%number_of_letters1_; code_-=codon_[i]; code_/=number_of_letters1_; }; } void FSA_utils::read_codon_AA_file( string file_name_, long int &number_of_letters1_,//number of letters for the sequence 1 long int &number_of_letters2_,//number of letters for the sequence 2 char *&alphabet1_,//alphabet letters for the sequence #1 char *&alphabet2_,//alphabet letters for the sequence #2 long int *&alphabet1_to_long_,//d_alphabet1_to_long[c] returns order number of the letter c (for the sequence #1) long int *&alphabet2_to_long_,//d_alphabet2_to_long[c] returns order number of the letter c (for the sequence #2) long int &codon_length_,//codon length long int *&codon_AA_,// bool reverse_codons_flag_)//if true, then the codons are reversed { ifstream f; alphabet1_=NULL; alphabet2_=NULL; alphabet1_to_long_=NULL; alphabet2_to_long_=NULL; codon_AA_=NULL; try { if(file_name_=="") { number_of_letters1_=4; alphabet1_=new char[4]; FSA_utils::assert_mem(alphabet1_); alphabet1_[0]='A'; alphabet1_[1]='C'; alphabet1_[2]='G'; alphabet1_[3]='T'; number_of_letters2_=25; alphabet2_=new char[25]; FSA_utils::assert_mem(alphabet2_); alphabet2_[0]='A'; alphabet2_[1]='R'; alphabet2_[2]='N'; alphabet2_[3]='D'; alphabet2_[4]='C'; alphabet2_[5]='Q'; alphabet2_[6]='E'; alphabet2_[7]='G'; alphabet2_[8]='H'; alphabet2_[9]='I'; alphabet2_[10]='L'; alphabet2_[11]='K'; alphabet2_[12]='M'; alphabet2_[13]='F'; alphabet2_[14]='P'; alphabet2_[15]='S'; alphabet2_[16]='T'; alphabet2_[17]='W'; alphabet2_[18]='Y'; alphabet2_[19]='V'; alphabet2_[20]='B'; alphabet2_[21]='J'; alphabet2_[22]='Z'; alphabet2_[23]='X'; alphabet2_[24]='*'; alphabet1_to_long_=new long int [length_max]; FSA_utils::assert_mem(alphabet1_to_long_); alphabet2_to_long_=new long int [length_max]; FSA_utils::assert_mem(alphabet2_to_long_); long int k; for(k=0;k>number_of_letters1_; if(number_of_letters1_<=0) { throw error("Error - the file "+file_name_+" is not correct: the number of letters must be positive\n",3); }; alphabet1_=new char[number_of_letters1_]; FSA_utils::assert_mem(alphabet1_); string line_tmp=""; f>>line_tmp; if((long int)line_tmp.length()>number_of_letters2_; if(number_of_letters2_<=0) { throw error("Error - the file "+file_name_+" is not correct\n",3); }; alphabet2_=new char[number_of_letters2_]; FSA_utils::assert_mem(alphabet2_); line_tmp=""; f>>line_tmp; if((long int)line_tmp.length()>codon_length_>>AA_length; codon_length_=3; AA_length=1; if(AA_length!=1||codon_length_<=0) { throw error("Error - the file "+file_name_+" is not correct\n",3); }; //read the table long int number_of_codons=1; for(k=0;k>line_tmp_codon; if(reverse_codons_flag_) { line_tmp_codon=string(line_tmp_codon.rbegin(),line_tmp_codon.rend()); }; f>>line_tmp_AA; if((codon_length_!=(long int)line_tmp_codon.length())||(AA_length!=(long int)line_tmp_AA.length())) { throw error("Error - the file "+file_name_+" is not correct\n",3); }; long int codon_code=alphabet1_to_long_[(size_t)line_tmp_codon[0]]; if(alphabet1_to_long_[(size_t)line_tmp_codon[0]]<0) { throw error("Error - the file "+file_name_+" is not correct\n",3); }; long int i; for(i=1;i<(long int)line_tmp_codon.length();i++) { if(alphabet1_to_long_[(size_t)line_tmp_codon[i]]<0) { throw error("Error - the file "+file_name_+" is not correct\n",3); }; codon_code=codon_code*number_of_letters1_+alphabet1_to_long_[(size_t)line_tmp_codon[i]]; }; if(alphabet2_to_long_[(size_t)line_tmp_AA[0]]<0) { throw error("Error - the file "+file_name_+" is not correct\n",3); }; codon_AA_[codon_code]=alphabet2_to_long_[(size_t)line_tmp_AA[0]]; }; f.close(); } catch (...) { delete[]alphabet1_;alphabet1_=NULL; delete[]alphabet2_;alphabet2_=NULL; delete[]alphabet1_to_long_;alphabet1_to_long_=NULL; delete[]alphabet2_to_long_;alphabet2_to_long_=NULL; delete[]codon_AA_;codon_AA_=NULL; if(f.is_open()) { f.close(); }; throw; }; } string FSA_utils::long_to_string(//convert interer ot string long int number_) { string res_=""; string tmp_string; if(number_>0) { tmp_string=""; } else { if(number_==0) { tmp_string=""; } else { tmp_string="-"; }; }; number_=labs(number_); for( ; ; ) { long int reminder=number_%10; number_=(number_-reminder)/10; res_=digit_to_string(reminder)+res_; if (number_==0) { break; }; }; return tmp_string+res_; } char FSA_utils::digit_to_string(//convert interer ot string long int digit_) { switch(digit_) { case 0:return '0'; case 1:return '1'; case 2:return '2'; case 3:return '3'; case 4:return '4'; case 5:return '5'; case 6:return '6'; case 7:return '7'; case 8:return '8'; case 9:return '9'; default:return '?'; }; } bool FSA_utils::the_value_is_double( string str_, double &val_) { if(str_=="") { return false; }; bool res=false; errno=0; char *p; val_=strtod(str_.c_str(),&p); if(errno!=0) { res=false; } else { res=(*p==0); }; return res; } bool FSA_utils::the_value_is_long( string str_, long int &val_) { if(str_.length()==0) { return false; }; if(!(str_[0]=='+'||str_[0]=='-'||isdigit(str_[0]))) { return false; }; long int start_digit=0; if(str_[0]=='+'||str_[0]=='-') { start_digit=1; }; long int i; for(i=start_digit;i<(long int)str_.size();i++) { if(!isdigit(str_[i])) { return false; }; }; if(((long int)str_.size()-start_digit)<=0) { return false; }; if(((long int)str_.size()-start_digit)>1) { while(str_[start_digit]=='0') { string::iterator it=str_.begin()+start_digit; str_.erase(it); if((long int)str_.size()<=start_digit+1) { break; }; }; }; if(((long int)str_.size()-start_digit>10)||((long int)str_.size()-start_digit)<=0) { return false; }; if((long int)str_.size()-start_digit==10) { if(!(str_[start_digit]=='1'||str_[start_digit]=='2')) { return false; }; if(str_[start_digit]=='2') { long int val2; string str2=str_.substr(start_digit+1,9); int flag=sscanf(str2.c_str(),"%ld",&val2); if(flag!=1) { return false; }; bool positive=true; if(start_digit>0) { if(str_[0]=='-') { positive=false; }; }; if(positive) { if(val2>147483647) { return false; }; } else { if(val2>147483648) { return false; }; }; }; }; int flag=sscanf(str_.c_str(),"%ld",&val_); if(flag!=1) { return false; }; return true; } double FSA_utils::sqrt_plus( double x_) { if(x_>=0) { return sqrt(x_); } else { return 0; }; } double FSA_utils::error_of_the_sum(//v1_+v2_ double v1_error_, double v2_error_) { if(v1_error_>=1e100||v2_error_>=1e100) { return 1e100; }; return sqrt(v1_error_*v1_error_+v2_error_*v2_error_); } double FSA_utils::error_of_the_product(//v1_*v2_ double v1_, double v1_error_, double v2_, double v2_error_) { if(v1_error_>=1e100||v2_error_>=1e100) { return 1e100; }; double a1=(v1_+v1_error_)*(v2_+v2_error_); double a2=(v1_-v1_error_)*(v2_+v2_error_); double a3=(v1_+v1_error_)*(v2_-v2_error_); double a4=(v1_-v1_error_)*(v2_-v2_error_); double a=v1_*v2_; return FSA_utils::Tmax(fabs(a1-a),fabs(a2-a),fabs(a3-a),fabs(a4-a)); } double FSA_utils::error_of_the_lg(//lg(v1_) double v1_, double v1_error_) { if(v1_error_>=1e100||v1_<=0) { return 1e100; }; return FSA_utils::Tmin(fabs(log(v1_)/log(10.0)),v1_error_/v1_/log(10.0)); } double FSA_utils::error_of_the_sqrt(//sqrt(v1_) double v1_, double v1_error_) { if(v1_error_>=1e100||v1_<0) { return 1e100; }; double s=sqrt(v1_); double s1=sqrt(FSA_utils::Tmax(0.0,v1_-v1_error_)); double s2=sqrt(FSA_utils::Tmax(0.0,v1_+v1_error_)); return FSA_utils::Tmax(fabs(s-s1),fabs(s-s2)); } double FSA_utils::error_of_the_ratio(//v1_/v2_ double v1_, double v1_error_, double v2_, double v2_error_) { if(v1_error_>=1e100||v2_error_>=1e100) { return 1e100; }; if(v2_==0) { return 1e100; }; if(v1_==0&&v1_error_==0) { return 0.0; }; double a=v1_/v2_; if(((v2_+v2_error_)*v2_<=0)) { double a3=(v1_+v1_error_)/(v2_-v2_error_); double a4=(v1_-v1_error_)/(v2_-v2_error_); return FSA_utils::Tmax(fabs(a-a3),fabs(a-a4)); }; if(((v2_-v2_error_)*v2_<=0)) { double a1=(v1_+v1_error_)/(v2_+v2_error_); double a2=(v1_-v1_error_)/(v2_+v2_error_); return FSA_utils::Tmax(fabs(a-a1),fabs(a-a2)); }; double a1=(v1_+v1_error_)/(v2_+v2_error_); double a2=(v1_-v1_error_)/(v2_+v2_error_); double a3=(v1_+v1_error_)/(v2_-v2_error_); double a4=(v1_-v1_error_)/(v2_-v2_error_); return FSA_utils::Tmax(fabs(a-a1),fabs(a-a2),fabs(a-a3),fabs(a-a4)); } double FSA_utils::error_of_the_sum_with_coeff(//c1_*v1_+c2_*v2_ double c1_, double v1_error_, double c2_, double v2_error_) { if(v1_error_>=1e100||v2_error_>=1e100) { return 1e100; }; return sqrt(c1_*c1_*v1_error_*v1_error_+c2_*c2_*v2_error_*v2_error_); } void FSA_utils::read_alphabet( string alphabet_file_name_, long int &number_of_AA_alphabet_, char* &alphabet_) { ifstream a(alphabet_file_name_.data()); if(!a) { throw error("Error - the file "+alphabet_file_name_+" is not found\n",1); }; a>>number_of_AA_alphabet_; if(number_of_AA_alphabet_<=0) { throw error("Error - the file "+alphabet_file_name_+" is wrong\n",1); }; alphabet_=new char[number_of_AA_alphabet_]; FSA_utils::assert_mem(alphabet_); long int i; for(i=0;i>alphabet_[i]; }; a.close(); } void FSA_utils::calculate_composition_frequencies( double number_of_AA_alphabet_, char* alphabet_, string fasta_file_name_, double *comp_frequencies_) { long int max_c=1000; long int *letter_to_int=new long int[max_c]; FSA_utils::assert_mem(letter_to_int); long int i; for(i=0;i=number_of_AA_alphabet_) { continue; }; comp_frequencies_[letter_ind]++; }; if(f.eof()) { break; }; getline(f,st); if(st.size()==0) { break; }; }; f.close(); double sum=0; for(i=0;i long int &codon_length_,//codon length long int number_of_letters1_,//number of letters for the sequence 1 long int number_of_letters2_,//number of letters for the sequence 2 const double *RR1_,//nucleotide probabilities double *&RR1_AA_)//the resulted frequencies { long int *codon_tmp=new long int [codon_length_]; FSA_utils::assert_mem(codon_tmp); RR1_AA_=new double[number_of_letters2_]; FSA_utils::assert_mem(RR1_AA_); long int k; for(k=0;k=number_of_letters2_) { throw error("Unexpected errro in FSA_utils::extract_AA_frequencies_for_DNA_sequence\n",1); }; double prob_tmp=1.0; long int i; for(i=0;i > A_,//matrix n*(n+1) std::vector &x_,//solution double inside_eps_, std::vector > *inv_A_) { long int i,j,jj; long int matr_size=(long int)A_.size(); if(matr_size==0) { //throw error("Error in FSA_utils::Gauss - sizes of matrix are wrong\n",1); return false; }; std::vector > E; if(inv_A_) { vector zero(matr_size,0); (*inv_A_).resize(matr_size, zero); E.resize(matr_size, zero); long int i; for(i=0;i=0;i--) { x_[i]=A_[i][matr_size]; for(j=i+1;j=0;i--) { (*inv_A_)[i][k]=E[i][k]; long int j; for(j=i+1;j > &A_, const std::vector > &B_, std::vector > &res_) { long int size1=(long int)A_.size(); if(size1==0) { throw error("Error in FSA_utils::multiply_matrices\n",1); }; long int size2=(long int)A_[0].size(); if(size2==0) { throw error("Error in FSA_utils::multiply_matrices\n",1); }; long int i,j,k; for(i=1;i > &A_, const std::vector &y_, std::vector &res_) { long int size1=(long int)A_.size(); if(size1==0) { throw error("Error in FSA_utils::multiply_matrix_and_vector\n",1); }; long int size2=(long int)A_[0].size(); if(size2==0) { throw error("Error in FSA_utils::multiply_matrix_and_vector\n",1); }; long int i,k; for(i=1;i > &A_, std::vector > &res_) { long int size1=(long int)A_.size(); if(size1==0) { res_.clear(); return; }; long int size2=(long int)A_[0].size(); long int i,j; for(i=1;i > A_) { long int i,j; for(i=0;i<(long int)A_.size();i++) { for(j=0;j<(long int)A_[i].size();j++) { if(j<(long int)A_[i].size()-1) { cout<0) { bool flag=true; long int i; for(i=0;i<(long int)st_.size();i++) { if(!(st_[i]==' '||(long int)st_[i]==13)) { flag=false; break; }; }; if(flag) { continue; }; }; if(st_.size()>0) { if((long int)st_[st_.size()-1]==13) { st_.resize(st_.size()-1); }; break; }; if(f_.eof()) { end_of_file_flag_=true; }; }; } void FSA_utils::read_sequences_for_alingment( string input_file_name_, long int &number_of_letters1_,//number of letters for the sequence 1 long int &number_of_letters2_,//number of letters for the sequence 2 char *&alphabet1_,//alphabet letters for the sequence #1 char *&alphabet2_,//alphabet letters for the sequence #2 long int& number_of_sequences_, string *&headers_, long int *&lengths1_,//lengths of the sequences #1 long int *&lengths2_,//lengths of the sequences #2 long int **&sequences1_,//the first index numerates sequences; the second - sequence letters long int **&sequences2_) { long int max_c=1000; long int *letter_to_int1=new long int[max_c]; long int *letter_to_int2=new long int[max_c]; long int i; for(i=0;i sequences1_vect; vector sequences2_vect; vector lengths1_vect; vector lengths2_vect; vector headers_vect; for( ; ; ) { bool end_of_file_flag; string header; read_string( f, header, end_of_file_flag); if(end_of_file_flag) { if(header!="") { throw error("Error - the file "+input_file_name_+" is incorrect\n",1); }; if(count==0) { throw error("Error - the file "+input_file_name_+" is incorrect\n",1); }; break; }; if(header[0]!='>') { throw error("Error - the file "+input_file_name_+" is incorrect\n",1); }; string st1=""; string st2=""; if(f.eof()) { throw error("Error - the file "+input_file_name_+" is incorrect\n",1); }; read_string( f, st1, end_of_file_flag); if(st1.size()==0||end_of_file_flag) { throw error("Error - the file "+input_file_name_+" is incorrect\n",1); }; read_string( f, st2, end_of_file_flag); if(st2.size()==0||end_of_file_flag) { throw error("Error - the file "+input_file_name_+" is incorrect\n",1); }; count++; //checking the sequences long int *sequences1_tmp=new long int [st1.size()]; FSA_utils::assert_mem(sequences1_tmp); long int *sequences2_tmp=new long int [st2.size()]; FSA_utils::assert_mem(sequences2_tmp); headers_vect.push_back(header); sequences1_vect.push_back(sequences1_tmp); sequences2_vect.push_back(sequences2_tmp); lengths1_vect.push_back((long int)st1.size()); lengths2_vect.push_back((long int)st2.size()); for(k=0;k<(long int)st1.size();k++) { long int long_tmp=letter_to_int1[(size_t)st1[k]]; if(long_tmp>=0) { sequences1_tmp[k]=long_tmp; } else { throw error("Error - the file "+input_file_name_+" is incorrect: a non-alphabet letter with the code "+FSA_utils::long_to_string((long int)st1[k])+"\n",1); }; }; for(k=0;k<(long int)st2.size();k++) { long int long_tmp=letter_to_int2[(size_t)st2[k]]; if(long_tmp>=0) { sequences2_tmp[k]=long_tmp; } else { throw error("Error - the file "+input_file_name_+" is incorrect: a non-alphabet letter with the code "+FSA_utils::long_to_string((long int)st2[k])+"\n",1); }; }; }; f.close(); delete[]letter_to_int1; delete[]letter_to_int2; //allocate memory for the arrays number_of_sequences_=count; sequences1_=new long int*[number_of_sequences_]; FSA_utils::assert_mem(sequences1_); sequences2_=new long int*[number_of_sequences_]; FSA_utils::assert_mem(sequences2_); lengths1_=new long int[number_of_sequences_]; FSA_utils::assert_mem(lengths1_); lengths2_=new long int[number_of_sequences_]; FSA_utils::assert_mem(lengths2_); headers_=new string[number_of_sequences_]; FSA_utils::assert_mem(headers_); for(k=0;k