/*****************************************************************************/ // Copyright (c) Illumina 2008 // Author: Richard Shaw // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). /*****************************************************************************/ #include #include "common/MathCompatibility.hh" #include "statistics/Non_Blank_Filter.h" // for the_blank_ch #include "statistics/Quality_Value_Stats_Set.h" /*****************************************************************************/ Quality_Value_Stats_Set::Quality_Value_Stats_Set() : my_num_bases(0) { ; } /*****************************************************************************/ Quality_Value_Stats_Set::~Quality_Value_Stats_Set() { ; } /*****************************************************************************/ void Quality_Value_Stats_Set::add_read_align_info(const Read_Align_Info& read_align_info) { if (!read_align_info.is_aligned()) { // Nothing to do. return; } const std::string& read_str(read_align_info.my_read_str); const std::string& ref_str(read_align_info.my_ref_str); const std::string& qual_str(read_align_info.my_qual_str); std::string::const_iterator read_ch_citer(read_str.begin()); std::string::const_iterator ref_ch_citer(ref_str.begin()); std::string::const_iterator qual_ch_citer(qual_str.begin()); const char del_ch(Read_Align_Info::our_del_ch); for ( ; read_ch_citer != read_str.end(); ++read_ch_citer, ++ref_ch_citer) { const char read_ch(*read_ch_citer); const char ref_ch(*ref_ch_citer); // Skip (do not move along quality string) any bases // that are X in read (deletions). if (read_ch == del_ch) { continue; } // Ignore any bases that are N in read or ref or X in ref (insertions). if ((read_ch == the_blank_ch) || (ref_ch == the_blank_ch) || (ref_ch == del_ch)) { ++qual_ch_citer; continue; } ++my_num_bases; const int qual_val(int(*qual_ch_citer++) - 64); ++my_qual_val_freq_map[qual_val]; if (read_ch != ref_ch) { ++my_errs_by_qual_val_freq_map[qual_val]; } } } /*****************************************************************************/ std::ostream& operator<<(std::ostream& ostrm, const Quality_Value_Stats_Set& quality_value_stats_set) { ostrm << "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-" << std::endl << "QV\tthis QV or higher\t\t|\tthis QV only\t\t\t|\texpected" << std::endl << "\t# error\t# bases\t% error\t% total\t|" << "\t# error\t# bases\t% error\t% total\t|\t% error" << std::endl << "-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-" << std::endl; const Quality_Value_Stats_Set::Qual_Val_Freq_Map& qual_val_freq_map(quality_value_stats_set.my_qual_val_freq_map); const Quality_Value_Stats_Set::Qual_Val_Freq_Map& errs_by_qual_val_freq_map(quality_value_stats_set. my_errs_by_qual_val_freq_map); const uint64_t& num_bases(quality_value_stats_set.my_num_bases); uint64_t cum_freq(0); uint64_t cum_err_count(0); for (Quality_Value_Stats_Set::Qual_Val_Freq_Map_CRIter pair_citer(qual_val_freq_map.rbegin()); pair_citer != qual_val_freq_map.rend(); ++pair_citer) { const int qual_val(pair_citer->first); const double expected_err(100.0 / (1.0 + (pow(10.0, (qual_val / 10.0))))); const uint64_t curr_freq(pair_citer->second); // Cannot map[key] on a const map (especially if want default vals). Quality_Value_Stats_Set::Qual_Val_Freq_Map_CIter err_pair_citer(errs_by_qual_val_freq_map.find(qual_val)); const uint64_t curr_err_count((err_pair_citer != errs_by_qual_val_freq_map.end()) ? err_pair_citer->second : 0); cum_freq += curr_freq; cum_err_count += curr_err_count; ostrm << qual_val << (boost::format("\t%d\t%d\t%5.2f\t%5.2f\t|") % cum_err_count % cum_freq % (100.0 * cum_err_count / cum_freq) % (100.0 * cum_freq / num_bases)) << (boost::format("\t%d\t%d\t%5.2f\t%5.2f\t|") % curr_err_count % curr_freq % (100.0 * curr_err_count / curr_freq) % (100.0 * curr_freq / num_bases)) << (boost::format("\t%5.3f") % expected_err) << std::endl; } return ostrm; } /*****************************************************************************/