// -*- c++ -*- /*****************************************************************************/ // Copyright (c) Illumina 2008 // Author: Richard Shaw // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). /*****************************************************************************/ #ifndef STATS_SET_H #define STATS_SET_H /*****************************************************************************/ #include #include "statistics/Base_Calls.h" #include "statistics/Cum_Errs.h" #include "common/File_Buffer.h" #include "statistics/Basic_Stats.h" #include "statistics/Info_Contents.h" #include "statistics/Read_Patterns.h" /*****************************************************************************/ class Stats_Set { public: Stats_Set(size_t max_patterns_to_store); ~Stats_Set(); /// This is intended only for initialisation. void set_num_cycles(const unsigned int num_cycles); unsigned int num_cycles() const; void add_basecalls_stats(const std::string &read, const std::string &qual); void add_align_stats(unsigned int align_score, unsigned int num_insertions, unsigned int num_insertion_bases, unsigned int num_deletions, unsigned int num_deletion_bases); void add_align_stats(const Stats_Set& stats_set); void apply_non_blank_mask(const std::valarray& non_blank_mask); void derive_stats(); void write(File_Buffer& file_buffer) const; typedef Base_Calls Uint_Base_Calls; Uint_Base_Calls& base_calls() { return my_base_calls; } Cum_Errs& cum_errs() { return my_cum_errs; } Read_Patterns& most_common_words() { return my_most_common_words; } Read_Patterns& most_common_blank_patterns() { return my_most_common_blank_patterns; } void mark_as_all_reads() { my_all_reads_flag = true; } bool is_all_reads() const { return my_all_reads_flag; } enum { MAX_NUM_BLANKS_IN_COMMON_WORDS = 2 }; unsigned long int clustersTotal() const {return my_num_clusters_total;} unsigned long int basesTotal() const {return my_num_bases_total;} unsigned long int q20BasesTotal() const {return my_q20_bases_total;} unsigned long int q30BasesTotal() const {return my_q30_bases_total;} unsigned long int qualityScoreSum() const {return my_quality_score_sum;} unsigned long int basesAligned() const {return my_num_bases_aligned;} unsigned long int uniqueAligns() const {return my_num_unique_aligns;} unsigned long int alignScoreSum() const {return my_align_score_sum;} unsigned long int alignMismatches() const {return my_num_errs;} private: typedef Base_Calls Int_Base_Calls; typedef Base_Calls Float_Base_Calls; void derive_proportions(const Uint_Base_Calls& base_calls, Float_Base_Calls& pc_base_calls, Uint_Base_Calls::Dimension sum_dimension, bool as_percents = true, bool exclude_read_blanks = false, bool exclude_ref_blanks = false); void derive_likelihoods(); void set_formats(); bool my_all_reads_flag; unsigned long int my_q20_bases_total; unsigned long int my_q30_bases_total; unsigned long int my_quality_score_sum; unsigned long int my_num_clusters_total; unsigned long int my_num_bases_total; unsigned long int my_num_bases_aligned; unsigned long int my_num_errs; unsigned long int my_num_blanks; unsigned long int my_num_unique_aligns; unsigned long int my_align_score_sum; unsigned long int my_num_insertions; unsigned long int my_num_insertion_bases; unsigned long int my_num_deletions; unsigned long int my_num_deletion_bases; std::valarray my_cycle_non_blank_freqs; Uint_Base_Calls my_base_calls; Uint_Base_Calls my_sum_over_cycles_base_calls; Basic_Stats my_basic_stats; Float_Base_Calls my_pc_reads_by_ref; Float_Base_Calls my_pc_reads_excl_blanks_by_ref; Float_Base_Calls my_pc_refs_by_read; Float_Base_Calls my_prop_reads_by_ref_cycle; Info_Contents my_info_contents; Read_Patterns my_most_common_words; Read_Patterns my_most_common_blank_patterns; Int_Base_Calls my_likelihoods; Cum_Errs my_cum_errs; }; /*****************************************************************************/ #endif // ! STATS_SET_H /*****************************************************************************/