// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// /// \author Chris Saunders /// /// /// note coding convention for all ranges '_pos fields' is: /// XXX_begin_pos is zero-indexed position at the begining of the range /// XXX_end_pos is zero-index position 1 step after the end of the range /// #ifndef __STARLING_POS_PROCESSOR_H #define __STARLING_POS_PROCESSOR_H #include "depth_buffer.hh" #include "indel_buffer.hh" #include "indel_set.hh" #include "pos_basecall_buffer.hh" #include "starling_read_buffer.hh" #include "blt_util/multi_stage_circular_manager.hh" #include "blt_util/pos_processor_base.hh" #include "blt_util/depth_stream_stat.hh" #include "starling/starling_shared.hh" #include "starling/starling_streams.hh" #include "boost/utility.hpp" #include #include struct diploid_genotype; struct nploid_info; int get_influence_zone_size(const unsigned max_indel_size); /// \brief accumulate sequential position specific information and /// send to a snp-calling/indel-calling routine after all position /// information is found /// /// pos_processor assumes that information related to each position /// will be available in an approximately sequential fashion, where all /// position values submitted after position X will be greater than /// X-POS_BUFFER_SIZE+1. A violation of this assumption will trigger a /// runtime error. /// /// The implementation should be split into a generic semi-sequential /// position-processor and an object with the application-specific /// position code (here, snp-calling). Maybe once a second application /// comes up... /// /// /// range policy: /// /// if begin_pos is not specified, then event processiing and /// reporting start at the first pos >= 0 with snp/indel information /// submitted, else at begin_pos /// /// if end_pos is not specified, then event processing and reporting /// end after last_pos with snp/indel information submitted, else at /// end_pos. If the reference sequence is set then end_pos must be < /// ref_size and will be adjusted accordingly. /// /// Submission of snps < first_pos will be ignored. Submission of indels /// between first_pos and first_pos-MAX_READ_SIZE will be processed but /// not reported. /// /// ... /// struct starling_pos_processor : public pos_processor_base, private boost::noncopyable { typedef pos_processor_base base_t; starling_pos_processor(const starling_options& client_opt, const starling_deriv_options& client_dopt, const std::string& ref_seq, const starling_streams& client_io); virtual ~starling_pos_processor(); // finish position report and reset structure to ground state: // void reset(); // note that indel position should be normalized before calling: // // returns true if this indel is novel to the buffer // bool insert_indel(const indel& in); // first return value is true if the alignment is accepted into // the buffer (alignments can fail a number of quality checks -- // such as being located too far away from other alignments of the // same read or having an indel that is too large // // second return value is read_id // std::pair insert_read(const bam_record& br, const alignment& al, const READ_ALIGN::index_t rat, const char* chrom_name, const bool is_usable_mapping, const align_id_t contig_id = 0, const indel_set_t* contig_indels_ptr = 0); starling_read* get_read(const align_id_t read_id) { return _read_buff.get_read(read_id); } void set_head_pos(const pos_t pos) { _msm.handle_new_pos_value(pos); } // Is a read potentially containing an indel or an indel itself // far enough from the report start and stop positions to be // excluded? // bool is_range_outside_report_influence_zone(const pos_range& pr) const { return (not _report_influence_range.is_range_intersect(pr)); } // Does a range fall outside of the report start and stop positions? // bool is_range_outside_report_zone(const pos_range& pr) const { return (not _client_dopt.report_range_limit.is_range_intersect(pr)); } private: bool is_pos_reportable(const pos_t pos){ return _client_dopt.report_range_limit.is_pos_intersect(pos); } void insert_pos_basecall(const pos_t pos, const base_call& bc); void process_pos(const int stage_no, const pos_t pos); void process_pos_indel(const pos_t pos); // void // clean_pos(const pos_t pos); void init_read_segment(const read_segment& rseg); void init_read_segment_pos(const pos_t pos); void align_pos(const pos_t pos); void pileup_pos_reads(const pos_t pos); void pileup_read_segment(const read_segment& rseg); void rebuffer_pos_reads(const pos_t pos); void write_reads(const pos_t pos); void process_pos_snp(const pos_t pos); std::ostream& get_report_os() const { return _client_io.report_os(); } const diploid_genotype& get_empty_dgt(const char ref) const; const starling_options& _client_opt; const starling_deriv_options& _client_dopt; const std::string& _ref_seq; const starling_streams& _client_io; const std::vector _stage_size; multi_stage_circular_manager _msm; pos_range _report_influence_range; std::string _chrom_name; indel_buffer _indel_buff; pos_basecall_buffer _bc_buff; starling_read_buffer _read_buff; depth_buffer _estdepth_buff; // provide an early estimate of read depth before realignment. depth_stream_stat _ss; depth_stream_stat _used_ss; depth_stream_stat _ssn; depth_stream_stat _used_ssn; bool _is_dependent_eprob; std::auto_ptr _empty_dgt[N_BASE]; std::auto_ptr _ninfo; double* _ws; }; #endif