// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// /// \author Chris Saunders /// #ifndef __STARLING_SHARED_HH #define __STARLING_SHARED_HH #include "blt_common/blt_shared.hh" #include "starling/diploid_indel.hh" enum { MAX_CONTIG_SIZE = 10000 }; struct starling_options : blt_options { starling_options() : bindel_diploid_theta(0), is_bindel_diploid(false), is_test_indels(false), is_bindel_diploid_file(false), genome_size(0), is_genome_size(false), is_simple_indel_error(false), simple_indel_error(0), min_read_bp_flank(6), is_realign_submapped_reads(false), max_indel_size(150), min_candidate_indel_reads(3), min_candidate_indel_read_frac(0.02), max_small_candidate_indel_size(4), min_small_candidate_indel_read_frac(0.1), max_read_indel_toggle(5), max_candidate_indel_density(0.15), is_clip_ambiguous_path(true), is_realigned_read_file(false), is_call_indels(false), is_smoothed_alignments(true), smoothed_lnp_range(std::log(10.)), is_filter_unanchored(false) {} // parameters inherited from varling caller: // double bindel_diploid_theta; bool is_bindel_diploid; bool is_test_indels; bool is_bindel_diploid_file; uint32_t genome_size; bool is_genome_size; bool is_simple_indel_error; double simple_indel_error; /// to contibute to a breakpoint likelihood, a read must have at least /// this many bases on each side of the breakpoint: int min_read_bp_flank; std::string indel_contig_filename; std::string indel_contig_read_filename; std::string bindel_diploid_filename; // starling parameters: // bool is_realign_submapped_reads; // should reads falling below the snp-caller's mapping criteria be realigned? (note this only makes sense if writing out realigned reads unsigned max_indel_size; // maximum indel size which can be represented by starling (formerly a static value) int min_candidate_indel_reads; // indel cannot become candidate unless at least min reads which meet mapping threshold support it double min_candidate_indel_read_frac; // indel cannot become candidate unless at least frac of reads which meet mapping thresholds support it (num is reads supporting indel/den is ELAND reads aligning to adjacent position). int max_small_candidate_indel_size; // indels this size or lower have additional 'small indel' candidacy criteria double min_small_candidate_indel_read_frac; // same as for min_candidate_indel_read_frac, but for small indels int max_read_indel_toggle; // if a read samples more than max indel changes, we skip realignment double max_candidate_indel_density; // max number of candidate indels per read base, if exceeded search is curtailed to toggle depth=1 bool is_clip_ambiguous_path; // clip the section of a read which aligns equally well to two or more paths before pileup or realigned read output bool is_realigned_read_file; bool is_call_indels; // this option imposes a consistency criteria on alignments with // nearly equal score to favor certain alignments even if they do // not have the optimal score. // // when using smoothed alignments, we realign to the prefered // alignment that is not more than smoothed_lnp_range from the // highest scoring alignment. // bool is_smoothed_alignments; double smoothed_lnp_range; // filter reads where both reads of pair have SE score 0, temp fix for internal analysis: bool is_filter_unanchored; std::string realigned_read_filename; std::string bam_filename; std::string bam_seq_name; }; // data deterministically derived from the input options: // struct starling_deriv_options : blt_deriv_options { double site_lnprior; double nonsite_lnprior; double diploid_indel_state_lnprior[DIINDEL::SIZE]; }; void get_starling_deriv_options(const starling_options& client_opt, const pos_t ref_size, starling_deriv_options& client_dopt); struct starling_read_counts : public blt_read_counts { starling_read_counts() : normal_indel_used(0), normal_indel_intersect(0), grouper_indel_used(0), grouper_indel_intersect(0), grouper_unused(0) {} void report(std::ostream& os) const; unsigned normal_indel_used; unsigned normal_indel_intersect; unsigned grouper_indel_used; unsigned grouper_indel_intersect; unsigned grouper_unused; }; #endif