// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// /// \author Chris Saunders /// #include "starling/align_path_bam_util.hh" #include "starling_read.hh" #include "starling_read_util.hh" #include "blt_util/log.hh" #include "starling/starling_shared.hh" #include starling_segmented_read:: starling_segmented_read(const seg_id_t size) : _seg_info(size) {} void starling_segmented_read:: set_segment(const seg_id_t seg_no, const read_segment& rseg){ assert(seg_no != 0); _seg_info[seg_no-1] = rseg; } read_segment& starling_segmented_read:: get_segment(const seg_id_t seg_no) { assert(seg_no != 0); return _seg_info[seg_no-1]; } const read_segment& starling_segmented_read:: get_segment(const seg_id_t seg_no) const { assert(seg_no != 0); return _seg_info[seg_no-1]; } namespace READ_ALIGN { const char* label(const index_t i) { switch(i) { case GENOME: return "GENOME"; case CONTIG: return "CONTIG"; } log_os << "ERROR: unknown READ_ALIGN index: " << i << "\n"; exit(EXIT_FAILURE); return NULL; } } static bool death_dump(const starling_read& sr, const alignment& al, const READ_ALIGN::index_t rat, const align_id_t contig_id){ log_os << "\tread: " << sr << "\n" << "\tnew-alignment: " << al << "\n" << "\tnew-alignment-type: " << READ_ALIGN::label(rat) << "\n"; if(READ_ALIGN::CONTIG == rat) log_os << "\tcontig-id: " << contig_id << "\n"; exit(EXIT_FAILURE); } starling_read:: starling_read(const bam_record& br, const bool is_bam_record_genomic) : is_genome_align_usable_mapping(false), _is_bam_record_genomic(is_bam_record_genomic), _id(0), _read_rec(br), _full_read(_read_rec.read_size(),0,this), _segment_ptr(NULL) {} // this needs to be in the cpp file for auto_ptr to work correctly starling_read:: ~starling_read() {} bool starling_read:: is_treated_as_usable_mapping() const { return (get_full_segment().genome_align().empty() or is_genome_align_usable_mapping); } uint8_t starling_read:: map_qual() const { if((not get_full_segment().genome_align().empty()) and _is_bam_record_genomic) { return _read_rec.map_qual(); } else { return 255; } } // // static bool is_alignment_in_range(const pos_t pos, const alignment& al, const unsigned max_indel_size){ return (std::abs(pos-get_alignment_buffer_pos(al)) <= static_cast(max_indel_size)); } // // bool starling_read:: is_compatible_alignment(const alignment& al, const READ_ALIGN::index_t rat, const align_id_t contig_id, const unsigned max_indel_size) const { if(is_fwd_strand() != al.is_fwd_strand) { log_os << "ERROR: multiple suggested alignments for read are not same-strand.\n"; death_dump(*this,al,rat,contig_id); } const read_segment& rseg(get_full_segment()); if(READ_ALIGN::GENOME == rat) { if(not rseg.genome_align().empty()) { log_os << "ERROR: multiple suggested genomic alignments for read.\n"; death_dump(*this,al,rat,contig_id); } } else { typedef contig_align_t cat; const cat& ct(rseg.contig_align()); cat::const_iterator i(ct.begin()),i_end(ct.end()); for(;i!=i_end;++i){ if(i->first == contig_id) { log_os << "ERROR: multiple suggested alignments for read from same contig.\n"; death_dump(*this,al,rat,contig_id); } } } // al_buffer_pos must be compatible with all non-empty alignemnts // stored for this read to be used -- this is treated as a warning // for now: const pos_t new_pos(get_alignment_buffer_pos(al)); if(not rseg.genome_align().empty()) { if(not is_alignment_in_range(new_pos,rseg.genome_align(),max_indel_size)) return false; } typedef contig_align_t cat; const cat& ct(rseg.contig_align()); cat::const_iterator i(ct.begin()),i_end(ct.end()); for(;i!=i_end;++i){ if(not is_alignment_in_range(new_pos,i->second,max_indel_size)) { return false; } } return true; } void starling_read:: set_genome_align(const alignment& al) { assert(get_full_segment().genome_align().empty()); assert(not al.empty()); get_full_segment()._genome_align=al; const seg_id_t n_seg(apath_exon_count(al.path)); if(n_seg<=1) return; // deal with segmented reads now: assert(not is_segmented()); _segment_ptr.reset(new starling_segmented_read(n_seg)); using namespace ALIGNPATH; seg_id_t seg_id(1); pos_t read_pos(0); pos_t ref_pos(al.pos); pos_t seg_start_read_pos(read_pos); pos_t seg_start_ref_pos(ref_pos); path_t seg_path; const unsigned as(al.path.size()); for(unsigned i(0);iseg_start_read_pos); const unsigned size(end_read_pos-seg_start_read_pos); const read_segment rseg(size,seg_start_read_pos,this); _segment_ptr->set_segment(seg_id,rseg); alignment& new_al(get_segment(seg_id)._genome_align); new_al.path=seg_path; new_al.pos=seg_start_ref_pos; new_al.is_fwd_strand=al.is_fwd_strand; seg_id++; seg_start_read_pos=read_pos; seg_start_ref_pos=ref_pos; seg_path.clear(); } } } // returns NULL for non-realigned contig reads: static const alignment* get_best_buffer_pos_align(const read_segment& rseg) { if(rseg.is_realigned) { return &(rseg.realignment); } if(not rseg.genome_align().empty()){ return &(rseg.genome_align()); } return NULL; } // update full segment based on segment realignments: void starling_read:: update_full_segment() { read_segment& fullseg(_full_read); assert(not fullseg.is_realigned); //bool is_realigned(false); bool is_new_realignment(false); // first determine if anything even needs to be done: const unsigned n_seg(segment_count()); for(unsigned i(0);iref_pos); ps.length = ral.pos-ref_pos; fal.path.push_back(ps); } ref_pos=ral.pos; const unsigned rs(ral.path.size()); for(unsigned j(0);j=-1); const uint32_t out_pos(orig_pos+1); bam_aux_append_unsigned(br,optag,out_pos); } ca.pos=al.pos; } // store orig cigar string if not in aux already (just assume // cigar has changed in realignment): // static const char octag[] = {'O','C'}; if((not is_orig_unmapped) and (NULL==bam_aux_get(&br,octag))) { std::string cigar; apath_to_cigar(rseg.genome_align().path,cigar); bam_aux_append(&br,octag,'Z', (cigar.size()+1),(uint8_t*) (cigar.c_str())); } // update cigar field: edit_bam_cigar(al.path,br); bam_update_bin(br); bamd.put_record(&br); } // if(al.pos < 0) return; // uint32_t flag(0); // if(not is_fwd_strand) flag |= SAMFLAG_STRAND; // if(key.is_read1) flag |= SAMFLAG_PAIR_FIRST; // else flag |= SAMFLAG_PAIR_SECOND; // std::string cigar; // apath_to_cigar(al.path,cigar); // std::string samqual; // const unsigned read_size(qual.size()); // for(unsigned i(0);i=0) { // os << tab << "AS:i:" << pe_map_score; // } // } // if(is_realigned and (realignment.pos == buffer_pos)) { // os << tab << "XS:f:" << realign_path_lnp; // } // os << "\n"; // } std::ostream& operator<<(std::ostream& os, const starling_read& sr) { os << "STARLING_READ id: " << sr.id() << " usable_genomic_mapping?: " << sr.is_genome_align_usable_mapping << "\n"; os << "full_segment_info:\n"; os << sr.get_full_segment(); const seg_id_t sc(sr.segment_count()); for(unsigned i(0);i(seg_id) << " :\n"; short_report(os,sr.get_segment(seg_id)); } return os; }