// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// /// \author Chris Saunders /// #include "alignment_util.hh" #include "starling_read_align_shared.hh" #include "starling_read_buffer.hh" #include "starling_read_util.hh" #include "blt_util/log.hh" #include "starling/starling_shared.hh" #include #include const starling_read_buffer::segment_group_t starling_read_buffer::_empty_segment_group; starling_read_buffer:: ~starling_read_buffer() { read_data_t::iterator i(_read_data.begin()); const read_data_t::iterator i_end(_read_data.end()); for(;i!=i_end;++i) delete i->second; } std::pair starling_read_buffer:: add_read_alignment(const unsigned max_indel_size, const bam_record& br, const alignment& al, const bool is_usable_mapping, const READ_ALIGN::index_t rat, const align_id_t contig_id) { assert(not br.is_unmapped()); const bool is_genomic(READ_ALIGN::GENOME == rat); const read_key tmp_key(br); const read_key_lup_t::const_iterator i(_read_key.find(tmp_key)); const bool is_key_found(i!=_read_key.end()); align_id_t this_read_id; if(not is_key_found){ this_read_id=_head_read_id++; _read_data[this_read_id] = new starling_read(br,is_genomic); } else { this_read_id=i->second; } starling_read& sread(*(_read_data[this_read_id])); if(not is_key_found){ _read_key[sread.key()]=this_read_id; sread.id() = this_read_id; } else { assert(sread.key() == tmp_key); { // no GROUPER input accepted for reads crossing splice junctions: bool is_spliced_contig_read(false); if(is_genomic) { if((not sread.contig_align().empty()) and (apath_exon_count(al.path)>1)) is_spliced_contig_read=true; } else { if(sread.is_segmented()) is_spliced_contig_read=true; } if(is_spliced_contig_read) { log_os << "ERROR: assembled contig realignments are not allowed for splice junction reads. Read: " << sread.key() << "\n"; exit(EXIT_FAILURE); } } if(not sread.is_compatible_alignment(al,rat,contig_id,max_indel_size)){ log_os << "WARNING: skipping new alignment: " << al << " which is incompatible with alignments in read: " << sread; return std::make_pair(false,0); } // contig BAM records are incomplete, so we want to fill in // the full record if there's a mapped genomic alignment // available: if(is_genomic) sread.set_genomic_bam_record(br); } if(is_genomic) { sread.set_genome_align(al); sread.is_genome_align_usable_mapping = is_usable_mapping; // deal with segmented reads now: if(sread.is_segmented()){ const uint8_t n_seg(sread.segment_count()); for(unsigned i(0);isecond->buffer_pos)); assert(j != _pos_group.end()); assert(j->second.count(read_id)==1); j->second.erase(read_id); // alter data within read: i->second->buffer_pos=new_buffer_pos; // add to new pos list: (_pos_group[new_buffer_pos]).insert(read_id); } #endif read_segment_iter starling_read_buffer:: get_pos_read_segment_iter(const pos_t pos) { const segment_group_t* g(&(_empty_segment_group)); const pos_group_t::const_iterator j(_pos_group.find(pos)); if(j != _pos_group.end()) g=(&(j->second)); return read_segment_iter(*this,g->begin(),g->end()); } void starling_read_buffer:: clear_pos(const pos_t pos) { const pos_group_t::iterator i(_pos_group.find(pos)); if(i == _pos_group.end()) return; segment_group_t& seg_group(i->second); segment_group_t::const_iterator j(seg_group.begin()),j_end(seg_group.end()); for(;j!=j_end;++j){ const align_id_t read_id(j->first); const seg_id_t seg_id(j->second); const read_data_t::iterator k(_read_data.find(read_id)); if(k == _read_data.end()) continue; const starling_read* srp(k->second); // only remove read from data structure when we find the last // segment: -- note this assumes that two segments will not // occur at the same position: // if(seg_id != srp->segment_count()) continue; // remove from contigs: typedef contig_align_t cat; const cat& ca(srp->contig_align()); cat::const_iterator m(ca.begin()), m_end(ca.end()); for(;m!=m_end;++m){ const align_id_t contig_id(m->first); align_id_group_t::iterator p(_contig_group.find(contig_id)); if(p==_contig_group.end()) continue; p->second.erase(read_id); } // remove from simple lookup structures and delete read itself: _read_data.erase(k); _read_key.erase(srp->key()); delete srp; } _pos_group.erase(i); } void starling_read_buffer:: dump_pos(const pos_t pos, std::ostream& os) const { const pos_group_t::const_iterator i(_pos_group.find(pos)); if(i == _pos_group.end()) return; os << "READ_BUFFER_POSITION: " << pos << " DUMP ON\n"; const segment_group_t& seg_group(i->second); segment_group_t::const_iterator j(seg_group.begin()),j_end(seg_group.end()); for(unsigned r(0);j!=j_end;++j){ const align_id_t read_id(j->first); const seg_id_t seg_id(j->second); const read_data_t::const_iterator k(_read_data.find(read_id)); if(k == _read_data.end()) continue; const starling_read& sr(*(k->second)); os << "READ_BUFFER_POSITION: " << pos << " read_segment_no: " << ++r << " seg_id: " << seg_id << "\n"; os << sr.get_segment(seg_id); } os << "READ_BUFFER_POSITION: " << pos << " DUMP OFF\n"; } read_segment_iter::ret_val read_segment_iter:: get_ptr(){ static const ret_val null_ret(std::make_pair(static_cast(NULL),0)); if(_head==_end) return null_ret; const align_id_t read_id(_head->first); const seg_id_t seg_id(_head->second); const starling_read_buffer::read_data_t::iterator i(_buff._read_data.find(read_id)); if(i==_buff._read_data.end()) return null_ret; return std::make_pair(i->second,seg_id); }