/***************************************************************************** unionBedGraphs.cpp (c) 2010 - Assaf Gordon, CSHL - Aaron Quinlan, UVA Hall Laboratory Department of Biochemistry and Molecular Genetics University of Virginia aaronquinlan@gmail.com Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ #include #include #include #include #include #include "bedGraphFile.h" #include "unionBedGraphs.h" using namespace std; UnionBedGraphs::UnionBedGraphs(std::ostream& _output, const vector& _filenames, const vector& _titles, bool _print_empty_regions, const std::string& _genome_size_filename, const std::string& _no_coverage_value ) : filenames(_filenames), titles(_titles), output(_output), current_non_zero_inputs(0), print_empty_regions(_print_empty_regions), genome_sizes(NULL), no_coverage_value(_no_coverage_value) { if (print_empty_regions) { assert(!_genome_size_filename.empty()); genome_sizes = new GenomeFile(_genome_size_filename); } } UnionBedGraphs::~UnionBedGraphs() { CloseBedgraphFiles(); if (genome_sizes) { delete genome_sizes; genome_sizes = NULL ; } } void UnionBedGraphs::Union() { OpenBedgraphFiles(); // Add the first interval from each file for(size_t i=0;i 0) PrintEmptyCoverage(0,current_start); // Intervals loop - until all intervals (of current chromosome) from all files are used. do { CHRPOS current_end = queue.top().coord; PrintCoverage(current_start, current_end); current_start = ConsumeNextCoordinate(); } while (!queue.empty()); // User wanted empty regions, and the last coordinate is not the last coordinate of the chromosome // print a dummy empty coverage if (print_empty_regions) { CHRPOS chrom_size = genome_sizes->getChromSize(current_chrom); if (current_start < chrom_size) PrintEmptyCoverage(current_start, chrom_size); } } while (!AllFilesDone()); } CHRPOS UnionBedGraphs::ConsumeNextCoordinate() { assert(!queue.empty()); CHRPOS new_position = queue.top().coord; do { PointWithDepth item = queue.top(); UpdateInformation(item); queue.pop(); } while (!queue.empty() && queue.top().coord == new_position); return new_position; } void UnionBedGraphs::UpdateInformation(const PointWithDepth &item) { // Update the depth coverage for this file // Which coordinate is it - start or end? switch (item.coord_type) { case START: current_depth[item.source_index] = item.depth; current_non_zero_inputs++; break; case END: //Read the next interval from this file AddInterval(item.source_index); current_depth[item.source_index] = no_coverage_value; current_non_zero_inputs--; break; default: assert(0); } } void UnionBedGraphs::PrintHeader() { output << "chrom\tstart\tend" ; for (size_t i=0;i(index) < bedgraph_files.size()); current_bedgraph_item[index].chrom=""; BedGraphFile *file = bedgraph_files[index]; BEDGRAPH_STR bg; int lineNum = 0; BedGraphLineStatus status; while ( (status = file->GetNextBedGraph(bg, lineNum)) != BEDGRAPH_INVALID ) { if (status != BEDGRAPH_VALID) continue; current_bedgraph_item[index] = bg; break; } } bool UnionBedGraphs::AllFilesDone() { for (size_t i=0;i(index) < bedgraph_files.size()); //This file has no more intervals if (current_bedgraph_item[index].chrom.empty()) return ; //If the next interval belongs to a different chrom, don't add it if (current_bedgraph_item[index].chrom!=current_chrom) return ; const BEDGRAPH_STR &bg(current_bedgraph_item[index]); PointWithDepth start_item(index, START, bg.start, bg.depth); PointWithDepth end_item(index, END, bg.end, bg.depth); queue.push(start_item); queue.push(end_item); LoadNextBedgraphItem(index); } void UnionBedGraphs::OpenBedgraphFiles() { for (size_t i=0;iOpen(); bedgraph_files.push_back(file); current_depth.push_back(no_coverage_value); } current_bedgraph_item.resize(filenames.size()); } void UnionBedGraphs::CloseBedgraphFiles() { for (size_t i=0;i