/***************************************************************************** chromsweep.cpp (c) 2009 - Aaron Quinlan Hall Laboratory Department of Biochemistry and Molecular Genetics University of Virginia aaronquinlan@gmail.com Licenced under the GNU General Public License 2.0 license. ******************************************************************************/ #include "lineFileUtilities.h" #include "chromsweep.h" #include bool after(const BED &a, const BED &b); /* // constructor using existing BedFile pointers */ ChromSweep::ChromSweep(BedFile *query, BedFile *db, bool sameStrand, bool diffStrand, float overlapFraction, bool reciprocal, bool useMergedIntervals, bool printHeader) : _query(query) , _db(db) , _overlapFraction(overlapFraction) , _sameStrand(sameStrand) , _diffStrand(diffStrand) , _reciprocal(reciprocal) , _useMergedIntervals(useMergedIntervals) { _hits.reserve(100000); _query->Open(); if (printHeader) _query->PrintHeader(); _db->Open(); NextQuery(); NextDatabase(); } /* Constructor with filenames */ ChromSweep::ChromSweep(string &queryFile, string &dbFile) { _hits.reserve(100000); _query = new BedFile(queryFile); _db = new BedFile(dbFile); _query->Open(); _db->Open(); NextQuery(); NextDatabase(); } bool ChromSweep::NextQuery() { if (!_useMergedIntervals) return _query->GetNextBed(_curr_qy, true); else return _query->GetNextMergedBed(_curr_qy); } bool ChromSweep::NextDatabase() { if (!_useMergedIntervals) return _db->GetNextBed(_curr_db, true); else return _db->GetNextMergedBed(_curr_db); } /* Destructor */ ChromSweep::~ChromSweep(void) { } void ChromSweep::ScanCache() { list::iterator c = _cache.begin(); while (c != _cache.end()) { if ((_curr_qy.chrom == c->chrom) && !(after(_curr_qy, *c))) { if (IsValidHit(_curr_qy, *c)) { _hits.push_back(*c); } ++c; } else { c = _cache.erase(c); } } } bool ChromSweep::ChromChange() { // the files are on the same chrom if (_curr_qy.chrom == _curr_db.chrom) { return false; } // the query is ahead of the database. // fast-forward the database to catch-up. else if ((_curr_qy.chrom > _curr_db.chrom) && (!_db->Empty())) { while (NextDatabase() && _curr_db.chrom < _curr_qy.chrom) { } _cache.clear(); return false; } // the database is ahead of the query. else { // 1. scan the cache for remaining hits on the query's current chrom. if (_curr_qy.chrom == _curr_chrom) { ScanCache(); _results.push(make_pair(_curr_qy, _hits)); _hits.clear(); } // 2. fast-forward until we catch up and report 0 hits until we do. else if (_curr_qy.chrom < _curr_db.chrom) { _results.push(make_pair(_curr_qy, _no_hits)); _cache.clear(); } NextQuery(); _curr_chrom = _curr_qy.chrom; return true; } } bool ChromSweep::IsValidHit(const BED &query, const BED &db) { CHRPOS aLength = query.end - query.start; CHRPOS s = max(query.start, db.start); CHRPOS e = min(query.end, db.end); int overlapBases = (e - s); // 1. is there sufficient overlap w.r.t A? if ( (float) overlapBases / (float) aLength >= _overlapFraction) { CHRPOS bLength = (db.end - db.start); float bOverlap = ( (float) overlapBases / (float) bLength ); // Now test for necessary strandedness. bool strands_are_same = (query.strand == db.strand); if ( (_sameStrand == false && _diffStrand == false) || (_sameStrand == true && strands_are_same == true) || (_diffStrand == true && strands_are_same == false) ) { // 3. did the user request reciprocal overlap // (i.e. sufficient overlap w.r.t. both A and B?) if (!_reciprocal) return true; else if (bOverlap >= _overlapFraction) return true; } } return false; } bool ChromSweep::Next(pair > &next) { if (!_query->Empty()) { // have we changed chromosomes? if (ChromChange() == false) { // scan the database cache for hits ScanCache(); // advance the db until we are ahead of the query. // update hits and cache as necessary while (!_db->Empty() && _curr_qy.chrom == _curr_db.chrom && !(after(_curr_db, _curr_qy))) { if (IsValidHit(_curr_qy, _curr_db)) { _hits.push_back(_curr_db); } _cache.push_back(_curr_db); NextDatabase(); } // add the hits for this query to the pump _results.push(make_pair(_curr_qy, _hits)); // reset for the next query _hits.clear(); NextQuery(); _curr_chrom = _curr_qy.chrom; } } // report the next set if hits if there are still overlaps in the pump if (!_results.empty()) { next = _results.front(); _results.pop(); return true; } else { return false; } }