/* This file is part of Jellyfish. Jellyfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Jellyfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Jellyfish. If not, see . */ #include #include #include #include #include #include #include #include #include namespace { using jellyfish::mer_dna; static const size_t nb_inserts = 10000; static const double error_rate = 0.001; template class MerDnaBloomTest : public ::testing::Test { }; struct TestBloomCounter { typedef jellyfish::mer_dna_bloom_counter bloom_type; typedef jellyfish::mer_dna_bloom_counter_file file_type; static const unsigned int threshold_twice = 2; // Bloom counter counts up to 2. }; struct TestBloomFilter { typedef jellyfish::mer_dna_bloom_filter bloom_type; typedef jellyfish::mer_dna_bloom_filter_file file_type; static const unsigned int threshold_twice = 1; // Bloom filter counts up to 1. }; typedef ::testing::Types TestBloomCounterTypes; TYPED_TEST_CASE(MerDnaBloomTest, TestBloomCounterTypes); TYPED_TEST(MerDnaBloomTest, FalsePositive) { mer_dna::k(50); std::set mer_set; typename TypeParam::bloom_type bc(error_rate, nb_inserts); size_t collisions2 = 0; size_t collisions3 = 0; // Insert once nb_inserts. Insert twice the first half { // First insertion size_t nb_collisions = 0; mer_dna m; for(size_t i = 0; i < nb_inserts; ++i) { m.randomize(); mer_set.insert(m); nb_collisions += bc.insert(m) > 0; } EXPECT_GT(error_rate * nb_inserts, nb_collisions); } // Second insertion { size_t nb_collisions = 0; size_t nb_errors = 0; auto it = mer_set.cbegin(); for(size_t i =0; i < nb_inserts / 2; ++i, ++it) { unsigned int oc = bc.insert(*it); nb_collisions += oc > 1; nb_errors += oc < 1; } EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions); EXPECT_EQ((size_t)0, nb_errors); } // Write to file and reload two different ways file_unlink f("bloom_file"); { std::ofstream out(f.path.c_str()); bc.write_bits(out); EXPECT_TRUE(out.good()); EXPECT_EQ(bc.nb_bytes(), out.tellp()); } std::ifstream in(f.path.c_str()); typename TypeParam::bloom_type bc_read(bc.m(), bc.k(), in, bc.hash_functions()); EXPECT_EQ(bc.nb_bytes(), in.tellg()); in.close(); typename TypeParam::file_type bc_map(bc.m(), bc.k(), f.path.c_str(), bc.hash_functions()); EXPECT_EQ(bc.m(), bc_read.m()); EXPECT_EQ(bc.k(), bc_read.k()); EXPECT_EQ(bc.m(), bc_map.m()); EXPECT_EQ(bc.k(), bc_map.k()); // Check known mers { size_t nb_collisions = 0; size_t nb_errors = 0; auto it = mer_set.cbegin(); for(size_t i = 0; i < nb_inserts; ++i, ++it) { unsigned int check = bc.check(*it); EXPECT_EQ(check, bc_read.check(*it)); EXPECT_EQ(check, bc_map.check(*it)); if(i < nb_inserts / 2) { nb_errors += check < TypeParam::threshold_twice; } else { nb_errors += check < 1; nb_collisions += check > 1; } } EXPECT_EQ((size_t)0, nb_errors); EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions); } // Check unknown mers { size_t nb_collisions = 0; mer_dna m; for(size_t i = 0; i < nb_inserts; ++i) { m.randomize(); unsigned int check = bc.check(m); EXPECT_EQ(check, bc_read.check(m)); EXPECT_EQ(check, bc_map.check(m)); nb_collisions += check > 0; } EXPECT_GT(2 * error_rate * nb_inserts, nb_collisions); } } TYPED_TEST(MerDnaBloomTest, Move) { mer_dna::k(100); typename TypeParam::bloom_type bc(error_rate, nb_inserts); const unsigned long k = bc.k(); const size_t m = bc.m(); typename TypeParam::bloom_type bc2(std::move(bc)); EXPECT_EQ(k, bc2.k()); EXPECT_EQ(m, bc.m()); } }