#include #include #include #include #include namespace { using std::string; using std::istringstream; template struct opened_streams { Iterator begin_, end_; opened_streams(Iterator begin, Iterator end) : begin_(begin), end_(end) { } std::unique_ptr next() { std::unique_ptr res; if(begin_ != end_) { res.reset(*begin_); ++begin_; } return res; } }; typedef jellyfish::mer_overlap_sequence_parser > parser_type; TEST(MerOverlapSequenceParser, OneSmallSequence) { static const char* seq = "ATTACCTTGTACCTTCAGAGC"; const char* file_name = "OneSmallSequence.fa"; file_unlink fu(file_name); { std::ofstream sequence(file_name); sequence << ">header\n" << seq; } auto sequence = new std::ifstream(file_name); opened_streams streams(&sequence, &sequence + 1); parser_type parser(10, 1, 10, 100, streams); parser_type::job j(parser); ASSERT_FALSE(j.is_empty()); EXPECT_EQ(strlen(seq), j->end - j->start); EXPECT_STREQ(seq, j->start); j.release(); parser_type::job j2(parser); EXPECT_TRUE(j2.is_empty()); EXPECT_EQ((size_t)1, parser.nb_files()); EXPECT_EQ((size_t)1, parser.nb_reads()); } string generate_sequences(std::ostream& os, int a, int b, int nb, bool fastq = false) { static const char bases[4] = {'A', 'C', 'G', 'T' }; string res; for(int i = 0; i < nb; ++i) { int len = a + random() % b; os << (fastq ? "@" : ">") << "r" << i << " " << len << "\n"; for(int j = 0; j < len; ++j) { char b = bases[random() & 0x3]; os << b; res += b; if(random() % 16 == 0) os << "\n"; } if(i != nb - 1) res += 'N'; os << "\n"; if(fastq) { os << "+\n"; for(int j = 0; j < len; ++j) { if(random() % 16 == 0) os << "\n"; os << "#"; } os << "\n"; } } return res; } TEST(MerOverlapSequenceParser, ManySmallSequences) { const char* file_name = "ManySmallSequences.fa"; file_unlink fu(file_name); static const int nb_reads = 20; static const int mer_len = 10; std::ofstream o_sequence(file_name); string res = generate_sequences(o_sequence, 20, 64, nb_reads); o_sequence.close(); auto sequence = new std::ifstream(file_name); opened_streams streams(&sequence, &sequence + 1); parser_type parser(mer_len, 1, 10, 100, streams); size_t offset = 0; while(true) { parser_type::job j(parser); if(j.is_empty()) break; SCOPED_TRACE(::testing::Message() << offset << ": " << res.substr(offset, j->end - j->start)); EXPECT_EQ(res.substr(offset, j->end - j->start).c_str(), string(j->start, j->end - j->start)); offset += j->end - j->start - (mer_len - 1); } EXPECT_EQ(res.size(), offset + mer_len - 1); EXPECT_EQ((size_t)1, parser.nb_files()); EXPECT_EQ((size_t)nb_reads, parser.nb_reads()); } TEST(MerOverlapSequenceParser, BigSequences) { const char* file_name1 = "BigSequences1.fa"; const char* file_name2 = "BigSequences2.fa"; file_unlink fu1(file_name1); file_unlink fu2(file_name2); std::ofstream o_sequence(file_name1); const string res0 = generate_sequences(o_sequence, 200, 100, 3); o_sequence.close(); o_sequence.open(file_name2); const string res1 = generate_sequences(o_sequence, 200, 100, 3); o_sequence.close(); std::ifstream* tmps[2]; tmps[0] = new std::ifstream(file_name1); tmps[1] = new std::ifstream(file_name2); opened_streams streams(tmps, tmps + 2); static const int mer_len = 10; parser_type parser(mer_len, 1, 10, 100, streams); size_t offset = 0; while(offset < res0.size() - mer_len + 1) { parser_type::job j(parser); ASSERT_FALSE(j.is_empty()); SCOPED_TRACE(::testing::Message() << offset << ": " << res0.substr(offset, j->end - j->start)); EXPECT_EQ(res0.substr(offset, j->end - j->start).c_str(), string(j->start, j->end - j->start)); offset += j->end - j->start - (mer_len - 1); } EXPECT_EQ(res0.size(), offset + mer_len - 1); offset = 0; while(offset < res1.size() - mer_len + 1) { parser_type::job j(parser); ASSERT_FALSE(j.is_empty()); SCOPED_TRACE(::testing::Message() << offset << ": " << res1.substr(offset, j->end - j->start)); EXPECT_EQ(res1.substr(offset, j->end - j->start).c_str(), string(j->start, j->end - j->start)); offset += j->end - j->start - (mer_len - 1); } EXPECT_EQ(res1.size(), offset + mer_len - 1); parser_type::job j2(parser); EXPECT_TRUE(j2.is_empty()); EXPECT_EQ((size_t)2, parser.nb_files()); EXPECT_EQ((size_t)6, parser.nb_reads()); } TEST(MerOverlapSequenceParser, Fastq) { const char* file_name = "Fastq.fa"; file_unlink fu(file_name); static const int nb_reads = 100; static const int mer_len = 20; std::ofstream o_sequence(file_name); string res = generate_sequences(o_sequence, 10, 50, nb_reads, true); o_sequence.close(); auto sequence = new std::ifstream(file_name); opened_streams streams(&sequence, &sequence + 1); parser_type parser(mer_len, 1, 10, 100, streams); size_t offset = 0; while(true) { parser_type::job j(parser); if(j.is_empty()) break; // SCOPED_TRACE(::testing::Message() << offset << ": " << res.substr(offset, j->end - j->start)); EXPECT_EQ(res.substr(offset, j->end - j->start).c_str(), string(j->start, j->end - j->start)); offset += j->end - j->start - (mer_len - 1); } EXPECT_EQ(res.size(), offset + mer_len - 1); EXPECT_EQ((size_t)1, parser.nb_files()); EXPECT_EQ((size_t)nb_reads, parser.nb_reads()); } } // namespace {