#include #include #include #include namespace { using std::string; using std::istringstream; template struct opened_streams { Iterator begin_, end_; opened_streams(Iterator begin, Iterator end) : begin_(begin), end_(end) { } std::unique_ptr next() { std::unique_ptr res; if(begin_ != end_) { res.reset(*begin_); ++begin_; } return res; } }; typedef jellyfish::whole_sequence_parser > parser_type; TEST(SequenceParser, Fasta) { static const char* seq1 = "ATTACCTTGTACCTTCAGAGC"; static const char* seq2 = "TTCGATCCCTTGATAATTAGTCACGTTAGCT"; const char* file_name = "Fasta.fa"; file_unlink fu(file_name); { std::ofstream sequence(file_name); sequence << ">header1\n" << seq1 << "\n\n" << ">header2\n" << seq2 << "\n\n\n" << seq1; } auto sequence = new std::ifstream(file_name); opened_streams streams(&sequence, &sequence + 1); parser_type parser(10, 1, 1, streams); { parser_type::job j(parser); ASSERT_FALSE(j.is_empty()); ASSERT_EQ((size_t)1, j->nb_filled); EXPECT_STREQ("header1", j->data[0].header.c_str()); EXPECT_STREQ(seq1, j->data[0].seq.c_str()); } { parser_type::job j(parser); ASSERT_FALSE(j.is_empty()); ASSERT_EQ((size_t)1, j->nb_filled); EXPECT_STREQ("header2", j->data[0].header.c_str()); EXPECT_EQ(strlen(seq2) + strlen(seq1), j->data[0].seq.size()); EXPECT_STREQ(seq2, j->data[0].seq.substr(0, strlen(seq2)).c_str()); EXPECT_STREQ(seq1, j->data[0].seq.substr(strlen(seq2)).c_str()); } { parser_type::job j(parser); EXPECT_TRUE(j.is_empty()); } EXPECT_EQ((size_t)1, parser.nb_files()); EXPECT_EQ((size_t)2, parser.nb_reads()); } TEST(SequenceParser, Fastq) { static const char* seq1 = "ATTACCTTGTACCTTCAGAGC"; static const char* seq2 = "TTCGATCCCTTGATAATTAGTCACGTTAGCT"; const char* file_name = "Fasta.fq"; file_unlink fu(file_name); { std::ofstream sequence(file_name); sequence << "@header1\n" << seq1 << "\n\n" << "+and some stuff\n" << seq1 << "\n" << "@header2\n" << seq2 << "\n\n\n" << seq1 << "\n" << "+\n" << seq1 << "\n" << seq2 << "\n"; } auto sequence = new std::ifstream(file_name); opened_streams streams(&sequence, &sequence + 1); parser_type parser(10, 1, 1, streams); { parser_type::job j(parser); ASSERT_FALSE(j.is_empty()); ASSERT_EQ((size_t)1, j->nb_filled); EXPECT_STREQ("header1", j->data[0].header.c_str()); EXPECT_STREQ(seq1, j->data[0].seq.c_str()); EXPECT_STREQ(seq1, j->data[0].qual.c_str()); } { parser_type::job j(parser); ASSERT_FALSE(j.is_empty()); ASSERT_EQ((size_t)1, j->nb_filled); EXPECT_STREQ("header2", j->data[0].header.c_str()); EXPECT_EQ(strlen(seq2) + strlen(seq1), j->data[0].seq.size()); EXPECT_EQ(strlen(seq2) + strlen(seq1), j->data[0].qual.size()); EXPECT_STREQ(seq2, j->data[0].seq.substr(0, strlen(seq2)).c_str()); EXPECT_STREQ(seq1, j->data[0].seq.substr(strlen(seq2)).c_str()); EXPECT_STREQ(seq1, j->data[0].qual.substr(0, strlen(seq1)).c_str()); EXPECT_STREQ(seq2, j->data[0].qual.substr(strlen(seq1)).c_str()); } { parser_type::job j(parser); EXPECT_TRUE(j.is_empty()); } EXPECT_EQ((size_t)1, parser.nb_files()); EXPECT_EQ((size_t)2, parser.nb_reads()); } TEST(SequenceParser, FastaMany) { const char* file_name = "FastaMany.fa"; file_unlink fu(file_name); static const int nb_sequences = 1000; { std::ofstream sequence(file_name); ASSERT_TRUE(sequence.good()); for(int i = 0; i < nb_sequences; ++i) { const int seq_len = random_bits(8) + 10; sequence << ">" << i << " " << seq_len << "\n"; for(int j = 0; j < seq_len; ++j) { sequence << (char)('A' + j % 26); if(random_bits(6) == 0) sequence << '\n'; } sequence << '\n'; } } auto sequence = new std::ifstream(file_name); opened_streams streams(&sequence, &sequence + 1); parser_type parser(10, 13, 1, streams); int got_sequences = 0; while(true) { parser_type::job job(parser); if(job.is_empty()) break; for(size_t i = 0; i < job->nb_filled; ++i) { ++got_sequences; const size_t len_pos = job->data[i].header.find_first_of(" "); const int seq_len = atoi(job->data[i].header.c_str() + len_pos + 1); ASSERT_LE(10, seq_len); for(int j = 0; j < seq_len; ++j) EXPECT_EQ('A' + (j % 26), job->data[i].seq[j]); } } EXPECT_EQ(nb_sequences, got_sequences); EXPECT_EQ((size_t)1, parser.nb_files()); EXPECT_EQ((size_t)nb_sequences, parser.nb_reads()); } }