/* This file is part of Jellyfish. Jellyfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Jellyfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Jellyfish. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include namespace err = jellyfish::err; using jellyfish::mer_dna; using jellyfish::mer_dna_bloom_counter; typedef std::vector file_vector; typedef jellyfish::mer_overlap_sequence_parser > sequence_parser; typedef jellyfish::mer_iterator mer_iterator; static query_main_cmdline args; // mer_dna_bloom_counter query_load_bloom_filter(const char* path) { // return res; // } template void query_from_sequence(PathIterator file_begin, PathIterator file_end, const Database& db, std::ostream& out, bool canonical) { jellyfish::stream_manager streams(file_begin, file_end); sequence_parser parser(mer_dna::k(), 1, 3, 4096, streams); for(mer_iterator mers(parser, canonical); mers; ++mers) out << *mers << " " << db.check(*mers) << "\n"; } template void query_from_cmdline(std::vector mers, const Database& db, std::ostream& out, bool canonical) { mer_dna m; for(auto it = mers.cbegin(); it != mers.cend(); ++it) { try { m = *it; if(canonical) m.canonicalize(); out << m << " " << db.check(m) << "\n"; } catch(std::length_error e) { std::cerr << "Invalid mer '" << *it << "'\n"; } } } template void query_from_stdin(const Database& db, std::ostream& out, bool canonical) { std::string buffer; mer_dna m; while(getline(std::cin, buffer)) { try { m = buffer; if(canonical) m.canonicalize(); out << db.check(m) << std::endl; // a flush is need for interactive use } catch(std::length_error e) { std::cerr << "Invalid mer '" << buffer << "'" << std::endl; } } } int query_main(int argc, char *argv[]) { args.parse(argc, argv); ofstream_default out(args.output_given ? args.output_arg : 0, std::cout); if(!out.good()) err::die(err::msg() << "Error opening output file '" << args.output_arg << "'"); std::ifstream in(args.file_arg, std::ios::in|std::ios::binary); jellyfish::file_header header(in); if(!in.good()) err::die(err::msg() << "Failed to parse header of file '" << args.file_arg << "'"); mer_dna::k(header.key_len() / 2); if(header.format() == "bloomcounter") { jellyfish::hash_pair fns(header.matrix(1), header.matrix(2)); mer_dna_bloom_counter filter(header.size(), header.nb_hashes(), in, fns); if(!in.good()) err::die("Bloom filter file is truncated"); in.close(); query_from_sequence(args.sequence_arg.begin(), args.sequence_arg.end(), filter, out, header.canonical()); query_from_cmdline(args.mers_arg, filter, out, header.canonical()); if(args.interactive_flag) query_from_stdin(filter, out, header.canonical()); } else if(header.format() == binary_dumper::format) { jellyfish::mapped_file binary_map(args.file_arg); if(!args.no_load_flag && (args.load_flag || (args.sequence_arg.begin() != args.sequence_arg.end()) || (args.mers_arg.size() > 100))) binary_map.load(); binary_query bq(binary_map.base() + header.offset(), header.key_len(), header.counter_len(), header.matrix(), header.size() - 1, binary_map.length() - header.offset()); query_from_sequence(args.sequence_arg.begin(), args.sequence_arg.end(), bq, out, header.canonical()); query_from_cmdline(args.mers_arg, bq, out, header.canonical()); if(args.interactive_flag) query_from_stdin(bq, out, header.canonical()); } else { err::die(err::msg() << "Unsupported format '" << header.format() << "'. Must be a bloom counter or binary list."); } return 0; }