/* This file is part of Jellyfish. Jellyfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Jellyfish is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Jellyfish. If not, see . */ #include #include #include #include #include #include #include #include namespace err = jellyfish::err; class rDNAg_t { public: rDNAg_t(CRandomMersenne *_rng) : rng(_rng), i(15), buff(0) {} char letter() { i = (i+1) % 16; if(i == 0) buff = rng->BRandom(); char res = letters[buff & 0x3]; buff >>= 2; return res; } char qual_Illumina() { return rng->IRandom(66, 104); } private: CRandomMersenne *rng; int i; uint32_t buff; static const char letters[4]; }; const char rDNAg_t::letters[4] = { 'A', 'C', 'G', 'T' }; // Output matrix // Generate matrices // uint64_t lines[64]; // for(unsigned int j = 0; j < args.mer_arg.size(); j++) { // if(args.mer_arg[j] <= 0 || args.mer_arg[j] > 31) // die << "Mer size (" << args.mer_arg[j] << ") must be between 1 and 31."; // int matrix_size = args.mer_arg[j] << 1; // SquareBinaryMatrix mat(matrix_size), inv(matrix_size); // while(true) { // for(int i = 0; i < matrix_size; i++) // lines[i] = (uint64_t)rng.BRandom() | ((uint64_t)rng.BRandom() << 32); // mat = SquareBinaryMatrix(lines, matrix_size); // try { // inv = mat.inverse(); // break; // } catch(SquareBinaryMatrix::SingularMatrix &e) {} // } // char path[4096]; // int len = snprintf(path, sizeof(path), "%s_matrix_%d", args.output_arg, // args.mer_arg[j]); // if(len < 0) // die << "Error creating the matrix file '" << path << "'" << err::no; // if((unsigned int)len >= sizeof(path)) // die << "Output prefix too long '" << args.output_arg << "'"; // std::ofstream fd(path); // if(!fd.good()) // die << "Can't open matrix file '" << path << "'" << err::no; // if(args.verbose_flag) // std::cout << "Creating matrix file '" << path << "'\n"; // mat.dump(&fd); // if(!fd.good()) // die << "Error while writing matrix '" << path << "'" << err::no; // fd.close(); // } void create_path(char *path, unsigned int path_size, const char *ext, bool many, int i, const char *output_arg) { int len; if(many) len = snprintf(path, path_size, "%s_%d.%s", output_arg, i, ext); else len = snprintf(path, path_size, "%s.%s", output_arg, ext); if(len < 0) die(err::msg() << "Error creating the fasta file '" << path << "': " << err::no); if((unsigned int)len >= path_size) die(err::msg() << "Output prefix too long '" << output_arg << "'"); } generate_sequence_args args; void output_fastq(size_t length, const char* path, CRandomMersenne& rng) { rDNAg_t rDNAg(&rng); std::ofstream fd(path); if(!fd.good()) die(err::msg() << "Can't open fasta file '" << path << "': " << jellyfish::err::no); if(args.verbose_flag) std::cout << "Creating fastq file '" << path << "'\n"; size_t total_len = 0; unsigned long read_id = 0; while(total_len < length) { fd << "@read_" << (read_id++) << "\n"; int base; for(base = 0; base < 70 && total_len < length; base++, total_len++) fd << rDNAg.letter(); fd << "\n+\n"; for(int j = 0; j < base; j++) fd << rDNAg.qual_Illumina(); fd << "\n"; } if(!fd.good()) die(err::msg() << "Error while writing fasta file '" << path << "': " << jellyfish::err::no); fd.close(); } void output_fasta(size_t length, const char* path, CRandomMersenne& rng) { rDNAg_t rDNAg(&rng); std::ofstream fd(path); if(!fd.good()) die(err::msg() << "Can't open fasta file '" << path << "': " << jellyfish::err::no); if(args.verbose_flag) std::cout << "Creating fasta file '" << path << "'\n"; size_t read_length = args.read_length_given ? args.read_length_arg : length; size_t total_len = 0; size_t read = 0; long rid = 0; fd << ">read" << ++rid << "\n"; while(total_len < length) { for(int base = 0; base < 70 && total_len < length && read < read_length; base++) { fd << rDNAg.letter(); total_len++; read++; } fd << "\n"; if(read >= read_length) { fd << ">read" << ++rid << "\n"; read = 0; } } if(!fd.good()) die(err::msg() << "Error while writing fasta file '" << path << "': " << jellyfish::err::no); fd.close(); } int main(int argc, char *argv[]) { args.parse(argc, argv); if(args.verbose_flag) std::cout << "Seed: " << args.seed_arg << "\n"; CRandomMersenne rng(args.seed_arg); // Output sequence char path[4096]; bool many = args.length_arg.size() > 1; for(unsigned int i = 0; i < args.length_arg.size(); ++i) { if(args.fastq_flag) { create_path(path, sizeof(path), "fq", many, i, args.output_arg); output_fastq(args.length_arg[i], path, rng); } else { create_path(path, sizeof(path), "fa", many, i, args.output_arg); output_fasta(args.length_arg[i], path, rng); } } return 0; }