/* Pollux Copyright (C) 2014 Eric Marinier This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "Utility.h" #include "ErrorProcessing.h" #include #include #include "Globals.h" #include "Reads.h" #include bool checkInput(int numInputFiles, char* inputFileNames, char* outputFileName, bool paired, enum SEQUENCING_TECHNOLOGY type, bool fastk, unsigned int KMER_SIZE) { if (numInputFiles < 1) { printf("ERROR: Need at least one input file.\n"); return false; } if (paired && numInputFiles != 2) { printf("ERROR: Provide only two input files when specifying paired input.\n"); return false; } if (KMER_SIZE < 4) { printf("ERROR: k-mer size is too small.\n"); return false; } if (KMER_SIZE > 31) { printf("ERROR: k-mer size is too large.\n"); return false; } return true; } void help() { printf("\n"); printf("USAGE: \n"); printf("\n"); printf("ERROR CORRECTION\n"); printf("Required: \n"); printf("\n"); printf("\t-i \t[file] \tSpecify one or many FASTQ input files.\n"); printf("\n"); printf("Optional: \n"); printf("\n"); printf("\t-o \t \tOutput directory.\n"); printf("\t-p \t \tSpecify input should be treated as paired.\n"); printf("\n"); printf("\t-s \t[bool] \tSubstitution corrections. \"true\" or \"false\".\n"); printf("\t-n \t[bool] \tInsertion corrections. \"true\" or \"false\".\n"); printf("\t-d \t[bool] \tDeletion corrections. \"true\" or \"false\".\n"); printf("\t-h \t[bool] \tHomopolymer corrections. \"true\" or \"false\".\n"); printf("\t-f \t[bool] \tLow k-mer read filtering. \"true\" or \"false\".\n"); //printf("\t-q \t[bool] \tQuality score updating. \"true\" or \"false\".\n"); printf("\n"); printf("\t-k \t[int] \tSpecify the k-mer size.\n"); printf("\t-b \t[int] \tSpecify the input batch size.\n"); printf("\n"); printf("FASTK CONVERSION\n"); printf("Required: \n"); printf("\t-fastk \t\tFASTQ to FASTK file conversion.\n"); printf("\t-i \t[file] \tSpecify one or many input files.\n"); printf("\n"); printf("EXAMPLES: \n"); printf("\n"); printf("./error -i file1.fastq\n"); printf("\tCorrect a single file.\n"); printf("\n"); printf("./error -i file1.fastq file2.fastq file3.fastq\n"); printf("\tCorrect multiple file together.\n"); printf("\n"); printf("./error -i frag_1.fastq frag_2.fastq -p\n"); printf("\tCorrect two paired files.\n"); printf("\n"); printf("./error -fastk -i file1.fastq\n"); printf("\tConvert file1.fastq to FASTK format.\n"); printf("\n"); } int main(int argc, char**argv) { char outputDirectory[1024]; enum SEQUENCING_TECHNOLOGY type = UNKNOWN; int numInputFiles = 0; char* inputFileNames; bool paired = false; bool fastk = false; // Corrections: bool substitutions = true; bool insertions = true; bool deletions = true; bool homopolymers = true; bool qualityUpdating = true; bool filtering = true; printf("\n"); printf("Pollux 1.0.2\n"); printf("Source compiled on %s at %s.\n", __DATE__, __TIME__); printf("\n"); if(argc == 1) { help(); return 1; } // Set default output directory: getcwd(outputDirectory, sizeof(outputDirectory)); for(int i = 1; i < argc; i++) { printf("Considering argument: %s ", argv[i]); // HELP if(strcmp("-help", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) { help(); return 1; } // INPUT FILES else if(strcmp("-i", argv[i]) == 0 && i < (argc - 1)) { // Determine the number of input files. for(int arg = i + 1; arg < argc && argv[arg][0] != '-'; arg++) { numInputFiles++; } // Allocate space for names. inputFileNames = (char*) malloc (sizeof(char*) * numInputFiles * 200); // Grab file names. for(int file = 0; file < numInputFiles; file++) { strcpy(&(inputFileNames[file * 200]), argv[i + 1 + file]); } i += numInputFiles; printf(": number of input files is %d\n", numInputFiles); } // OUTPUT FILE(S) else if(strcmp("-o", argv[i]) == 0 && i < (argc - 1)) { strcpy(outputDirectory, argv[i + 1]); i++; printf(": output directory is %s\n", outputDirectory); } // TYPE OF DATA else if(strcmp("-t", argv[i]) == 0) { // Illumina if(strcmp("illumina", argv[i + 1]) == 0) { type = ILLUMINA; printf(": input type is Illumina\n"); } else if(strcmp("ion", argv[i + 1]) == 0) { type = ION; printf(": input type is Ion Torrent\n"); } else if(strcmp("454", argv[i + 1]) == 0) { type = ROCHE454; printf(": input type is 454\n"); } else { type = UNKNOWN; printf(": input type is unknown!\n"); } i++; } // PAIRED INPUT else if(strcmp("-p", argv[i]) == 0) { paired = true; printf(": paired input\n"); } // BATCH SIZE else if(strcmp("-b", argv[i]) == 0) { BATCH_SIZE = atoi(argv[i + 1]); printf(": batch size is %d\n", BATCH_SIZE); i++; } // FASTK else if(strcmp("-fastk", argv[i]) == 0) { fastk = true; printf(": FASTK conversion\n"); } // SUBSTITUTIONS else if(strcmp("-s", argv[i]) == 0) { // Enabled if(strcmp("true", argv[i + 1]) == 0) { substitutions = true; printf(": substitutions enabled\n"); } else if(strcmp("false", argv[i + 1]) == 0) { substitutions = false; printf(": substitutions disabled\n"); } else { printf(": input not understood!\n"); } i++; } // INSERTIONS else if(strcmp("-n", argv[i]) == 0) { // Enabled if(strcmp("true", argv[i + 1]) == 0) { insertions = true; printf(": insertions enabled\n"); } else if(strcmp("false", argv[i + 1]) == 0) { insertions = false; printf(": insertions disabled\n"); } else { printf(": input not understood!\n"); } i++; } // DELETIONS else if(strcmp("-d", argv[i]) == 0) { // Enabled if(strcmp("true", argv[i + 1]) == 0) { deletions = true; printf(": deletions enabled\n"); } else if(strcmp("false", argv[i + 1]) == 0) { deletions = false; printf(": deletions disabled\n"); } else { printf(": input not understood!\n"); } i++; } // HOMOPOLYMERS else if(strcmp("-h", argv[i]) == 0) { // Enabled if(strcmp("true", argv[i + 1]) == 0) { homopolymers = true; printf(": homopolymers enabled\n"); } else if(strcmp("false", argv[i + 1]) == 0) { homopolymers = false; printf(": homopolymers disabled\n"); } else { printf(": input not understood!\n"); } i++; } // FILTERING else if(strcmp("-f", argv[i]) == 0) { // Enabled if(strcmp("true", argv[i + 1]) == 0) { filtering = true; printf(": read filtering enabled\n"); } else if(strcmp("false", argv[i + 1]) == 0) { filtering = false; printf(": read filtering disabled\n"); } else { printf(": input not understood!\n"); } i++; } // QUALITY UPDATING /* else if(strcmp("-q", argv[i]) == 0) { // Enabled if(strcmp("true", argv[i + 1]) == 0) { qualityUpdating = true; printf(": quality updating enabled\n"); } else if(strcmp("false", argv[i + 1]) == 0) { qualityUpdating = false; printf(": quality updating disabled\n"); } else { printf(": input not understood!\n"); } i++; } */ // KMER SIZE else if(strcmp("-k", argv[i]) == 0) { KMER_SIZE = atoi(argv[i + 1]); printf(": k-mer size is %d\n", KMER_SIZE); i++; } // PROBLEM else { printf("\nProblem with argument: %s\n", argv[i]); return 1; } } printf("\n"); for(int file = 0; file < numInputFiles; file++) { printf("File name: %s\n", &(inputFileNames[file * 200])); } printf("\n"); if(checkInput(numInputFiles, inputFileNames, outputDirectory, paired, type, fastk, KMER_SIZE)) { // FASTK CONVERSION if(fastk) { convertFASTQToFASTK(numInputFiles, inputFileNames, outputDirectory); } // ERROR CORRECTION else { processCorrection(numInputFiles, inputFileNames, outputDirectory, paired, substitutions, insertions, deletions, homopolymers, filtering, qualityUpdating); } } return 0; }