// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file #include "variance/export.h" #include #include #include using namespace std; /* ----- ----- ----- ----- ----- ----- * * ----- Auxillary funcitons ----- * * ----- ----- ----- ----- ----- ----- */ void Export::stringToUpper(string & seq) { const unsigned ssize(seq.length()); for (unsigned ii = 0; ii < ssize; ii++) { seq[ii] = toupper(seq[ii]); } } string Export::reverse(const string& seq) { string revSeq = seq; int rcIdx = 0; for (int ii = seq.length() - 1; ii >= 0; ii--) { revSeq[rcIdx] = seq[ii]; rcIdx++; } return(revSeq); } string Export::complement(const string& seq) { string compSeq = seq; stringToUpper(compSeq); const unsigned csize(compSeq.length()); for (unsigned ii = 0; ii < csize; ii++) { if (compSeq[ii] == 'A') { compSeq[ii] = 'T'; } else if (compSeq[ii] == 'C') { compSeq[ii] = 'G'; } else if (compSeq[ii] == 'G') { compSeq[ii] = 'C'; } else if (compSeq[ii] == 'T') { compSeq[ii] = 'A'; } } return(compSeq); } void Export::forceOnlyACTGN(string & seq) { stringToUpper(seq); const unsigned ssize(seq.length()); for (unsigned ii = 0; ii < ssize; ii++) { if (seq[ii] == 'A' || seq[ii] == 'C' || seq[ii] == 'G' || seq[ii] == 'T') { // do nothing } else { // convert to N seq[ii] = 'N'; } } } void Export::tsvToArray(const string& line, vector & data, char sep = '\t') { // Ensure data is clean data.clear(); string::size_type prevIndex = 0; string::size_type currIndex = line.find_first_of(sep); int idx = 0; while(currIndex != string::npos) { data.push_back(line.substr(prevIndex, currIndex - prevIndex)); prevIndex = currIndex+1; currIndex = line.find_first_of(sep, prevIndex); idx++; } data.push_back(line.substr(prevIndex, line.length() - prevIndex)); } /* ----- ----- ----- ----- ----- ----- ----- * * ----- Constructors and Destructors ----- * * ----- ----- ----- ----- ----- ----- ----- */ Export::Export(const std::string& line, unsigned int expType) { // Parse line vector data; Export::tsvToArray(line, data); //, '\t'); if (data.size() < 22) { cerr << "[ERROR]: Not enough data entries for export format: " << data.size() << " < 22\nOn line: '" << line << "'" << endl; exit(EXIT_CODE_FAILURE); } // Set Export file type this->EXP_FORMAT_TYPE = expType; if (expType == 0) { this->machineName = data[EXP_MACHINE_NAME_IDX]; this->runNum = atoi(data[EXP_RUN_NUM_IDX].c_str()); this->lane = atoi(data[EXP_LANE_NUM_IDX].c_str()); this->tile = atoi(data[EXP_TILE_IDX].c_str()); this->xCord = atoi(data[EXP_XCORD_IDX].c_str()); this->yCord = atoi(data[EXP_YCORD_IDX].c_str()); this->indexStr = data[EXP_INDEX_STR_IDX]; this->readNum = atoi(data[EXP_READ_NUM_IDX].c_str()); this->read = data[EXP_READ_IDX]; this->qualStr = data[EXP_QUAL_STR_IDX]; this->matchChr = data[EXP_MATCH_CHR_IDX]; this->matchContig = data[EXP_MATCH_CONTIG_IDX]; this->matchStart = atoi(data[EXP_MATCH_POS_IDX].c_str()); this->matchStrand = (char)data[EXP_MATCH_STRAND_IDX].c_str()[0]; this->matchDesc = data[EXP_MATCH_DESC_IDX]; this->singleAlnScore = atoi(data[EXP_SINGLE_ALN_IDX].c_str()); this->pairedAlnScore = atoi(data[EXP_PAIRED_ALN_IDX].c_str()); this->partnerChr = data[EXP_PARTNER_CHR_IDX]; this->partnerContig = data[EXP_PARTNER_CONTIG_IDX]; this->partnerOffset = atoi(data[EXP_PARTNER_OFFSET_IDX].c_str()); this->partnerStrand = (char)data[EXP_PARTNER_STRAND_IDX].c_str()[0]; this->filterStatus = (char)data[EXP_FILTER_IDX].c_str()[0]; if (data.size() > EXP_RANGE_START_IDX) { //cerr << "Setting 1\t" << data.size() << "\t" << EXP_RANGE_START_IDX << endl; this->rangeStart = atoi(data[EXP_RANGE_START_IDX].c_str()); //cerr << "START SET\t" << this->rangeStart << endl; } else { this->rangeStart = 0; } if (data.size() > EXP_RANGE_END_IDX) { //cerr << "Setting 2\t" << data.size() << "\t" << EXP_RANGE_END_IDX << endl; this->rangeEnd = atoi(data[EXP_RANGE_END_IDX].c_str()); //cerr << "END SET\t" << this->rangeEnd << endl; } else { this->rangeEnd = 0; } } else { this->machineName = data[EXP2_MACHINE_NAME_IDX]; this->runNum = atoi(data[EXP2_RUN_NUM_IDX].c_str()); this->lane = atoi(data[EXP2_LANE_NUM_IDX].c_str()); this->tile = atoi(data[EXP2_TILE_IDX].c_str()); this->xCord = atoi(data[EXP2_XCORD_IDX].c_str()); this->yCord = atoi(data[EXP2_YCORD_IDX].c_str()); this->indexStr = data[EXP2_INDEX_STR_IDX]; this->readNum = atoi(data[EXP2_READ_NUM_IDX].c_str()); this->read = data[EXP2_READ_IDX]; this->qualStr = data[EXP2_QUAL_STR_IDX]; this->matchChr = data[EXP2_MATCH_CHR_IDX]; this->matchContig = data[EXP2_MATCH_CONTIG_IDX]; this->matchStart = atoi(data[EXP2_MATCH_POS_IDX].c_str()); this->matchStrand = (char)data[EXP2_MATCH_STRAND_IDX].c_str()[0]; this->matchDesc = data[EXP2_MATCH_DESC_IDX]; this->singleAlnScore = atoi(data[EXP2_SINGLE_ALN_IDX].c_str()); this->pairedAlnScore = atoi(data[EXP2_PAIRED_ALN_IDX].c_str()); this->partnerChr = data[EXP2_PARTNER_CHR_IDX]; this->partnerContig = data[EXP2_PARTNER_CONTIG_IDX]; this->partnerOffset = atoi(data[EXP2_PARTNER_OFFSET_IDX].c_str()); this->partnerStrand = data[EXP2_PARTNER_STRAND_IDX]; // BF457 this->filterStatus = (char)data[EXP2_FILTER_IDX].c_str()[0]; this->rangeStart = atoi(data[EXP2_RANGE_START_IDX].c_str()); this->rangeEnd = atoi(data[EXP2_RANGE_END_IDX].c_str()); } if (this->matchDesc.length() == 0 || this->matchDesc.compare("-") == 0) { this->readType = RT_SHADOW_ALN; } else if (this->matchDesc.find('^', 0)) { this->readType = RT_GAPPED_ALN; } else { this->readType = RT_UNKOWN_ALN; } data.clear(); } /* ----- ----- ----- ----- ----- ----- * * ----- Manipulation Functions ----- * * ----- ----- ----- ----- ----- ----- */ /* ----- ----- ----- ----- ----- * * ----- Access Functions ----- * * ----- ----- ----- ----- ----- */ string Export::toString() { stringstream con; // Silly formating for export 2 case // This is the indel finder case if (this->EXP_FORMAT_TYPE == EXPORT_TYPE_INDELFINDER) { con << this->rangeStart << "\t" << this->rangeEnd << "\t"; } // Standard export format con << this->machineName << "\t" << this->runNum << "\t" << this->lane << "\t" << this->tile << "\t" << this->xCord << "\t" << this->yCord << "\t" << this->indexStr << "\t" << this->readNum << "\t" << this->read << "\t" << this->qualStr << "\t" << this->matchChr << "\t" << this->matchContig << "\t" << this->matchStart << "\t" << this->matchStrand << "\t" << this->matchDesc << "\t" << this->singleAlnScore << "\t" << this->pairedAlnScore << "\t" << this->partnerChr << "\t" << this->partnerContig << "\t" << this->partnerOffset << "\t" << this->partnerStrand << "\t" << this->filterStatus; // Silly formating for export 2 case // This is the normal case if (this->EXP_FORMAT_TYPE == EXPORT_TYPE_NORMAL && this->rangeEnd != 0) { con << "\t" << this->rangeStart << "\t" << this->rangeEnd; } return(con.str()); } // END OF FILE