/** ** Copyright (c) 2007-2010 Illumina, Inc. ** ** This software is covered by the "Illumina Genome Analyzer Software ** License Agreement" and the "Illumina Source Code License Agreement", ** and certain third party copyright/licenses, and any user of this ** source file is bound by the terms therein (see accompanying files ** Illumina_Genome_Analyzer_Software_License_Agreement.pdf and ** Illumina_Source_Code_License_Agreement.pdf and third party ** copyright/license notices). ** ** This file is part of the Consensus Assessment of Sequence And VAriation ** (CASAVA) software package. ** **/ #include #include #include "smallRna/Trimmer.h" /**************************************************************************** Function: Destructor Description: Deallocates and resets all member variables. ****************************************************************************/ CTrimmer::~CTrimmer() { deallocRun(); delete [] m_Read; m_Read = 0; delete [] m_Adaptor; m_Adaptor = 0; } /**************************************************************************** Function: Setup Description: Arguments: Input: Output: Globals: None ****************************************************************************/ int CTrimmer::Setup(const char adaptorString[], const int similarityMatrix[NSYMBOLS][NSYMBOLS]) { int status = OKAY; if ((status = initAdaptor(adaptorString)) != OKAY) return status; initSimilarityMatrix(similarityMatrix); return status; } /**************************************************************************** Function: Run Description: Arguments: Input: Output: Globals: None ****************************************************************************/ int CTrimmer::Run(const char readString[], int & suffixPosition, int & maxScore) { if ((readString == 0) || (readString[0] == 0)) { maxScore = INT_MIN; suffixPosition = -1; return INVALID_INPUT_PARAMETERS; } int readStringLength = (int) strlen(readString); if (m_ReadLength != readStringLength) { deallocRun(); delete [] m_Read; m_Read = 0; m_Read = new int [readStringLength]; if (!m_Read) { maxScore = INT_MIN; suffixPosition = -1; return MEMORY_ALLOCATION_FAILURE; } m_ReadLength = readStringLength; if (initScoreMatrix() != OKAY) { maxScore = INT_MIN; suffixPosition = -1; return MEMORY_ALLOCATION_FAILURE; } } getCode(readString, m_Read); makeScoreMatrix(); int iMax, jMax; maxScore = getMaxScoreAndLocation(iMax, jMax); suffixPosition = getSuffixPosition(iMax, jMax); return OKAY; } void CTrimmer::getCode(const char anyString[], int * anyCode) const { int anyStringLength = (int) strlen(anyString); for (int i = 0; i < anyStringLength; i++) { switch (anyString[i]) { case 'A': anyCode[i] = 1; break; case 'a': anyCode[i] = 1; break; case 'C': anyCode[i] = 2; break; case 'c': anyCode[i] = 2; break; case 'G': anyCode[i] = 3; break; case 'g': anyCode[i] = 3; break; case 'T': anyCode[i] = 4; break; case 't': anyCode[i] = 4; break; case 'N': anyCode[i] = 5; break; case 'n': anyCode[i] = 5; break; default: break; } } } int CTrimmer::initAdaptor(const char adaptorString[]) { if ((adaptorString == 0) || (adaptorString[0] == 0)) { return INVALID_INPUT_PARAMETERS; } int adaptorStringLength = (int) strlen(adaptorString); if (m_AdaptorLength != adaptorStringLength) { delete [] m_Adaptor; m_Adaptor = 0; m_AdaptorLength = 0; m_Adaptor = new int [adaptorStringLength]; if (!m_Adaptor) return MEMORY_ALLOCATION_FAILURE; m_AdaptorLength = adaptorStringLength; } getCode(adaptorString, m_Adaptor); return OKAY; } void CTrimmer::initSimilarityMatrix(const int similarityMatrix[NSYMBOLS][NSYMBOLS]) { int i, j; for (i = 0; i < NSYMBOLS; i++) { for (j = 0; j < NSYMBOLS; j++) { m_SimilarityMatrix[i][j] = similarityMatrix[i][j]; } } } int CTrimmer::initScoreMatrix() // Allocate and initialize matrices and arrays { // m_Read: Arrayed along the vertical axis of the table i.e. rows int i; // m_Adaptor: Arrayed along the horizontal axis of the table i.e. columns // Allocate matrix: m_ScoreMatrix m_ScoreMatrix = new int * [m_ReadLength + 1]; // rows if (!m_ScoreMatrix) return MEMORY_ALLOCATION_FAILURE; memset(m_ScoreMatrix, 0, m_ReadLength * sizeof(int *)); for (i = 0; i <= m_ReadLength; i++) // columns { m_ScoreMatrix[i] = new int [m_AdaptorLength + 1]; if (!m_ScoreMatrix[i]) return MEMORY_ALLOCATION_FAILURE; } // Allocate matrix: m_PathMatrix m_PathMatrix = new unsigned char * [m_ReadLength + 1]; // rows if (!m_PathMatrix) return MEMORY_ALLOCATION_FAILURE; memset(m_PathMatrix, 0, m_ReadLength * sizeof(unsigned char *)); for (i = 0; i <= m_ReadLength; i++) // columns { m_PathMatrix[i] = new unsigned char [m_AdaptorLength + 1]; if (!m_PathMatrix[i]) return MEMORY_ALLOCATION_FAILURE; } // Initialize matrices: m_ScoreMatrix & m_PathMatrix m_ScoreMatrix[0][0] = 0; m_PathMatrix[0][0] = 0; for (i = 1; i <= m_ReadLength; i++) { m_ScoreMatrix[i][0] = 0; m_PathMatrix[i][0] = 2; } for (i = 1; i <= m_AdaptorLength; i++) { m_ScoreMatrix[0][i] = m_ScoreMatrix[0][i - 1] + m_SimilarityMatrix[0][m_Adaptor[i - 1]]; m_PathMatrix[0][i] = 3; } // Allocate m_Reverser: There is an extra space added at the end to null terminate m_Reverser = new char[m_ReadLength + m_AdaptorLength + 3]; // (m_ReadLength + 1) + (m_AdaptorLength + 1) + 1 (for the extra space) // is the total size of m_Reverser if (!m_Reverser) return MEMORY_ALLOCATION_FAILURE; return OKAY; } void CTrimmer::makeScoreMatrix() // m_Read: Arrayed along the vertical axis of the table i.e. rows { // m_Adaptor: Arrayed along the horizontal axis of the table i.e. columns int i, j; // [i, j] is element at row i and column j int score; // "score" temporary variable // Dynamic programming loops for (i = 1; i <= m_ReadLength; i++) { for (j = 1; j <= m_AdaptorLength; j++) { // substitution: match or wobble in adaptor m_ScoreMatrix[i][j] = m_ScoreMatrix[i - 1][j - 1] + m_SimilarityMatrix[m_Read[i - 1]][m_Adaptor[j - 1]]; m_PathMatrix[i][j] = (m_Read[i - 1] == m_Adaptor[j - 1]) ? 0 : 1; // insertion into adaptor score = m_ScoreMatrix[i - 1][j] + m_SimilarityMatrix[m_Read[i - 1]][0]; if (score > m_ScoreMatrix[i][j]) { m_ScoreMatrix[i][j] = score; m_PathMatrix[i][j] = 2; } // deletion from adaptor score = m_ScoreMatrix[i][j - 1] + m_SimilarityMatrix[0][m_Adaptor[j - 1]]; if (score > m_ScoreMatrix[i][j]) { m_ScoreMatrix[i][j] = score; m_PathMatrix[i][j] = 3; } } } } int CTrimmer::getMaxScoreAndLocation(int & iMax, // m_ScoreMatrix[iMax, jMax] is a max element int & jMax) const // along its outer boundary { // Init the max score int i = iMax = m_ReadLength; int j = jMax = m_AdaptorLength; int maxScore = m_ScoreMatrix[i][j]; // Get the max score from the last row for (j--; j > 0; j--) { if (m_ScoreMatrix[i][j] > maxScore) { maxScore = m_ScoreMatrix[i][j]; iMax = i; jMax = j; } } // Update the max score from the last column i = m_ReadLength; j = m_AdaptorLength; for (i--; i > 0; i--) { if (m_ScoreMatrix[i][j] > maxScore) { maxScore = m_ScoreMatrix[i][j]; iMax = i; jMax = j; } } return maxScore; } int CTrimmer::getSuffixPosition(int i, // m_ScoreMatrix[i, j] is a max element int j) // along its outer boundary { char tmpChar; char * pForward = m_Reverser; char * pBackward = m_Reverser; while (j > 0) { switch (m_PathMatrix[i][j]) { case 0: i--; j--; *pBackward++ = 'm'; break; case 1: i--; j--; *pBackward++ = 'w'; break; case 2: i--; *pBackward++ = 'i'; break; case 3: j--; *pBackward++ = 'd'; break; } } *pBackward-- = 0; // Null terminate // String reverse while (pForward < pBackward) { tmpChar = *pForward; *pForward++ = *pBackward; *pBackward-- = tmpChar; } return i; } void CTrimmer::deallocRun() { int i; delete [] m_Reverser; m_Reverser = 0; if (m_PathMatrix) { for (i = 0; i <= m_ReadLength; i++) { delete [] m_PathMatrix[i]; m_PathMatrix[i] = 0; } } delete [] m_PathMatrix; m_PathMatrix = 0; if (m_ScoreMatrix) { for (i = 0; i <= m_ReadLength; i++) { delete [] m_ScoreMatrix[i]; m_ScoreMatrix[i] = 0; } } delete [] m_ScoreMatrix; m_ScoreMatrix = 0; } /////////////////////////////////////////////////////////////////////////////////////////////////////////////