/*
Pollux
Copyright (C) 2014 Eric Marinier
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#ifndef UTILITY_H
#define UTILITY_H
#ifdef __cplusplus
extern "C" {
#endif
typedef int bool;
#define true 1
#define false 0
#define MAX_CORRECTIONS 30 // 'Minimum' max corrections performed.
// - More important for short reads.
struct Sequence
{
unsigned long long int* sequence; // Pointer to the actual sequence.
char* quality; // Pointer to the quality scores.
unsigned int length; // Number of nucleotides in sequence.
unsigned int blocks; // The number of blocks of memory used.
unsigned int numCorrections;
char* corrections;
int* homopolymerSize; // Associated with corrections.
char type; // Sequence (error) type.
};
/**
* This function returns a k-mer from the passed sequence array as a single
* 64-bit integer. Consequently, the means that the k-mer size cannot be larger
* than 32 nucleotide bases.
*
* @param sequence The sequence array from which to pull the k-mer.
* @param startNucleotidePosition The starting position of the k-mer in
* nucleotide bases, relative to the entire sequence (with sequence[0]
* base 0 being the start). The starting base is included in the k-mer.
* @param endNucleotidePosition The ending position of the k-mer in
* nucleotide bases, relative to the entire sequence (with sequence[0]
* base 0 being the start). The ending base is not included in the k-mer.
* @return A 64-bit integer representation of the k-mer with the first k high
* bits representing the k-mer and the 64 - (2 * k) lower bits set to 0.
*/
unsigned long long int getKMer(unsigned long long int* sequence,
unsigned int startNucleotidePosition,
unsigned int endNucleotidePosition);
/**
* This function will print to standard output a sequence of nucleotides
* defined by the function call. It will not print an end of line character.
*
* @param sequence The sequence array from which to print the nucleotides.
* @param startNucleotidePosition The starting position of the printing in
* nucleotide bases, relative to the entire sequence (with sequence[0]
* base 0 being the start). The starting base is included in the print.
* @param endNucleotidePosition The ending position of the printing in
* nucleotide bases, relative to the entire sequence (with sequence[0]
* base 0 being the start). The ending base is not included in the print.
*/
void printAsNucleotides(unsigned long long int* sequence,
unsigned int startNucleotidePosition,
unsigned int endNucleotidePosition);
void writeAsNucleotides(FILE* file, unsigned long long int* sequence,
unsigned int startNucleotidePosition,
unsigned int endNucleotidePosition);
/**
* This function will print to standard output a sequence of nucleotides
* defined by the value passed to the function call. It will interpret the
* entire value as being valid nucleotide data. This will print 32 nucleotide
* bases to standard output.
*
* @param value The 64-bit representation of 32 nucleotide bases.
*/
void printValueAsNucleotides(unsigned long long int value);
/**
* This function will print to the specified file a sequence of nucleotides
* defined by the value passed to the function call. It will interpret the
* entire value as being valid nucleotide data. This will print 32 nucleotide
* bases to standard output.
*
* @param file The file to write to. Use "stdout" for standard output.
* @param value The 64-bit representation of the 32 nucleotide bases.
*/
void writeValueAsNucleotides(FILE* file, unsigned long long int value);
/**
* This is a helper function to get max of two numbers.
*
* @param value1
* @param value2
* @return The larger of the two numbers.
*/
int getMax(int value1, int value2);
/**
* This is a helper function to get min of two numbers.
*
* @param value1
* @param value2
* @return The smaller of the two numbers.
*/
int getMin(int value1, int value2);
/**
* This function returns the reverse for a single 64bit sequence block.
*
* @param sequence The single 64bit sequence VALUE (not pointer) to reverse.
* @return The VALUE of the reverse.
*/
unsigned long long int getReverse(unsigned long long int sequence);
/**
* This function returns the reverse compliment for the passed sequence. This
* function will allocate memory. The original sequence will be unchanged. The
* remaining bits in the last block will be masked out with 0's.
*
* @param sequence The sequence to reverse compliment.
* @param length The length of the sequence (>= 1).
* @return
*/
unsigned long long int* createReverseCompliment(unsigned long long int* sequence,
unsigned int length);
char getBase(unsigned long long int* nucleotideSequence,
unsigned int nucleotidePosition);
void setBase(unsigned long long int* nucleotideSequence,
unsigned int nucleotidePosition, char nucleotide);
void changeBase(struct Sequence* sequence, unsigned int nucleotidePosition,
char nucleotide, char quality);
void deleteBase(struct Sequence* sequence, unsigned int nucleotidePosition);
void insertBase(struct Sequence* sequence, unsigned int nucleotidePosition,
char nucleotide, char quality);
unsigned int getNumMemoryBlocks(unsigned int length);
void writeAsNucleotidesSpaced(FILE* file, unsigned long long int* sequence,
unsigned int startNucleotidePosition,
unsigned int endNucleotidePosition);
int getHomopolymerLength(unsigned long long int* nucleotideSequence,
int sequenceLength, unsigned int nucleotidePosition);
int getHomopolymerLeftmostNucleotide(unsigned long long int* nucleotideSequence,
unsigned int nucleotidePosition);
void setHomopolymerLength(struct Sequence* sequence,
unsigned int nucleotidePosition, unsigned int size, char quality);
#ifdef __cplusplus
}
#endif
#endif /* UTILITY_H */