/*
Pollux
Copyright (C) 2014 Eric Marinier
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
---
Contributions: Oliver Grant
*/
#include
#include
#include
#include
#include "Encoding.h"
unsigned int encode_Nucleotide(char n1) {
if (n1 == 'A' || n1 == 'a') {
return 0;
} else if (n1 == 'G' || n1 == 'g') {
return 1;
} else if (n1 == 'C' || n1 == 'c') {
return 2;
} else if (n1 == 'T' || n1 == 't') {
return 3;
} else { // used for filling
return 0;
}
}
void fill_int(unsigned long long int *nucl_seq, char n1) {
*nucl_seq <<= 2;
*nucl_seq += encode_Nucleotide(n1);
}
void encode_sequence(struct read* rd, char* seq) {
if (strchr(seq, '\n')) {
seq[strlen(seq) - 1] = 0;
}
int seq_words = ceil((double)strlen(seq)/(double)32);
rd->sequence = malloc(seq_words * sizeof(unsigned long long int));
// initialize the array to be 0's
for (int i = 0; i < seq_words; i++) {
rd->sequence[i] = 0;
}
rd->length = strlen(seq);
// for each character in the sequence, encode it
for (int i = 0; i < rd->length; i++) {
fill_int(&rd->sequence[i / 32], seq[i]);
}
// pad the rest with 0's
for (int i = rd->length; i < (seq_words*32); i++) {
fill_int(&rd->sequence[i / 32], 'X');
}
}