// -*- mode: c++; indent-tabs-mode: nil; -*- // // Copyright 2009 Illumina, Inc. // // This software is covered by the "Illumina Genome Analyzer Software // License Agreement" and the "Illumina Source Code License Agreement", // and certain third party copyright/licenses, and any user of this // source file is bound by the terms therein (see accompanying files // Illumina_Genome_Analyzer_Software_License_Agreement.pdf and // Illumina_Source_Code_License_Agreement.pdf and third party // copyright/license notices). // // /// \file /// \author Chris Saunders /// #ifndef __BAM_SEQ_HH #define __BAM_SEQ_HH #include "blt_util/blt_types.hh" #include #include #include #include #include namespace BAM_BASE { enum index_t { REF = 0x0, A = 0x1, C = 0x2, G = 0x4, T = 0x8, ANY = 0xF }; } inline char get_bam_seq_char(const uint8_t a){ using namespace BAM_BASE; switch(a) { case REF: return '='; case A: return 'A'; case C: return 'C'; case G: return 'G'; case T: return 'T'; default: return 'N'; } } inline uint8_t get_bam_seq_code(const char c){ using namespace BAM_BASE; switch(c) { case '=': return REF; case 'A': return A; case 'C': return C; case 'G': return G; case 'T': return T; default: return ANY; } } // interface to bam_seq -- allows us to pass either compressed // sequences from bam files and regular strings using the same // object: // struct bam_seq_base { virtual ~bam_seq_base() {} virtual unsigned size() const = 0; virtual uint8_t get_code(pos_t i) const = 0; virtual char get_char(const pos_t i) const = 0; bool is_in_range(const pos_t i) const { return ((i>=0) and (i(size()))); } }; std::ostream& operator<<(std::ostream& os, const bam_seq_base& bs); // // struct bam_seq : public bam_seq_base { bam_seq(const uint8_t* s, const uint16_t size, const uint16_t offset=0) : _s(s), _size(size), _offset(offset) {} unsigned size() const { return _size; } uint8_t get_code(pos_t i) const { if(not is_in_range(i)) return BAM_BASE::ANY; i += static_cast(_offset); return _s[(i/2)] >> 4*(1-(i%2)) & 0xf; } char get_char(const pos_t i) const { return get_bam_seq_char(get_code(i)); } private: const uint8_t* _s; uint16_t _size; uint16_t _offset; }; // // struct string_bam_seq : public bam_seq_base { string_bam_seq(const std::string& s) : _s(s.c_str()), _size(s.size()) {} string_bam_seq(const char* s, const unsigned size) : _s(s), _size(size) {} unsigned size() const { return _size; } uint8_t get_code(pos_t i) const { return get_bam_seq_code(get_char(i)); } char get_char(const pos_t i) const { if(not is_in_range(i)) return 'N'; return _s[i]; } private: const char* _s; unsigned _size; }; #endif