# Copyright 2011 by Wibowo Arindrarto (w.arindrarto@gmail.com)
# Revisions copyright 2011-2016 by Peter Cock.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Bio.SeqIO parser for the ABI format.

ABI is the format used by Applied Biosystem's sequencing machines to store
sequencing results.

For more details on the format specification, visit:
http://www6.appliedbiosystems.com/support/software_community/ABIF_File_Format.pdf

"""


import datetime
import struct

from os.path import basename

from Bio import Alphabet
from Bio.Alphabet.IUPAC import ambiguous_dna, unambiguous_dna
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

from Bio._py3k import _bytes_to_string
from Bio._py3k import zip

# dictionary for determining which tags goes into SeqRecord annotation
# each key is tag_name + tag_number
# if a tag entry needs to be added, just add its key and its key
# for the annotations dictionary as the value
# dictionary for tags that require preprocessing before use in creating
# seqrecords
_EXTRACT = {
    'TUBE1': 'sample_well',
    'DySN1': 'dye',
    'GTyp1': 'polymer',
    'MODL1': 'machine_model',
}


# Complete data structure representing 98% of the API. The general section
# represents the part of the API that's common to ALL instruments, whereas the
# instrument specific sections are labelled as they are in the ABIF spec
#
# Keys don't seem to clash from machine to machine, so when we parse, we look
# for ANY key, and store that in the raw ABIF data structure attached to the
# annotations, with the assumption that anyone parsing the data can look up the
# spec themself
#
# Key definitions are retained in case end users want "nice" labels pre-made
# for them for all of the available fields.
_INSTRUMENT_SPECIFIC_TAGS = {}

_INSTRUMENT_SPECIFIC_TAGS['general'] = {
    'APFN2': 'Sequencing Analysis parameters file name',
    'APXV1': 'Analysis Protocol XML schema version',
    'APrN1': 'Analysis Protocol settings name',
    'APrV1': 'Analysis Protocol settings version',
    'APrX1': 'Analysis Protocol XML string',
    'CMNT1': 'Sample Comment',
    'CTID1': 'Container Identifier, a.k.a. plate barcode',
    'CTNM1': 'Container name, usually identical to CTID, but not necessarily so',
    'CTTL1': 'Comment Title',
    'CpEP1': 'Capillary type electrophoresis. 1 for a capillary based machine. 0 for a slab gel based machine.',
    'DATA1': 'Channel 1 raw data',
    'DATA2': 'Channel 2 raw data',
    'DATA3': 'Channel 3 raw data',
    'DATA4': 'Channel 4 raw data',
    'DATA5': 'Short Array holding measured volts/10 (EP voltage) during run',
    'DATA6': 'Short Array holding measured milliAmps trace (EP current) during run',
    'DATA7': 'Short Array holding measured milliWatts trace (Laser EP Power) during run',
    'DATA8': 'Short Array holding measured oven Temperature (polymer temperature) trace during run',
    'DATA9': 'Channel 9 processed data',
    'DATA10': 'Channel 10 processed data',
    'DATA11': 'Channel 11 processed data',
    'DATA12': 'Channel 12 processed data',
    # Prism 3100/3100-Avant may provide DATA105
    #          3130/3130-XL may provide DATA105
    # 3530/3530-XL may provide DATA105-199, 9-12, 205-299
    'DSam1': 'Downsampling factor',
    'DySN1': 'Dye set name',
    'Dye#1': 'Number of dyes',
    'DyeN1': 'Dye 1 name',
    'DyeN2': 'Dye 2 name',
    'DyeN3': 'Dye 3 name',
    'DyeN4': 'Dye 4 name',
    'DyeW1': 'Dye 1 wavelength',
    'DyeW2': 'Dye 2 wavelength',
    'DyeW3': 'Dye 3 wavelength',
    'DyeW4': 'Dye 4 wavelength',
    # 'DyeN5-N': 'Dye 5-N Name',
    # 'DyeW5-N': 'Dye 5-N Wavelength',
    'EPVt1': 'Electrophoresis voltage setting (volts)',
    'EVNT1': 'Start Run event',
    'EVNT2': 'Stop Run event',
    'EVNT3': 'Start Collection event',
    'EVNT4': 'Stop Collection event',
    'FWO_1': 'Base Order. Sequencing Analysis Filter wheel order. Fixed for 3500 at "GATC"',
    'GTyp1': 'Gel or polymer Type',
    'InSc1': 'Injection time (seconds)',
    'InVt1': 'Injection voltage (volts)',
    'LANE1': 'Lane/Capillary',
    'LIMS1': 'Sample tracking ID',
    'LNTD1': 'Length to detector',
    'LsrP1': 'Laser Power setting (micro Watts)',
    'MCHN1': 'Instrument name and serial number',
    'MODF1': 'Data collection module file',
    'MODL1': 'Model number',
    'NAVG1': 'Pixels averaged per lane',
    'NLNE1': 'Number of capillaries',
    'OfSc1': 'List of scans that are marked off scale in Collection. (optional)',
    # OvrI and OrvV are listed as "1-N", and "One for each dye (unanalyzed
    # and/or analyzed data)"
    'OvrI1': 'List of scan number indexes that have values greater than 32767 but did not '
             'saturate the camera. In Genemapper samples, this can have indexes with '
             'values greater than 32000. In sequencing samples, this cannot have '
             'indexes with values greater than 32000.',
    'OvrI2': 'List of scan number indexes that have values greater than 32767 but did not '
             'saturate the camera. In Genemapper samples, this can have indexes with '
             'values greater than 32000. In sequencing samples, this cannot have '
             'indexes with values greater than 32000.',
    'OvrI3': 'List of scan number indexes that have values greater than 32767 but did not '
             'saturate the camera. In Genemapper samples, this can have indexes with '
             'values greater than 32000. In sequencing samples, this cannot have '
             'indexes with values greater than 32000.',
    'OvrI4': 'List of scan number indexes that have values greater than 32767 but did not '
             'saturate the camera. In Genemapper samples, this can have indexes with '
             'values greater than 32000. In sequencing samples, this cannot have '
             'indexes with values greater than 32000.',
    'OvrV1': 'List of color data values found at the locations listed in the OvrI tag. '
             'There must be exactly as many numbers in this array as in the OvrI array.',
    'OvrV2': 'List of color data values found at the locations listed in the OvrI tag. '
             'There must be exactly as many numbers in this array as in the OvrI array.',
    'OvrV3': 'List of color data values found at the locations listed in the OvrI tag. '
             'There must be exactly as many numbers in this array as in the OvrI array.',
    'OvrV4': 'List of color data values found at the locations listed in the OvrI tag. '
             'There must be exactly as many numbers in this array as in the OvrI array.',
    'PDMF1': 'Sequencing Analysis Mobility file name chosen in collection',
    'RMXV1': 'Run Module XML schema version',
    'RMdN1': 'Run Module name (same as MODF)',
    'RMdX1': 'Run Module XML string',
    'RPrN1': 'Run Protocol name',
    'RPrV1': 'Run Protocol version',
    'RUND1': 'Run Started Date',
    'RUND2': 'Run Stopped Date',
    'RUND3': 'Data Collection Started Date',
    'RUND4': 'Data Collection Stopped date',
    'RUNT1': 'Run Started Time',
    'RUNT2': 'Run Stopped Time',
    'RUNT3': 'Data Collection Started Time',
    'RUNT4': 'Data Collection Stopped Time',
    'Rate1': 'Scanning Rate. Milliseconds per frame.',
    'RunN1': 'Run Name',
    'SCAN1': 'Number of scans',
    'SMED1': 'Polymer lot expiration date',
    'SMLt1': 'Polymer lot number',
    'SMPL1': 'Sample name',
    'SVER1': 'Data collection software version',
    'SVER3': 'Data collection firmware version',
    'Satd1': 'Array of longs representing the scan numbers of data points, which are flagged as saturated by data collection (optional)',
    'Scal1': 'Rescaling divisor for color data',
    'Scan1': 'Number of scans (legacy - use SCAN)',
    'TUBE1': 'Well ID',
    'Tmpr1': 'Run temperature setting',
    'User1': 'Name of user who created the plate (optional)',
}

#  No instrument specific tags
# _INSTRUMENT_SPECIFIC_TAGS['abi_prism_3100/3100-Avant'] = {
# }

_INSTRUMENT_SPECIFIC_TAGS['abi_3130/3130xl'] = {
    'CTOw1': 'Container owner',
    'HCFG1': 'Instrument Class',
    'HCFG2': 'Instrument Family',
    'HCFG3': 'Official Instrument Name',
    'HCFG4': 'Instrument Parameters',
    'RMdVa1': 'Run Module version',
}

_INSTRUMENT_SPECIFIC_TAGS['abi_3530/3530xl'] = {
    'AAct1': 'Primary Analysis Audit Active indication. True if system auditing was enabled during the last write of this file, '
             'false if system auditing was disabled.',
    'ABED1': 'Anode buffer expiration date using ISO 8601 format using the patterns YYYY-MM-DDTHH:MM:SS.ss+/-HH:MM. Hundredths of a second are optional.',
    'ABID1': 'Anode buffer tray first installed date',
    'ABLt1': 'Anode buffer lot number',
    'ABRn1': 'Number of runs (injections) processed with the current Anode Buffer (runs allowed - runs remaining)',
    'ABTp1': 'Anode buffer type',
    'AEPt1': 'Analysis Ending scan number for basecalling on initial analysis',
    'AEPt2': 'Analysis Ending scan number for basecalling on last analysis',
    'APCN1': 'Amplicon name',
    'ARTN1': 'Analysis Return code. Produced only by 5 Prime basecaller 1.0b3',
    'ASPF1': 'Flag to indicate whether adaptive processing worked or not',
    'ASPt1': 'Analysis Starting scan number for first analysis',
    'ASPt2': 'Analysis Starting scan number for last analysis',
    'AUDT2': 'Audit log used across 3500 software (optional)',
    'AVld1': 'Assay validation flag (true or false)',
    'AmbT1': 'Record of ambient temperature readings',
    'AsyC1': 'The assay contents (xml format)',
    'AsyN1': 'The assay name',
    'AsyV1': 'The assay version',
    'B1Pt1': 'Reference scan number for mobility and spacing curves for first analysis',
    'B1Pt2': 'Reference scan number for mobility and spacing curves for last analysis',
    'BCTS1': 'Basecaller timestamp. Time of completion of most recent analysis',
    'BcRn1': 'Basecalling qc code',
    'BcRs1': 'Basecalling warnings, a concatenated comma separated string',
    'BcRs2': 'Basecalling errors, a concatenated comma separated string',
    'CAED1': 'Capillary array expiration',
    'CALt1': 'Capillary array lot number',
    'CARn1': 'Number of injections processed (including the one of which this sample was a part) through the capillary array',
    'CASN1': 'Capillary array serial number',
    'CBED1': 'Cathode buffer expiration date',
    'CBID1': 'Cathode buffer tray first installed date',
    'CBLt1': 'Cathode buffer lot number',
    'CBRn1': 'Number of runs (injections) processed with the current Cathode Buffer (runs allowed - runs remaining)',
    'CBTp1': 'Cathode buffer type',
    'CLRG1': 'Start of the clear range (inclusive).',
    'CLRG2': 'Clear range length',
    'CRLn1': 'Contiguous read length',
    'CRLn2': 'One of "Pass", "Fail", or "Check"',
    'CTOw1': 'The name entered as the Owner of a plate, in the plate editor',
    'CkSm1': 'File checksum',
    'DCEv1': 'A list of door-close events, separated by semicolon. Door open events are generally paired with door close events.',
    'DCHT1': 'Reserved for backward compatibility. The detection cell heater temperature setting from the Run Module. Not used for 3500.',
    'DOEv1': 'A list of door-open events, separated by semicolon. Door close events are generally paired with door open events.',
    'ESig2': 'Electronic signature record used across 3500 software',
    'FTab1': 'Feature table. Can be created by Nibbler for Clear Range.',
    'FVoc1': 'Feature table vocabulary. Can be created by Nibbler for Clear Range.',
    'Feat1': 'Features. Can be created by Nibbler for Clear Range.',
    'HCFG1': 'The Instrument Class. All upper case, no spaces. Initial valid value: CE',
    'HCFG2': 'The Instrument Family. All upper case, no spaces. Valid values: 31XX or 37XX for UDC, 35XX (for 3500)',
    'HCFG3': 'The official instrument name. Mixed case, minus any special formatting. Initial valid values: 3130, 3130xl, 3730, 3730xl, 3500, 3500xl.',
    'HCFG4': 'Instrument parameters. Contains key-value pairs of instrument configuration information, separated by semicolons. '
             'Four parameters are included initially: UnitID=<UNITD number>, CPUBoard=<board type>, '
             'ArraySize=<# of capillaries>, SerialNumber=<Instrument Serial#>.',
    'InjN1': 'Injection name',
    'LAST1': 'Parameter settings information',
    'NOIS1': 'The estimate of rms baseline noise (S/N ratio) for each dye for a successfully analyzed sample. '
             'Corresponds in order to the raw data in tags DATA 1-4. KB basecaller only.',
    'P1AM1': 'Amplitude of primary peak, which is not necessarily equal to corresponding signal strength at that position',
    'P1RL1': 'Deviation of primary peak position from (PLoc,2), times 100, rounded to integer',
    'P1WD1': 'Full-width Half-max of primary peak, times 100, rounded to integer. '
             'Corresponding signal intensity is not necessarily equal to one half of primary peak amplitude',
    'P2AM1': 'Amplitude of secondary peak, which is not necessarily equal to corresponding signal strength at that position',
    'P2BA1': 'Base of secondary peak',
    'P2RL1': 'Deviation of secondary peak position from (PLoc,2), times 100, rounded to integer',
    'PBAS1': 'Array of sequence characters edited by user',
    'PBAS2': 'Array of sequence characters as called by Basecaller',
    'PCON1': 'Array of quality Values (0-255) as edited by user',
    'PCON2': 'Array of quality values (0-255) as called by Basecaller',
    'PDMF2': 'Mobility file name chosen in most recent analysis (identical to PDMF1)',
    'PLOC1': 'Array of peak locations edited by user',
    'PLOC2': 'Array of peak locations as called by Basecaller',
    'PRJT1': 'SeqScape 2.0 project template name',
    'PROJ4': 'SeqScape 2.0 project name',
    'PSZE1': 'Plate size. The number of sample positions in the container. Current allowed values: 96, 384.',
    'PTYP1': 'Plate type. Current allowed values: 96-Well, 384-Well.',
    'PuSc1': 'Median pupscore',
    'QV201': 'QV20+ value',
    'QV202': 'One of "Pass", "Fail", or "Check"',
    'QcPa1': 'QC parameters',
    'QcRn1': 'Trimming and QC code',
    'QcRs1': 'QC warnings, a concatenated comma separated string',
    'QcRs2': 'QC errors, a concatenated comma separated string',
    'RGOw1': 'The name entered as the Owner of a Results Group, in the Results Group Editor. Implemented as the user name from the results group.',
    'RInj1': 'Reinjection number. The reinjection number that this sample belongs to. Not present if there was no reinjection.',
    'RNmF1': 'Raman normalization factor',
    'RevC1': 'for whether the sequence has been complemented',
    'RunN1': 'Run name (which, for 3500, is different from injection name)',
    'S/N%1': 'Signal strength for each dye',
    'SMID1': 'Polymer first installed date',
    'SMRn1': 'Number of runs (injections) processed with the current polymer (runs allowed - runs remaining)',
    'SPAC1': 'Average peak spacing used in last analysis',
    'SPAC2': 'Basecaller name - corresponds to name of bcp file.',
    'SPAC3': 'Average peak spacing last calculated by the Basecaller.',
    'SPEC1': 'Sequencing Analysis Specimen Name',
    'SVER2': 'Basecaller version number',
    'SVER4': 'Sample File Format Version String',
    'ScPa1': 'The parameter string of size caller',
    'ScSt1': 'Raw data start point. Set to 0 for 3500 data collection.',
    'SpeN1': 'Active spectral calibration name',
    'TrPa1': 'Timming parameters',
    'TrSc1': 'Trace score.',
    'TrSc2': 'One of "Pass", "Fail", or "Check"',
    'phAR1': 'Trace peak aria ratio',
    'phCH1': 'Chemistry type ("term", "prim", "unknown"), based on DYE_1 information',
    'phDY1': 'Dye ("big", "d-rhod", "unknown"), based on mob file information',
    'phQL1': 'Maximum Quality Value',
    'phTR1': 'Set Trim region',
    'phTR2': 'Trim probability',
}

_INSTRUMENT_SPECIFIC_TAGS['abi_3730/3730xl'] = {
    'BufT1': 'Buffer tray heater temperature (degrees C)',
}

# dictionary for data unpacking format
_BYTEFMT = {
    1: 'b',     # byte
    2: 's',     # char
    3: 'H',     # word
    4: 'h',     # short
    5: 'i',     # long
    6: '2i',    # rational, legacy unsupported
    7: 'f',     # float
    8: 'd',     # double
    10: 'h2B',  # date
    11: '4B',   # time
    12: '2i2b',  # thumb
    13: 'B',    # bool
    14: '2h',   # point, legacy unsupported
    15: '4h',   # rect, legacy unsupported
    16: '2i',   # vPoint, legacy unsupported
    17: '4i',   # vRect, legacy unsupported
    18: 's',    # pString
    19: 's',    # cString
    20: '2i',   # tag, legacy unsupported
}
# header data structure (exluding 4 byte ABIF marker)
_HEADFMT = '>H4sI2H3I'
# directory data structure
_DIRFMT = '>4sI2H4I'

__global_tag_listing = []
for tag in _INSTRUMENT_SPECIFIC_TAGS.values():
    __global_tag_listing += tag.keys()


def AbiIterator(handle, alphabet=None, trim=False):
    """Return an iterator for the Abi file format."""
    # raise exception is alphabet is not dna
    if alphabet is not None:
        if isinstance(Alphabet._get_base_alphabet(alphabet),
                      Alphabet.ProteinAlphabet):
            raise ValueError(
                "Invalid alphabet, ABI files do not hold proteins.")
        if isinstance(Alphabet._get_base_alphabet(alphabet),
                      Alphabet.RNAAlphabet):
            raise ValueError("Invalid alphabet, ABI files do not hold RNA.")

    # raise exception if handle mode is not 'rb'
    if hasattr(handle, 'mode'):
        if set('rb') != set(handle.mode.lower()):
            raise ValueError("ABI files has to be opened in 'rb' mode.")

    # check if input file is a valid Abi file
    handle.seek(0)
    marker = handle.read(4)
    if not marker:
        # handle empty file gracefully
        return
    if marker != b"ABIF":
        raise IOError('File should start ABIF, not %r' % marker)

    # dirty hack for handling time information
    times = {'RUND1': '', 'RUND2': '', 'RUNT1': '', 'RUNT2': '', }

    # initialize annotations
    annot = dict(zip(_EXTRACT.values(), [None] * len(_EXTRACT)))

    # parse header and extract data from directories
    header = struct.unpack(_HEADFMT,
                           handle.read(struct.calcsize(_HEADFMT)))

    # Set default sample ID value, which we expect to be present in most cases
    # in the SMPL1 tag, but may be missing.
    sample_id = '<unknown id>'

    raw = dict()
    for tag_name, tag_number, tag_data in _abi_parse_header(header, handle):
        key = tag_name + str(tag_number)

        raw[key] = tag_data

        # PBAS2 is base-called sequence, only available in 3530
        if key == 'PBAS2':
            seq = tag_data
            ambigs = 'KYWMRS'
            if alphabet is None:
                if set(seq).intersection(ambigs):
                    alphabet = ambiguous_dna
                else:
                    alphabet = unambiguous_dna
        # PCON2 is quality values of base-called sequence
        elif key == 'PCON2':
            qual = [ord(val) for val in tag_data]
        # SMPL1 is sample id entered before sequencing run
        elif key == 'SMPL1':
            sample_id = tag_data
        elif key in times:
            times[key] = tag_data
        else:
            if key in _EXTRACT:
                annot[_EXTRACT[key]] = tag_data

    # set time annotations
    annot['run_start'] = '%s %s' % (times['RUND1'], times['RUNT1'])
    annot['run_finish'] = '%s %s' % (times['RUND2'], times['RUNT2'])

    # raw data (for advanced end users benefit)
    annot['abif_raw'] = raw

    # fsa check
    is_fsa_file = all([tn not in raw for tn in ('PBAS1', 'PBAS2')])

    if is_fsa_file:
        try:
            file_name = basename(handle.name).replace('.fsa', '')
        except AttributeError:
            file_name = ""
        sample_id = raw.get('LIMS1') or sample_id
        description = raw.get('CTID1', '<unknown description>')
        record = SeqRecord(Seq(''),
                           id=sample_id,
                           name=file_name,
                           description=description,
                           annotations=annot)

    else:
        # use the file name as SeqRecord.name if available
        try:
            file_name = basename(handle.name).replace('.ab1', '')
        except AttributeError:
            file_name = ""
        record = SeqRecord(Seq(seq, alphabet),
                           id=sample_id, name=file_name,
                           description='',
                           annotations=annot,
                           letter_annotations={'phred_quality': qual})

    if not trim or is_fsa_file:
        yield record
    else:
        yield _abi_trim(record)


def _AbiTrimIterator(handle):
    """Return an iterator for the Abi file format that yields trimmed SeqRecord objects (PRIVATE)."""
    return AbiIterator(handle, trim=True)


def _abi_parse_header(header, handle):
    """Return directory contents (PRIVATE)."""
    # header structure (after ABIF marker):
    # file version, tag name, tag number,
    # element type code, element size, number of elements
    # data size, data offset, handle (not file handle)
    head_elem_size = header[4]
    head_elem_num = header[5]
    head_offset = header[7]
    index = 0

    while index < head_elem_num:
        start = head_offset + index * head_elem_size
        # add directory offset to tuple
        # to handle directories with data size <= 4 bytes
        handle.seek(start)
        dir_entry = struct.unpack(_DIRFMT,
                                  handle.read(struct.calcsize(_DIRFMT))) + (start,)
        index += 1
        # only parse desired dirs
        key = _bytes_to_string(dir_entry[0])
        key += str(dir_entry[1])

        tag_name = _bytes_to_string(dir_entry[0])
        tag_number = dir_entry[1]
        elem_code = dir_entry[2]
        elem_num = dir_entry[4]
        data_size = dir_entry[5]
        data_offset = dir_entry[6]
        tag_offset = dir_entry[8]
        # if data size <= 4 bytes, data is stored inside tag
        # so offset needs to be changed
        if data_size <= 4:
            data_offset = tag_offset + 20
        handle.seek(data_offset)
        data = handle.read(data_size)
        yield tag_name, tag_number, \
            _parse_tag_data(elem_code, elem_num, data)


def _abi_trim(seq_record):
    """Trims the sequence using Richard Mott's modified trimming algorithm (PRIVATE).

    Arguments:
        - seq_record - SeqRecord object to be trimmed.

    Trimmed bases are determined from their segment score, which is a
    cumulative sum of each base's score. Base scores are calculated from
    their quality values.

    More about the trimming algorithm:
    http://www.phrap.org/phredphrap/phred.html
    http://resources.qiagenbioinformatics.com/manuals/clcgenomicsworkbench/650/Quality_trimming.html
    """
    start = False   # flag for starting position of trimmed sequence
    segment = 20    # minimum sequence length
    trim_start = 0  # init start index
    cutoff = 0.05   # default cutoff value for calculating base score

    if len(seq_record) <= segment:
        return seq_record
    else:
        # calculate base score
        score_list = [cutoff - (10 ** (qual / -10.0)) for qual in
                      seq_record.letter_annotations['phred_quality']]

        # calculate cummulative score
        # if cummulative value < 0, set it to 0
        # first value is set to 0, because of the assumption that
        # the first base will always be trimmed out
        cummul_score = [0]
        for i in range(1, len(score_list)):
            score = cummul_score[-1] + score_list[i]
            if score < 0:
                cummul_score.append(0)
            else:
                cummul_score.append(score)
                if not start:
                    # trim_start = value when cummulative score is first > 0
                    trim_start = i
                    start = True

        # trim_finish = index of highest cummulative score,
        # marking the end of sequence segment with highest cummulative score
        trim_finish = cummul_score.index(max(cummul_score))

        return seq_record[trim_start:trim_finish]


def _parse_tag_data(elem_code, elem_num, raw_data):
    """Return single data value (PRIVATE).

    Arguments:
     - elem_code - What kind of data
     - elem_num - How many data points
     - raw_data - abi file object from which the tags would be unpacked

    """
    if elem_code in _BYTEFMT:
        # because '>1s' unpack differently from '>s'
        if elem_num == 1:
            num = ''
        else:
            num = str(elem_num)
        fmt = '>' + num + _BYTEFMT[elem_code]

        assert len(raw_data) == struct.calcsize(fmt)
        data = struct.unpack(fmt, raw_data)

        # no need to use tuple if len(data) == 1
        # also if data is date / time
        if elem_code not in [10, 11] and len(data) == 1:
            data = data[0]

        # account for different data types
        if elem_code == 2:
            return _bytes_to_string(data)
        elif elem_code == 10:
            return str(datetime.date(*data))
        elif elem_code == 11:
            return str(datetime.time(*data[:3]))
        elif elem_code == 13:
            return bool(data)
        elif elem_code == 18:
            return _bytes_to_string(data[1:])
        elif elem_code == 19:
            return _bytes_to_string(data[:-1])
        else:
            return data
    else:
        return None


if __name__ == '__main__':
    pass