"""Substitution matrices."""

import os
import string
import numpy

from Bio import File
from Bio import BiopythonExperimentalWarning
from Bio._py3k import raise_from

# These two can be removed once we drop python2:
import sys
import platform

import warnings
warnings.warn("Bio.Align.substitution_matrices is an experimental module "
              "which may still undergo significant changes. In particular, "
              "the location of this module may change, and the Array class "
              "defined in this module may be moved to other existing or new "
              "modules in Biopython.",
              BiopythonExperimentalWarning)


class Array(numpy.ndarray):
    """numpy array subclass indexed by integers and by letters."""

    def __new__(cls, alphabet=None, dims=None, data=None, dtype=float):
        """Create a new Array instance."""
        if isinstance(data, dict):
            if alphabet is not None:
                raise ValueError("alphabet should be None if data is a dict")
            if dims is not None:
                raise ValueError("dims should be None if data is a dict")
            alphabet = []
            for key in data:
                if isinstance(key, str):
                    if dims is None:
                        dims = 1
                    elif dims != 1:
                        raise ValueError("inconsistent dimensions in data")
                    alphabet.append(key)
                elif isinstance(key, tuple):
                    single_letters = True
                    if dims is None:
                        dims = len(key)
                    elif dims != len(key):
                        raise ValueError("inconsistent dimensions in data")
                    if dims == 1:
                        if not isinstance(key, str):
                            raise ValueError("expected string")
                        if len(key) > 1:
                            single_letters = False
                        alphabet.append(key)
                    elif dims == 2:
                        for letter in key:
                            if not isinstance(letter, str):
                                raise ValueError("expected string")
                            if len(letter) > 1:
                                single_letters = False
                            alphabet.append(letter)
                    else:
                        raise ValueError("data array should be 1- or 2- "
                                         "dimensional (found %d dimensions) "
                                         "in key" % dims)
            alphabet = sorted(set(alphabet))
            if single_letters:
                alphabet = "".join(alphabet)
            else:
                alphabet = tuple(alphabet)
            n = len(alphabet)
            if dims == 1:
                shape = (n, )
            elif dims == 2:
                shape = (n, n)
            else:  # dims is None
                raise ValueError("data is an empty dictionary")
            obj = super(Array, cls).__new__(cls, shape, dtype)
            if dims == 1:
                for i, key in enumerate(alphabet):
                    obj[i] = data.get(letter, 0.0)
            elif dims == 2:
                for i1, letter1 in enumerate(alphabet):
                    for i2, letter2 in enumerate(alphabet):
                        key = (letter1, letter2)
                        value = data.get(key, 0.0)
                        obj[i1, i2] = value
            obj._alphabet = alphabet
            return obj
        if alphabet is None:
            alphabet = string.ascii_uppercase
        elif not (isinstance(alphabet, str) or isinstance(alphabet, tuple)):
            raise ValueError("alphabet should be a string or a tuple")
        n = len(alphabet)
        if data is None:
            if dims is None:
                dims = 1
            elif dims not in (1, 2):
                raise ValueError("dims should be 1 or 2 (found %d)" % dims)
            shape = (n, ) * dims
        else:
            if dims is None:
                shape = data.shape
                dims = len(shape)
                if dims == 1:
                    pass
                elif dims == 2:
                    if shape[0] != shape[1]:
                        raise ValueError("data array is not square")
                else:
                    raise ValueError("data array should be 1- or 2- "
                                     "dimensional (found %d dimensions) "
                                     % dims)
            else:
                shape = (n, ) * dims
                if data.shape != shape:
                    raise ValueError("data shape has inconsistent shape "
                                     "(expected (%s), found (%s))"
                                     % (shape, data.shape))
        obj = super(Array, cls).__new__(cls, shape, dtype)
        if data is None:
            obj[:] = 0.0
        else:
            obj[:] = data
        obj._alphabet = alphabet
        return obj

    def __array_finalize__(self, obj):
        if obj is None:
            return
        self._alphabet = getattr(obj, "_alphabet", None)

    def _convert_key(self, key):
        if isinstance(key, tuple):
            indices = []
            for index in key:
                if isinstance(index, str):
                    try:
                        index = self._alphabet.index(index)
                    except ValueError:
                        raise_from(IndexError("'%s'" % index), None)
                indices.append(index)
            key = tuple(indices)
        elif isinstance(key, str):
            try:
                key = self._alphabet.index(key)
            except ValueError:
                raise_from(IndexError("'%s'" % key), None)
        return key

    def __getitem__(self, key):
        key = self._convert_key(key)
        value = numpy.ndarray.__getitem__(self, key)
        if value.ndim == 2:
            if self.ndim == 2:
                if value.shape != self.shape:
                    raise IndexError("Requesting truncated array")
            elif self.ndim == 1:
                length = self.shape[0]
                if value.shape[0] == length and value.shape[1] == 1:
                    pass
                elif value.shape[0] == 1 and value.shape[1] == length:
                    pass
                else:
                    raise IndexError("Requesting truncated array")
        elif value.ndim == 1:
            if value.shape[0] != self.shape[0]:
                value._alphabet = self.alphabet[key]
        return value.view(Array)

    def __setitem__(self, key, value):
        key = self._convert_key(key)
        numpy.ndarray.__setitem__(self, key, value)

    def __contains__(self, key):
        # Follow dict definition of __contains__
        return key in self.keys()

    def __array_prepare__(self, out_arr, context=None):
        # needed for numpy older than 1.13.0
        ufunc, inputs, i = context
        alphabet = self.alphabet
        for arg in inputs:
            if isinstance(arg, Array):
                if arg.alphabet != alphabet:
                    raise ValueError("alphabets are inconsistent")
        return numpy.ndarray.__array_prepare__(self, out_arr, context)

    def __array_wrap__(self, out_arr, context=None):
        if len(out_arr) == 1:
            return out_arr[0]
        return numpy.ndarray.__array_wrap__(self, out_arr, context)

    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
        args = []
        alphabet = self._alphabet
        for arg in inputs:
            if isinstance(arg, Array):
                if arg.alphabet != alphabet:
                    raise ValueError("alphabets are inconsistent")
                args.append(arg.view(numpy.ndarray))
            else:
                args.append(arg)

        outputs = kwargs.pop("out", None)
        if outputs:
            out_args = []
            for arg in outputs:
                if isinstance(arg, Array):
                    if arg.alphabet != alphabet:
                        raise ValueError("alphabets are inconsistent")
                    out_args.append(arg.view(numpy.ndarray))
                else:
                    out_args.append(arg)
            kwargs["out"] = tuple(out_args)
        else:
            outputs = (None,) * ufunc.nout

        raw_results = super(Array, self).__array_ufunc__(ufunc, method,
                                                         *args, **kwargs)
        if raw_results is NotImplemented:
            return NotImplemented

        if method == "at":
            return

        if ufunc.nout == 1:
            raw_results = (raw_results,)

        results = []
        for raw_result, output in zip(raw_results, outputs):
            if raw_result.ndim == 0:
                result = raw_result
            elif output is None:
                result = numpy.asarray(raw_result).view(Array)
                result._alphabet = self._alphabet
            else:
                result = output
                result._alphabet = self._alphabet
            results.append(result)

        return results[0] if len(results) == 1 else results

    def transpose(self, axes=None):
        """Transpose the array."""
        other = numpy.ndarray.transpose(self, axes)
        other._alphabet = self._alphabet
        return other

    @property
    def alphabet(self):
        """Return the alphabet property."""
        return self._alphabet

    def copy(self):
        """Create and return a copy of the array."""
        other = Array(alphabet=self._alphabet, data=self)
        return other

    def get(self, key, value=None):
        """Return the value of the key if found; return value otherwise."""
        try:
            return self[key]
        except IndexError:
            return value

    def items(self):
        """Return an iterator  of (key, value) pairs in the array."""
        dims = len(self.shape)
        if dims == 1:
            for index, key in enumerate(self._alphabet):
                value = numpy.ndarray.__getitem__(self, index)
                yield key, value
        elif dims == 2:
            for i1, c1 in enumerate(self._alphabet):
                for i2, c2 in enumerate(self._alphabet):
                    key = (c1, c2)
                    value = numpy.ndarray.__getitem__(self, (i1, i2))
                    yield key, value
        else:
            raise RuntimeError("array has unexpected shape %s" % self.shape)

    def keys(self):
        """Return a tuple with the keys associated with the array."""
        dims = len(self.shape)
        alphabet = self._alphabet
        if dims == 1:
            return tuple(alphabet)
        elif dims == 2:
            return tuple((c1, c2) for c2 in alphabet for c1 in alphabet)
        else:
            raise RuntimeError("array has unexpected shape %s" % self.shape)

    def values(self):
        """Return a tuple with the values stored in the array."""
        dims = len(self.shape)
        alphabet = self._alphabet
        if dims == 1:
            return tuple(self)
        elif dims == 2:
            n1, n2 = self.shape
            return tuple(numpy.ndarray.__getitem__(self, (i1, i2)) for i2 in range(n2) for i1 in range(n1))
        else:
            raise RuntimeError("array has unexpected shape %s" % self.shape)

    def update(self, E=None, **F):
        """Update the array from dict/iterable E and F."""
        if E is not None:
            try:
                alphabet = E.keys()
            except AttributeError:
                for key, value in E:
                    self[key] = value
            else:
                for key in E:
                    self[key] = E[key]
        for key in F:
            self[key] = F[key]

    def _format_1D(self, fmt):
        _alphabet = self._alphabet
        n = len(_alphabet)
        words = [None for i in range(n)]
        lines = []
        try:
            header = self.header
        except AttributeError:
            pass
        else:
            for line in header:
                line = "#  %s\n" % line
                lines.append(line)
        maxwidth = 0
        for i, key in enumerate(_alphabet):
            value = self[key]
            word = fmt % value
            width = len(word)
            if width > maxwidth:
                maxwidth = width
            words[i] = word
        fmt2 = " %" + str(maxwidth) + "s"
        for letter, word in zip(_alphabet, words):
            word = fmt2 % word
            line = letter + word + "\n"
            lines.append(line)
        text = "".join(lines)
        return text

    def _format_2D(self, fmt):
        alphabet = self.alphabet
        n = len(alphabet)
        words = [[None for j in range(n)] for i in range(n)]
        lines = []
        try:
            header = self.header
        except AttributeError:
            pass
        else:
            for line in header:
                line = "#  %s\n" % line
                lines.append(line)
        width = max(len(c) for c in alphabet)
        line = " " * width
        for j, c2 in enumerate(alphabet):
            maxwidth = 0
            for i, c1 in enumerate(alphabet):
                key = (c1, c2)
                value = self[key]
                word = fmt % value
                width = len(word)
                if width > maxwidth:
                    maxwidth = width
                words[i][j] = word
            fmt2 = " %" + str(maxwidth) + "s"
            word = fmt2 % c2
            line += word
            for i, c1 in enumerate(alphabet):
                word = words[i][j]
                words[i][j] = fmt2 % word
        line = line.rstrip() + "\n"
        lines.append(line)
        for letter, row in zip(alphabet, words):
            line = letter + "".join(row) + "\n"
            lines.append(line)
        text = "".join(lines)
        return text

    def __format__(self, fmt):
        if fmt == "":
            if numpy.issubdtype(self.dtype, numpy.integer):
                fmt = "%i"
            else:
                fmt = "%.1f"
        n = len(self.shape)
        if n == 1:
            return self._format_1D(fmt)
        elif n == 2:
            return self._format_2D(fmt)
        else:
            raise RuntimeError("Array has unexpected rank %d" % n)

    def __str__(self):
        return self.__format__("")

    def __repr__(self):
        text = numpy.ndarray.__repr__(self)
        alphabet = self._alphabet
        if isinstance(alphabet, str):
            assert text.endswith(")")
            text = text[:-1] + ",\n         alphabet='%s')" % self._alphabet
        return text


if sys.version_info[0] < 3 and platform.python_implementation() == "PyPy":
    # For python2 on PyPy, subclassing from a numpy array, which supports the
    # buffer protocol, loses the Py_TPFLAGS_HAVE_NEWBUFFER flag on tp_flags on
    # the class type, although the subclass still supports the buffer protocol.
    # Adding this flag by hand here, as a temporary hack until we drop python2.
    from .. import _aligners
    _aligners.add_buffer_protocol_flag(Array)


def read(handle, dtype=float):
    """Parse the file and return an Array object."""
    header = []
    with File.as_handle(handle) as fp:
        for line in fp:
            if not line.startswith("#"):
                break
            header.append(line[1:].strip())
        row = line.split()
        rows = [row]
        for line in fp:
            row = line.split()
            rows.append(row)
    if len(rows[0]) == len(rows[1]) == 2:
        alphabet = [key for key, value in rows]
        for key in alphabet:
            if len(key) > 1:
                alphabet = tuple(alphabet)
                break
        else:
            alphabet = "".join(alphabet)
        matrix = Array(alphabet=alphabet, dims=1, dtype=dtype)
        matrix.update(rows)
    else:
        alphabet = rows.pop(0)
        for key in alphabet:
            if len(key) > 1:
                alphabet = tuple(alphabet)
                break
        else:
            alphabet = "".join(alphabet)
        matrix = Array(alphabet=alphabet, dims=2, dtype=dtype)
        for letter1, row in zip(alphabet, rows):
            assert letter1 == row.pop(0)
            for letter2, word in zip(alphabet, row):
                matrix[letter1, letter2] = float(word)
    matrix.header = header
    return matrix


def load(name=None):
    """Load and return a precalculated substitution matrix.

    >>> from Bio.Align import substitution_matrices
    >>> names = substitution_matrices.load()
    """
    path = os.path.realpath(__file__)
    directory = os.path.dirname(path)
    subdirectory = os.path.join(directory, "data")
    if name is None:
        filenames = os.listdir(subdirectory)
        return sorted(filenames)
    path = os.path.join(subdirectory, name)
    matrix = read(path)
    return matrix