#!/usr/bin/env python
#
# Copyright 2002 by Michael Hoffman.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Bio.GFF.easy: some functions to ease the use of Biopython (DEPRECATED)

This is part of the "old" Bio.GFF module by Michael Hoffman, which offered
access to a MySQL database holding GFF data loaded by BioPerl. This code has
now been deprecated, and will probably be removed in order to free the Bio.GFF
namespace for a new GFF parser in Biopython (including GFF3 support).

Some of the more useful ideas of Bio.GFF.easy may be reworked for Bio.GenBank,
using the standard SeqFeature objects used elsewhere in Biopython.
"""

import copy
import re
import sys

import Bio
from Bio import GenBank
from Bio.Data import IUPACData
from Bio.Seq import Seq

from Bio import SeqIO
from Bio import SeqUtils

from . import GenericTools

class FeatureDict(dict):
    """ JH:  accessing feature.qualifiers as a list is stupid.  Here's a dict that does it"""
    def __init__(self, feature_list, default=None):
        dict.__init__(self)
        self.default = default
        key_re = re.compile(r'/(\S+)=')

        for i in feature_list:
            key = key_re.match(i.key).group(1)
            val = i.value.replace('"','')
            self[key] = val
    def __getitem__(self, key):
        try:
            return dict.__getitem__(self, key)
        except KeyError:
            return self.default

class Location(GenericTools.VerboseList):
    """
    this is really best interfaced through LocationFromString
    fuzzy: < or >
    join: {0 = no join, 1 = join, 2 = order}
    
    >>> location = Location([Location([339]), Location([564])]) # zero-based
    >>> location
    Location(Location(339), Location(564))
    >>> print location # one-based
    340..565
    >>> print location.five_prime()
    340
    >>> location_rev = Location([Location([339]), Location([564])], 1)
    >>> print location_rev
    complement(340..565)
    >>> print location_rev.five_prime()
    565
    """
    def __init__(self, the_list, complement=0, seqname=None):
        self.complement = complement
        self.join = 0
        self.fuzzy = None
        self.seqname = seqname
        list.__init__(self, the_list)

    def _joinstr(self):
        if self.join == 1:
            label = 'join'
        elif self.join == 2:
            label = 'order'
        return "%s(%s)" % (label, ",".join(map(str, self)))
    
    def __str__(self):
        if self.seqname:
            format = "%s:%%s" % self.seqname
        else:
            format = "%s"

        if self.complement:
            format = format % "complement(%s)"

        if self.join:
            return format % self._joinstr()
            
        elif isinstance(self[0], list):
            return format % "%s..%s" % (str(self[0]), str(self[1]))
        else:
            if self.fuzzy:
                format = format % self.fuzzy + "%s"
            return format % str(self[0] + 1)

    def __repr__(self):
        return "Location(%s)" % ", ".join(map(repr, self))

    direction2index = {1: 0, -1: -1}
    def direction_and_index(self, direction):
        """
        1: 5'
        -1: 3'

        >>> loc1 = LocationFromString("join(1..3,complement(5..6))")
        >>> loc1.direction_and_index(1)
        (1, 0)
        >>> loc1.direction_and_index(-1)
        (-1, -1)
        >>> loc1.reverse()
        >>> print loc1
        complement(join(1..3,complement(5..6)))
        >>> loc1.direction_and_index(1)
        (-1, -1)
        """
        if self.complement:
            direction = direction * -1
        index = self.direction2index[direction]
        return direction, index
    
    def findside(self, direction):
        """
        >>> loc = LocationFromString("complement(join(1..5,complement(6..10)))")
        >>> loc.findside(1)
        Location(5)
        >>> loc.findside(-1)
        Location(0)
        """
        direction, index = self.direction_and_index(direction)
        if self.join or isinstance(self[0], list):
            return self[index].findside(direction)
        else:
            return self

    def findseqname_3prime(self):
        """
        >>> loc = LocationFromString("complement(join(MOOCOW:1..5,SEQ:complement(6..10)))")
        >>> loc.findseqname_3prime()
        'MOOCOW'
        """
        return self.findseqname(-1)

    def findseqname(self, direction=1): # find 5' seqname
        """
        >>> loc = LocationFromString("complement(join(MOOCOW:1..5,SEQ:complement(6..10)))")
        >>> loc.findseqname()
        'SEQ'
        >>> loc.findseqname(-1)
        'MOOCOW'
        """
        direction, index = self.direction_and_index(direction)
        if self.seqname:
            return self.seqname
        elif self.join:
            return self[index].findseqname(direction)
        else:
            raise AttributeError('no sequence name')

    def five_prime(self):
        return self.findside(1)
    def three_prime(self):
        return self.findside(-1)

    def length(self):
        """
        WARNING: doesn't deal with joins!!!!
        """
        return self.end()-self.start()

    def intersection(self, other):
        """
        WARNING: doesn't deal with joins!!!!

        >>> location1 = LocationFromString("1..50")
        >>> location2 = LocationFromString("25..200")
        >>> print location1.intersection(location2)
        25..50
        >>> print location1.intersection(location2)
        25..50
        """
        if self.start() >= other.start():
            start = self.start()
        else:
            start = other.start()
        if self.end() <= other.end():
            end = self.end()
        else:
            end = other.end()
        return Location([Location([start]), Location([end])])

    def start(self):
        # zero-based
        if self.complement:
            return self.three_prime()[0]
        else:
            return self.five_prime()[0]

    def end(self):
        # zero-based
        if self.complement:
            return self.five_prime()[0]
        else:
            return self.three_prime()[0]

    def three_prime_range(self, window):
        three_prime_loc = self.three_prime()
        if self.complement:
            return Location([three_prime_loc-window, three_prime_loc], complement=1)
        else:
            return Location([three_prime_loc, three_prime_loc+window])

    def sublocation(self, sub_location):
        """
        >>> fwd_location = LocationFromString('X:5830132..5831528')
        >>> print fwd_location.sublocation(LocationFromString('1..101'))
        X:5830132..5830232
        >>> print fwd_location.sublocation(LocationFromString('1267..1286'))
        X:5831398..5831417
        >>> rev_location = LocationFromString('I:complement(8415686..8416216)')
        >>> print rev_location.sublocation(LocationFromString('1..101'))
        I:complement(8416116..8416216)
        >>> print rev_location.sublocation(LocationFromString('100..200'))
        I:complement(8416017..8416117)
        """
        
        absolute_location = copy.deepcopy(self)
        for i in range(2):
            absolute_location[i] = self.five_prime().add(sub_location[i], self.complement)
        if absolute_location.complement:
            list.reverse(absolute_location)
        return absolute_location

    def __add__(self, addend):
        return self.add(addend)

    def add(self, addend, complement=0):
        self_copy = copy.deepcopy(self)
        if isinstance(addend, Location):
            addend = addend[0]
        if complement:
            addend *= -1
        self_copy[0] += addend
        return self_copy

    def __sub__(self, subtrahend):
        return self + -subtrahend

    def reverse(self):
        self.complement = [1, 0][self.complement]

    def reorient(self):
        """
        >>> loc1 = LocationFromString("join(I:complement(1..9000),I:complement(9001..10000))")
        >>> loc1.reorient()
        >>> print loc1
        complement(join(I:1..9000,I:9001..10000))
        >>> loc2 = LocationFromString("join(I:complement(1..9000),I:9001..10000)")
        >>> loc2.reorient()
        >>> print loc2
        join(I:complement(1..9000),I:9001..10000)
        """
        if self.join:
            if len([x for x in self if x.complement]) == len(self):
                self.reverse()
                for segment in self:
                    segment.reverse()

    def bounding(self):
        """
        works for single level non-complex joins

        >>> LOC = LocationFromString
        >>> l1 = LOC("join(alpha:1..30,alpha:50..70)")
        >>> print l1.bounding()
        join(alpha:1..70)
        >>> l2 = LOC("join(alpha:1..30,alpha:complement(50..70))")
        >>> print l2.bounding()
        join(alpha:1..30,alpha:complement(50..70))
        >>> l3 = LOC("join(alpha:1..30,alpha:complement(50..70),beta:6..20,alpha:25..45)")
        >>> print l3.bounding()
        join(alpha:1..45,alpha:complement(50..70),beta:6..20)

        """
        if not self.join:
            return

        seqdict = {}
        seqkeys = []
        for subloc in self:
            assert subloc.seqname
            assert not subloc.join
            try:
                seqdict[_hashname(subloc)].append(subloc)
            except KeyError:
                key = _hashname(subloc)
                seqdict[key] = [subloc]
                seqkeys.append(key)

        res = LocationJoin()
        for key in seqkeys:
            locations = seqdict[key]
            coords = []
            for subloc in locations:
                coords.append(subloc.start())
                coords.append(subloc.end())
            res.append(LocationFromCoords(min(coords), max(coords), locations[0].complement, locations[0].seqname))
        return res

def _hashname(location):
    return str(location.complement) + location.seqname

class LocationJoin(Location):
    """
    >>> join = LocationJoin([LocationFromCoords(339, 564, 1), LocationFromString("complement(100..339)")])
    >>> appendloc = LocationFromString("order(complement(66..99),complement(5..55))")
    >>> join.append(appendloc)
    >>> print join
    join(complement(340..565),complement(100..339),order(complement(66..99),complement(5..55)))
    >>> join2 = LocationJoin()
    >>> join2.append(LocationFromString("complement(66..99)"))
    >>> join2.append(LocationFromString("complement(5..55)"))
    >>> print join2
    join(complement(66..99),complement(5..55))
    """
    def __init__(self, the_list = [], complement=0, seqname=None):
        self.complement = complement
        self.join = 1
        self.fuzzy = None
        self.seqname = seqname
        list.__init__(self, the_list)

class LocationFromCoords(Location):
    """
    >>> print LocationFromCoords(339, 564)
    340..565
    >>> print LocationFromCoords(339, 564, seqname="I")
    I:340..565
    >>> print LocationFromCoords(999, 3234, "-", seqname="NC_343434")
    NC_343434:complement(1000..3235)
    """
    def __init__(self, start, end, strand=0, seqname=None):
        if strand == "+":
            strand = 0
        elif strand == "-":
            strand = 1
        Location.__init__(self, [Location([start]), Location([end])], strand, seqname)

# see http://www.ncbi.nlm.nih.gov/collab/FT/index.html#backus-naur
# for how this should actually be implemented
re_complement = re.compile(r"^complement\((.*)\)$")
re_seqname = re.compile(r"^(?!join|order|complement)([^\:]+?):(.*)$") # not every character is allowed by spec
re_join = re.compile(r"^(join|order)\((.*)\)$")
re_dotdot = re.compile(r"^([><]*\d+)\.\.([><]*\d+)$")
re_fuzzy = re.compile(r"^([><])(\d+)")
class LocationFromString(Location):
    """
    >>> # here are some tests from http://www.ncbi.nlm.nih.gov/collab/FT/index.html#location
    >>> print LocationFromString("467")
    467
    >>> print LocationFromString("340..565")
    340..565
    >>> print LocationFromString("<345..500")
    <345..500
    >>> print LocationFromString("<1..888")
    <1..888
    >>> # (102.110) and 123^124 syntax unimplemented
    >>> print LocationFromString("join(12..78,134..202)")
    join(12..78,134..202)
    >>> print LocationFromString("complement(join(2691..4571,4918..5163))")
    complement(join(2691..4571,4918..5163))
    >>> print LocationFromString("join(complement(4918..5163),complement(2691..4571))")
    join(complement(4918..5163),complement(2691..4571))
    >>> print LocationFromString("order(complement(4918..5163),complement(2691..4571))")
    order(complement(4918..5163),complement(2691..4571))
    >>> print LocationFromString("NC_001802x.fna:73..78")
    NC_001802x.fna:73..78
    >>> print LocationFromString("J00194:100..202")
    J00194:100..202

    >>> print LocationFromString("join(117505..118584,1..609)")
    join(117505..118584,1..609)
    >>> print LocationFromString("join(test3:complement(4..6),test3:complement(1..3))")
    join(test3:complement(4..6),test3:complement(1..3))
    >>> print LocationFromString("test3:join(test1:complement(1..3),4..6)")
    test3:join(test1:complement(1..3),4..6)
    """
    def __init__(self, location_str):
        match_seqname = re_seqname.match(location_str)
        if match_seqname:
            self.seqname = match_seqname.group(1)
            location_str = match_seqname.group(2)
        else:
            self.seqname = None
        match_complement = re_complement.match(location_str)
        if match_complement:
            self.complement = 1
            location_str = match_complement.group(1)
        else:
            self.complement = 0
        match_join = re_join.match(location_str)
        if match_join:
            self.join = {'join':1, 'order':2}[match_join.group(1)]
            list.__init__(self, [LocationFromString(x) for x in match_join.group(2).split(",")])
        else:
            self.join = 0
            match_dotdot = re_dotdot.match(location_str)
            if match_dotdot:
                list.__init__(self, [LocationFromString(match_dotdot.group(x)) for x in (1, 2)])
            else:
                match_fuzzy = re_fuzzy.match(location_str)
                if match_fuzzy:
                    self.fuzzy = match_fuzzy.group(1)
                    location_str = match_fuzzy.group(2)
                else:
                    self.fuzzy = None
                    
                list.__init__(self, [int(location_str)-1]) # zero based, nip it in the bud

def open_file(filename):
    if filename:
        return open(filename)
    else:
        return sys.stdin

def fasta_single(filename=None, string=None):
    """
    >>> record = fasta_single(string='''
    ... >gi|9629360|ref|NP_057850.1| Gag [Human immunodeficiency virus type 1]
    ... MGARASVLSGGELDRWEKIRLRPGGKKKYKLKHIVWASRELERFAVNPGLLETSEGCRQILGQLQPSLQT
    ... GSEELRSLYNTVATLYCVHQRIEIKDTKEALDKIEEEQNKSKKKAQQAAADTGHSNQVSQNYPIVQNIQG
    ... QMVHQAISPRTLNAWVKVVEEKAFSPEVIPMFSALSEGATPQDLNTMLNTVGGHQAAMQMLKETINEEAA
    ... EWDRVHPVHAGPIAPGQMREPRGSDIAGTTSTLQEQIGWMTNNPPIPVGEIYKRWIILGLNKIVRMYSPT
    ... SILDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTAC
    ... QGVGGPGHKARVLAEAMSQVTNSATIMMQRGNFRNQRKIVKCFNCGKEGHTARNCRAPRKKGCWKCGKEG
    ... HQMKDCTERQANFLGKIWPSYKGRPGNFLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLR
    ... SLFGNDPSSQ
    ... ''')
    >>> record.id
    'gi|9629360|ref|NP_057850.1|'
    >>> record.description
    'gi|9629360|ref|NP_057850.1| Gag [Human immunodeficiency virus type 1]'
    >>> record.seq[0:5]
    Seq('MGARA', SingleLetterAlphabet())
    """
    #Returns the first record in a fasta file as a SeqRecord,
    #or None if there are no records in the file.
    if string:
        import io
        handle = io.StringIO(string)
    else:
        handle = open_file(filename)
    try:
        record = next(SeqIO.parse(handle, format="fasta"))
    except StopIteration:
        record = None
    return record

def fasta_multi(filename=None):
    #Simple way is just:
    #return SeqIO.parse(open_file(filename), format="fasta")
    #However, for backwards compatibility make sure we raise
    #the StopIteration exception rather than returning None.
    reader = SeqIO.parse(open_file(filename), format="fasta")
    while True:
        record = next(reader)
        if record is None:
            raise StopIteration
        else:
            yield record

def fasta_readrecords(filename=None):
    """
    >>> records = fasta_readrecords('GFF/multi.fna')
    >>> records[0].id
    'test1'
    >>> records[2].seq
    Seq('AAACACAC', SingleLetterAlphabet())
    """
    return list(SeqIO.parse(open_file(filename), format="fasta"))

def fasta_write(filename, records):
    handle = open(filename, "w")
    SeqIO.write(records, handle, format="fasta")
    handle.close()

def genbank_single(filename):
    """
    >>> record = genbank_single("GFF/NC_001422.gbk")
    >>> record.taxonomy
    ['Viruses', 'ssDNA viruses', 'Microviridae', 'Microvirus']
    >>> cds = record.features[-4]
    >>> cds.key
    'CDS'
    >>> location = LocationFromString(cds.location)
    >>> print location
    2931..3917
    >>> subseq = record_subseq(record, location)
    >>> subseq[0:20]
    Seq('ATGTTTGGTGCTATTGCTGG', Alphabet())
    """
    return GenBank.RecordParser().parse(open(filename))

def record_subseq(record, location, *args, **keywds):
    """
    >>> from Bio.SeqRecord import SeqRecord    
    >>> record = SeqRecord(Seq("gagttttatcgcttccatga"),
    ...                    "ref|NC_001422",
    ...                    "Coliphage phiX174, complete genome",
    ...                    "bases 1-11")
    >>> record_subseq(record, LocationFromString("1..4")) # one-based
    Seq('GAGT', Alphabet())
    >>> record_subseq(record, LocationFromString("complement(1..4)")) # one-based
    Seq('ACTC', Alphabet())
    >>> record_subseq(record, LocationFromString("join(complement(1..4),1..4)")) # what an idea!
    Seq('ACTCGAGT', Alphabet())
    >>> loc = LocationFromString("complement(join(complement(5..7),1..4))")
    >>> print loc
    complement(join(complement(5..7),1..4))
    >>> record_subseq(record, loc)
    Seq('ACTCTTT', Alphabet())
    >>> print loc
    complement(join(complement(5..7),1..4))
    >>> loc.reverse()
    >>> record_subseq(record, loc)
    Seq('AAAGAGT', Alphabet())
    >>> record_subseq(record, loc, upper=1)
    Seq('AAAGAGT', Alphabet())
    """
    if location.join:
        subsequence_list = []
        if location.complement:
            location_copy = copy.copy(location)
            list.reverse(location_copy)
        else:
            location_copy = location
        for sublocation in location_copy:
            if location.complement:
                sublocation_copy = copy.copy(sublocation)
                sublocation_copy.reverse()
            else:
                sublocation_copy = sublocation
            subsequence_list.append(record_subseq(record, sublocation_copy, *args, **keywds).tostring())
        return Seq(''.join(subsequence_list), record_sequence(record).alphabet)
    else:
        return record_coords(record, location.start(), location.end()+1, location.complement, *args, **keywds)

def record_sequence(record):
    """
    returns the sequence of a record

    can be Bio.SeqRecord.SeqRecord or Bio.GenBank.Record.Record
    """
    if isinstance(record, Bio.SeqRecord.SeqRecord):
        return record.seq
    elif isinstance(record, Bio.GenBank.Record.Record):
        return Seq(record.sequence)
    else:
        raise TypeError('not Bio.SeqRecord.SeqRecord or Bio.GenBank.Record.Record')

def record_coords(record, start, end, strand=0, upper=0):
    """
    >>> from Bio.SeqRecord import SeqRecord
    >>> record = SeqRecord(Seq("gagttttatcgcttccatga"),
    ...                    "ref|NC_001422",
    ...                    "Coliphage phiX174, complete genome",
    ...                    "bases 1-11")
    >>> record_coords(record, 0, 4) # zero-based
    Seq('GAGT', Alphabet())
    >>> record_coords(record, 0, 4, "-") # zero-based
    Seq('ACTC', Alphabet())
    >>> record_coords(record, 0, 4, "-", upper=1) # zero-based
    Seq('ACTC', Alphabet())
    """

    subseq = record_sequence(record)[start:end]
    subseq_str = subseq.tostring()
    subseq_str = subseq_str.upper()
    subseq = Seq(subseq_str, subseq.alphabet)
    if strand == '-' or strand == 1:
        return subseq.reverse_complement()
    else:
        return subseq

def _test():
    """Run the Bio.GFF.easy module's doctests (PRIVATE).

    This will try and locate the unit tests directory, and run the doctests
    from there in order that the relative paths used in the examples work.
    """
    import doctest
    import os
    if os.path.isdir(os.path.join("..","..","Tests")):
        print("Runing doctests...")
        cur_dir = os.path.abspath(os.curdir)
        os.chdir(os.path.join("..","..","Tests"))
        doctest.testmod()
        os.chdir(cur_dir)
        del cur_dir
        print("Done")
    elif os.path.isdir(os.path.join("Tests")) :
        print("Runing doctests...")
        cur_dir = os.path.abspath(os.curdir)
        os.chdir(os.path.join("Tests"))
        doctest.testmod()
        os.chdir(cur_dir)
        del cur_dir
        print("Done")

if __name__ == "__main__":
    if __debug__:
        _test()