#!/usr/bin/env python
#
#      Restriction Analysis Libraries.
#      Copyright (C) 2004. Frederic Sohm.
#
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#

import re
import itertools
from Bio.Restriction import RanaConfig as RanaConf

"""
Usage:

    PrintFormat allow to print the results from restriction analysis in 3
    different format.
    List, column or map.

    the easiest way to use it is:
    
    >>> from Bio.Restriction.PrintFormat import PrintFormat
    >>> from Bio.Restriction.Restriction import AllEnzymes
    >>> from Bio import Entrez
    >>> from Bio import SeqIO
    >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322")
    >>> pBR322 = SeqIO.read(handle, "fasta")
    >>> handle.close()
    >>> dct = AllEnzymes.search(pBR322.seq)
    >>> new = PrintFormat()
    >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n')

     my pBR322 analysis
     
    AasI       :  2169, 2582.
    AatII      :  4289.
    ...
            More enzymes.
    ...
    ZraI       :  4287.
    ZrmI       :  3847.
    
     no site:
     
    AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI    
    ...
            More enzymes.
    ...
    Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI     
    Zsp2I 

    >>> new.sequence = pBR322.seq
    >>> new.print_as("map")
    >>> new.print_that(dct)
    ...
    
    Some of the methods of PrintFormat are meant to be overriden by derived
    class.
"""

class PrintFormat(object):
    """PrintFormat allow the printing of results of restriction analysis."""

    ConsoleWidth = RanaConf.ConsoleWidth
    NameWidth    = RanaConf.NameWidth
    MaxSize      = RanaConf.MaxSize
    Cmodulo      = ConsoleWidth%NameWidth       
    PrefWidth    = ConsoleWidth - Cmodulo
    Indent       = RanaConf.Indent
    linesize     = PrefWidth - NameWidth

    def __init__(self):
        """PrintFormat() -> new PrintFormat Instance"""
        pass

    def print_as(self, what='list'):
        """PF.print_as([what='list']) -> print the results as specified.

        Valid format are:
            'list'      -> alphabetical order
            'number'    -> number of sites in the sequence
            'map'       -> a map representation of the sequence with the sites.

        If you want more flexibility over-ride the virtual method make_format.
        """
        if what == 'map':
            self.make_format = self._make_map
        elif what == 'number':
            self.make_format = self._make_number
        else:
            self.make_format = self._make_list
            
        return
            

    def print_that(self, dct, title='',  s1=''):
        """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted.

        dct is a dictionary as returned by a RestrictionBatch.search()
        
        title is the title of the map.
        It must be a formated string, i.e. you must include the line break.
        
        s1 is the title separating the list of enzymes that have sites from
        those without sites.
        s1 must be a formatted string as well.

        The format of print_that is a list."""
        if not dct:
            dct = self.results
        ls, nc = [], []
        for k, v in dct.items():
            if v:
                ls.append((k,v))
            else:
                nc.append(k)
        print(self.make_format(ls, title, nc, s1))
        return
       
    def make_format(self, cut=[], title='', nc=[], s1=''):
        """PF.make_format(cut, nc, title, s) -> string

        Virtual method.
        Here to be pointed to one of the _make_* methods.
        You can as well create a new method and point make_format to it."""
        return self._make_list(cut,title, nc,s1)

###### _make_* methods to be used with the virtual method make_format

    def _make_list(self, ls,title, nc,s1):
        """PF._make_number(ls,title, nc,s1) -> string.

        return a string of form:
        
        title.

        enzyme1     :   position1, position2.
        enzyme2     :   position1, position2, position3.

        ls is a list of cutting enzymes.
        title is the title.
        nc is a list of non cutting enzymes.
        s1 is the sentence before the non cutting enzymes."""
        return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)

    def _make_map(self, ls,title, nc,s1):
        """PF._make_number(ls,title, nc,s1) -> string.

        return a string of form:
        
        title.

            enzyme1, position
            |
        AAAAAAAAAAAAAAAAAAAAA...
        |||||||||||||||||||||
        TTTTTTTTTTTTTTTTTTTTT...

        ls is a list of cutting enzymes.
        title is the title.
        nc is a list of non cutting enzymes.
        s1 is the sentence before the non cutting enzymes."""
        return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)

    def _make_number(self, ls,title, nc,s1):
        """PF._make_number(ls,title, nc,s1) -> string.

        title.
        
        enzyme which cut 1 time:
        
        enzyme1     :   position1.

        enzyme which cut 2 times:
        
        enzyme2     :   position1, position2.
        ...

        ls is a list of cutting enzymes.
        title is the title.
        nc is a list of non cutting enzymes.
        s1 is the sentence before the non cutting enzymes."""
        return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)
    
    def _make_nocut(self, ls,title, nc,s1):
        """PF._make_nocut(ls,title, nc,s1) -> string.

        return a formatted string of the non cutting enzymes.

        ls is a list of cutting enzymes -> will not be used.
        Here for compatibility with make_format.
        
        title is the title.
        nc is a list of non cutting enzymes.
        s1 is the sentence before the non cutting enzymes."""
        return title + self._make_nocut_only(nc, s1) 

    def _make_nocut_only(self, nc, s1, ls =[],title=''):
        """PF._make_nocut_only(nc, s1) -> string.

        return a formatted string of the non cutting enzymes.
        
        nc is a list of non cutting enzymes.
        s1 is the sentence before the non cutting enzymes.
        """
        if not nc:
            return s1
        nc.sort()
        st = ''
        stringsite = s1 or '\n   Enzymes which do not cut the sequence.\n\n'    
        Join = ''.join
        for key in nc:
            st = Join((st, str.ljust(str(key), self.NameWidth)))
            if len(st) > self.linesize:
                stringsite = Join((stringsite, st, '\n'))
                st = ''
        stringsite = Join((stringsite, st, '\n'))
        return stringsite
    
    def _make_list_only(self, ls, title, nc = [], s1 = ''):
        """PF._make_list_only(ls, title) -> string.

        return a string of form:
        
        title.

        enzyme1     :   position1, position2.
        enzyme2     :   position1, position2, position3.
        ...
        
        ls is a list of results.
        title is a string.
        Non cutting enzymes are not included."""
        if not ls:
            return title
        return self.__next_section(ls, title)

    def _make_number_only(self, ls, title, nc = [], s1 =''):
        """PF._make_number_only(ls, title) -> string.

        return a string of form:
        
        title.
        
        enzyme which cut 1 time:
        
        enzyme1     :   position1.

        enzyme which cut 2 times:
        
        enzyme2     :   position1, position2.
        ...
        
                
        ls is a list of results.
        title is a string.
        Non cutting enzymes are not included."""
        if not ls:
            return title
        ls.sort(lambda x,y : cmp(len(x[1]), len(y[1])))
        iterator = iter(ls)
        cur_len  = 1
        new_sect = []
        for name, sites in iterator:
            l = len(sites)
            if l > cur_len:
                title += "\n\nenzymes which cut %i times :\n\n"%cur_len
                title = self.__next_section(new_sect, title)
                new_sect, cur_len = [(name, sites)], l
                continue
            new_sect.append((name,sites))
        title += "\n\nenzymes which cut %i times :\n\n"%cur_len
        return self.__next_section(new_sect, title)
            
    def _make_map_only(self, ls, title, nc = [],  s1 = ''):
        """PF._make_map_only(ls, title) -> string.

        return a string of form:
        
        title.

            enzyme1, position
            |
        AAAAAAAAAAAAAAAAAAAAA...
        |||||||||||||||||||||
        TTTTTTTTTTTTTTTTTTTTT...
        
                
        ls is a list of results.
        title is a string.
        Non cutting enzymes are not included.
        """
        if not ls:
            return title
        resultKeys = [str(x) for x,y in ls]
        resultKeys.sort()
        map = title or ''
        enzymemap = {}
        for (enzyme, cut) in ls:
            for c in cut:
                if c in enzymemap:
                    enzymemap[c].append(str(enzyme))
                else:
                    enzymemap[c] = [str(enzyme)]
        mapping = list(enzymemap.keys())
        mapping.sort()
        cutloc = {}
        x, counter, length = 0, 0, len(self.sequence)
        for x in range(60, length, 60):
            counter = x - 60
            l=[]
            for key in mapping:
                if key <= x:
                    l.append(key)
                else:
                    cutloc[counter] = l
                    mapping = mapping[mapping.index(key):]
                    break
            cutloc[x] = l
        cutloc[x] = mapping
        sequence = self.sequence.tostring()
        revsequence = self.sequence.complement().tostring()
        a = '|'
        base, counter = 0, 0
        emptyline = ' ' * 60
        Join = ''.join
        for base in range(60, length, 60):
            counter = base - 60
            line = emptyline
            for key in cutloc[counter]:
                s = ''
                if key == base:
                    for n in enzymemap[key] : s = ' '.join((s,n))
                    l = line[0:59]
                    lineo = Join((l, str(key), s, '\n'))
                    line2 = Join((l, a, '\n'))
                    linetot = Join((lineo, line2))
                    map = Join((map, linetot))
                    break
                for n in enzymemap[key] : s = ' '.join((s,n))
                k = key%60
                lineo = Join((line[0:(k-1)], str(key), s, '\n'))
                line = Join((line[0:(k-1)], a, line[k:]))
                line2 = Join((line[0:(k-1)], a, line[k:], '\n'))
                linetot = Join((lineo,line2))
                map = Join((map,linetot))
            mapunit = '\n'.join((sequence[counter : base],a * 60,
                                 revsequence[counter : base],
                                 Join((str.ljust(str(counter+1), 15), ' '* 30,
                                        str.rjust(str(base), 15),'\n\n'))
                                 ))
            map = Join((map, mapunit)) 
        line = ' '* 60
        for key in cutloc[base]:
            s = ''
            if key == length:
                for n in enzymemap[key]:
                    s = Join((s,' ',n))
                l = line[0:(length-1)]
                lineo = Join((l,str(key),s,'\n'))
                line2 = Join((l,a,'\n'))
                linetot = Join((lineo, line2))
                map = Join((map, linetot))
                break
            for n in enzymemap[key] : s = Join((s,' ',n))
            k = key%60
            lineo = Join((line[0:(k-1)],str(key),s,'\n'))
            line = Join((line[0:(k-1)],a,line[k:]))
            line2 = Join((line[0:(k-1)],a,line[k:],'\n'))
            linetot = Join((lineo,line2))
            map = Join((map,linetot))
        mapunit = ''
        mapunit = Join((sequence[base : length], '\n'))
        mapunit = Join((mapunit, a * (length-base), '\n'))
        mapunit = Join((mapunit,revsequence[base:length], '\n'))
        mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*(
            length-base-30),str.rjust(str(length), 15),
                                       '\n\n'))))
        map = Join((map,mapunit))
        return map
    
###### private method to do lists:
    
    def __next_section(self, ls, into):
        """FP.__next_section(ls, into) -> string.

        ls is a list of tuple (string, [int, int]).
        into is a string to which the formatted ls will be added.

        Format ls as a string of lines:
        The form is:

        enzyme1     :   position1.
        enzyme2     :   position2, position3.

        then add the formatted ls to tot
        return tot."""
        ls.sort()
        indentation = '\n' + (self.NameWidth + self.Indent) * ' '
        linesize = self.linesize - self.MaxSize
        pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize)
        several, Join = '', ''.join
        for name, sites in ls:
            stringsite = ''
            l = Join((', '.join([str(site) for site in sites]), '.'))
            if len(l) > linesize:
                #
                #   cut where appropriate and add the indentation
                #
                l = [x.group() for x in re.finditer(pat, l)]
                stringsite = indentation.join(l) 
            else:
                stringsite = l    
            into = Join((into,
                         str(name).ljust(self.NameWidth),' :  ',stringsite,'\n'))
        return into