# Copyright 1999 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Index.py

This module provides a way to create indexes to text files.

Classes:
Index     Dictionary-like class used to store index information.

_ShelveIndex    An Index class based on the shelve module.
_InMemoryIndex  An in-memory Index class.

"""
import os
import array
import cPickle
import shelve

class _ShelveIndex(dict):
    """An index file wrapped around shelve.

    """
    # Without a good dbm module installed, this is pretty slow and
    # generates large files.  When generating an index on a FASTA-
    # formatted file with 82000 sequences (37Mb), the 
    # index 'dat' file is 42Mb and 'dir' file is 8Mb.

    __version = 2
    __version_key = '__version'

    def __init__(self, indexname, truncate=None):
        dict.__init__(self)
        try:
            if truncate:
                # In python 1.52 and before, dumbdbm (under shelve)
                # doesn't clear the old database.
                files = [indexname + '.dir',
                         indexname + '.dat',
                         indexname + '.bak'
                         ]
                for file in files:
                    if os.path.exists(file):
                        os.unlink(file)
                raise Exception("open a new shelf")
            self.data = shelve.open(indexname, flag='r')
        except:
            # No database exists.
            self.data = shelve.open(indexname, flag='n')
            self.data[self.__version_key] = self.__version
        else:
            # Check to make sure the database is the correct version.
            version = self.data.get(self.__version_key, None)
            if version is None:
                raise IOError("Unrecognized index format")
            elif version != self.__version:
                raise IOError("Version %s doesn't match my version %s" \
                              % (version, self.__version))
            
    def __del__(self):
        if self.__dict__.has_key('data'):
            self.data.close()

class _InMemoryIndex(dict):
    """This creates an in-memory index file.

    """
    # File Format:
    # version
    # key value
    # [...]
    
    __version = 3
    __version_key = '__version'

    def __init__(self, indexname, truncate=None):
        self._indexname = indexname
        dict.__init__(self)
        self.__changed = 0     # the index hasn't changed
        
        # Remove the database if truncate is true.
        if truncate and os.path.exists(indexname):
            os.unlink(indexname)
            self.__changed = 1

        # Load the database if it exists
        if os.path.exists(indexname):
            handle = open(indexname)
            version = self._toobj(handle.readline().rstrip())
            if version != self.__version:
                raise IOError("Version %s doesn't match my version %s" \
                              % (version, self.__version))
            for line in handle:
                key, value = line.split()
                key, value = self._toobj(key), self._toobj(value)
                self[key] = value
            self.__changed = 0

    def update(self, dict):
        self.__changed = 1
        dict.update(self, dict)
    def __setitem__(self, key, value):
        self.__changed = 1
        dict.__setitem__(self, key, value)
    def __delitem__(self, key):
        self.__changed = 1
        dict.__delitem__(self, key)
    def clear(self):
        self.__changed = 1
        dict.clear(self)
            
    def __del__(self):
        if self.__changed:
            handle = open(self._indexname, 'w')
            handle.write("%s\n" % self._tostr(self.__version))
            for key, value in self.items():
                handle.write("%s %s\n" %
                             (self._tostr(key), self._tostr(value)))
            handle.close()

    def _tostr(self, obj):
        # I need a representation of the object that's saveable to
        # a file that uses whitespace as delimiters.  Thus, I'm
        # going to pickle the object, and then convert each character of
        # the string to its ASCII integer value.  Then, I'm going to convert
        # the integers into strings and join them together with commas. 
        # It's not the most efficient way of storing things, but it's
        # relatively fast.
        s = cPickle.dumps(obj)
        intlist = array.array('b', s)
        strlist = map(str, intlist)
        return ','.join(strlist)

    def _toobj(self, str):
        intlist = map(int, str.split(','))
        intlist = array.array('b', intlist)
        strlist = map(chr, intlist)
        return cPickle.loads(''.join(strlist))

Index = _InMemoryIndex