# Copyright 1999 by Jeffrey Chang. All rights reserved. # # This file is part of the Biopython distribution and governed by your # choice of the "Biopython License Agreement" or the "BSD 3-Clause License". # Please see the LICENSE file that should have been included as part of this # package. """Index text files. This module provides a way to create indexes to text files. Classes: Index Dictionary-like class used to store index information. _ShelveIndex An Index class based on the shelve module. _InMemoryIndex An in-memory Index class. """ import os import array import shelve try: import cPickle as pickle # Only available under Python 2 except ImportError: import pickle # Python 3 import warnings from Bio import BiopythonDeprecationWarning warnings.warn("Bio.Index has been deprecated, and we intend to remove it" " in a future release of Biopython. If you would like to" " continue using Bio.Index, please contact the Biopython" " developers via the mailing list or GitHub.", BiopythonDeprecationWarning) class _ShelveIndex(dict): """An index file wrapped around shelve.""" # Without a good dbm module installed, this is pretty slow and # generates large files. When generating an index on a FASTA- # formatted file with 82000 sequences (37Mb), the # index 'dat' file is 42Mb and 'dir' file is 8Mb. __version = 2 __version_key = "__version" def __init__(self, indexname, truncate=None): dict.__init__(self) try: if truncate: # In python 1.52 and before, dumbdbm (under shelve) # doesn't clear the old database. files = [indexname + ".dir", indexname + ".dat", indexname + ".bak" ] for file in files: if os.path.exists(file): os.unlink(file) raise Exception("open a new shelf") self.data = shelve.open(indexname, flag="r") except Exception: # TODO: Which exception? # No database exists. self.data = shelve.open(indexname, flag="n") self.data[self.__version_key] = self.__version else: # Check to make sure the database is the correct version. version = self.data.get(self.__version_key) if version is None: raise IOError("Unrecognized index format") elif version != self.__version: raise IOError("Version %s doesn't match my version %s" % (version, self.__version)) def __del__(self): if "data" in self.__dict__: self.data.close() class _InMemoryIndex(dict): """Creates an in-memory index file (PRIVATE).""" # File Format: # version # key value # [...] __version = 3 __version_key = "__version" def __init__(self, indexname, truncate=None): self._indexname = indexname dict.__init__(self) self.__changed = 0 # the index hasn't changed # Remove the database if truncate is true. if truncate and os.path.exists(indexname): os.unlink(indexname) self.__changed = 1 # Load the database if it exists if os.path.exists(indexname): with open(indexname) as handle: version = self._toobj(handle.readline().rstrip()) if version != self.__version: raise IOError("Version %s doesn't match my version %s" % (version, self.__version)) for line in handle: key, value = line.split() key, value = self._toobj(key), self._toobj(value) self[key] = value self.__changed = 0 def update(self, dict): self.__changed = 1 dict.update(self, dict) def __setitem__(self, key, value): self.__changed = 1 dict.__setitem__(self, key, value) def __delitem__(self, key): self.__changed = 1 dict.__delitem__(self, key) def clear(self): self.__changed = 1 dict.clear(self) def __del__(self): if self.__changed: with open(self._indexname, "w") as handle: handle.write("%s\n" % self._tostr(self.__version)) for key, value in self.items(): handle.write("%s %s\n" % (self._tostr(key), self._tostr(value))) def _tostr(self, obj): # I need a representation of the object that's saveable to # a file that uses whitespace as delimiters. Thus, I'm # going to pickle the object, and then convert each character of # the string to its ASCII integer value. Then, I'm going to convert # the integers into strings and join them together with commas. # It's not the most efficient way of storing things, but it's # relatively fast. s = pickle.dumps(obj) intlist = array.array("b", s) return ",".join(str(i) for i in intlist) def _toobj(self, str): intlist = [int(i) for i in str.split(",")] intlist = array.array("b", intlist) return pickle.loads("".join(chr(i) for i in intlist)) Index = _InMemoryIndex