/* File: dict.c * Author: Richard Durbin (rd@sanger.ac.uk) * Copyright (C) J Thierry-Mieg and R Durbin, 1995 * ------------------------------------------------------------------- * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * ------------------------------------------------------------------- * This file is part of the ACEDB genome database package, written by * Richard Durbin (MRC LMB, UK) rd@mrc-lmb.cam.ac.uk, and * Jean Thierry-Mieg (CRBM du CNRS, France) mieg@kaa.cnrs-mop.fr * * Description: * Exported functions: * HISTORY: * Last edited: Sep 16 23:43 1997 (rd) * Created: Tue Jan 17 17:33:44 1995 (rd) *------------------------------------------------------------------- */ /* $Id: dict.c,v 1.16 2002/02/22 18:00:20 srk Exp $ */ #include "dict.h" /************* standard utility from Jean *************/ static int hashString (char *cp, int n, BOOL isDiff) { register int i ; register unsigned int j, x = 0 ; register int rotate = isDiff ? 21 : 13 ; register int leftover = 8*sizeof(int) - rotate ; while (*cp) x = freeupper (*cp++) ^ (( x >> leftover) | (x << rotate)) ; /* compress down to n bits */ for (j = x, i = n ; i < sizeof(int) ; i += n) j ^= (x >> i) ; j &= (1 << n) - 1 ; if (isDiff) /* return odd number */ j |= 1 ; return j ; } #ifdef DAZ static int hashString (char *cp, int n, BOOL isDiff) { register int x; register int mask = (1 << n) - 1 ; x = 0; if (isDiff) { for (;*cp;*cp++) { x = ((x * 5) + *cp) & mask ; } x |= 1 ; /* return odd number */ } else for (;*cp;*cp++) { x = ((x * 3) + *cp) & mask ; } return x ; } #endif /******************************************************/ static void dictFinalise (void *x) ; DICT *dictHandleCreate (int size, STORE_HANDLE handle) { DICT *dict = (DICT*) halloc (sizeof (DICT), handle) ; blockSetFinalise (dict, dictFinalise) ; for (dict->dim = 6, dict->max = 64 ; dict->max < size ; ++dict->dim, dict->max *= 2) ; dict->table = arrayCreate (dict->max, int) ; array (dict->table, dict->max-1, int) = 0 ; /* set arrayMax */ dict->names = arrayCreate (dict->dim/4, int) ; array(dict->names, 0, int) = 0 ; /* IMPORTANT - dummy 0 entry */ dict->nameText = stackCreate (dict->dim) ; return dict ; } DICT *dictCreate (int size) { return dictHandleCreate (size, 0) ; } void uDictDestroy (DICT *dict) { messfree (dict) ; } static void dictFinalise (void *x) /* does the work */ { DICT *dict = (DICT*)x ; arrayDestroy (dict->table) ; arrayDestroy (dict->names) ; stackDestroy (dict->nameText) ; } DICT *dictCopy (DICT *dict) { DICT *new ; if (!dict) return 0 ; new = (DICT*) messalloc (sizeof (DICT)) ; new->table = arrayCopy (dict->table) ; new->names = arrayCopy (dict->names) ; new->nameText = stackCopy (dict->nameText, 0) ; new->dim = dict->dim ; new->max = dict->max ; return new ; } void dictClear (DICT *dict) { array (dict->table, dict->max-1, int) = 0 ; /* set arrayMax */ array(dict->names, 0, int) = 0 ; /* IMPORTANT - dummy 0 entry */ } /**********************************************/ BOOL dictFind (DICT *dict, char *s, int *ip) { register int i, h, dh = 0 ; if (!dict || !s) return FALSE ; h = hashString (s, dict->dim, FALSE) ; while (TRUE) { i = arr(dict->table, h, int) ; if (!i) /* empty slot, s is unknown */ { dict->newPos = h ; return FALSE ; } if (!strcmp (s, stackText(dict->nameText, arr(dict->names, i, int)))) { if (ip) *ip = i-1 ; return TRUE ; } if (!dh) dh = hashString (s, dict->dim, TRUE) ; h += dh ; if (h >= dict->max) h -= dict->max ; } } /*****************/ static void dictReHash (DICT *dict, int newDim) { int i ; if (newDim <= dict->dim) return ; dict->dim = newDim ; dict->max = 1 << newDim ; /* remake the table */ arrayDestroy (dict->table) ; dict->table = arrayCreate (dict->max, int) ; array (dict->table, dict->max-1, int) = 0 ; /* set arrayMax */ /* reinsert all the names */ for (i = 1 ; i < arrayMax(dict->names) ; ++i) { dictFind (dict, stackText(dict->nameText, arr(dict->names, i, int)), 0) ; /* will fail, but sets dict->newPos */ arr(dict->table, dict->newPos, int) = i ; } } /****************/ BOOL dictAdd (DICT *dict, char *s, int *ip) /* always fills ip, returns TRUE if added, FALSE if known */ { int i ; if (!dict || !s) return FALSE ; if (dictFind (dict, s, ip)) /* word already known */ return FALSE ; i = arrayMax(dict->names) ; arr (dict->table, dict->newPos, int) = i ; array (dict->names, i, int) = stackMark(dict->nameText) ; pushText (dict->nameText, s) ; if (arrayMax(dict->names) > 0.4 * dict->max) dictReHash (dict, dict->dim+1) ; if (ip) *ip = i-1 ; return TRUE ; } /********************** utilities ***********************/ char *dictName (DICT *dict, int i) { if (i < 0 || ++i >= arrayMax(dict->names)) messcrash ("Call to dictName() out of bounds: %d %d", i-1, dictMax(dict)) ; return stackText (dict->nameText, arr(dict->names, i, int)) ; } int dictMax (DICT *dict) { return arrayMax(dict->names) - 1 ; } /******************** end of file **********************/