Leptonica 1.82.0
Image processing and image analysis suite
hashmap.c
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27/*
28 * \file hashmap.c
29 * <pre>
30 *
31 * Hashmap creation, destruction
32 * L_HASHMAP *l_hmapCreate()
33 * void l_hmapDestroy()
34 *
35 * Hashmap: Accessors and modifiers
36 * L_HASHITEM *l_hmapLookup()
37 * l_int32 l_hmapRehash()
38 *
39 * (1) See also hashmap.h for a brief description of the design.
40 * (2) In a typical use, a set of objects (in an array or associated
41 * with image pixels) is represented by a hashmap. A uint64 key is
42 * produced for each object. This integer is then hashed into an
43 * index in a hashtable, using the mod function with the table size
44 * which is a prime number. Each entry in the hash table is a list
45 * of hash items. In lookup, the appropriate list is traversed,
46 * looking for the object key found earlier.
47 * (3) Hash functions that map points, strings and float64 to uint64
48 * are given in utils1.c.
49 * (4) Use of the hashmap on points, strings and float64 data are
50 * given in ptafunc2.c, sarray2.c and dnafunc1.c.
51 * (5) Useful rule of thumb for hashing collisions:
52 * For a random hashing function (say, from strings to l_uint64),
53 * the probability of a collision increases as N^2 for N much
54 * less than 2^32. The quadratic behavior switches over to
55 * approaching 1.0 around 2^32, which is the square root of 2^64.
56 * So, for example, if you have 10^7 strings, the probability
57 * of a single collision using an l_uint64 key is on the order of
58 * (10^7/4x10^9)^2 ~ 10^-5.
59 * For a million strings, collisions are very rare (~10^-7 probability).
60 * </pre>
61 */
62
63#ifdef HAVE_CONFIG_H
64#include <config_auto.h>
65#endif /* HAVE_CONFIG_H */
66
67#include "allheaders.h"
68
69 /* Limit on the hashtable size */
70static const l_uint32 MaxTabsize = 50000000;
71 /* Default values for creating the hashmap. */
72static const l_int32 DefaultInitNItems = 2000;
73static const l_int32 DefaultMaxOcc = 2;
74
75/*--------------------------------------------------------------------------*
76 * Hashmap creation and destruction *
77 *--------------------------------------------------------------------------*/
101L_HASHMAP *
102l_hmapCreate(l_int32 ninit,
103 l_int32 maxocc)
104{
105l_uint32 size, tabsize;
106L_HASHMAP *hmap;
107
108 PROCNAME("l_hmapCreate");
109
110 ninit = L_MAX(ninit, DefaultInitNItems);
111 if (maxocc <= 0) maxocc = DefaultMaxOcc;
112 if (maxocc > 5) {
113 L_WARNING("maxocc = %d; non-optimal value. Set to default = %d\n",
114 procName, maxocc, DefaultMaxOcc);
115 maxocc = DefaultMaxOcc;
116 }
117 size = ninit / maxocc;
118 if (size > MaxTabsize) {
119 L_ERROR("ninit/maxocc = %d > MaxTabsize = %d\n", procName,
120 size, MaxTabsize);
121 return NULL;
122 }
123
124 hmap = (L_HASHMAP *)LEPT_CALLOC(1, sizeof(L_HASHMAP));
125 findNextLargerPrime(size, &tabsize); /* at least 101 */
126 if ((hmap->hashtab =
127 (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *))) == NULL) {
128 LEPT_FREE(hmap);
129 return (L_HASHMAP *)ERROR_PTR("hashtab not made", procName, NULL);
130 }
131
132 hmap->nitems = 0;
133 hmap->ntogo = ninit;
134 hmap->maxocc = maxocc;
135 hmap->tabsize = tabsize;
136 return hmap;
137}
138
139
146void
147l_hmapDestroy(L_HASHMAP **phmap)
148{
149l_int32 i;
150L_HASHITEM *hitem, *next;
151L_HASHMAP *hmap;
152
153 PROCNAME("l_hmapDestroy");
154
155 if (phmap == NULL) {
156 L_WARNING("ptr address is NULL!\n", procName);
157 return;
158 }
159
160 if ((hmap = *phmap) == NULL)
161 return;
162
163 for (i = 0; i < hmap->tabsize; i++) {
164 for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
165 next = hitem->next;
166 LEPT_FREE(hitem);
167 }
168 }
169 LEPT_FREE(hmap->hashtab);
170 LEPT_FREE(hmap);
171 *phmap = NULL;
172}
173
174
175/*--------------------------------------------------------------------------*
176 * Hashmap: Accessors and modifiers *
177 *--------------------------------------------------------------------------*/
203l_hmapLookup(L_HASHMAP *hmap,
204 l_uint64 key,
205 l_uint64 val,
206 l_int32 op)
207{
208l_uint32 index;
209L_HASHITEM *hlist, *hitem;
210
211 PROCNAME("l_hmapLookup");
212
213 if (!hmap)
214 return (L_HASHITEM *)ERROR_PTR("hmap not defined", procName, NULL);
215 if (op != L_HMAP_CHECK && op != L_HMAP_CREATE)
216 return (L_HASHITEM *)ERROR_PTR("invalid op", procName, NULL);
217
218 /* If found, return a ptr to the hitem (not a copy) */
219 index = key % hmap->tabsize; /* into hashtab */
220 hlist = hmap->hashtab[index]; /* head of the list */
221 for (hitem = hlist; hitem != NULL; hitem = hitem->next) {
222 if (key == hitem->key) {
223 if (op == L_HMAP_CREATE) hitem->count++;
224 return hitem;
225 }
226 }
227 if (op == L_HMAP_CHECK) return NULL;
228
229 /* Not found and %op == L_HMAP_CREATE.
230 * Make a new hitem and add to the head of the list */
231 hitem = (L_HASHITEM *)LEPT_CALLOC(1, sizeof(L_HASHITEM));
232 hitem->key = key;
233 hitem->val = val;
234 hitem->count = 1;
235 hitem->next = hlist;
236 hmap->hashtab[index] = hitem;
237 hmap->nitems++;
238 hmap->ntogo--;
239
240 /* If hmap is full based on average occupancy, rehash */
241 if (hmap->ntogo == 0)
242 l_hmapRehash(hmap);
243
244 return hitem;
245}
246
247
260l_ok
261l_hmapRehash(L_HASHMAP *hmap)
262{
263l_int32 i, index;
264l_uint32 tabsize;
265L_HASHITEM *hstore, *hitem, *next;
266
267 PROCNAME("l_hmapRehash");
268
269 if (!hmap)
270 return ERROR_INT("hmap not defined", procName, 1);
271
272 /* Put hash items in temporary storage in a single list,
273 * successively adding each to the list head. */
274 hstore = NULL; /* ptr to resulting list */
275 for (i = 0; i < hmap->tabsize; i++) {
276 for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
277 next = hitem->next;
278 hitem->next = hstore;
279 hstore = hitem;
280 }
281 }
282
283 /* Destroy the old hashtab and make a new one that is twice as big */
284 LEPT_FREE(hmap->hashtab);
285 findNextLargerPrime(2 * hmap->tabsize, &tabsize);
286 hmap->tabsize = tabsize;
287 hmap->hashtab = (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *));
288 if (hmap->hashtab == NULL) {
289 hmap->tabsize = 0;
290 return ERROR_INT("hashtab ptr array not made", procName, 1);
291 }
292 hmap->ntogo = hmap->maxocc * tabsize - hmap->nitems;
293
294 /* Populate with the stored hash items */
295 for (hitem = hstore; hitem != NULL; hitem = next) {
296 next = hitem->next;
297 index = hitem->key % tabsize; /* into new hashtab */
298 hitem->next = hmap->hashtab[index]; /* link to head of existing list */
299 hmap->hashtab[index] = hitem; /* put at head */
300 }
301
302 return 0;
303}
l_uint64 val
Definition: hashmap.h:117
l_uint64 key
Definition: hashmap.h:116
l_int32 count
Definition: hashmap.h:118
struct L_Hashitem * next
Definition: hashmap.h:119
struct L_Hashitem ** hashtab
Definition: hashmap.h:106
l_int32 maxocc
Definition: hashmap.h:105
l_int32 tabsize
Definition: hashmap.h:107
l_int32 nitems
Definition: hashmap.h:102
l_int32 ntogo
Definition: hashmap.h:103
l_ok findNextLargerPrime(l_int32 start, l_uint32 *pprime)
findNextLargerPrime()
Definition: utils1.c:861