Leptonica 1.85.0
Image processing and image analysis suite
Loading...
Searching...
No Matches
sarray2.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
71#ifdef HAVE_CONFIG_H
72#include <config_auto.h>
73#endif /* HAVE_CONFIG_H */
74
75#include <string.h>
76#include "allheaders.h"
77#include "array_internal.h"
78
79/*----------------------------------------------------------------------*
80 * Sort *
81 *----------------------------------------------------------------------*/
97SARRAY *
99 SARRAY *sain,
100 l_int32 sortorder)
101{
102char **array;
103char *tmp;
104l_int32 n, i, j, gap;
105
106 if (!sain)
107 return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
108
109 /* Make saout if necessary; otherwise do in-place */
110 if (!saout)
111 saout = sarrayCopy(sain);
112 else if (sain != saout)
113 return (SARRAY *)ERROR_PTR("invalid: not in-place", __func__, NULL);
114 array = saout->array; /* operate directly on the array */
115 n = sarrayGetCount(saout);
116
117 /* Shell sort */
118 for (gap = n/2; gap > 0; gap = gap / 2) {
119 for (i = gap; i < n; i++) {
120 for (j = i - gap; j >= 0; j -= gap) {
121 if ((sortorder == L_SORT_INCREASING &&
122 stringCompareLexical(array[j], array[j + gap])) ||
123 (sortorder == L_SORT_DECREASING &&
124 stringCompareLexical(array[j + gap], array[j])))
125 {
126 tmp = array[j];
127 array[j] = array[j + gap];
128 array[j + gap] = tmp;
129 }
130 }
131 }
132 }
133
134 return saout;
135}
136
137
145SARRAY *
147 NUMA *naindex)
148{
149char *str;
150l_int32 i, n, index;
151SARRAY *saout;
152
153 if (!sain)
154 return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
155 if (!naindex)
156 return (SARRAY *)ERROR_PTR("naindex not defined", __func__, NULL);
157
158 n = sarrayGetCount(sain);
159 saout = sarrayCreate(n);
160 for (i = 0; i < n; i++) {
161 numaGetIValue(naindex, i, &index);
162 str = sarrayGetString(sain, index, L_COPY);
163 sarrayAddString(saout, str, L_INSERT);
164 }
165
166 return saout;
167}
168
169
183l_int32
184stringCompareLexical(const char *str1,
185 const char *str2)
186{
187l_int32 i, len1, len2, len;
188
189 if (!str1)
190 return ERROR_INT("str1 not defined", __func__, 1);
191 if (!str2)
192 return ERROR_INT("str2 not defined", __func__, 1);
193
194 len1 = strlen(str1);
195 len2 = strlen(str2);
196 len = L_MIN(len1, len2);
197
198 for (i = 0; i < len; i++) {
199 if (str1[i] == str2[i])
200 continue;
201 if (str1[i] > str2[i])
202 return 1;
203 else
204 return 0;
205 }
206
207 if (len1 > len2)
208 return 1;
209 else
210 return 0;
211}
212
213
214/*----------------------------------------------------------------------*
215 * Set operations using aset (rbtree) *
216 *----------------------------------------------------------------------*/
223L_ASET *
225{
226char *str;
227l_int32 i, n;
228l_uint64 hash;
229L_ASET *set;
230RB_TYPE key;
231
232 if (!sa)
233 return (L_ASET *)ERROR_PTR("sa not defined", __func__, NULL);
234
235 set = l_asetCreate(L_UINT_TYPE);
236 n = sarrayGetCount(sa);
237 for (i = 0; i < n; i++) {
238 str = sarrayGetString(sa, i, L_NOCOPY);
239 l_hashStringToUint64Fast(str, &hash);
240 key.utype = hash;
241 l_asetInsert(set, key);
242 }
243
244 return set;
245}
246
247
265l_ok
267 SARRAY **psad)
268{
269char *str;
270l_int32 i, n;
271l_uint64 hash;
272L_ASET *set;
273RB_TYPE key;
274SARRAY *sad;
275
276 if (!psad)
277 return ERROR_INT("&sad not defined", __func__, 1);
278 *psad = NULL;
279 if (!sas)
280 return ERROR_INT("sas not defined", __func__, 1);
281
282 set = l_asetCreate(L_UINT_TYPE);
283 sad = sarrayCreate(0);
284 *psad = sad;
285 n = sarrayGetCount(sas);
286 for (i = 0; i < n; i++) {
287 str = sarrayGetString(sas, i, L_NOCOPY);
288 l_hashStringToUint64Fast(str, &hash);
289 key.utype = hash;
290 if (!l_asetFind(set, key)) {
291 sarrayAddString(sad, str, L_COPY);
292 l_asetInsert(set, key);
293 }
294 }
295
296 l_asetDestroy(&set);
297 return 0;
298}
299
300
319l_ok
321 SARRAY *sa2,
322 SARRAY **psad)
323{
324SARRAY *sa3;
325
326 if (!psad)
327 return ERROR_INT("&sad not defined", __func__, 1);
328 *psad = NULL;
329 if (!sa1)
330 return ERROR_INT("sa1 not defined", __func__, 1);
331 if (!sa2)
332 return ERROR_INT("sa2 not defined", __func__, 1);
333
334 /* Join */
335 sa3 = sarrayCopy(sa1);
336 if (sarrayJoin(sa3, sa2) == 1) {
337 sarrayDestroy(&sa3);
338 return ERROR_INT("join failed for sa3", __func__, 1);
339 }
340
341 /* Eliminate duplicates */
342 sarrayRemoveDupsByAset(sa3, psad);
343 sarrayDestroy(&sa3);
344 return 0;
345}
346
347
368l_ok
370 SARRAY *sa2,
371 SARRAY **psad)
372{
373char *str;
374l_int32 n1, n2, i, n;
375l_uint64 hash;
376L_ASET *set1, *set2;
377RB_TYPE key;
378SARRAY *sa_small, *sa_big, *sad;
379
380 if (!psad)
381 return ERROR_INT("&sad not defined", __func__, 1);
382 *psad = NULL;
383 if (!sa1)
384 return ERROR_INT("sa1 not defined", __func__, 1);
385 if (!sa2)
386 return ERROR_INT("sa2 not defined", __func__, 1);
387
388 /* Put the elements of the biggest array into a set */
389 n1 = sarrayGetCount(sa1);
390 n2 = sarrayGetCount(sa2);
391 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
392 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
393 set1 = l_asetCreateFromSarray(sa_big);
394
395 /* Build up the intersection of strings */
396 sad = sarrayCreate(0);
397 *psad = sad;
398 n = sarrayGetCount(sa_small);
399 set2 = l_asetCreate(L_UINT_TYPE);
400 for (i = 0; i < n; i++) {
401 str = sarrayGetString(sa_small, i, L_NOCOPY);
402 l_hashStringToUint64Fast(str, &hash);
403 key.utype = hash;
404 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
405 sarrayAddString(sad, str, L_COPY);
406 l_asetInsert(set2, key);
407 }
408 }
409
410 l_asetDestroy(&set1);
411 l_asetDestroy(&set2);
412 return 0;
413}
414
415
416/*----------------------------------------------------------------------*
417 * Hashmap operations *
418 *----------------------------------------------------------------------*/
425L_HASHMAP *
427{
428l_int32 i, n;
429l_uint64 key;
430char *str;
431L_HASHMAP *hmap;
432
433 if (!sa)
434 return (L_HASHMAP *)ERROR_PTR("sa not defined", __func__, NULL);
435
436 n = sarrayGetCount(sa);
437 if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
438 return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL);
439 for (i = 0; i < n; i++) {
440 str = sarrayGetString(sa, i, L_NOCOPY);
441 l_hashStringToUint64Fast(str, &key);
442 l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
443 }
444 return hmap;
445}
446
447
456l_ok
458 SARRAY **psad,
459 L_HASHMAP **phmap)
460{
461l_int32 i, tabsize;
462char *str;
463SARRAY *sad;
464L_HASHITEM *hitem;
465L_HASHMAP *hmap;
466
467 if (phmap) *phmap = NULL;
468 if (!psad)
469 return ERROR_INT("&sad not defined", __func__, 1);
470 *psad = NULL;
471 if (!sas)
472 return ERROR_INT("sas not defined", __func__, 1);
473
474 /* Traverse the hashtable lists */
475 if ((hmap = l_hmapCreateFromSarray(sas)) == NULL)
476 return ERROR_INT("hmap not made", __func__, 1);
477 sad = sarrayCreate(0);
478 *psad = sad;
479 tabsize = hmap->tabsize;
480 for (i = 0; i < tabsize; i++) {
481 hitem = hmap->hashtab[i];
482 while (hitem) {
483 str = sarrayGetString(sas, hitem->val, L_COPY);
484 sarrayAddString(sad, str, L_INSERT);
485 hitem = hitem->next;
486 }
487 }
488
489 if (phmap)
490 *phmap = hmap;
491 else
492 l_hmapDestroy(&hmap);
493 return 0;
494}
495
496
505l_ok
507 SARRAY *sa2,
508 SARRAY **psad)
509{
510SARRAY *sa3;
511
512 if (!psad)
513 return ERROR_INT("&sad not defined", __func__, 1);
514 *psad = NULL;
515 if (!sa1)
516 return ERROR_INT("sa1 not defined", __func__, 1);
517 if (!sa2)
518 return ERROR_INT("sa2 not defined", __func__, 1);
519
520 sa3 = sarrayCopy(sa1);
521 if (sarrayJoin(sa3, sa2) == 1) {
522 sarrayDestroy(&sa3);
523 return ERROR_INT("sa3 join failed", __func__, 1);
524 }
525 sarrayRemoveDupsByHmap(sa3, psad, NULL);
526 sarrayDestroy(&sa3);
527 return 0;
528}
529
530
539l_ok
541 SARRAY *sa2,
542 SARRAY **psad)
543{
544l_int32 i, n1, n2, n;
545l_uint64 key;
546char *str;
547SARRAY *sa_small, *sa_big, *sa3, *sad;
548L_HASHITEM *hitem;
549L_HASHMAP *hmap;
550
551 if (!psad)
552 return ERROR_INT("&sad not defined", __func__, 1);
553 *psad = NULL;
554 if (!sa1)
555 return ERROR_INT("sa1 not defined", __func__, 1);
556 if (!sa2)
557 return ERROR_INT("sa2 not defined", __func__, 1);
558
559 /* Make a hashmap for the elements of the biggest array */
560 n1 = sarrayGetCount(sa1);
561 n2 = sarrayGetCount(sa2);
562 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
563 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
564 if ((hmap = l_hmapCreateFromSarray(sa_big)) == NULL)
565 return ERROR_INT("hmap not made", __func__, 1);
566
567 /* Remove duplicates from the smallest array. Alternatively,
568 * we can skip this step and avoid counting duplicates in
569 * sa_small by modifying the count fields in the sa_big hashitems;
570 * e.g., see l_hmapIntersectionDna(). */
571 sarrayRemoveDupsByHmap(sa_small, &sa3, NULL);
572
573 /* Go through sa3, the set of strings derived from the smallest array,
574 * hashing into the big array table. Any string found belongs to both,
575 * so add it to the output array. */
576 sad = sarrayCreate(0);
577 *psad = sad;
578 n = sarrayGetCount(sa3);
579 for (i = 0; i < n; i++) {
580 str = sarrayGetString(sa3, i, L_NOCOPY);
581 l_hashStringToUint64Fast(str, &key);
582 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
583 if (hitem)
584 sarrayAddString(sad, str, L_COPY);
585 }
586 l_hmapDestroy(&hmap);
587 sarrayDestroy(&sa3);
588 return 0;
589}
590
591
592/*----------------------------------------------------------------------*
593 * Miscellaneous operations *
594 *----------------------------------------------------------------------*/
601SARRAY *
603{
604char buf[32];
605l_int32 i;
606SARRAY *sa;
607
608 if ((sa = sarrayCreate(n)) == NULL)
609 return (SARRAY *)ERROR_PTR("sa not made", __func__, NULL);
610 for (i = 0; i < n; i++) {
611 snprintf(buf, sizeof(buf), "%d", i);
612 sarrayAddString(sa, buf, L_COPY);
613 }
614 return sa;
615}
616
617
640l_ok
642 const char *keystring,
643 char **pvalstring)
644{
645char *key, *val, *str;
646l_int32 i, n;
647SARRAY *sa1;
648
649 if (!pvalstring)
650 return ERROR_INT("&valstring not defined", __func__, 1);
651 *pvalstring = NULL;
652 if (!sa)
653 return ERROR_INT("sa not defined", __func__, 1);
654 if (!keystring)
655 return ERROR_INT("keystring not defined", __func__, 1);
656
657 n = sarrayGetCount(sa);
658 for (i = 0; i < n; i++) {
659 str = sarrayGetString(sa, i, L_NOCOPY);
660 sa1 = sarrayCreate(2);
661 sarraySplitString(sa1, str, ",");
662 if (sarrayGetCount(sa1) != 2) {
663 sarrayDestroy(&sa1);
664 continue;
665 }
666 key = sarrayGetString(sa1, 0, L_NOCOPY);
667 val = sarrayGetString(sa1, 1, L_NOCOPY);
668 if (!strcmp(key, keystring)) {
669 *pvalstring = stringNew(val);
670 sarrayDestroy(&sa1);
671 return 0;
672 }
673 sarrayDestroy(&sa1);
674 }
675
676 return 0;
677}
@ L_COPY
Definition pix.h:505
@ L_NOCOPY
Definition pix.h:503
@ L_INSERT
Definition pix.h:504
@ L_SORT_DECREASING
Definition pix.h:523
@ L_SORT_INCREASING
Definition pix.h:522
L_HASHMAP * l_hmapCreateFromSarray(SARRAY *sa)
l_hmapCreateFromSarray()
Definition sarray2.c:426
l_ok sarrayRemoveDupsByAset(SARRAY *sas, SARRAY **psad)
sarrayRemoveDupsByAset()
Definition sarray2.c:266
l_ok sarrayIntersectionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByHmap()
Definition sarray2.c:540
SARRAY * sarraySortByIndex(SARRAY *sain, NUMA *naindex)
sarraySortByIndex()
Definition sarray2.c:146
l_ok sarrayUnionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByAset()
Definition sarray2.c:320
l_int32 stringCompareLexical(const char *str1, const char *str2)
stringCompareLexical()
Definition sarray2.c:184
l_ok sarrayLookupCSKV(SARRAY *sa, const char *keystring, char **pvalstring)
sarrayLookupCSKV()
Definition sarray2.c:641
SARRAY * sarrayGenerateIntegers(l_int32 n)
sarrayGenerateIntegers()
Definition sarray2.c:602
l_ok sarrayUnionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByHmap()
Definition sarray2.c:506
l_ok sarrayIntersectionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByAset()
Definition sarray2.c:369
L_ASET * l_asetCreateFromSarray(SARRAY *sa)
l_asetCreateFromSarray()
Definition sarray2.c:224
l_ok sarrayRemoveDupsByHmap(SARRAY *sas, SARRAY **psad, L_HASHMAP **phmap)
sarrayRemoveDupsByHmap()
Definition sarray2.c:457
SARRAY * sarraySort(SARRAY *saout, SARRAY *sain, l_int32 sortorder)
sarraySort()
Definition sarray2.c:98
l_uint64 val
Definition hashmap.h:117
struct L_Hashitem * next
Definition hashmap.h:119
struct L_Hashitem ** hashtab
Definition hashmap.h:106
l_int32 tabsize
Definition hashmap.h:107
char ** array