Leptonica 1.82.0
Image processing and image analysis suite
sarray2.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
71#ifdef HAVE_CONFIG_H
72#include <config_auto.h>
73#endif /* HAVE_CONFIG_H */
74
75#include <string.h>
76#include "allheaders.h"
77
78/*----------------------------------------------------------------------*
79 * Sort *
80 *----------------------------------------------------------------------*/
96SARRAY *
98 SARRAY *sain,
99 l_int32 sortorder)
100{
101char **array;
102char *tmp;
103l_int32 n, i, j, gap;
104
105 PROCNAME("sarraySort");
106
107 if (!sain)
108 return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
109
110 /* Make saout if necessary; otherwise do in-place */
111 if (!saout)
112 saout = sarrayCopy(sain);
113 else if (sain != saout)
114 return (SARRAY *)ERROR_PTR("invalid: not in-place", procName, NULL);
115 array = saout->array; /* operate directly on the array */
116 n = sarrayGetCount(saout);
117
118 /* Shell sort */
119 for (gap = n/2; gap > 0; gap = gap / 2) {
120 for (i = gap; i < n; i++) {
121 for (j = i - gap; j >= 0; j -= gap) {
122 if ((sortorder == L_SORT_INCREASING &&
123 stringCompareLexical(array[j], array[j + gap])) ||
124 (sortorder == L_SORT_DECREASING &&
125 stringCompareLexical(array[j + gap], array[j])))
126 {
127 tmp = array[j];
128 array[j] = array[j + gap];
129 array[j + gap] = tmp;
130 }
131 }
132 }
133 }
134
135 return saout;
136}
137
138
146SARRAY *
148 NUMA *naindex)
149{
150char *str;
151l_int32 i, n, index;
152SARRAY *saout;
153
154 PROCNAME("sarraySortByIndex");
155
156 if (!sain)
157 return (SARRAY *)ERROR_PTR("sain not defined", procName, NULL);
158 if (!naindex)
159 return (SARRAY *)ERROR_PTR("naindex not defined", procName, NULL);
160
161 n = sarrayGetCount(sain);
162 saout = sarrayCreate(n);
163 for (i = 0; i < n; i++) {
164 numaGetIValue(naindex, i, &index);
165 str = sarrayGetString(sain, index, L_COPY);
166 sarrayAddString(saout, str, L_INSERT);
167 }
168
169 return saout;
170}
171
172
186l_int32
187stringCompareLexical(const char *str1,
188 const char *str2)
189{
190l_int32 i, len1, len2, len;
191
192 PROCNAME("sarrayCompareLexical");
193
194 if (!str1)
195 return ERROR_INT("str1 not defined", procName, 1);
196 if (!str2)
197 return ERROR_INT("str2 not defined", procName, 1);
198
199 len1 = strlen(str1);
200 len2 = strlen(str2);
201 len = L_MIN(len1, len2);
202
203 for (i = 0; i < len; i++) {
204 if (str1[i] == str2[i])
205 continue;
206 if (str1[i] > str2[i])
207 return 1;
208 else
209 return 0;
210 }
211
212 if (len1 > len2)
213 return 1;
214 else
215 return 0;
216}
217
218
219/*----------------------------------------------------------------------*
220 * Set operations using aset (rbtree) *
221 *----------------------------------------------------------------------*/
228L_ASET *
230{
231char *str;
232l_int32 i, n;
233l_uint64 hash;
234L_ASET *set;
235RB_TYPE key;
236
237 PROCNAME("l_asetCreateFromSarray");
238
239 if (!sa)
240 return (L_ASET *)ERROR_PTR("sa not defined", procName, NULL);
241
242 set = l_asetCreate(L_UINT_TYPE);
243 n = sarrayGetCount(sa);
244 for (i = 0; i < n; i++) {
245 str = sarrayGetString(sa, i, L_NOCOPY);
246 l_hashStringToUint64Fast(str, &hash);
247 key.utype = hash;
248 l_asetInsert(set, key);
249 }
250
251 return set;
252}
253
254
272l_ok
274 SARRAY **psad)
275{
276char *str;
277l_int32 i, n;
278l_uint64 hash;
279L_ASET *set;
280RB_TYPE key;
281SARRAY *sad;
282
283 PROCNAME("sarrayRemoveDupsByAset");
284
285 if (!psad)
286 return ERROR_INT("&sad not defined", procName, 1);
287 *psad = NULL;
288 if (!sas)
289 return ERROR_INT("sas not defined", procName, 1);
290
291 set = l_asetCreate(L_UINT_TYPE);
292 sad = sarrayCreate(0);
293 *psad = sad;
294 n = sarrayGetCount(sas);
295 for (i = 0; i < n; i++) {
296 str = sarrayGetString(sas, i, L_NOCOPY);
297 l_hashStringToUint64Fast(str, &hash);
298 key.utype = hash;
299 if (!l_asetFind(set, key)) {
300 sarrayAddString(sad, str, L_COPY);
301 l_asetInsert(set, key);
302 }
303 }
304
305 l_asetDestroy(&set);
306 return 0;
307}
308
309
328l_ok
330 SARRAY *sa2,
331 SARRAY **psad)
332{
333SARRAY *sa3;
334
335 PROCNAME("sarrayUnionByAset");
336
337 if (!psad)
338 return ERROR_INT("&sad not defined", procName, 1);
339 *psad = NULL;
340 if (!sa1)
341 return ERROR_INT("sa1 not defined", procName, 1);
342 if (!sa2)
343 return ERROR_INT("sa2 not defined", procName, 1);
344
345 /* Join */
346 sa3 = sarrayCopy(sa1);
347 if (sarrayJoin(sa3, sa2) == 1) {
348 sarrayDestroy(&sa3);
349 return ERROR_INT("join failed for sa3", procName, 1);
350 }
351
352 /* Eliminate duplicates */
353 sarrayRemoveDupsByAset(sa3, psad);
354 sarrayDestroy(&sa3);
355 return 0;
356}
357
358
379l_ok
381 SARRAY *sa2,
382 SARRAY **psad)
383{
384char *str;
385l_int32 n1, n2, i, n;
386l_uint64 hash;
387L_ASET *set1, *set2;
388RB_TYPE key;
389SARRAY *sa_small, *sa_big, *sad;
390
391 PROCNAME("sarrayIntersectionByAset");
392
393 if (!psad)
394 return ERROR_INT("&sad not defined", procName, 1);
395 *psad = NULL;
396 if (!sa1)
397 return ERROR_INT("sa1 not defined", procName, 1);
398 if (!sa2)
399 return ERROR_INT("sa2 not defined", procName, 1);
400
401 /* Put the elements of the biggest array into a set */
402 n1 = sarrayGetCount(sa1);
403 n2 = sarrayGetCount(sa2);
404 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
405 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
406 set1 = l_asetCreateFromSarray(sa_big);
407
408 /* Build up the intersection of strings */
409 sad = sarrayCreate(0);
410 *psad = sad;
411 n = sarrayGetCount(sa_small);
412 set2 = l_asetCreate(L_UINT_TYPE);
413 for (i = 0; i < n; i++) {
414 str = sarrayGetString(sa_small, i, L_NOCOPY);
415 l_hashStringToUint64Fast(str, &hash);
416 key.utype = hash;
417 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
418 sarrayAddString(sad, str, L_COPY);
419 l_asetInsert(set2, key);
420 }
421 }
422
423 l_asetDestroy(&set1);
424 l_asetDestroy(&set2);
425 return 0;
426}
427
428
429/*----------------------------------------------------------------------*
430 * Hashmap operations *
431 *----------------------------------------------------------------------*/
438L_HASHMAP *
440{
441l_int32 i, n;
442l_uint64 key;
443char *str;
444L_HASHITEM *hitem;
445L_HASHMAP *hmap;
446
447 PROCNAME("l_hmapCreateFromSarray");
448
449 if (!sa)
450 return (L_HASHMAP *)ERROR_PTR("sa not defined", procName, NULL);
451
452 n = sarrayGetCount(sa);
453 if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
454 return (L_HASHMAP *)ERROR_PTR("hmap not made", procName, NULL);
455 for (i = 0; i < n; i++) {
456 str = sarrayGetString(sa, i, L_NOCOPY);
457 l_hashStringToUint64Fast(str, &key);
458 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
459 }
460 return hmap;
461}
462
463
472l_ok
474 SARRAY **psad,
475 L_HASHMAP **phmap)
476{
477l_int32 i, tabsize;
478l_uint64 key;
479char *str;
480SARRAY *sad;
481L_HASHITEM *hitem;
482L_HASHMAP *hmap;
483
484 PROCNAME("sarrayRemoveDupsByHmap");
485
486 if (phmap) *phmap = NULL;
487 if (!psad)
488 return ERROR_INT("&sad not defined", procName, 1);
489 *psad = NULL;
490 if (!sas)
491 return ERROR_INT("sas not defined", procName, 1);
492
493 /* Traverse the hashtable lists */
494 if ((hmap = l_hmapCreateFromSarray(sas)) == NULL)
495 return ERROR_INT("hmap not made", procName, 1);
496 sad = sarrayCreate(0);
497 *psad = sad;
498 tabsize = hmap->tabsize;
499 for (i = 0; i < tabsize; i++) {
500 hitem = hmap->hashtab[i];
501 while (hitem) {
502 str = sarrayGetString(sas, hitem->val, L_COPY);
503 sarrayAddString(sad, str, L_INSERT);
504 hitem = hitem->next;
505 }
506 }
507
508 if (phmap)
509 *phmap = hmap;
510 else
511 l_hmapDestroy(&hmap);
512 return 0;
513}
514
515
524l_ok
526 SARRAY *sa2,
527 SARRAY **psad)
528{
529SARRAY *sa3;
530
531 PROCNAME("l_hmapUnionSarray");
532
533 if (!psad)
534 return ERROR_INT("&sad not defined", procName, 1);
535 *psad = NULL;
536 if (!sa1)
537 return ERROR_INT("sa1 not defined", procName, 1);
538 if (!sa2)
539 return ERROR_INT("sa2 not defined", procName, 1);
540
541 sa3 = sarrayCopy(sa1);
542 if (sarrayJoin(sa3, sa2) == 1) {
543 sarrayDestroy(&sa3);
544 return ERROR_INT("sa3 join failed", procName, 1);
545 }
546 sarrayRemoveDupsByHmap(sa3, psad, NULL);
547 sarrayDestroy(&sa3);
548 return 0;
549}
550
551
560l_ok
562 SARRAY *sa2,
563 SARRAY **psad)
564{
565l_int32 i, n1, n2, n;
566l_uint64 key;
567char *str;
568SARRAY *sa_small, *sa_big, *sa3, *sad;
569L_HASHITEM *hitem;
570L_HASHMAP *hmap;
571
572 PROCNAME("sarrayIntersectionByHmap");
573
574 if (!psad)
575 return ERROR_INT("&sad not defined", procName, 1);
576 *psad = NULL;
577 if (!sa1)
578 return ERROR_INT("sa1 not defined", procName, 1);
579 if (!sa2)
580 return ERROR_INT("sa2 not defined", procName, 1);
581
582 /* Make a hashmap for the elements of the biggest array */
583 n1 = sarrayGetCount(sa1);
584 n2 = sarrayGetCount(sa2);
585 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
586 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
587 if ((hmap = l_hmapCreateFromSarray(sa_big)) == NULL)
588 return ERROR_INT("hmap not made", procName, 1);
589
590 /* Remove duplicates from the smallest array. Alternatively,
591 * we can skip this step and avoid counting duplicates in
592 * sa_small by modifying the count fields in the sa_big hashitems;
593 * e.g., see l_hmapIntersectionDna(). */
594 sarrayRemoveDupsByHmap(sa_small, &sa3, NULL);
595
596 /* Go through sa3, the set of strings derived from the smallest array,
597 * hashing into the big array table. Any string found belongs to both,
598 * so add it to the output array. */
599 sad = sarrayCreate(0);
600 *psad = sad;
601 n = sarrayGetCount(sa3);
602 for (i = 0; i < n; i++) {
603 str = sarrayGetString(sa3, i, L_NOCOPY);
604 l_hashStringToUint64Fast(str, &key);
605 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
606 if (hitem)
607 sarrayAddString(sad, str, L_COPY);
608 }
609 l_hmapDestroy(&hmap);
610 sarrayDestroy(&sa3);
611 return 0;
612}
613
614
615/*----------------------------------------------------------------------*
616 * Miscellaneous operations *
617 *----------------------------------------------------------------------*/
624SARRAY *
626{
627char buf[32];
628l_int32 i;
629SARRAY *sa;
630
631 PROCNAME("sarrayGenerateIntegers");
632
633 if ((sa = sarrayCreate(n)) == NULL)
634 return (SARRAY *)ERROR_PTR("sa not made", procName, NULL);
635 for (i = 0; i < n; i++) {
636 snprintf(buf, sizeof(buf), "%d", i);
637 sarrayAddString(sa, buf, L_COPY);
638 }
639 return sa;
640}
641
642
665l_ok
667 const char *keystring,
668 char **pvalstring)
669{
670char *key, *val, *str;
671l_int32 i, n;
672SARRAY *sa1;
673
674 PROCNAME("sarrayLookupCSKV");
675
676 if (!pvalstring)
677 return ERROR_INT("&valstring not defined", procName, 1);
678 *pvalstring = NULL;
679 if (!sa)
680 return ERROR_INT("sa not defined", procName, 1);
681 if (!keystring)
682 return ERROR_INT("keystring not defined", procName, 1);
683
684 n = sarrayGetCount(sa);
685 for (i = 0; i < n; i++) {
686 str = sarrayGetString(sa, i, L_NOCOPY);
687 sa1 = sarrayCreate(2);
688 sarraySplitString(sa1, str, ",");
689 if (sarrayGetCount(sa1) != 2) {
690 sarrayDestroy(&sa1);
691 continue;
692 }
693 key = sarrayGetString(sa1, 0, L_NOCOPY);
694 val = sarrayGetString(sa1, 1, L_NOCOPY);
695 if (!strcmp(key, keystring)) {
696 *pvalstring = stringNew(val);
697 sarrayDestroy(&sa1);
698 return 0;
699 }
700 sarrayDestroy(&sa1);
701 }
702
703 return 0;
704}
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:754
@ L_COPY
Definition: pix.h:712
@ L_NOCOPY
Definition: pix.h:710
@ L_INSERT
Definition: pix.h:711
@ L_SORT_DECREASING
Definition: pix.h:730
@ L_SORT_INCREASING
Definition: pix.h:729
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
Definition: sarray1.c:170
l_ok sarrayJoin(SARRAY *sa1, SARRAY *sa2)
sarrayJoin()
Definition: sarray1.c:969
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:703
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:643
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:362
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
Definition: sarray1.c:451
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
Definition: sarray1.c:398
L_HASHMAP * l_hmapCreateFromSarray(SARRAY *sa)
l_hmapCreateFromSarray()
Definition: sarray2.c:439
l_ok sarrayRemoveDupsByAset(SARRAY *sas, SARRAY **psad)
sarrayRemoveDupsByAset()
Definition: sarray2.c:273
l_ok sarrayIntersectionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByHmap()
Definition: sarray2.c:561
SARRAY * sarraySortByIndex(SARRAY *sain, NUMA *naindex)
sarraySortByIndex()
Definition: sarray2.c:147
l_ok sarrayUnionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByAset()
Definition: sarray2.c:329
l_int32 stringCompareLexical(const char *str1, const char *str2)
stringCompareLexical()
Definition: sarray2.c:187
l_ok sarrayLookupCSKV(SARRAY *sa, const char *keystring, char **pvalstring)
sarrayLookupCSKV()
Definition: sarray2.c:666
SARRAY * sarrayGenerateIntegers(l_int32 n)
sarrayGenerateIntegers()
Definition: sarray2.c:625
l_ok sarrayUnionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByHmap()
Definition: sarray2.c:525
l_ok sarrayIntersectionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByAset()
Definition: sarray2.c:380
L_ASET * l_asetCreateFromSarray(SARRAY *sa)
l_asetCreateFromSarray()
Definition: sarray2.c:229
l_ok sarrayRemoveDupsByHmap(SARRAY *sas, SARRAY **psad, L_HASHMAP **phmap)
sarrayRemoveDupsByHmap()
Definition: sarray2.c:473
SARRAY * sarraySort(SARRAY *saout, SARRAY *sain, l_int32 sortorder)
sarraySort()
Definition: sarray2.c:97
l_uint64 val
Definition: hashmap.h:117
struct L_Hashitem * next
Definition: hashmap.h:119
struct L_Hashitem ** hashtab
Definition: hashmap.h:106
l_int32 tabsize
Definition: hashmap.h:107
Definition: array.h:71
Definition: array.h:127
char ** array
Definition: array.h:131
Definition: rbtree.h:62
l_ok l_hashStringToUint64Fast(const char *str, l_uint64 *phash)
l_hashStringToUint64Fast()
Definition: utils1.c:771
char * stringNew(const char *src)
stringNew()
Definition: utils2.c:223