Leptonica 1.82.0
Image processing and image analysis suite
partition.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
45#ifdef HAVE_CONFIG_H
46#include <config_auto.h>
47#endif /* HAVE_CONFIG_H */
48
49#include "allheaders.h"
50
53 l_float32 size; /* sorting key */
54 BOX *box; /* region of the element */
55 BOXA *boxa; /* set of intersecting boxes */
56};
57typedef struct PartitionElement PARTEL;
58
59static PARTEL * partelCreate(BOX *box);
60static void partelDestroy(PARTEL **ppartel);
61static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag);
62static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim,
63 l_float32 fract);
64static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim,
65 l_float32 fract);
66static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa,
67 l_float32 maxoverlap);
68
69static const l_int32 DefaultMaxPops = 20000;
70
71
72#ifndef NO_CONSOLE_IO
73#define OUTPUT_HEAP_STATS 0
74#endif /* ~NO_CONSOLE_IO */
75
76/*------------------------------------------------------------------*
77 * Whitespace block extraction *
78 *------------------------------------------------------------------*/
191BOXA *
193 BOX *box,
194 l_int32 sortflag,
195 l_int32 maxboxes,
196 l_float32 maxoverlap,
197 l_int32 maxperim,
198 l_float32 fract,
199 l_int32 maxpops)
200{
201l_int32 i, w, h, n, nsub, npush, npop;
202BOX *boxsub;
203BOXA *boxa, *boxa4, *boxasub, *boxad;
204PARTEL *partel;
205L_HEAP *lh;
206
207 PROCNAME("boxaGetWhiteblocks");
208
209 if (!boxas)
210 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
211 if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT &&
212 sortflag != L_SORT_BY_MIN_DIMENSION &&
213 sortflag != L_SORT_BY_MAX_DIMENSION &&
214 sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA)
215 return (BOXA *)ERROR_PTR("invalid sort flag", procName, NULL);
216 if (maxboxes < 1) {
217 maxboxes = 1;
218 L_WARNING("setting maxboxes = 1\n", procName);
219 }
220 if (maxoverlap < 0.0 || maxoverlap > 1.0)
221 return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
222 if (maxpops == 0)
223 maxpops = DefaultMaxPops;
224
225 if (!box) {
226 boxaGetExtent(boxas, &w, &h, NULL);
227 box = boxCreate(0, 0, w, h);
228 }
229
230 /* Prime the heap */
232 partel = partelCreate(box);
233 partel->boxa = boxaCopy(boxas, L_CLONE);
234 partelSetSize(partel, sortflag);
235 lheapAdd(lh, partel);
236
237 npush = 1;
238 npop = 0;
239 boxad = boxaCreate(0);
240 while (1) {
241 if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */
242 break;
243
244 npop++; /* How many boxes have we retrieved from the queue? */
245 if (npop > maxpops) {
246 partelDestroy(&partel);
247 break;
248 }
249
250 /* Extract the contents */
251 boxa = boxaCopy(partel->boxa, L_CLONE);
252 box = boxClone(partel->box);
253 partelDestroy(&partel);
254
255 /* Can we output this one? */
256 n = boxaGetCount(boxa);
257 if (n == 0) {
258 if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0)
259 boxaAddBox(boxad, box, L_INSERT);
260 else
261 boxDestroy(&box);
262 boxaDestroy(&boxa);
263 if (boxaGetCount(boxad) >= maxboxes) /* we're done */
264 break;
265 continue;
266 }
267
268
269 /* Generate up to 4 subboxes and put them on the heap */
270 boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract);
271 boxDestroy(&box);
272 nsub = boxaGetCount(boxa4);
273 for (i = 0; i < nsub; i++) {
274 boxsub = boxaGetBox(boxa4, i, L_CLONE);
275 boxasub = boxaIntersectsBox(boxa, boxsub);
276 partel = partelCreate(boxsub);
277 partel->boxa = boxasub;
278 partelSetSize(partel, sortflag);
279 lheapAdd(lh, partel);
280 boxDestroy(&boxsub);
281 }
282 npush += nsub; /* How many boxes have we put on the queue? */
283
284/* boxaWriteStderr(boxa4); */
285
286 boxaDestroy(&boxa4);
287 boxaDestroy(&boxa);
288 }
289
290#if OUTPUT_HEAP_STATS
291 lept_stderr("Heap statistics:\n");
292 lept_stderr(" Number of boxes pushed: %d\n", npush);
293 lept_stderr(" Number of boxes popped: %d\n", npop);
294 lept_stderr(" Number of boxes on heap: %d\n", lheapGetCount(lh));
295#endif /* OUTPUT_HEAP_STATS */
296
297 /* Clean up the heap */
298 while ((partel = (PARTEL *)lheapRemove(lh)) != NULL)
299 partelDestroy(&partel);
300 lheapDestroy(&lh, FALSE);
301
302 return boxad;
303}
304
305
306/*------------------------------------------------------------------*
307 * Helpers *
308 *------------------------------------------------------------------*/
315static PARTEL *
317{
318PARTEL *partel;
319
320 partel = (PARTEL *)LEPT_CALLOC(1, sizeof(PARTEL));
321 partel->box = boxCopy(box);
322 return partel;
323}
324
325
332static void
334{
335PARTEL *partel;
336
337 PROCNAME("partelDestroy");
338
339 if (ppartel == NULL) {
340 L_WARNING("ptr address is null!\n", procName);
341 return;
342 }
343
344 if ((partel = *ppartel) == NULL)
345 return;
346
347 boxDestroy(&partel->box);
348 boxaDestroy(&partel->boxa);
349 LEPT_FREE(partel);
350 *ppartel = NULL;
351 return;
352}
353
354
364static l_int32
366 l_int32 sortflag)
367{
368l_int32 w, h;
369
370 PROCNAME("partelSetSize");
371
372 if (!partel)
373 return ERROR_INT("partel not defined", procName, 1);
374
375 boxGetGeometry(partel->box, NULL, NULL, &w, &h);
376 if (sortflag == L_SORT_BY_WIDTH)
377 partel->size = (l_float32)w;
378 else if (sortflag == L_SORT_BY_HEIGHT)
379 partel->size = (l_float32)h;
380 else if (sortflag == L_SORT_BY_MIN_DIMENSION)
381 partel->size = (l_float32)L_MIN(w, h);
382 else if (sortflag == L_SORT_BY_MAX_DIMENSION)
383 partel->size = (l_float32)L_MAX(w, h);
384 else if (sortflag == L_SORT_BY_PERIMETER)
385 partel->size = (l_float32)(w + h);
386 else if (sortflag == L_SORT_BY_AREA)
387 partel->size = (l_float32)(w * h);
388 else
389 return ERROR_INT("invalid sortflag", procName, 1);
390 return 0;
391}
392
393
407static BOXA *
409 BOXA *boxa,
410 l_int32 maxperim,
411 l_float32 fract)
412{
413l_int32 x, y, w, h, xp, yp, wp, hp;
414BOX *boxp; /* pivot box */
415BOX *boxsub;
416BOXA *boxa4;
417
418 PROCNAME("boxaGenerateSubboxes");
419
420 if (!box)
421 return (BOXA *)ERROR_PTR("box not defined", procName, NULL);
422 if (!boxa)
423 return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL);
424
425 boxa4 = boxaCreate(4);
426 boxp = boxaSelectPivotBox(box, boxa, maxperim, fract);
427 boxGetGeometry(box, &x, &y, &w, &h);
428 boxGetGeometry(boxp, &xp, &yp, &wp, &hp);
429 boxDestroy(&boxp);
430 if (xp > x) { /* left sub-box */
431 boxsub = boxCreate(x, y, xp - x, h);
432 boxaAddBox(boxa4, boxsub, L_INSERT);
433 }
434 if (yp > y) { /* top sub-box */
435 boxsub = boxCreate(x, y, w, yp - y);
436 boxaAddBox(boxa4, boxsub, L_INSERT);
437 }
438 if (xp + wp < x + w) { /* right sub-box */
439 boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h);
440 boxaAddBox(boxa4, boxsub, L_INSERT);
441 }
442 if (yp + hp < y + h) { /* bottom sub-box */
443 boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp);
444 boxaAddBox(boxa4, boxsub, L_INSERT);
445 }
446
447 return boxa4;
448}
449
450
485static BOX *
487 BOXA *boxa,
488 l_int32 maxperim,
489 l_float32 fract)
490{
491l_int32 i, n, bw, bh, w, h;
492l_int32 smallfound, minindex, perim, minsize;
493l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy;
494BOX *boxt;
495
496 PROCNAME("boxaSelectPivotBox");
497
498 if (!box)
499 return (BOX *)ERROR_PTR("box not defined", procName, NULL);
500 if (!boxa)
501 return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);
502 n = boxaGetCount(boxa);
503 if (n == 0)
504 return (BOX *)ERROR_PTR("no boxes in boxa", procName, NULL);
505 if (fract < 0.0 || fract > 1.0) {
506 L_WARNING("fract out of bounds; using 0.0\n", procName);
507 fract = 0.0;
508 }
509
510 boxGetGeometry(box, NULL, NULL, &w, &h);
511 boxGetCenter(box, &x, &y);
512 threshdist = fract * (w * w + h * h);
513 mindist = 1000000000.;
514 minindex = 0;
515 smallfound = FALSE;
516 for (i = 0; i < n; i++) {
517 boxt = boxaGetBox(boxa, i, L_CLONE);
518 boxGetGeometry(boxt, NULL, NULL, &bw, &bh);
519 boxGetCenter(boxt, &cx, &cy);
520 boxDestroy(&boxt);
521 if (bw + bh > maxperim)
522 continue;
523 smallfound = TRUE;
524 delx = cx - x;
525 dely = cy - y;
526 dist = delx * delx + dely * dely;
527 if (dist <= threshdist)
528 return boxaGetBox(boxa, i, L_COPY);
529 if (dist < mindist) {
530 minindex = i;
531 mindist = dist;
532 }
533 }
534
535 /* If there are small boxes but none are within 'fract' of the
536 * centroid, return the nearest one. */
537 if (smallfound == TRUE)
538 return boxaGetBox(boxa, minindex, L_COPY);
539
540 /* No small boxes; return the smallest of the large boxes */
541 minsize = 1000000000;
542 minindex = 0;
543 for (i = 0; i < n; i++) {
544 boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
545 perim = bw + bh;
546 if (perim < minsize) {
547 minsize = perim;
548 minindex = i;
549 }
550 }
551 return boxaGetBox(boxa, minindex, L_COPY);
552}
553
554
565static l_int32
567 BOXA *boxa,
568 l_float32 maxoverlap)
569{
570l_int32 i, n, bigoverlap;
571l_float32 fract;
572BOX *boxt;
573
574 PROCNAME("boxCheckIfOverlapIsBig");
575
576 if (!box)
577 return ERROR_INT("box not defined", procName, 1);
578 if (!boxa)
579 return ERROR_INT("boxa not defined", procName, 1);
580 if (maxoverlap < 0.0 || maxoverlap > 1.0)
581 return ERROR_INT("invalid maxoverlap", procName, 1);
582
583 n = boxaGetCount(boxa);
584 if (n == 0 || maxoverlap == 1.0)
585 return 0;
586
587 bigoverlap = 0;
588 for (i = 0; i < n; i++) {
589 boxt = boxaGetBox(boxa, i, L_CLONE);
590 boxOverlapFraction(boxt, box, &fract);
591 boxDestroy(&boxt);
592 if (fract > maxoverlap) {
593 bigoverlap = 1;
594 break;
595 }
596 }
597
598 return bigoverlap;
599}
600
601
620BOXA *
622 l_float32 maxoverlap)
623{
624l_int32 i, j, n, remove;
625l_float32 fract;
626BOX *box1, *box2;
627BOXA *boxad;
628
629 PROCNAME("boxaPruneSortedOnOverlap");
630
631 if (!boxas)
632 return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
633 if (maxoverlap < 0.0 || maxoverlap > 1.0)
634 return (BOXA *)ERROR_PTR("invalid maxoverlap", procName, NULL);
635
636 n = boxaGetCount(boxas);
637 if (n == 0 || maxoverlap == 1.0)
638 return boxaCopy(boxas, L_COPY);
639
640 boxad = boxaCreate(0);
641 box2 = boxaGetBox(boxas, 0, L_COPY);
642 boxaAddBox(boxad, box2, L_INSERT);
643 for (j = 1; j < n; j++) { /* prune on j */
644 box2 = boxaGetBox(boxas, j, L_COPY);
645 remove = FALSE;
646 for (i = 0; i < j; i++) { /* test on i */
647 box1 = boxaGetBox(boxas, i, L_CLONE);
648 boxOverlapFraction(box1, box2, &fract);
649 boxDestroy(&box1);
650 if (fract > maxoverlap) {
651 remove = TRUE;
652 break;
653 }
654 }
655 if (remove == TRUE)
656 boxDestroy(&box2);
657 else
658 boxaAddBox(boxad, box2, L_INSERT);
659 }
660
661 return boxad;
662}
BOX * boxCopy(BOX *box)
boxCopy()
Definition: boxbasic.c:235
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:779
BOXA * boxaCopy(BOXA *boxa, l_int32 copyflag)
boxaCopy()
Definition: boxbasic.c:537
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:282
BOX * boxClone(BOX *box)
boxClone()
Definition: boxbasic.c:256
BOX * boxCreate(l_int32 x, l_int32 y, l_int32 w, l_int32 h)
boxCreate()
Definition: boxbasic.c:172
l_ok boxaGetBoxGeometry(BOXA *boxa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxaGetBoxGeometry()
Definition: boxbasic.c:879
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:734
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:313
BOXA * boxaCreate(l_int32 n)
boxaCreate()
Definition: boxbasic.c:502
l_ok boxaAddBox(BOXA *boxa, BOX *box, l_int32 copyflag)
boxaAddBox()
Definition: boxbasic.c:620
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:583
BOXA * boxaIntersectsBox(BOXA *boxas, BOX *box)
boxaIntersectsBox()
Definition: boxfunc1.c:331
l_ok boxGetCenter(BOX *box, l_float32 *pcx, l_float32 *pcy)
boxGetCenter()
Definition: boxfunc1.c:1583
l_ok boxOverlapFraction(BOX *box1, BOX *box2, l_float32 *pfract)
boxOverlapFraction()
Definition: boxfunc1.c:810
l_ok boxaGetExtent(BOXA *boxa, l_int32 *pw, l_int32 *ph, BOX **pbox)
boxaGetExtent()
Definition: boxfunc4.c:953
void lheapDestroy(L_HEAP **plh, l_int32 freeflag)
lheapDestroy()
Definition: heap.c:154
void * lheapRemove(L_HEAP *lh)
lheapRemove()
Definition: heap.c:251
L_HEAP * lheapCreate(l_int32 n, l_int32 direction)
lheapCreate()
Definition: heap.c:112
l_int32 lheapGetCount(L_HEAP *lh)
lheapGetCount()
Definition: heap.c:283
l_ok lheapAdd(L_HEAP *lh, void *item)
lheapAdd()
Definition: heap.c:193
static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa, l_float32 maxoverlap)
boxCheckIfOverlapIsBig()
Definition: partition.c:566
BOXA * boxaGetWhiteblocks(BOXA *boxas, BOX *box, l_int32 sortflag, l_int32 maxboxes, l_float32 maxoverlap, l_int32 maxperim, l_float32 fract, l_int32 maxpops)
boxaGetWhiteblocks()
Definition: partition.c:192
static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaGenerateSubboxes()
Definition: partition.c:408
static PARTEL * partelCreate(BOX *box)
partelCreate()
Definition: partition.c:316
static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag)
partelSetSize()
Definition: partition.c:365
static void partelDestroy(PARTEL **ppartel)
partelDestroy()
Definition: partition.c:333
BOXA * boxaPruneSortedOnOverlap(BOXA *boxas, l_float32 maxoverlap)
boxaPruneSortedOnOverlap()
Definition: partition.c:621
static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaSelectPivotBox()
Definition: partition.c:486
@ L_SORT_BY_AREA
Definition: pix.h:744
@ L_SORT_BY_MIN_DIMENSION
Definition: pix.h:741
@ L_SORT_BY_PERIMETER
Definition: pix.h:743
@ L_SORT_BY_WIDTH
Definition: pix.h:739
@ L_SORT_BY_HEIGHT
Definition: pix.h:740
@ L_SORT_BY_MAX_DIMENSION
Definition: pix.h:742
@ L_COPY
Definition: pix.h:712
@ L_CLONE
Definition: pix.h:713
@ L_INSERT
Definition: pix.h:711
@ L_SORT_DECREASING
Definition: pix.h:730
Definition: pix.h:481
Definition: pix.h:492
Definition: heap.h:78
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306