Leptonica 1.84.1
Image processing and image analysis suite
Loading...
Searching...
No Matches
pageseg.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
84#ifdef HAVE_CONFIG_H
85#include <config_auto.h>
86#endif /* HAVE_CONFIG_H */
87
88#include <math.h>
89#include "allheaders.h"
90#include "pix_internal.h"
91
92 /* These functions are not intended to work on very low-res images */
93static const l_int32 MinWidth = 100;
94static const l_int32 MinHeight = 100;
95
96/*------------------------------------------------------------------*
97 * Top level page segmentation *
98 *------------------------------------------------------------------*/
115l_ok
117 PIX **ppixhm,
118 PIX **ppixtm,
119 PIX **ppixtb,
120 PIXA *pixadb)
121{
122l_int32 w, h, htfound, tlfound;
123PIX *pixr, *pix1, *pix2;
124PIX *pixtext; /* text pixels only */
125PIX *pixhm2; /* halftone mask; 2x reduction */
126PIX *pixhm; /* halftone mask; */
127PIX *pixtm2; /* textline mask; 2x reduction */
128PIX *pixtm; /* textline mask */
129PIX *pixvws; /* vertical white space mask */
130PIX *pixtb2; /* textblock mask; 2x reduction */
131PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */
132PIX *pixtb; /* textblock mask */
133
134 if (ppixhm) *ppixhm = NULL;
135 if (ppixtm) *ppixtm = NULL;
136 if (ppixtb) *ppixtb = NULL;
137 if (!pixs || pixGetDepth(pixs) != 1)
138 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
139 pixGetDimensions(pixs, &w, &h, NULL);
140 if (w < MinWidth || h < MinHeight) {
141 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
142 return 1;
143 }
144
145 /* 2x reduce, to 150 -200 ppi */
146 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
147 if (pixadb) pixaAddPix(pixadb, pixr, L_COPY);
148
149 /* Get the halftone mask */
150 pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb);
151
152 /* Get the textline mask from the text pixels */
153 pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb);
154
155 /* Get the textblock mask from the textline mask */
156 pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb);
157 pixDestroy(&pixr);
158 pixDestroy(&pixtext);
159 pixDestroy(&pixvws);
160
161 /* Remove small components from the mask, where a small
162 * component is defined as one with both width and height < 60 */
163 pixtbf2 = NULL;
164 if (pixtb2) {
165 pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
166 L_SELECT_IF_GTE, NULL);
167 pixDestroy(&pixtb2);
168 if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY);
169 }
170
171 /* Expand all masks to full resolution, and do filling or
172 * small dilations for better coverage. */
173 pixhm = pixExpandReplicate(pixhm2, 2);
174 pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
175 pixOr(pixhm, pixhm, pix1);
176 pixDestroy(&pixhm2);
177 pixDestroy(&pix1);
178 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
179
180 pix1 = pixExpandReplicate(pixtm2, 2);
181 pixtm = pixDilateBrick(NULL, pix1, 3, 3);
182 pixDestroy(&pixtm2);
183 pixDestroy(&pix1);
184 if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY);
185
186 if (pixtbf2) {
187 pix1 = pixExpandReplicate(pixtbf2, 2);
188 pixtb = pixDilateBrick(NULL, pix1, 3, 3);
189 pixDestroy(&pixtbf2);
190 pixDestroy(&pix1);
191 if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY);
192 } else {
193 pixtb = pixCreateTemplate(pixs); /* empty mask */
194 }
195
196 /* Debug: identify objects that are neither text nor halftone image */
197 if (pixadb) {
198 pix1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */
199 pix2 = pixSubtract(NULL, pix1, pixhm); /* remove halftone pixels */
200 pixaAddPix(pixadb, pix2, L_INSERT);
201 pixDestroy(&pix1);
202 }
203
204 /* Debug: display textline components with random colors */
205 if (pixadb) {
206 l_int32 w, h;
207 BOXA *boxa;
208 PIXA *pixa;
209 boxa = pixConnComp(pixtm, &pixa, 8);
210 pixGetDimensions(pixtm, &w, &h, NULL);
211 pix1 = pixaDisplayRandomCmap(pixa, w, h);
212 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
213 pixaAddPix(pixadb, pix1, L_INSERT);
214 pixaDestroy(&pixa);
215 boxaDestroy(&boxa);
216 }
217
218 /* Debug: identify the outlines of each textblock */
219 if (pixadb) {
220 PIXCMAP *cmap;
221 PTAA *ptaa;
222 ptaa = pixGetOuterBordersPtaa(pixtb);
223 lept_mkdir("lept/pageseg");
224 ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1);
225 pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
226 cmap = pixGetColormap(pix1);
227 pixcmapResetColor(cmap, 0, 130, 130, 130);
228 pixaAddPix(pixadb, pix1, L_INSERT);
229 ptaaDestroy(&ptaa);
230 }
231
232 /* Debug: get b.b. for all mask components */
233 if (pixadb) {
234 BOXA *bahm, *batm, *batb;
235 bahm = pixConnComp(pixhm, NULL, 4);
236 batm = pixConnComp(pixtm, NULL, 4);
237 batb = pixConnComp(pixtb, NULL, 4);
238 boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm);
239 boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm);
240 boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb);
241 boxaDestroy(&bahm);
242 boxaDestroy(&batm);
243 boxaDestroy(&batb);
244 }
245 if (pixadb) {
246 pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation",
247 "/tmp/lept/pageseg/debug.pdf");
248 L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__);
249 }
250
251 if (ppixhm)
252 *ppixhm = pixhm;
253 else
254 pixDestroy(&pixhm);
255 if (ppixtm)
256 *ppixtm = pixtm;
257 else
258 pixDestroy(&pixtm);
259 if (ppixtb)
260 *ppixtb = pixtb;
261 else
262 pixDestroy(&pixtb);
263
264 return 0;
265}
266
267
268/*------------------------------------------------------------------*
269 * Halftone region extraction *
270 *------------------------------------------------------------------*/
281PIX *
283 PIX **ppixtext,
284 l_int32 *phtfound,
285 l_int32 debug)
286{
287 return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL);
288}
289
290
306PIX *
308 PIX **ppixtext,
309 l_int32 *phtfound,
310 PIXA *pixadb)
311{
312l_int32 w, h, empty;
313PIX *pix1, *pix2, *pixhs, *pixhm, *pixd;
314
315 if (ppixtext) *ppixtext = NULL;
316 if (phtfound) *phtfound = 0;
317 if (!pixs || pixGetDepth(pixs) != 1)
318 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
319 pixGetDimensions(pixs, &w, &h, NULL);
320 if (w < MinWidth || h < MinHeight) {
321 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
322 return NULL;
323 }
324
325 /* Compute seed for halftone parts at 8x reduction */
326 pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0);
327 pix2 = pixOpenBrick(NULL, pix1, 5, 5);
328 pixhs = pixExpandReplicate(pix2, 4); /* back to 2x reduction */
329 pixDestroy(&pix1);
330 pixDestroy(&pix2);
331 if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY);
332
333 /* Compute mask for connected regions */
334 pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
335 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
336
337 /* Fill seed into mask to get halftone mask */
338 pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
339 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
340
341#if 0
342 pixOpenBrick(pixd, pixd, 9, 9);
343#endif
344
345 /* Check if mask is empty */
346 pixZero(pixd, &empty);
347 if (phtfound && !empty)
348 *phtfound = 1;
349
350 /* Optionally, get all pixels that are not under the halftone mask */
351 if (ppixtext) {
352 if (empty)
353 *ppixtext = pixCopy(NULL, pixs);
354 else
355 *ppixtext = pixSubtract(NULL, pixs, pixd);
356 if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY);
357 }
358
359 pixDestroy(&pixhs);
360 pixDestroy(&pixhm);
361 return pixd;
362}
363
364
365/*------------------------------------------------------------------*
366 * Textline extraction *
367 *------------------------------------------------------------------*/
387PIX *
389 PIX **ppixvws,
390 l_int32 *ptlfound,
391 PIXA *pixadb)
392{
393l_int32 w, h, empty;
394PIX *pix1, *pix2, *pixvws, *pixd;
395
396 if (ptlfound) *ptlfound = 0;
397 if (!ppixvws)
398 return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL);
399 *ppixvws = NULL;
400 if (!pixs || pixGetDepth(pixs) != 1)
401 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
402 pixGetDimensions(pixs, &w, &h, NULL);
403 if (w < MinWidth || h < MinHeight) {
404 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
405 return NULL;
406 }
407
408 /* First we need a vertical whitespace mask. Invert the image. */
409 pix1 = pixInvert(NULL, pixs);
410
411 /* The whitespace mask will break textlines where there
412 * is a large amount of white space below or above.
413 * This can be prevented by identifying regions of the
414 * inverted image that have large horizontal extent (bigger than
415 * the separation between columns) and significant
416 * vertical extent (bigger than the separation between
417 * textlines), and subtracting this from the bg. */
418 pix2 = pixMorphCompSequence(pix1, "o80.60", 0);
419 pixSubtract(pix1, pix1, pix2);
420 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
421 pixDestroy(&pix2);
422
423 /* Identify vertical whitespace by opening the remaining bg.
424 * o5.1 removes thin vertical bg lines and o1.200 extracts
425 * long vertical bg lines. */
426 pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0);
427 *ppixvws = pixvws;
428 if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY);
429 pixDestroy(&pix1);
430
431 /* Three steps to getting text line mask:
432 * (1) close the characters and words in the textlines
433 * (2) open the vertical whitespace corridors back up
434 * (3) small opening to remove noise */
435 pix1 = pixMorphSequence(pixs, "c30.1", 0);
436 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
437 pixd = pixSubtract(NULL, pix1, pixvws);
438 pixOpenBrick(pixd, pixd, 3, 3);
439 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
440 pixDestroy(&pix1);
441
442 /* Check if text line mask is empty */
443 if (ptlfound) {
444 pixZero(pixd, &empty);
445 if (!empty)
446 *ptlfound = 1;
447 }
448
449 return pixd;
450}
451
452
453/*------------------------------------------------------------------*
454 * Textblock extraction *
455 *------------------------------------------------------------------*/
477PIX *
479 PIX *pixvws,
480 PIXA *pixadb)
481{
482l_int32 w, h, empty;
483PIX *pix1, *pix2, *pix3, *pixd;
484
485 if (!pixs || pixGetDepth(pixs) != 1)
486 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
487 pixGetDimensions(pixs, &w, &h, NULL);
488 if (w < MinWidth || h < MinHeight) {
489 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
490 return NULL;
491 }
492 if (!pixvws)
493 return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL);
494
495 /* Join pixels vertically to make a textblock mask */
496 pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
497 pixZero(pix1, &empty);
498 if (empty) {
499 pixDestroy(&pix1);
500 L_INFO("no fg pixels in textblock mask\n", __func__);
501 return NULL;
502 }
503 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
504
505 /* Solidify the textblock mask and remove noise:
506 * (1) For each cc, close the blocks and dilate slightly
507 * to form a solid mask.
508 * (2) Small horizontal closing between components.
509 * (3) Open the white space between columns, again.
510 * (4) Remove small components. */
511 pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL);
512 pixCloseSafeBrick(pix2, pix2, 10, 1);
513 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
514 pix3 = pixSubtract(NULL, pix2, pixvws);
515 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
516 pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH,
517 L_SELECT_IF_GTE, NULL);
518 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
519
520 pixDestroy(&pix1);
521 pixDestroy(&pix2);
522 pixDestroy(&pix3);
523 return pixd;
524}
525
526
527/*------------------------------------------------------------------*
528 * Location and extraction of page foreground; cleaning pages *
529 *------------------------------------------------------------------*/
572PIX *
574 l_int32 lr_clear,
575 l_int32 tb_clear,
576 l_int32 edgeclean,
577 l_int32 lr_add,
578 l_int32 tb_add,
579 l_float32 maxwiden,
580 const char *debugfile,
581 BOX **pcropbox)
582{
583char cmd[64];
584l_int32 w, h, d, val;
585l_int32 left, right, top, bot, leftfinal, rightfinal, topfinal, botfinal;
586static l_int32 first_time = TRUE;
587l_float32 hscale;
588PIX *pix1, *pix2, *pix3;
589PIXA *pixa1;
590BOX *box1, *box2;
591
592 if (pcropbox) *pcropbox = NULL;
593 if (!pixs)
594 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
595 if (edgeclean < 0) edgeclean = 0;
596 if (edgeclean > 15) {
597 L_WARNING("edgeclean > 15; setting to 15\n", __func__);
598 edgeclean = 15;
599 }
600 pixGetDimensions(pixs, &w, &h, &d);
601 if (w < MinWidth || h < MinHeight) {
602 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
603 return NULL;
604 }
605 if (lr_clear < 0) lr_clear = 0;
606 if (tb_clear < 0) tb_clear = 0;
607 if (lr_add < 0) lr_add = 0;
608 if (tb_add < 0) tb_add = 0;
609 if (lr_clear > w / 6 || tb_clear > h / 6) {
610 L_ERROR("lr_clear or tb_clear too large; must be <= %d and %d\n",
611 __func__, w / 6, h / 6);
612 return NULL;
613 }
614 if (maxwiden > 1.2)
615 L_WARNING("maxwiden = %f > 1.2; suggest between 1.0 and 1.15\n",
616 __func__, maxwiden);
617 pixa1 = (debugfile) ? pixaCreate(5) : NULL;
618 if (pixa1) pixaAddPix(pixa1, pixs, L_COPY);
619
620 /* Binarize if necessary and 2x reduction */
621 pix1 = pixBackgroundNormTo1MinMax(pixs, 1, 1);
622 pix2 = pixReduceRankBinary2(pix1, 2, NULL);
623
624 /* Clear out border pixels */
625 pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2,
626 tb_clear / 2, PIX_CLR);
627 if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0), L_INSERT);
628
629 /* Optional morphological close/open and find the bounding box
630 * of the foreground pixels. */
631 if (edgeclean == 0) {
632 pixClipToForeground(pix2, NULL, &box1);
633 } else {
634 val = edgeclean + 1;
635 snprintf(cmd, 64, "c%d.%d + o%d.%d", val, val, val, val);
636 pix3 = pixMorphSequence(pix2, cmd, 0);
637 pixClipToForeground(pix3, NULL, &box1);
638 pixDestroy(&pix3);
639 }
640 pixDestroy(&pix2);
641
642 /* Transform to full resolution */
643 box2 = boxTransform(box1, 0, 0, 2.0, 2.0); /* full res */
644 if (pixa1) {
645 pix2 = pixCopy(NULL, pix1);
646 pixRenderBoxArb(pix2, box2, 5, 255, 0, 0);
647 pixaAddPix(pixa1, pix2, L_INSERT);
648 }
649
650 /* Adjust sides outward, respecting %lr_clear and %tb_clear */
651 boxGetSideLocations(box2, &left, &right, &top, &bot);
652 leftfinal = L_MAX(left - lr_add, lr_clear);
653 rightfinal = L_MIN(right + lr_add, w - lr_clear);
654 topfinal = L_MAX(top - tb_add, tb_clear);
655 botfinal = L_MIN(bot + tb_add, h - tb_clear);
656 boxSetSideLocations(box2, leftfinal, rightfinal, topfinal, botfinal);
657 boxDestroy(&box1);
658 if (pixa1) {
659 pix2 = pixCopy(NULL, pix1);
660 pixRenderBoxArb(pix2, box2, 5, 255, 0, 0);
661 pixaAddPix(pixa1, pix2, L_INSERT);
662 }
663
664 /* Crop the input image */
665 pix2 = pixClipRectangle(pix1, box2, NULL);
666
667 /* Slightly thicken long horizontal lines. This prevents loss of
668 * printed thin music staff lines due to aliasing. */
669 pix3 = pixMorphSequence(pix2, "o80.1 + d1.2", 0);
670 pixOr(pix2, pix2, pix3);
671 pixDestroy(&pix3);
672
673 /* Widen the result to fit the standard page shape (8.5 x 11 inch).
674 * Do not stretch horizontally by more than %maxwiden. */
675 pixGetDimensions(pix2, &w, &h, NULL);
676 hscale = (l_float32)h / (1.2941f * (l_float32)w);
677 if (hscale > 1.0 && maxwiden > 1.0) {
678 hscale = L_MIN(hscale, maxwiden);
679 pix3 = pixScale(pix2, hscale, 1.0);
680 if (first_time == TRUE) {
681 lept_stderr("Widening page by factor %5.3f\n", hscale);
682 first_time = FALSE;
683 }
684 } else {
685 pix3 = pixClone(pix2);
686 }
687 pixDestroy(&pix1);
688 pixDestroy(&pix2);
689
690 if (pcropbox)
691 *pcropbox = box2;
692 else
693 boxDestroy(&box2);
694 if (pixa1) {
695 pixaAddPix(pixa1, pix3, L_COPY);
696 lept_stderr("Writing debug file: %s\n", debugfile);
697 pixaConvertToPdf(pixa1, 0, 1.0, L_DEFAULT_ENCODE, 0, NULL, debugfile);
698 pixaDestroy(&pixa1);
699 }
700 return pix3;
701}
702
703
737PIX *
739 l_int32 contrast,
740 l_int32 rotation,
741 l_int32 scale,
742 l_int32 opensize)
743{
744char sequence[32];
745PIX *pix1, *pix2, *pix3, *pix4, *pix5;
746
747 if (!pixs)
748 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
749 if (rotation < 0 || rotation > 3) {
750 L_ERROR("invalid rotation = %d; rotation must be in {0,1,2,3}\n",
751 __func__, rotation);
752 return NULL;
753 }
754 if (contrast < 1 || contrast > 10) {
755 L_ERROR("invalid contrast = %d; contrast must be in [1...10]\n",
756 __func__, contrast);
757 return NULL;
758 }
759 if (scale != 1 && scale != 2) {
760 L_ERROR("invalid scale = %d; scale must be 1 or 2\n",
761 __func__, opensize);
762 return NULL;
763 }
764 if (opensize > 3) {
765 L_ERROR("invalid opensize = %d; opensize must be <= 3\n",
766 __func__, opensize);
767 return NULL;
768 }
769
770 if (pixGetDepth(pixs) == 1) {
771 if (rotation > 0)
772 pix1 = pixRotateOrth(pixs, rotation);
773 else
774 pix1 = pixClone(pixs);
775 pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL);
776 if (scale == 2)
777 pix4 = pixExpandBinaryReplicate(pix2, 2, 2);
778 else /* scale == 1 */
779 pix4 = pixClone(pix2);
780 } else {
781 pix1 = pixConvertTo8MinMax(pixs);
782 if (rotation > 0)
783 pix2 = pixRotateOrth(pix1, rotation);
784 else
785 pix2 = pixClone(pix1);
786 pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL);
787 pix4 = pixBackgroundNormTo1MinMax(pix3, contrast, scale);
788 pixDestroy(&pix3);
789 }
790
791 if (opensize == 2 || opensize == 3) {
792 snprintf(sequence, sizeof(sequence), "o%d.%d", opensize, opensize);
793 pix5 = pixMorphSequence(pix4, sequence, 0);
794 } else {
795 pix5 = pixClone(pix4);
796 }
797
798 pixDestroy(&pix1);
799 pixDestroy(&pix2);
800 pixDestroy(&pix4);
801 return pix5;
802}
803
804
840BOX *
842 l_int32 threshold,
843 l_int32 mindist,
844 l_int32 erasedist,
845 l_int32 showmorph,
846 PIXAC *pixac)
847{
848l_int32 flag, nbox, intersects;
849l_int32 w, h, bx, by, bw, bh, left, right, top, bottom;
850PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
851BOX *box, *boxfg, *boxin, *boxd;
852BOXA *ba1, *ba2;
853
854 if (!pixs)
855 return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL);
856 pixGetDimensions(pixs, &w, &h, NULL);
857 if (w < MinWidth || h < MinHeight) {
858 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
859 return NULL;
860 }
861
862 /* Binarize, downscale by 0.5, remove the noise to generate a seed,
863 * and do a seedfill back from the seed into those 8-connected
864 * components of the binarized image for which there was at least
865 * one seed pixel. */
866 flag = (showmorph) ? 100 : 0;
867 pixb = pixConvertTo1(pixs, threshold);
868 pixb2 = pixScale(pixb, 0.5, 0.5);
869 pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag);
870 pix1 = pixMorphSequence(pixb2, "o50.1", 0);
871 pixOr(pixseed, pixseed, pix1);
872 pixDestroy(&pix1);
873 pix1 = pixMorphSequence(pixb2, "o1.50", 0);
874 pixOr(pixseed, pixseed, pix1);
875 pixDestroy(&pix1);
876 pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
877 pixm = pixRemoveBorderConnComps(pixsf, 8);
878
879 /* Now, where is the main block of text? We want to remove noise near
880 * the edge of the image, but to do that, we have to be convinced that
881 * (1) there is noise and (2) it is far enough from the text block
882 * and close enough to the edge. For each edge, if the block
883 * is more than mindist from that edge, then clean 'erasedist'
884 * pixels from the edge. */
885 pix1 = pixMorphSequence(pixm, "c50.50", flag);
886 ba1 = pixConnComp(pix1, NULL, 8);
887 ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
888 pixGetDimensions(pix1, &w, &h, NULL);
889 nbox = boxaGetCount(ba2);
890 if (nbox > 1) {
891 box = boxaGetBox(ba2, 0, L_CLONE);
892 boxGetGeometry(box, &bx, &by, &bw, &bh);
893 left = (bx > mindist) ? erasedist : 0;
894 right = (w - bx - bw > mindist) ? erasedist : 0;
895 top = (by > mindist) ? erasedist : 0;
896 bottom = (h - by - bh > mindist) ? erasedist : 0;
897 pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR);
898 boxDestroy(&box);
899 }
900 pixDestroy(&pix1);
901 boxaDestroy(&ba1);
902 boxaDestroy(&ba2);
903
904 /* Locate the foreground region; don't bother cropping */
905 pixClipToForeground(pixm, NULL, &boxfg);
906
907 /* Sanity check the fg region. Make sure it's not confined
908 * to a thin boundary on the left and right sides of the image,
909 * in which case it is likely to be noise. */
910 if (boxfg) {
911 boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
912 boxIntersects(boxfg, boxin, &intersects);
913 boxDestroy(&boxin);
914 if (!intersects) boxDestroy(&boxfg);
915 }
916
917 boxd = NULL;
918 if (boxfg) {
919 boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */
920 boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);
921
922 /* Save the debug image showing the box for this page */
923 if (pixac) {
924 pixg2 = pixConvert1To4Cmap(pixb);
925 pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
926 pixacompAddPix(pixac, pixg2, IFF_DEFAULT);
927 pixDestroy(&pixg2);
928 }
929 }
930
931 pixDestroy(&pixb);
932 pixDestroy(&pixb2);
933 pixDestroy(&pixseed);
934 pixDestroy(&pixsf);
935 pixDestroy(&pixm);
936 boxDestroy(&boxfg);
937 return boxd;
938}
939
940
941/*------------------------------------------------------------------*
942 * Extraction of characters from image with only text *
943 *------------------------------------------------------------------*/
966l_ok
968 l_int32 minw,
969 l_int32 minh,
970 BOXA **pboxa,
971 PIXA **ppixa,
972 PIX **ppixdebug)
973{
974l_int32 ncomp, i, xoff, yoff;
975BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
976BOXAA *baa;
977PIX *pix, *pix1, *pix2, *pixdb;
978PIXA *pixa1, *pixadb;
979
980 if (pboxa) *pboxa = NULL;
981 if (ppixa) *ppixa = NULL;
982 if (ppixdebug) *ppixdebug = NULL;
983 if (!pixs || pixGetDepth(pixs) != 1)
984 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
985
986 /* Remove the small stuff */
987 pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
988 L_SELECT_IF_GT, NULL);
989
990 /* Small vertical close for consolidation */
991 pix2 = pixMorphSequence(pix1, "c1.10", 0);
992 pixDestroy(&pix1);
993
994 /* Get the 8-connected components */
995 boxa1 = pixConnComp(pix2, &pixa1, 8);
996 pixDestroy(&pix2);
997 boxaDestroy(&boxa1);
998
999 /* Split the components if obvious */
1000 ncomp = pixaGetCount(pixa1);
1001 boxa2 = boxaCreate(ncomp);
1002 pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
1003 for (i = 0; i < ncomp; i++) {
1004 pix = pixaGetPix(pixa1, i, L_CLONE);
1005 if (ppixdebug) {
1006 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
1007 if (pixdb)
1008 pixaAddPix(pixadb, pixdb, L_INSERT);
1009 } else {
1010 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
1011 }
1012 pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
1013 boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
1014 boxaJoin(boxa2, boxat2, 0, -1);
1015 pixDestroy(&pix);
1016 boxaDestroy(&boxat1);
1017 boxaDestroy(&boxat2);
1018 }
1019 pixaDestroy(&pixa1);
1020
1021 /* Generate the debug image */
1022 if (ppixdebug) {
1023 if (pixaGetCount(pixadb) > 0) {
1024 *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
1025 1.0, 0, 20, 1);
1026 }
1027 pixaDestroy(&pixadb);
1028 }
1029
1030 /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
1031 baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
1032 boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
1033 boxaaDestroy(&baa);
1034 boxaDestroy(&boxa2);
1035
1036 /* Optionally extract the pieces from the input image */
1037 if (ppixa)
1038 *ppixa = pixClipRectangles(pixs, boxad);
1039 if (pboxa)
1040 *pboxa = boxad;
1041 else
1042 boxaDestroy(&boxad);
1043 return 0;
1044}
1045
1046
1065BOXA *
1067 l_int32 delta,
1068 l_int32 mindel,
1069 PIX **ppixdebug)
1070{
1071l_int32 w, h, n2, i, firstmin, xmin, xshift;
1072l_int32 nmin, nleft, nright, nsplit, isplit, ncomp;
1073l_int32 *array1, *array2;
1074BOX *box;
1075BOXA *boxad;
1076NUMA *na1, *na2, *nasplit;
1077PIX *pix1, *pixdb;
1078
1079 if (ppixdebug) *ppixdebug = NULL;
1080 if (!pixs || pixGetDepth(pixs) != 1)
1081 return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL);
1082 pixGetDimensions(pixs, &w, &h, NULL);
1083
1084 /* Closing to consolidate characters vertically */
1085 pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100);
1086
1087 /* Get extrema of column projections */
1088 boxad = boxaCreate(2);
1089 na1 = pixCountPixelsByColumn(pix1); /* w elements */
1090 pixDestroy(&pix1);
1091 na2 = numaFindExtrema(na1, delta, NULL);
1092 n2 = numaGetCount(na2);
1093 if (n2 < 3) { /* no split possible */
1094 box = boxCreate(0, 0, w, h);
1095 boxaAddBox(boxad, box, L_INSERT);
1096 numaDestroy(&na1);
1097 numaDestroy(&na2);
1098 return boxad;
1099 }
1100
1101 /* Look for sufficiently deep and narrow minima.
1102 * All minima of of interest must be surrounded by max on each
1103 * side. firstmin is the index of first possible minimum. */
1104 array1 = numaGetIArray(na1);
1105 array2 = numaGetIArray(na2);
1106 if (ppixdebug) numaWriteStderr(na2);
1107 firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2;
1108 nasplit = numaCreate(n2); /* will hold split locations */
1109 for (i = firstmin; i < n2 - 1; i+= 2) {
1110 xmin = array2[i];
1111 nmin = array1[xmin];
1112 if (xmin + 2 >= w) break; /* no more splits possible */
1113 nleft = array1[xmin - 2];
1114 nright = array1[xmin + 2];
1115 if (ppixdebug) {
1116 lept_stderr(
1117 "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n",
1118 xmin, w, nleft, nmin, nright);
1119 }
1120 if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */
1121 numaAddNumber(nasplit, xmin);
1122 }
1123 nsplit = numaGetCount(nasplit);
1124
1125#if 0
1126 if (ppixdebug && nsplit > 0) {
1127 lept_mkdir("lept/split");
1128 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL);
1129 }
1130#endif
1131
1132 numaDestroy(&na1);
1133 numaDestroy(&na2);
1134 LEPT_FREE(array1);
1135 LEPT_FREE(array2);
1136
1137 if (nsplit == 0) { /* no splitting */
1138 numaDestroy(&nasplit);
1139 box = boxCreate(0, 0, w, h);
1140 boxaAddBox(boxad, box, L_INSERT);
1141 return boxad;
1142 }
1143
1144 /* Use split points to generate b.b. after splitting */
1145 for (i = 0, xshift = 0; i < nsplit; i++) {
1146 numaGetIValue(nasplit, i, &isplit);
1147 box = boxCreate(xshift, 0, isplit - xshift, h);
1148 boxaAddBox(boxad, box, L_INSERT);
1149 xshift = isplit + 1;
1150 }
1151 box = boxCreate(xshift, 0, w - xshift, h);
1152 boxaAddBox(boxad, box, L_INSERT);
1153 numaDestroy(&nasplit);
1154
1155 if (ppixdebug) {
1156 pixdb = pixConvertTo32(pixs);
1157 ncomp = boxaGetCount(boxad);
1158 for (i = 0; i < ncomp; i++) {
1159 box = boxaGetBox(boxad, i, L_CLONE);
1160 pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5);
1161 boxDestroy(&box);
1162 }
1163 *ppixdebug = pixdb;
1164 }
1165
1166 return boxad;
1167}
1168
1169
1170/*------------------------------------------------------------------*
1171 * Extraction of lines of text *
1172 *------------------------------------------------------------------*/
1219PIXA *
1221 l_int32 maxw,
1222 l_int32 maxh,
1223 l_int32 minw,
1224 l_int32 minh,
1225 l_int32 adjw,
1226 l_int32 adjh,
1227 PIXA *pixadb)
1228{
1229char buf[64];
1230l_int32 res, csize, empty;
1231BOXA *boxa1, *boxa2, *boxa3;
1232PIX *pix1, *pix2, *pix3;
1233PIXA *pixa1, *pixa2, *pixa3;
1234
1235 if (!pixs)
1236 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1237
1238 /* Binarize carefully, if necessary */
1239 if (pixGetDepth(pixs) > 1) {
1240 pix2 = pixConvertTo8(pixs, FALSE);
1241 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1242 pix1 = pixThresholdToBinary(pix3, 150);
1243 pixDestroy(&pix2);
1244 pixDestroy(&pix3);
1245 } else {
1246 pix1 = pixClone(pixs);
1247 }
1248 pixZero(pix1, &empty);
1249 if (empty) {
1250 pixDestroy(&pix1);
1251 L_INFO("no fg pixels in input image\n", __func__);
1252 return NULL;
1253 }
1254 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1255
1256 /* Remove any very tall or very wide connected components */
1257 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1258 L_SELECT_IF_LT, NULL);
1259 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1260 pixDestroy(&pix1);
1261
1262 /* Filter to solidify the text lines within the x-height region.
1263 * The closing (csize) bridges gaps between words. The opening
1264 * removes isolated bridges between textlines. */
1265 if ((res = pixGetXRes(pixs)) == 0) {
1266 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1267 res = 300;
1268 }
1269 csize = L_MIN(120., 60.0 * res / 300.0);
1270 snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3);
1271 pix3 = pixMorphCompSequence(pix2, buf, 0);
1272 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1273
1274 /* Extract the connected components. These should be dilated lines */
1275 boxa1 = pixConnComp(pix3, &pixa1, 4);
1276 if (pixadb) {
1277 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1278 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1279 pixaAddPix(pixadb, pix1, L_INSERT);
1280 }
1281
1282 /* Set minw, minh if default is requested */
1283 minw = (minw != 0) ? minw : (l_int32)(0.12 * res);
1284 minh = (minh != 0) ? minh : (l_int32)(0.07 * res);
1285
1286 /* Remove line components that are too small */
1287 pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH,
1288 L_SELECT_IF_GTE, NULL);
1289 if (pixadb) {
1290 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1291 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1292 pixaAddPix(pixadb, pix1, L_INSERT);
1293 pix1 = pixConvertTo32(pix2);
1294 pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0);
1295 pixaAddPix(pixadb, pix1, L_INSERT);
1296 }
1297
1298 /* Selectively AND with the version before dilation, and save */
1299 boxa2 = pixaGetBoxa(pixa2, L_CLONE);
1300 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1301 pixa3 = pixClipRectangles(pix2, boxa3);
1302 if (pixadb) {
1303 pix1 = pixaDisplayRandomCmap(pixa3, 0, 0);
1304 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1305 pixaAddPix(pixadb, pix1, L_INSERT);
1306 }
1307
1308 pixDestroy(&pix2);
1309 pixDestroy(&pix3);
1310 pixaDestroy(&pixa1);
1311 pixaDestroy(&pixa2);
1312 boxaDestroy(&boxa1);
1313 boxaDestroy(&boxa2);
1314 boxaDestroy(&boxa3);
1315 return pixa3;
1316}
1317
1318
1357PIXA *
1359 l_int32 maxw,
1360 l_int32 maxh,
1361 l_int32 adjw,
1362 l_int32 adjh,
1363 PIXA *pixadb)
1364{
1365char buf[64];
1366l_int32 res, csize, empty;
1367BOXA *boxa1, *boxa2, *boxa3;
1368BOXAA *baa1;
1369PIX *pix1, *pix2, *pix3;
1370PIXA *pixa1, *pixa2;
1371
1372 if (!pixs)
1373 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1374
1375 /* Set maxw, maxh if default is requested */
1376 if ((res = pixGetXRes(pixs)) == 0) {
1377 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1378 res = 300;
1379 }
1380 maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res);
1381 maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res);
1382
1383 /* Binarize carefully, if necessary */
1384 if (pixGetDepth(pixs) > 1) {
1385 pix2 = pixConvertTo8(pixs, FALSE);
1386 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1387 pix1 = pixThresholdToBinary(pix3, 150);
1388 pixDestroy(&pix2);
1389 pixDestroy(&pix3);
1390 } else {
1391 pix1 = pixClone(pixs);
1392 }
1393 pixZero(pix1, &empty);
1394 if (empty) {
1395 pixDestroy(&pix1);
1396 L_INFO("no fg pixels in input image\n", __func__);
1397 return NULL;
1398 }
1399 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1400
1401 /* Remove any very tall or very wide connected components */
1402 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1403 L_SELECT_IF_LT, NULL);
1404 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1405 pixDestroy(&pix1);
1406
1407 /* Filter to solidify the text lines within the x-height region.
1408 * The closing (csize) bridges gaps between words. */
1409 csize = L_MIN(120., 60.0 * res / 300.0);
1410 snprintf(buf, sizeof(buf), "c%d.1", csize);
1411 pix3 = pixMorphCompSequence(pix2, buf, 0);
1412 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1413
1414 /* Extract the connected components. These should be dilated lines */
1415 boxa1 = pixConnComp(pix3, &pixa1, 4);
1416 if (pixadb) {
1417 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1418 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1419 pixaAddPix(pixadb, pix1, L_INSERT);
1420 }
1421
1422 /* Do a 2-d sort, and generate a bounding box for each set of text
1423 * line segments that is aligned horizontally (i.e., has vertical
1424 * overlap) into a box representing a single text line. */
1425 baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5);
1426 boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2);
1427 if (pixadb) {
1428 pix1 = pixConvertTo32(pix2);
1429 pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0);
1430 pixaAddPix(pixadb, pix1, L_INSERT);
1431 }
1432
1433 /* Optionally adjust the sides of each text line box, and then
1434 * use the boxes to generate a pixa of the text lines. */
1435 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1436 pixa2 = pixClipRectangles(pix2, boxa3);
1437 if (pixadb) {
1438 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1439 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1440 pixaAddPix(pixadb, pix1, L_INSERT);
1441 }
1442
1443 pixDestroy(&pix2);
1444 pixDestroy(&pix3);
1445 pixaDestroy(&pixa1);
1446 boxaDestroy(&boxa1);
1447 boxaDestroy(&boxa2);
1448 boxaDestroy(&boxa3);
1449 boxaaDestroy(&baa1);
1450 return pixa2;
1451}
1452
1453
1454/*------------------------------------------------------------------*
1455 * How many text columns *
1456 *------------------------------------------------------------------*/
1483l_ok
1485 l_float32 deltafract,
1486 l_float32 peakfract,
1487 l_float32 clipfract,
1488 l_int32 *pncols,
1489 PIXA *pixadb)
1490{
1491l_int32 w, h, res, i, n, npeak;
1492l_float32 scalefact, redfact, minval, maxval, val4, val5, fract;
1493BOX *box;
1494NUMA *na1, *na2, *na3, *na4, *na5;
1495PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1496
1497 if (!pncols)
1498 return ERROR_INT("&ncols not defined", __func__, 1);
1499 *pncols = -1; /* init */
1500 if (!pixs || pixGetDepth(pixs) != 1)
1501 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1502 if (deltafract < 0.15 || deltafract > 0.75)
1503 L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__);
1504 if (peakfract < 0.25 || peakfract > 0.9)
1505 L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__);
1506 if (clipfract < 0.0 || clipfract >= 0.5)
1507 return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1);
1508 if (pixadb) pixaAddPix(pixadb, pixs, L_COPY);
1509
1510 /* Scale to between 37.5 and 75 ppi */
1511 if ((res = pixGetXRes(pixs)) == 0) {
1512 L_WARNING("resolution undefined; set to 300\n", __func__);
1513 pixSetResolution(pixs, 300, 300);
1514 res = 300;
1515 }
1516 if (res < 37) {
1517 L_WARNING("resolution %d very low\n", __func__, res);
1518 scalefact = 37.5 / res;
1519 pix1 = pixScale(pixs, scalefact, scalefact);
1520 } else {
1521 redfact = (l_float32)res / 37.5;
1522 if (redfact < 2.0)
1523 pix1 = pixClone(pixs);
1524 else if (redfact < 4.0)
1525 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1526 else if (redfact < 8.0)
1527 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0);
1528 else if (redfact < 16.0)
1529 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0);
1530 else
1531 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2);
1532 }
1533 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1534
1535 /* Crop inner 80% of image */
1536 pixGetDimensions(pix1, &w, &h, NULL);
1537 box = boxCreate(clipfract * w, clipfract * h,
1538 (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h);
1539 pix2 = pixClipRectangle(pix1, box, NULL);
1540 pixGetDimensions(pix2, &w, &h, NULL);
1541 boxDestroy(&box);
1542 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1543
1544 /* Deskew */
1545 pix3 = pixDeskew(pix2, 0);
1546 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1547
1548 /* Close to increase column counts for text */
1549 pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21);
1550 if (pixadb) pixaAddPix(pixadb, pix4, L_COPY);
1551 pixInvert(pix4, pix4);
1552 na1 = pixCountByColumn(pix4, NULL);
1553
1554 if (pixadb) {
1555 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL);
1556 pix5 = pixRead("/tmp/lept/plot.png");
1557 pixaAddPix(pixadb, pix5, L_INSERT);
1558 }
1559
1560 /* Analyze the column counts. na4 gives the locations of
1561 * the extrema in normalized units (0.0 to 1.0) across the
1562 * cropped image. na5 gives the magnitude of the
1563 * extrema, normalized to the dynamic range. The peaks
1564 * are values that are at least peakfract of (max - min). */
1565 numaGetMax(na1, &maxval, NULL);
1566 numaGetMin(na1, &minval, NULL);
1567 fract = (l_float32)(maxval - minval) / h; /* is there much at all? */
1568 if (fract < 0.05) {
1569 L_INFO("very little content on page; 0 text columns\n", __func__);
1570 *pncols = 0;
1571 } else {
1572 na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3);
1573 na4 = numaTransform(na2, 0, 1.0 / w);
1574 na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval));
1575 n = numaGetCount(na4);
1576 for (i = 0, npeak = 0; i < n; i++) {
1577 numaGetFValue(na4, i, &val4);
1578 numaGetFValue(na5, i, &val5);
1579 if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) {
1580 npeak++;
1581 L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5);
1582 }
1583 }
1584 *pncols = npeak + 1;
1585 numaDestroy(&na2);
1586 numaDestroy(&na3);
1587 numaDestroy(&na4);
1588 numaDestroy(&na5);
1589 }
1590
1591 pixDestroy(&pix1);
1592 pixDestroy(&pix2);
1593 pixDestroy(&pix3);
1594 pixDestroy(&pix4);
1595 numaDestroy(&na1);
1596 return 0;
1597}
1598
1599
1600/*------------------------------------------------------------------*
1601 * Decision text vs photo *
1602 *------------------------------------------------------------------*/
1629l_ok
1631 BOX *box,
1632 l_int32 *pistext,
1633 PIXA *pixadb)
1634{
1635l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp;
1636l_float32 ratio1, ratio2;
1637L_BMF *bmf;
1638BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5;
1639PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7;
1640PIXA *pixa1;
1641SEL *sel1;
1642
1643 if (!pistext)
1644 return ERROR_INT("&istext not defined", __func__, 1);
1645 *pistext = -1;
1646 if (!pixs)
1647 return ERROR_INT("pixs not defined", __func__, 1);
1648
1649 /* Crop, convert to 1 bpp, 300 ppi */
1650 if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL)
1651 return ERROR_INT("pix1 not made", __func__, 1);
1652
1653 pixZero(pix1, &empty);
1654 if (empty) {
1655 pixDestroy(&pix1);
1656 L_INFO("pix is empty\n", __func__);
1657 return 0;
1658 }
1659 w = pixGetWidth(pix1);
1660
1661 /* Identify and remove tall, thin vertical lines (as found in tables)
1662 * that are up to 9 pixels wide. Make a hit-miss sel with an
1663 * 81 pixel vertical set of hits and with 3 pairs of misses that
1664 * are 10 pixels apart horizontally. It is necessary to use a
1665 * hit-miss transform; if we only opened with a vertical line of
1666 * hits, we would remove solid regions of pixels that are not
1667 * text or vertical lines. */
1668 pix2 = pixCreate(11, 81, 1);
1669 for (i = 0; i < 81; i++)
1670 pixSetPixel(pix2, 5, i, 1);
1671 sel1 = selCreateFromPix(pix2, 40, 5, NULL);
1672 selSetElement(sel1, 20, 0, SEL_MISS);
1673 selSetElement(sel1, 20, 10, SEL_MISS);
1674 selSetElement(sel1, 40, 0, SEL_MISS);
1675 selSetElement(sel1, 40, 10, SEL_MISS);
1676 selSetElement(sel1, 60, 0, SEL_MISS);
1677 selSetElement(sel1, 60, 10, SEL_MISS);
1678 pix3 = pixHMT(NULL, pix1, sel1);
1679 pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000);
1680 pix5 = pixXor(NULL, pix1, pix4);
1681 pixDestroy(&pix2);
1682 selDestroy(&sel1);
1683
1684 /* Convert the text lines to separate long horizontal components */
1685 pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0);
1686
1687 /* Estimate the distance to the bottom of the significant region */
1688 if (box) { /* use full height */
1689 pixGetDimensions(pix6, NULL, &h, NULL);
1690 } else { /* use height of region that has text lines */
1691 pixFindThreshFgExtent(pix6, 400, NULL, &h);
1692 }
1693
1694 if (pixadb) {
1695 bmf = bmfCreate(NULL, 6);
1696 pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary",
1697 0x0000ff00, L_ADD_BELOW);
1698 pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line",
1699 0x0000ff00, L_ADD_BELOW);
1700 pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill",
1701 0x0000ff00, L_ADD_BELOW);
1702 pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor",
1703 0x0000ff00, L_ADD_BELOW);
1704 pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components",
1705 0x0000ff00, L_ADD_BELOW);
1706 }
1707
1708 /* Extract the connected components */
1709 if (pixadb) {
1710 boxa1 = pixConnComp(pix6, &pixa1, 8);
1711 pix7 = pixaDisplayRandomCmap(pixa1, 0, 0);
1712 pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255);
1713 pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components",
1714 0x0000ff00, L_ADD_BELOW);
1715 pixDestroy(&pix7);
1716 pixaDestroy(&pixa1);
1717 bmfDestroy(&bmf);
1718 } else {
1719 boxa1 = pixConnComp(pix6, NULL, 8);
1720 }
1721
1722 /* Analyze the connected components. The following conditions
1723 * at 300 ppi must be satisfied if the image is text:
1724 * (1) There are no components that are wider than 400 pixels and
1725 * taller than 175 pixels.
1726 * (2) The second longest component is at least 60% of the
1727 * (possibly cropped) image width. This catches images
1728 * that don't have any significant content.
1729 * (3) Of the components that are at least 40% of the length
1730 * of the longest (n2), at least 80% of them must not exceed
1731 * 60 pixels in height.
1732 * (4) The number of those long, thin components (n3) must
1733 * equal or exceed a minimum that scales linearly with the
1734 * image height.
1735 * Most images that are not text fail more than one of these
1736 * conditions. */
1737 boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL);
1738 boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL); /* 2nd longest */
1739 boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH,
1740 L_SELECT_IF_GTE, NULL);
1741 boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT,
1742 L_SELECT_IF_LTE, NULL);
1743 boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH,
1744 L_SELECT_IF_GT, NULL);
1745 big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1;
1746 n1 = boxaGetCount(boxa1);
1747 n2 = boxaGetCount(boxa3);
1748 n3 = boxaGetCount(boxa4);
1749 ratio1 = (l_float32)maxw / (l_float32)w;
1750 ratio2 = (l_float32)n3 / (l_float32)n2;
1751 minlines = L_MAX(2, h / 125);
1752 if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines)
1753 *pistext = 0;
1754 else
1755 *pistext = 1;
1756 if (pixadb) {
1757 if (*pistext == 1) {
1758 L_INFO("This is text: \n n1 = %d, n2 = %d, n3 = %d, "
1759 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
1760 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
1761 maxw, ratio1, h, big_comp);
1762 } else {
1763 L_INFO("This is not text: \n n1 = %d, n2 = %d, n3 = %d, "
1764 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
1765 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
1766 maxw, ratio1, h, big_comp);
1767 }
1768 }
1769
1770 boxaDestroy(&boxa1);
1771 boxaDestroy(&boxa2);
1772 boxaDestroy(&boxa3);
1773 boxaDestroy(&boxa4);
1774 boxaDestroy(&boxa5);
1775 pixDestroy(&pix1);
1776 pixDestroy(&pix3);
1777 pixDestroy(&pix4);
1778 pixDestroy(&pix5);
1779 pixDestroy(&pix6);
1780 return 0;
1781}
1782
1783
1793l_ok
1795 l_int32 thresh,
1796 l_int32 *ptop,
1797 l_int32 *pbot)
1798{
1799l_int32 i, n;
1800l_int32 *array;
1801NUMA *na;
1802
1803 if (ptop) *ptop = 0;
1804 if (pbot) *pbot = 0;
1805 if (!ptop && !pbot)
1806 return ERROR_INT("nothing to determine", __func__, 1);
1807 if (!pixs || pixGetDepth(pixs) != 1)
1808 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1809
1810 na = pixCountPixelsByRow(pixs, NULL);
1811 n = numaGetCount(na);
1812 array = numaGetIArray(na);
1813 if (ptop) {
1814 for (i = 0; i < n; i++) {
1815 if (array[i] >= thresh) {
1816 *ptop = i;
1817 break;
1818 }
1819 }
1820 }
1821 if (pbot) {
1822 for (i = n - 1; i >= 0; i--) {
1823 if (array[i] >= thresh) {
1824 *pbot = i;
1825 break;
1826 }
1827 }
1828 }
1829 LEPT_FREE(array);
1830 numaDestroy(&na);
1831 return 0;
1832}
1833
1834
1835/*------------------------------------------------------------------*
1836 * Decision: table vs text *
1837 *------------------------------------------------------------------*/
1881l_ok
1883 BOX *box,
1884 l_int32 orient,
1885 l_int32 *pscore,
1886 PIXA *pixadb)
1887{
1888l_int32 empty, nhb, nvb, nvw, score, htfound;
1889PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
1890
1891 if (!pscore)
1892 return ERROR_INT("&score not defined", __func__, 1);
1893 *pscore = -1;
1894 if (!pixs)
1895 return ERROR_INT("pixs not defined", __func__, 1);
1896
1897 /* Check if there is an image region. First convert to 1 bpp
1898 * at 175 ppi. If an image is found, assume there is no table. */
1899 pix1 = pixPrepare1bpp(pixs, box, 0.1, 175);
1900 pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL);
1901 if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1902 pixDestroy(&pix1);
1903 pixDestroy(&pix2);
1904 if (htfound) {
1905 *pscore = 0;
1906 L_INFO("pix has an image region\n", __func__);
1907 return 0;
1908 }
1909
1910 /* Crop, convert to 1 bpp, 75 ppi */
1911 if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL)
1912 return ERROR_INT("pix1 not made", __func__, 1);
1913
1914 pixZero(pix1, &empty);
1915 if (empty) {
1916 *pscore = 0;
1917 pixDestroy(&pix1);
1918 L_INFO("pix is empty\n", __func__);
1919 return 0;
1920 }
1921
1922 /* The 2x2 dilation on 75 ppi makes these two approaches very similar:
1923 * (1) pix1 = pixPrepare1bpp(..., 300); // 300 ppi resolution
1924 * pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
1925 * (2) pix1 = pixPrepare1bpp(..., 75); // 75 ppi resolution
1926 * pix2 = pixDilateBrick(NULL, pix1, 2, 2);
1927 * But (2) is more efficient if the input image to pixPrepare1bpp()
1928 * is not at 300 ppi. */
1929 pix2 = pixDilateBrick(NULL, pix1, 2, 2);
1930
1931 /* Deskew both horizontally and vertically; rotate by 90
1932 * degrees if in landscape mode. */
1933 pix3 = pixDeskewBoth(pix2, 1);
1934 if (pixadb) {
1935 pixaAddPix(pixadb, pix2, L_COPY);
1936 pixaAddPix(pixadb, pix3, L_COPY);
1937 }
1938 if (orient == L_LANDSCAPE_MODE)
1939 pix4 = pixRotate90(pix3, 1);
1940 else
1941 pix4 = pixClone(pix3);
1942 pixDestroy(&pix1);
1943 pixDestroy(&pix2);
1944 pixDestroy(&pix3);
1945 pix1 = pixClone(pix4);
1946 pixDestroy(&pix4);
1947
1948 /* Look for horizontal and vertical lines */
1949 pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0);
1950 pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8);
1951 pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0);
1952 pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8);
1953 pix6 = pixOr(NULL, pix3, pix5);
1954 if (pixadb) {
1955 pixaAddPix(pixadb, pix2, L_COPY);
1956 pixaAddPix(pixadb, pix4, L_COPY);
1957 pixaAddPix(pixadb, pix3, L_COPY);
1958 pixaAddPix(pixadb, pix5, L_COPY);
1959 pixaAddPix(pixadb, pix6, L_COPY);
1960 }
1961 pixCountConnComp(pix2, 8, &nhb); /* number of horizontal black lines */
1962 pixCountConnComp(pix4, 8, &nvb); /* number of vertical black lines */
1963
1964 /* Remove the lines */
1965 pixSubtract(pix1, pix1, pix6);
1966 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1967
1968 /* Remove noise pixels */
1969 pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0);
1970 if (pixadb) pixaAddPix(pixadb, pix7, L_COPY);
1971
1972 /* Look for vertical white space. Invert to convert white bg
1973 * to fg. Use a single rank-1 2x reduction, which closes small
1974 * fg holes, for the final processing at 37.5 ppi.
1975 * The vertical opening is then about 3 inches on a 300 ppi image.
1976 * We also remove vertical whitespace that is less than 5 pixels
1977 * wide at this resolution (about 0.1 inches) */
1978 pixInvert(pix7, pix7);
1979 pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0);
1980 pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH,
1981 L_SELECT_IF_GTE, NULL);
1982 pixCountConnComp(pix9, 8, &nvw); /* number of vertical white lines */
1983 if (pixadb) {
1984 pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT);
1985 pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT);
1986 }
1987
1988 /* Require at least 2 of the following 4 conditions for a table.
1989 * Some tables do not have black (fg) lines, and for those we
1990 * require more than 6 long vertical whitespace (bg) lines. */
1991 score = 0;
1992 if (nhb > 1) score++;
1993 if (nvb > 2) score++;
1994 if (nvw > 3) score++;
1995 if (nvw > 6) score++;
1996 *pscore = score;
1997
1998 pixDestroy(&pix1);
1999 pixDestroy(&pix2);
2000 pixDestroy(&pix3);
2001 pixDestroy(&pix4);
2002 pixDestroy(&pix5);
2003 pixDestroy(&pix6);
2004 pixDestroy(&pix7);
2005 pixDestroy(&pix8);
2006 pixDestroy(&pix9);
2007 return 0;
2008}
2009
2010
2029PIX *
2031 BOX *box,
2032 l_float32 cropfract,
2033 l_int32 outres)
2034{
2035l_int32 w, h, res;
2036l_float32 factor;
2037BOX *box1;
2038PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2039
2040 if (!pixs)
2041 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2042
2043 /* Crop the image. If no box is given, use %cropfract to remove
2044 * pixels near the image boundary; this helps avoid false
2045 * negatives from noise that is often found there. */
2046 if (box) {
2047 pix1 = pixClipRectangle(pixs, box, NULL);
2048 } else {
2049 pixGetDimensions(pixs, &w, &h, NULL);
2050 box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h),
2051 (l_int32)((1.0 - 2 * cropfract) * w),
2052 (l_int32)((1.0 - 2 * cropfract) * h));
2053 pix1 = pixClipRectangle(pixs, box1, NULL);
2054 boxDestroy(&box1);
2055 }
2056
2057 /* Convert to 1 bpp with adaptive background cleaning */
2058 if (pixGetDepth(pixs) > 1) {
2059 pix2 = pixConvertTo8(pix1, 0);
2060 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160);
2061 pixDestroy(&pix1);
2062 pixDestroy(&pix2);
2063 if (!pix3) {
2064 L_INFO("pix cleaning failed\n", __func__);
2065 return NULL;
2066 }
2067 pix4 = pixThresholdToBinary(pix3, 200);
2068 pixDestroy(&pix3);
2069 } else {
2070 pix4 = pixClone(pix1);
2071 pixDestroy(&pix1);
2072 }
2073
2074 /* Scale the image to the requested output resolution;
2075 do not scale if %outres <= 0 */
2076 if (outres <= 0)
2077 return pix4;
2078 if ((res = pixGetXRes(pixs)) == 0) {
2079 L_WARNING("Resolution is not set: using 300 ppi\n", __func__);
2080 res = 300;
2081 }
2082 if (res != outres) {
2083 factor = (l_float32)outres / (l_float32)res;
2084 pix5 = pixScale(pix4, factor, factor);
2085 } else {
2086 pix5 = pixClone(pix4);
2087 }
2088 pixDestroy(&pix4);
2089 return pix5;
2090}
2091
2092
2093/*------------------------------------------------------------------*
2094 * Estimate the grayscale background value *
2095 *------------------------------------------------------------------*/
2112l_ok
2114 l_int32 darkthresh,
2115 l_float32 edgecrop,
2116 l_int32 *pbg)
2117{
2118l_int32 w, h, sampling;
2119l_float32 fbg;
2120BOX *box;
2121PIX *pix1, *pix2, *pixm;
2122
2123 if (!pbg)
2124 return ERROR_INT("&bg not defined", __func__, 1);
2125 *pbg = 0;
2126 if (!pixs || pixGetDepth(pixs) != 8)
2127 return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1);
2128 if (darkthresh > 128)
2129 L_WARNING("darkthresh unusually large\n", __func__);
2130 if (edgecrop < 0.0 || edgecrop >= 1.0)
2131 return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1);
2132
2133 pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE);
2134 pixGetDimensions(pix1, &w, &h, NULL);
2135
2136 /* Optionally crop inner part of image */
2137 if (edgecrop > 0.0) {
2138 box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h,
2139 (1.0 - edgecrop) * w, (1.0 - edgecrop) * h);
2140 pix2 = pixClipRectangle(pix1, box, NULL);
2141 boxDestroy(&box);
2142 } else {
2143 pix2 = pixClone(pix1);
2144 }
2145
2146 /* We will use no more than 50K samples */
2147 sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5));
2148
2149 /* Optionally make a mask over all pixels lighter than %darkthresh */
2150 pixm = NULL;
2151 if (darkthresh > 0) {
2152 pixm = pixThresholdToBinary(pix2, darkthresh);
2153 pixInvert(pixm, pixm);
2154 }
2155
2156 pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL);
2157 *pbg = (l_int32)(fbg + 0.5);
2158 pixDestroy(&pix1);
2159 pixDestroy(&pix2);
2160 pixDestroy(&pixm);
2161 return 0;
2162}
2163
2164
2165/*---------------------------------------------------------------------*
2166 * Largest white or black rectangles in an image *
2167 *---------------------------------------------------------------------*/
2194l_ok
2196 l_int32 polarity,
2197 l_int32 nrect,
2198 BOXA **pboxa,
2199 PIX **ppixdb)
2200{
2201l_int32 i, op, bx, by, bw, bh;
2202BOX *box;
2203BOXA *boxa;
2204PIX *pix;
2205
2206 if (ppixdb) *ppixdb = NULL;
2207 if (!pboxa)
2208 return ERROR_INT("&boxa not defined", __func__, 1);
2209 *pboxa = NULL;
2210 if (!pixs || pixGetDepth(pixs) != 1)
2211 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
2212 if (polarity != 0 && polarity != 1)
2213 return ERROR_INT("invalid polarity", __func__, 1);
2214 if (nrect > 1000) {
2215 L_WARNING("large num rectangles = %d requested; using 1000\n",
2216 __func__, nrect);
2217 nrect = 1000;
2218 }
2219
2220 pix = pixCopy(NULL, pixs);
2221 boxa = boxaCreate(nrect);
2222 *pboxa = boxa;
2223
2224 /* Sequentially find largest rectangle and fill with opposite color */
2225 for (i = 0; i < nrect; i++) {
2226 if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) {
2227 boxDestroy(&box);
2228 L_ERROR("failure in pixFindLargestRectangle\n", __func__);
2229 break;
2230 }
2231 boxaAddBox(boxa, box, L_INSERT);
2232 op = (polarity == 0) ? PIX_SET : PIX_CLR;
2233 boxGetGeometry(box, &bx, &by, &bw, &bh);
2234 pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0);
2235 }
2236
2237 if (ppixdb)
2238 *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3);
2239
2240 pixDestroy(&pix);
2241 return 0;
2242}
2243
2244
2295l_ok
2297 l_int32 polarity,
2298 BOX **pbox,
2299 PIX **ppixdb)
2300{
2301l_int32 i, j, w, h, d, wpls, val;
2302l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2;
2303l_int32 xmax, ymax; /* LR corner of the largest rectangle */
2304l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg;
2305l_int32 *lowestfg;
2306l_uint32 *datas, *lines;
2307l_uint32 **linew, **lineh;
2308BOX *box;
2309PIX *pixw, *pixh; /* keeps the width and height for the largest */
2310 /* rectangles whose LR corner is located there. */
2311
2312 if (ppixdb) *ppixdb = NULL;
2313 if (!pbox)
2314 return ERROR_INT("&box not defined", __func__, 1);
2315 *pbox = NULL;
2316 if (!pixs)
2317 return ERROR_INT("pixs not defined", __func__, 1);
2318 pixGetDimensions(pixs, &w, &h, &d);
2319 if (d != 1)
2320 return ERROR_INT("pixs not 1 bpp", __func__, 1);
2321 if (polarity != 0 && polarity != 1)
2322 return ERROR_INT("invalid polarity", __func__, 1);
2323
2324 /* Initialize lowest "fg" seen so far for each column */
2325 lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32));
2326 for (i = 0; i < w; i++)
2327 lowestfg[i] = -1;
2328
2329 /* The combination (val ^ polarity) is the color for which we
2330 * are searching for the maximum rectangle. For polarity == 0,
2331 * we search in the bg (white). */
2332 pixw = pixCreate(w, h, 32); /* stores width */
2333 pixh = pixCreate(w, h, 32); /* stores height */
2334 linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL);
2335 lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL);
2336 datas = pixGetData(pixs);
2337 wpls = pixGetWpl(pixs);
2338 maxarea = xmax = ymax = wmax = hmax = 0;
2339 for (i = 0; i < h; i++) {
2340 lines = datas + i * wpls;
2341 prevfg = -1;
2342 for (j = 0; j < w; j++) {
2343 val = GET_DATA_BIT(lines, j);
2344 if ((val ^ polarity) == 0) { /* bg (0) if polarity == 0, etc. */
2345 if (i == 0 && j == 0) {
2346 wp = hp = 1;
2347 } else if (i == 0) {
2348 wp = linew[i][j - 1] + 1;
2349 hp = 1;
2350 } else if (j == 0) {
2351 wp = 1;
2352 hp = lineh[i - 1][j] + 1;
2353 } else {
2354 /* Expand #1 prev rectangle down */
2355 w1 = linew[i - 1][j];
2356 h1 = lineh[i - 1][j];
2357 horizdist = j - prevfg;
2358 wmin = L_MIN(w1, horizdist); /* width of new rectangle */
2359 area1 = wmin * (h1 + 1);
2360
2361 /* Expand #2 prev rectangle to right */
2362 w2 = linew[i][j - 1];
2363 h2 = lineh[i][j - 1];
2364 vertdist = i - lowestfg[j];
2365 hmin = L_MIN(h2, vertdist); /* height of new rectangle */
2366 area2 = hmin * (w2 + 1);
2367
2368 if (area1 > area2) {
2369 wp = wmin;
2370 hp = h1 + 1;
2371 } else {
2372 wp = w2 + 1;
2373 hp = hmin;
2374 }
2375 }
2376 } else { /* fg (1) if polarity == 0; bg (0) if polarity == 1 */
2377 prevfg = j;
2378 lowestfg[j] = i;
2379 wp = hp = 0;
2380 }
2381 linew[i][j] = wp;
2382 lineh[i][j] = hp;
2383 if (wp * hp > maxarea) {
2384 maxarea = wp * hp;
2385 xmax = j;
2386 ymax = i;
2387 wmax = wp;
2388 hmax = hp;
2389 }
2390 }
2391 }
2392
2393 /* Translate from LR corner to Box coords (UL corner, w, h) */
2394 box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax);
2395 *pbox = box;
2396
2397 if (ppixdb) {
2398 *ppixdb = pixConvertTo8(pixs, TRUE);
2399 pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0);
2400 }
2401
2402 LEPT_FREE(linew);
2403 LEPT_FREE(lineh);
2404 LEPT_FREE(lowestfg);
2405 pixDestroy(&pixw);
2406 pixDestroy(&pixh);
2407 return 0;
2408}
2409
2410
2411/*---------------------------------------------------------------------*
2412 * Generate rectangle inside connected component *
2413 *---------------------------------------------------------------------*/
2445BOX *
2446pixFindRectangleInCC(PIX *pixs,
2447 BOX *boxs,
2448 l_float32 fract,
2449 l_int32 dir,
2450 l_int32 select,
2451 l_int32 debug)
2452{
2453l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res;
2454l_int32 xfirst, xlast, xstart, yfirst, ylast, length;
2455BOX *box1, *box2, *box3, *box4, *box5;
2456PIX *pix1, *pix2, *pixdb1, *pixdb2;
2457PIXA *pixadb;
2458
2459 if (!pixs || pixGetDepth(pixs) != 1)
2460 return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
2461 if (fract <= 0.0 || fract > 1.0)
2462 return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL);
2463 if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
2464 return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL);
2465 if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
2466 select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
2467 return (BOX *)ERROR_PTR("invalid select", __func__, NULL);
2468
2469 /* Extract the c.c. if necessary */
2470 x = y = 0;
2471 if (boxs) {
2472 pix1 = pixClipRectangle(pixs, boxs, NULL);
2473 boxGetGeometry(boxs, &x, &y, NULL, NULL);
2474 } else {
2475 pix1 = pixClone(pixs);
2476 }
2477
2478 /* All fast scans are horizontal; rotate 90 deg cw if necessary */
2479 if (dir == L_SCAN_VERTICAL)
2480 pix2 = pixRotate90(pix1, 1);
2481 else /* L_SCAN_HORIZONTAL */
2482 pix2 = pixClone(pix1);
2483 pixGetDimensions(pix2, &w, &h, NULL);
2484
2485 pixadb = (debug) ? pixaCreate(0) : NULL;
2486 pixdb1 = NULL;
2487 if (pixadb) {
2488 lept_mkdir("lept/rect");
2489 pixaAddPix(pixadb, pix1, L_CLONE);
2490 pixdb1 = pixConvertTo32(pix2);
2491 }
2492 pixDestroy(&pix1);
2493
2494 /* Scanning down, find the first scanline with a long enough run.
2495 * That run goes from (xfirst, yfirst) to (xlast, yfirst). */
2496 found = FALSE;
2497 for (i = 0; i < h; i++) {
2498 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2499 if (length >= (l_int32)(fract * w + 0.5)) {
2500 yfirst = i;
2501 xfirst = xstart;
2502 xlast = xfirst + length - 1;
2503 found = TRUE;
2504 break;
2505 }
2506 }
2507 if (!found) {
2508 L_WARNING("no run of sufficient size was found\n", __func__);
2509 pixDestroy(&pix2);
2510 pixDestroy(&pixdb1);
2511 pixaDestroy(&pixadb);
2512 return NULL;
2513 }
2514
2515 /* Continue down until the condition fails */
2516 w1 = xlast - xfirst + 1;
2517 h1 = h - yfirst; /* init */
2518 ylast = h - 1; /* init */
2519 for (i = yfirst + 1; i < h; i++) {
2520 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2521 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2522 i == h - 1) {
2523 ylast = i - 1;
2524 h1 = ylast - yfirst + 1;
2525 break;
2526 }
2527 }
2528 box1 = boxCreate(xfirst, yfirst, w1, h1);
2529
2530 /* Scanning up, find the first scanline with a long enough run.
2531 * That run goes from (xfirst, ylast) to (xlast, ylast). */
2532 for (i = h - 1; i >= 0; i--) {
2533 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2534 if (length >= (l_int32)(fract * w + 0.5)) {
2535 ylast = i;
2536 xfirst = xstart;
2537 xlast = xfirst + length - 1;
2538 break;
2539 }
2540 }
2541
2542 /* Continue up until the condition fails */
2543 w2 = xlast - xfirst + 1;
2544 h2 = ylast + 1; /* initialize */
2545 for (i = ylast - 1; i >= 0; i--) {
2546 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2547 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2548 i == 0) {
2549 yfirst = i + 1;
2550 h2 = ylast - yfirst + 1;
2551 break;
2552 }
2553 }
2554 box2 = boxCreate(xfirst, yfirst, w2, h2);
2555 pixDestroy(&pix2);
2556
2557 if (pixadb) {
2558 pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
2559 pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
2560 pixaAddPix(pixadb, pixdb1, L_INSERT);
2561 }
2562
2563 /* Select the final result from the two boxes */
2564 if (select == L_GEOMETRIC_UNION)
2565 box3 = boxBoundingRegion(box1, box2);
2566 else if (select == L_GEOMETRIC_INTERSECTION)
2567 box3 = boxOverlapRegion(box1, box2);
2568 else if (select == L_LARGEST_AREA)
2569 box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2570 else /* select == L_SMALLEST_AREA) */
2571 box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2572 boxDestroy(&box1);
2573 boxDestroy(&box2);
2574
2575 /* Rotate the box 90 degrees ccw if necessary */
2576 box4 = NULL;
2577 if (box3) {
2578 if (dir == L_SCAN_VERTICAL)
2579 box4 = boxRotateOrth(box3, w, h, 3);
2580 else
2581 box4 = boxCopy(box3);
2582 }
2583
2584 /* Transform back to global coordinates if %boxs exists */
2585 box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
2586 boxDestroy(&box3);
2587 boxDestroy(&box4);
2588
2589 /* Debug output */
2590 if (pixadb) {
2591 pixdb1 = pixConvertTo8(pixs, 0);
2592 pixAddConstantGray(pixdb1, 190);
2593 pixdb2 = pixConvertTo32(pixdb1);
2594 if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
2595 pixaAddPix(pixadb, pixdb2, L_INSERT);
2596 res = pixGetXRes(pixs);
2597 L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__);
2598 pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
2599 "/tmp/lept/rect/fitrect.pdf");
2600 pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
2601 pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
2602 pixDestroy(&pix1);
2603 pixDestroy(&pixdb1);
2604 pixaDestroy(&pixadb);
2605 }
2606
2607 return box5;
2608}
2609
2610/*------------------------------------------------------------------*
2611 * Automatic photoinvert for OCR *
2612 *------------------------------------------------------------------*/
2632PIX *
2634 l_int32 thresh,
2635 PIX **ppixm,
2636 PIXA *pixadb)
2637{
2638l_int32 i, n, empty, x, y, w, h;
2639l_float32 fgfract;
2640BOX *box1;
2641BOXA *boxa1;
2642PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2643
2644 if (ppixm) *ppixm = NULL;
2645 if (!pixs)
2646 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2647 if (thresh == 0) thresh = 128;
2648
2649 if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL)
2650 return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL);
2651 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2652
2653 /* Identify regions for photo-inversion:
2654 * (1) Start with the halftone mask.
2655 * (2) Eliminate ordinary text and halftones in the mask.
2656 * (3) Some regions of inverted text may have been removed in
2657 * steps (1) and (2). Conditionally fill holes in the mask,
2658 * but do not fill out to the bounding rect. */
2659 pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb);
2660 pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0); /* remove noise */
2661 pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0);
2662 if (pixadb) {
2663 pixaAddPix(pixadb, pix2, L_CLONE);
2664 pixaAddPix(pixadb, pix3, L_CLONE);
2665 pixaAddPix(pixadb, pix4, L_COPY);
2666 }
2667 pixDestroy(&pix2);
2668 pixDestroy(&pix3);
2669 pixZero(pix4, &empty);
2670 if (empty) {
2671 pixDestroy(&pix4);
2672 return pix1;
2673 }
2674
2675 /* Examine each component and validate the inversion.
2676 * Require at least 60% of pixels under each component to be FG. */
2677 boxa1 = pixConnCompBB(pix4, 8);
2678 n = boxaGetCount(boxa1);
2679 for (i = 0; i < n; i++) {
2680 box1 = boxaGetBox(boxa1, i, L_COPY);
2681 pix5 = pixClipRectangle(pix1, box1, NULL);
2682 pixForegroundFraction(pix5, &fgfract);
2683 if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract);
2684 boxGetGeometry(box1, &x, &y, &w, &h);
2685 if (fgfract < 0.6) /* erase from the mask */
2686 pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0);
2687 pixDestroy(&pix5);
2688 boxDestroy(&box1);
2689 }
2690 boxaDestroy(&boxa1);
2691 pixZero(pix4, &empty);
2692 if (empty) {
2693 pixDestroy(&pix4);
2694 return pix1;
2695 }
2696
2697 /* Combine pixels of the photo-inverted pix with the binarized input */
2698 pix5 = pixInvert(NULL, pix1);
2699 pixCombineMasked(pix1, pix5, pix4);
2700
2701 if (pixadb) {
2702 pixaAddPix(pixadb, pix5, L_CLONE);
2703 pixaAddPix(pixadb, pix1, L_COPY);
2704 }
2705 pixDestroy(&pix5);
2706 if (ppixm)
2707 *ppixm = pix4;
2708 else
2709 pixDestroy(&pix4);
2710 return pix1;
2711}
PIX * pixConvertTo8MinMax(PIX *pixs)
pixConvertTo8MinMax()
Definition adaptmap.c:2983
PIX * pixBackgroundNormTo1MinMax(PIX *pixs, l_int32 contrast, l_int32 scalefactor)
pixBackgroundNormTo1MinMax()
Definition adaptmap.c:2939
PIX * pixCleanBackgroundToWhite(PIX *pixs, PIX *pixim, PIX *pixg, l_float32 gamma, l_int32 blackval, l_int32 whiteval)
pixCleanBackgroundToWhite()
Definition adaptmap.c:204
#define GET_DATA_BIT(pdata, n)
@ L_DEFAULT_ENCODE
Definition imageio.h:158
l_ok pixFindThreshFgExtent(PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot)
pixFindThreshFgExtent()
Definition pageseg.c:1794
PIX * pixGenTextblockMask(PIX *pixs, PIX *pixvws, PIXA *pixadb)
pixGenTextblockMask()
Definition pageseg.c:478
l_ok pixGetRegionsBinary(PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb)
pixGetRegionsBinary()
Definition pageseg.c:116
PIX * pixGenTextlineMask(PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb)
pixGenTextlineMask()
Definition pageseg.c:388
l_ok pixEstimateBackground(PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg)
pixEstimateBackground()
Definition pageseg.c:2113
BOX * pixFindPageForeground(PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac)
pixFindPageForeground()
Definition pageseg.c:841
PIXA * pixExtractRawTextlines(PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
pixExtractRawTextlines()
Definition pageseg.c:1358
PIX * pixAutoPhotoinvert(PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb)
pixFindRectangleInCC()
Definition pageseg.c:2633
PIX * pixPrepare1bpp(PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres)
pixPrepare1bpp()
Definition pageseg.c:2030
PIXA * pixExtractTextlines(PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
pixExtractTextlines()
Definition pageseg.c:1220
PIX * pixCropImage(PIX *pixs, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_add, l_int32 tb_add, l_float32 maxwiden, const char *debugfile, BOX **pcropbox)
pixCropImage()
Definition pageseg.c:573
l_ok pixFindLargeRectangles(PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb)
pixFindLargeRectangles()
Definition pageseg.c:2195
l_ok pixCountTextColumns(PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb)
pixCountTextColumns()
Definition pageseg.c:1484
l_ok pixSplitIntoCharacters(PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug)
pixSplitIntoCharacters()
Definition pageseg.c:967
PIX * pixGenHalftoneMask(PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug)
pixGenHalftoneMask()
Definition pageseg.c:282
l_ok pixFindLargestRectangle(PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb)
pixFindLargestRectangle()
Definition pageseg.c:2296
PIX * pixGenerateHalftoneMask(PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb)
pixGenerateHalftoneMask()
Definition pageseg.c:307
PIX * pixCleanImage(PIX *pixs, l_int32 contrast, l_int32 rotation, l_int32 scale, l_int32 opensize)
pixCleanImage()
Definition pageseg.c:738
l_ok pixDecideIfTable(PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb)
pixDecideIfTable()
Definition pageseg.c:1882
BOXA * pixSplitComponentWithProfile(PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug)
pixSplitComponentWithProfile()
Definition pageseg.c:1066
l_ok pixDecideIfText(PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb)
pixDecideIfText()
Definition pageseg.c:1630
@ L_SELECT_IF_LTE
Definition pix.h:577
@ L_SELECT_IF_LT
Definition pix.h:575
@ L_SELECT_IF_GT
Definition pix.h:576
@ L_SELECT_IF_GTE
Definition pix.h:578
@ L_NEG_SLOPE_LINE
Definition pix.h:809
@ L_SELECT_IF_BOTH
Definition pix.h:599
@ L_SELECT_IF_EITHER
Definition pix.h:597
@ L_SELECT_WIDTH
Definition pix.h:593
@ L_SELECT_HEIGHT
Definition pix.h:594
@ L_SORT_BY_AREA
Definition pix.h:537
@ L_SORT_BY_WIDTH
Definition pix.h:532
@ REMOVE_CMAP_TO_GRAYSCALE
Definition pix.h:381
@ L_ADD_BELOW
Definition pix.h:1003
@ L_COPY
Definition pix.h:505
@ L_CLONE
Definition pix.h:506
@ L_INSERT
Definition pix.h:504
@ L_LANDSCAPE_MODE
Definition pix.h:819
@ L_SCAN_VERTICAL
Definition pix.h:835
@ L_SCAN_HORIZONTAL
Definition pix.h:834
#define PIX_CLR
Definition pix.h:447
@ L_SORT_DECREASING
Definition pix.h:523
#define PIX_SET
Definition pix.h:448
@ L_GEOMETRIC_UNION
Definition pix.h:889
@ L_SMALLEST_AREA
Definition pix.h:892
@ L_LARGEST_AREA
Definition pix.h:891
@ L_GEOMETRIC_INTERSECTION
Definition pix.h:890
Definition bmf.h:47
struct Boxa * boxa