87#include <config_auto.h>
91#include "allheaders.h"
95static const l_int32 MinWidth = 100;
96static const l_int32 MinHeight = 100;
101 l_int32 lr_border, l_int32 tb_border,
102 l_float32 maxwiden,
PIX **ppixsc);
130l_int32 w, h, htfound, tlfound;
131PIX *pixr, *pix1, *pix2;
142 if (ppixhm) *ppixhm = NULL;
143 if (ppixtm) *ppixtm = NULL;
144 if (ppixtb) *ppixtb = NULL;
145 if (!pixs || pixGetDepth(pixs) != 1)
146 return ERROR_INT(
"pixs undefined or not 1 bpp", __func__, 1);
147 pixGetDimensions(pixs, &w, &h, NULL);
148 if (w < MinWidth || h < MinHeight) {
149 L_ERROR(
"pix too small: w = %d, h = %d\n", __func__, w, h);
154 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
155 if (pixadb) pixaAddPix(pixadb, pixr,
L_COPY);
166 pixDestroy(&pixtext);
176 if (pixadb) pixaAddPix(pixadb, pixtbf2,
L_COPY);
181 pixhm = pixExpandReplicate(pixhm2, 2);
182 pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
183 pixOr(pixhm, pixhm, pix1);
186 if (pixadb) pixaAddPix(pixadb, pixhm,
L_COPY);
188 pix1 = pixExpandReplicate(pixtm2, 2);
189 pixtm = pixDilateBrick(NULL, pix1, 3, 3);
192 if (pixadb) pixaAddPix(pixadb, pixtm,
L_COPY);
195 pix1 = pixExpandReplicate(pixtbf2, 2);
196 pixtb = pixDilateBrick(NULL, pix1, 3, 3);
197 pixDestroy(&pixtbf2);
199 if (pixadb) pixaAddPix(pixadb, pixtb,
L_COPY);
201 pixtb = pixCreateTemplate(pixs);
206 pix1 = pixSubtract(NULL, pixs, pixtm);
207 pix2 = pixSubtract(NULL, pix1, pixhm);
217 boxa = pixConnComp(pixtm, &pixa, 8);
218 pixGetDimensions(pixtm, &w, &h, NULL);
219 pix1 = pixaDisplayRandomCmap(pixa, w, h);
220 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
230 ptaa = pixGetOuterBordersPtaa(pixtb);
231 lept_mkdir(
"lept/pageseg");
232 ptaaWriteDebug(
"/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1);
233 pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
234 cmap = pixGetColormap(pix1);
235 pixcmapResetColor(cmap, 0, 130, 130, 130);
242 BOXA *bahm, *batm, *batb;
243 bahm = pixConnComp(pixhm, NULL, 4);
244 batm = pixConnComp(pixtm, NULL, 4);
245 batb = pixConnComp(pixtb, NULL, 4);
246 boxaWriteDebug(
"/tmp/lept/pageseg/htmask.boxa", bahm);
247 boxaWriteDebug(
"/tmp/lept/pageseg/textmask.boxa", batm);
248 boxaWriteDebug(
"/tmp/lept/pageseg/textblock.boxa", batb);
254 pixaConvertToPdf(pixadb, 0, 1.0, 0, 0,
"Debug page segmentation",
255 "/tmp/lept/pageseg/debug.pdf");
256 L_INFO(
"Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__);
321PIX *pix1, *pix2, *pixhs, *pixhm, *pixd;
323 if (ppixtext) *ppixtext = NULL;
324 if (phtfound) *phtfound = 0;
325 if (!pixs || pixGetDepth(pixs) != 1)
326 return (
PIX *)ERROR_PTR(
"pixs undefined or not 1 bpp", __func__, NULL);
327 pixGetDimensions(pixs, &w, &h, NULL);
328 if (w < MinWidth || h < MinHeight) {
329 L_ERROR(
"pix too small: w = %d, h = %d\n", __func__, w, h);
334 pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0);
335 pix2 = pixOpenBrick(NULL, pix1, 5, 5);
336 pixhs = pixExpandReplicate(pix2, 4);
339 if (pixadb) pixaAddPix(pixadb, pixhs,
L_COPY);
342 pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
343 if (pixadb) pixaAddPix(pixadb, pixhm,
L_COPY);
346 pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
347 if (pixadb) pixaAddPix(pixadb, pixd,
L_COPY);
350 pixOpenBrick(pixd, pixd, 9, 9);
354 pixZero(pixd, &empty);
355 if (phtfound && !empty)
361 *ppixtext = pixCopy(NULL, pixs);
363 *ppixtext = pixSubtract(NULL, pixs, pixd);
364 if (pixadb) pixaAddPix(pixadb, *ppixtext,
L_COPY);
402PIX *pix1, *pix2, *pixvws, *pixd;
404 if (ptlfound) *ptlfound = 0;
406 return (
PIX *)ERROR_PTR(
"&pixvws not defined", __func__, NULL);
408 if (!pixs || pixGetDepth(pixs) != 1)
409 return (
PIX *)ERROR_PTR(
"pixs undefined or not 1 bpp", __func__, NULL);
410 pixGetDimensions(pixs, &w, &h, NULL);
411 if (w < MinWidth || h < MinHeight) {
412 L_ERROR(
"pix too small: w = %d, h = %d\n", __func__, w, h);
417 pix1 = pixInvert(NULL, pixs);
426 pix2 = pixMorphCompSequence(pix1,
"o80.60", 0);
427 pixSubtract(pix1, pix1, pix2);
428 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
434 pixvws = pixMorphCompSequence(pix1,
"o5.1 + o1.200", 0);
436 if (pixadb) pixaAddPix(pixadb, pixvws,
L_COPY);
443 pix1 = pixMorphSequence(pixs,
"c30.1", 0);
444 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
445 pixd = pixSubtract(NULL, pix1, pixvws);
446 pixOpenBrick(pixd, pixd, 3, 3);
447 if (pixadb) pixaAddPix(pixadb, pixd,
L_COPY);
452 pixZero(pixd, &empty);
491PIX *pix1, *pix2, *pix3, *pixd;
493 if (!pixs || pixGetDepth(pixs) != 1)
494 return (
PIX *)ERROR_PTR(
"pixs undefined or not 1 bpp", __func__, NULL);
495 pixGetDimensions(pixs, &w, &h, NULL);
496 if (w < MinWidth || h < MinHeight) {
497 L_ERROR(
"pix too small: w = %d, h = %d\n", __func__, w, h);
501 return (
PIX *)ERROR_PTR(
"pixvws not defined", __func__, NULL);
504 pix1 = pixMorphSequence(pixs,
"c1.10 + o4.1", 0);
505 pixZero(pix1, &empty);
508 L_INFO(
"no fg pixels in textblock mask\n", __func__);
511 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
519 pix2 = pixMorphSequenceByComponent(pix1,
"c30.30 + d3.3", 8, 0, 0, NULL);
520 pixCloseSafeBrick(pix2, pix2, 10, 1);
521 if (pixadb) pixaAddPix(pixadb, pix2,
L_COPY);
522 pix3 = pixSubtract(NULL, pix2, pixvws);
523 if (pixadb) pixaAddPix(pixadb, pix3,
L_COPY);
526 if (pixadb) pixaAddPix(pixadb, pixd,
L_COPY);
613 const char *debugfile,
617l_int32 w, h, val, ret;
620PIX *pix1, *pix2, *pix3, *pix4;
623 if (pcropbox) *pcropbox = NULL;
625 return (
PIX *)ERROR_PTR(
"pixs not defined", __func__, NULL);
626 if (edgeclean > 15) {
627 L_WARNING(
"edgeclean > 15; setting to 15\n", __func__);
630 if (edgeclean < -1) {
631 lept_stderr(
"Using edgeclean = -2\n");
634 pixGetDimensions(pixs, &w, &h, NULL);
635 if (w < MinWidth || h < MinHeight) {
636 L_ERROR(
"pix too small: w = %d, h = %d\n", __func__, w, h);
639 if (lr_clear < 0) lr_clear = 0;
640 if (tb_clear < 0) tb_clear = 0;
641 if (lr_border < 0) lr_border = 0;
642 if (tb_border < 0) tb_border = 0;
643 if (lr_clear > w / 6 || tb_clear > h / 6) {
644 L_ERROR(
"lr_clear or tb_clear too large; must be <= %d and %d\n",
645 __func__, w / 6, h / 6);
649 L_WARNING(
"maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
651 if (printwiden < 0 || printwiden > 2) printwiden = 0;
652 pixa1 = (debugfile) ? pixaCreate(5) : NULL;
653 if (pixa1) pixaAddPix(pixa1, pixs,
L_COPY);
657 pix2 = pixReduceRankBinary2(pix1, 2, NULL);
660 pixSetOrClearBorder(pix2, lr_clear / 2, lr_clear / 2, tb_clear / 2,
662 if (pixa1) pixaAddPix(pixa1, pixScale(pix2, 2.0, 2.0),
L_INSERT);
670 if (edgeclean == 0) {
671 ret = pixClipToForeground(pix2, NULL, &box1);
672 }
else if (edgeclean > 0) {
674 snprintf(cmd, 64,
"c%d.%d + o%d.%d", val, val, val, val);
675 pix3 = pixMorphSequence(pix2, cmd, 0);
676 ret = pixClipToForeground(pix3, NULL, &box1);
678 }
else if (edgeclean == -1) {
685 L_ERROR(
"no returned b.b. for foreground\n", __func__);
693 box2 = boxTransform(box1, 0, 0, 2.0, 2.0);
696 pix2 = pixCopy(NULL, pix1);
697 pixRenderBoxArb(pix2, box2, 5, 255, 0, 0);
702 pix2 = pixClipRectangle(pix1, box2, NULL);
707 pix3 = pixMorphSequence(pix2,
"o80.1 + d1.2", 0);
708 pixOr(pix2, pix2, pix3);
716 pix2 = pixCopy(NULL, pix3);
724 r1 = (l_float32)h / (l_float32)w;
728 else if (printwiden == 2)
731 r2 = L_MIN(r2, 1.20);
732 lept_stderr(
"oversize h/w ratio by factor %6.3f\n", r2);
733 pix4 = pixScale(pix3, r2, 1.0);
735 pix4 = pixClone(pix3);
744 pixaAddPix(pixa1, pix4,
L_COPY);
745 lept_stderr(
"Writing debug file: %s\n", debugfile);
779l_int32 w1, h1, w2, h2, n, empty;
785 return ERROR_INT(
"pbox not defined", __func__, 1);
787 if (!pixs || pixGetDepth(pixs) != 1)
788 return ERROR_INT(
"pixs undefined or not 1 bpp", __func__, 1);
791 pix1 = pixMorphSequence(pixs,
"r11 + c3.80 + o3.80 + x4", 0);
792 pixZero(pix1, &empty);
795 return ERROR_INT(
"pix1 is empty", __func__, 1);
801 boxa1 = pixConnCompBB(pix1, 8);
804 if ((n = boxaGetCount(boxa2)) == 1) {
805 *pbox = boxaGetBox(boxa2, 0,
L_COPY);
807 box1 = boxaGetBox(boxa2, 0,
L_COPY);
808 box2 = boxaGetBox(boxa2, 1,
L_COPY);
809 boxGetGeometry(box1, NULL, NULL, &w1, &h1);
810 boxGetGeometry(box2, NULL, NULL, &w2, &h2);
811 if (((l_float32)(w2 * h2) / (l_float32)(w1 * h1)) > 0.7) {
812 *pbox = boxBoundingRegion(box1, box2);
852 return ERROR_INT(
"pbox not defined", __func__, 1);
854 if (!pixs || pixGetDepth(pixs) != 1)
855 return ERROR_INT(
"pixs undefined or not 1 bpp", __func__, 1);
858 pix1 = pixMorphSequence(pixs,
"r22 + c5.5 + o7.7", 0);
859 pixZero(pix1, &empty);
862 return ERROR_INT(
"pix1 is empty", __func__, 1);
866 pixInvert(pix1, pix1);
867 pix2 = pixMorphSequence(pix1,
"c11.11 + o11.11", 0);
869 boxa1 = pixConnCompBB(pix2, 8);
872 box1 = boxaGetBox(boxa2, 0,
L_COPY);
873 boxAdjustSides(box1, box1, 5, -5, 5, -5);
874 *pbox = boxTransform(box1, 0, 0, 4.0, 4.0);
919static l_int32 first_time = TRUE;
920l_int32 wi, hi, wmax, hmax, wn, wf, hf, xf;
921l_float32 ratio, scaleh, scalew, scalewid;
924 if (ppixsc) *ppixsc = NULL;
925 if (!pixs || pixGetDepth(pixs) != 1)
926 return (
PIX *)ERROR_PTR(
"pixs undefined or not 1 bpp", __func__, NULL);
927 if (lr_border < 0) lr_border = 0;
928 if (tb_border < 0) tb_border = 0;
929 maxwiden = L_MAX(1.0, maxwiden);
931 L_WARNING(
"maxwiden = %f > 1.15; suggest between 1.0 and 1.15\n",
943 pixGetDimensions(pixs, &wi, &hi, NULL);
944 wmax = w - 2 * lr_border;
945 hmax = h - 2 * tb_border;
946 ratio = (l_float32)(wmax * hi) / (l_float32)(hmax * wi);
948 scaleh = (l_float32)hmax / (l_float32)hi;
950 scalewid = L_MIN(maxwiden, (l_float32)wmax / (l_float32)wn);
951 scalew = scaleh * scalewid;
954 pix1 = pixScale(pixs, scalew, scaleh);
955 if (first_time == TRUE) {
956 lept_stderr(
"Width stretched by factor %5.3f\n", scalewid);
961 scalew = (l_float32)wmax / (l_float32)wi;
962 pix1 = pixScale(pixs, scalew, scalew);
970 pixd = pixCreate(w, h, 1);
971 pixRasterop(pixd, xf, tb_border, wf, hf,
PIX_SRC, pix1, 0, 0);
1022PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1025 return (
PIX *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1026 if (rotation < 0 || rotation > 3) {
1027 L_ERROR(
"invalid rotation = %d; rotation must be in {0,1,2,3}\n",
1028 __func__, rotation);
1031 if (contrast < 1 || contrast > 10) {
1032 L_ERROR(
"invalid contrast = %d; contrast must be in [1...10]\n",
1033 __func__, contrast);
1036 if (scale != 1 && scale != 2) {
1037 L_ERROR(
"invalid scale = %d; scale must be 1 or 2\n",
1038 __func__, opensize);
1042 L_ERROR(
"invalid opensize = %d; opensize must be <= 3\n",
1043 __func__, opensize);
1047 if (pixGetDepth(pixs) == 1) {
1049 pix1 = pixRotateOrth(pixs, rotation);
1051 pix1 = pixClone(pixs);
1052 pix2 = pixFindSkewAndDeskew(pix1, 2, NULL, NULL);
1054 pix4 = pixExpandBinaryReplicate(pix2, 2, 2);
1056 pix4 = pixClone(pix2);
1060 pix2 = pixRotateOrth(pix1, rotation);
1062 pix2 = pixClone(pix1);
1063 pix3 = pixFindSkewAndDeskew(pix2, 2, NULL, NULL);
1068 if (opensize == 2 || opensize == 3) {
1069 snprintf(sequence,
sizeof(sequence),
"o%d.%d", opensize, opensize);
1070 pix5 = pixMorphSequence(pix4, sequence, 0);
1072 pix5 = pixClone(pix4);
1125l_int32 flag, nbox, intersects;
1126l_int32 w, h, bx, by, bw, bh, left, right, top, bottom;
1127PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
1128BOX *box, *boxfg, *boxin, *boxd;
1132 return (
BOX *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1133 pixGetDimensions(pixs, &w, &h, NULL);
1134 if (w < MinWidth || h < MinHeight) {
1135 L_ERROR(
"pix too small: w = %d, h = %d\n", __func__, w, h);
1143 flag = (showmorph) ? 100 : 0;
1144 pixb = pixConvertTo1(pixs, threshold);
1145 pixb2 = pixScale(pixb, 0.5, 0.5);
1146 pixseed = pixMorphSequence(pixb2,
"o1.2 + c9.9 + o3.3", flag);
1147 pix1 = pixMorphSequence(pixb2,
"o50.1", 0);
1148 pixOr(pixseed, pixseed, pix1);
1150 pix1 = pixMorphSequence(pixb2,
"o1.50", 0);
1151 pixOr(pixseed, pixseed, pix1);
1153 pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
1154 pixm = pixRemoveBorderConnComps(pixsf, 8);
1162 pix1 = pixMorphSequence(pixm,
"c50.50", flag);
1163 ba1 = pixConnComp(pix1, NULL, 8);
1165 pixGetDimensions(pix1, &w, &h, NULL);
1166 nbox = boxaGetCount(ba2);
1168 box = boxaGetBox(ba2, 0,
L_CLONE);
1169 boxGetGeometry(box, &bx, &by, &bw, &bh);
1170 left = (bx > mindist) ? erasedist : 0;
1171 right = (w - bx - bw > mindist) ? erasedist : 0;
1172 top = (by > mindist) ? erasedist : 0;
1173 bottom = (h - by - bh > mindist) ? erasedist : 0;
1174 pixSetOrClearBorder(pixm, left, right, top, bottom,
PIX_CLR);
1182 pixClipToForeground(pixm, NULL, &boxfg);
1188 boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
1189 boxIntersects(boxfg, boxin, &intersects);
1191 if (!intersects) boxDestroy(&boxfg);
1196 boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2);
1197 boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);
1201 pixg2 = pixConvert1To4Cmap(pixb);
1202 pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
1203 pixacompAddPix(pixac, pixg2, IFF_DEFAULT);
1210 pixDestroy(&pixseed);
1251l_int32 ncomp, i, xoff, yoff;
1252BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
1254PIX *pix, *pix1, *pix2, *pixdb;
1255PIXA *pixa1, *pixadb;
1257 if (pboxa) *pboxa = NULL;
1258 if (ppixa) *ppixa = NULL;
1259 if (ppixdebug) *ppixdebug = NULL;
1260 if (!pixs || pixGetDepth(pixs) != 1)
1261 return ERROR_INT(
"pixs not defined or not 1 bpp", __func__, 1);
1268 pix2 = pixMorphSequence(pix1,
"c1.10", 0);
1272 boxa1 = pixConnComp(pix2, &pixa1, 8);
1274 boxaDestroy(&boxa1);
1277 ncomp = pixaGetCount(pixa1);
1278 boxa2 = boxaCreate(ncomp);
1279 pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
1280 for (i = 0; i < ncomp; i++) {
1281 pix = pixaGetPix(pixa1, i,
L_CLONE);
1285 pixaAddPix(pixadb, pixdb,
L_INSERT);
1289 pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
1290 boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
1291 boxaJoin(boxa2, boxat2, 0, -1);
1293 boxaDestroy(&boxat1);
1294 boxaDestroy(&boxat2);
1296 pixaDestroy(&pixa1);
1300 if (pixaGetCount(pixadb) > 0) {
1301 *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
1304 pixaDestroy(&pixadb);
1308 baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
1309 boxad = boxaaFlattenToBoxa(baa, NULL,
L_CLONE);
1311 boxaDestroy(&boxa2);
1315 *ppixa = pixClipRectangles(pixs, boxad);
1319 boxaDestroy(&boxad);
1348l_int32 w, h, n2, i, firstmin, xmin, xshift;
1349l_int32 nmin, nleft, nright, nsplit, isplit, ncomp;
1350l_int32 *array1, *array2;
1353NUMA *na1, *na2, *nasplit;
1356 if (ppixdebug) *ppixdebug = NULL;
1357 if (!pixs || pixGetDepth(pixs) != 1)
1358 return (
BOXA *)ERROR_PTR(
"pixa undefined or not 1 bpp", __func__, NULL);
1359 pixGetDimensions(pixs, &w, &h, NULL);
1362 pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100);
1365 boxad = boxaCreate(2);
1366 na1 = pixCountPixelsByColumn(pix1);
1368 na2 = numaFindExtrema(na1, delta, NULL);
1369 n2 = numaGetCount(na2);
1371 box = boxCreate(0, 0, w, h);
1381 array1 = numaGetIArray(na1);
1382 array2 = numaGetIArray(na2);
1383 if (ppixdebug) numaWriteStderr(na2);
1384 firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2;
1385 nasplit = numaCreate(n2);
1386 for (i = firstmin; i < n2 - 1; i+= 2) {
1388 nmin = array1[xmin];
1389 if (xmin + 2 >= w)
break;
1390 nleft = array1[xmin - 2];
1391 nright = array1[xmin + 2];
1394 "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n",
1395 xmin, w, nleft, nmin, nright);
1397 if (nleft - nmin >= mindel && nright - nmin >= mindel)
1398 numaAddNumber(nasplit, xmin);
1400 nsplit = numaGetCount(nasplit);
1403 if (ppixdebug && nsplit > 0) {
1404 lept_mkdir(
"lept/split");
1405 gplotSimple1(na1, GPLOT_PNG,
"/tmp/lept/split/split", NULL);
1415 numaDestroy(&nasplit);
1416 box = boxCreate(0, 0, w, h);
1422 for (i = 0, xshift = 0; i < nsplit; i++) {
1423 numaGetIValue(nasplit, i, &isplit);
1424 box = boxCreate(xshift, 0, isplit - xshift, h);
1426 xshift = isplit + 1;
1428 box = boxCreate(xshift, 0, w - xshift, h);
1430 numaDestroy(&nasplit);
1433 pixdb = pixConvertTo32(pixs);
1434 ncomp = boxaGetCount(boxad);
1435 for (i = 0; i < ncomp; i++) {
1436 box = boxaGetBox(boxad, i,
L_CLONE);
1437 pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5);
1507l_int32 res, csize, empty;
1508BOXA *boxa1, *boxa2, *boxa3;
1509PIX *pix1, *pix2, *pix3;
1510PIXA *pixa1, *pixa2, *pixa3;
1513 return (
PIXA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1516 if (pixGetDepth(pixs) > 1) {
1517 pix2 = pixConvertTo8(pixs, FALSE);
1519 pix1 = pixThresholdToBinary(pix3, 150);
1523 pix1 = pixClone(pixs);
1525 pixZero(pix1, &empty);
1528 L_INFO(
"no fg pixels in input image\n", __func__);
1531 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
1536 if (pixadb) pixaAddPix(pixadb, pix2,
L_COPY);
1542 if ((res = pixGetXRes(pixs)) == 0) {
1543 L_INFO(
"Resolution is not set: setting to 300 ppi\n", __func__);
1546 csize = L_MIN(120., 60.0 * res / 300.0);
1547 snprintf(buf,
sizeof(buf),
"c%d.1 + o%d.1", csize, csize / 3);
1548 pix3 = pixMorphCompSequence(pix2, buf, 0);
1549 if (pixadb) pixaAddPix(pixadb, pix3,
L_COPY);
1552 boxa1 = pixConnComp(pix3, &pixa1, 4);
1554 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1555 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1556 pixaAddPix(pixadb, pix1,
L_INSERT);
1560 minw = (minw != 0) ? minw : (l_int32)(0.12 * res);
1561 minh = (minh != 0) ? minh : (l_int32)(0.07 * res);
1567 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1568 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1569 pixaAddPix(pixadb, pix1,
L_INSERT);
1570 pix1 = pixConvertTo32(pix2);
1571 pixRenderBoxaArb(pix1, pixa2->
boxa, 2, 255, 0, 0);
1572 pixaAddPix(pixadb, pix1,
L_INSERT);
1576 boxa2 = pixaGetBoxa(pixa2,
L_CLONE);
1577 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1578 pixa3 = pixClipRectangles(pix2, boxa3);
1580 pix1 = pixaDisplayRandomCmap(pixa3, 0, 0);
1581 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1582 pixaAddPix(pixadb, pix1,
L_INSERT);
1587 pixaDestroy(&pixa1);
1588 pixaDestroy(&pixa2);
1589 boxaDestroy(&boxa1);
1590 boxaDestroy(&boxa2);
1591 boxaDestroy(&boxa3);
1643l_int32 res, csize, empty;
1644BOXA *boxa1, *boxa2, *boxa3;
1646PIX *pix1, *pix2, *pix3;
1650 return (
PIXA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1653 if ((res = pixGetXRes(pixs)) == 0) {
1654 L_INFO(
"Resolution is not set: setting to 300 ppi\n", __func__);
1657 maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res);
1658 maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res);
1661 if (pixGetDepth(pixs) > 1) {
1662 pix2 = pixConvertTo8(pixs, FALSE);
1664 pix1 = pixThresholdToBinary(pix3, 150);
1668 pix1 = pixClone(pixs);
1670 pixZero(pix1, &empty);
1673 L_INFO(
"no fg pixels in input image\n", __func__);
1676 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
1681 if (pixadb) pixaAddPix(pixadb, pix2,
L_COPY);
1686 csize = L_MIN(120., 60.0 * res / 300.0);
1687 snprintf(buf,
sizeof(buf),
"c%d.1", csize);
1688 pix3 = pixMorphCompSequence(pix2, buf, 0);
1689 if (pixadb) pixaAddPix(pixadb, pix3,
L_COPY);
1692 boxa1 = pixConnComp(pix3, &pixa1, 4);
1694 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1695 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1696 pixaAddPix(pixadb, pix1,
L_INSERT);
1702 baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5);
1703 boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2);
1705 pix1 = pixConvertTo32(pix2);
1706 pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0);
1707 pixaAddPix(pixadb, pix1,
L_INSERT);
1712 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1713 pixa2 = pixClipRectangles(pix2, boxa3);
1715 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1716 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1717 pixaAddPix(pixadb, pix1,
L_INSERT);
1722 pixaDestroy(&pixa1);
1723 boxaDestroy(&boxa1);
1724 boxaDestroy(&boxa2);
1725 boxaDestroy(&boxa3);
1726 boxaaDestroy(&baa1);
1762 l_float32 deltafract,
1763 l_float32 peakfract,
1764 l_float32 clipfract,
1768l_int32 w, h, res, i, n, npeak;
1769l_float32 scalefact, redfact, minval, maxval, val4, val5, fract;
1771NUMA *na1, *na2, *na3, *na4, *na5;
1772PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1775 return ERROR_INT(
"&ncols not defined", __func__, 1);
1777 if (!pixs || pixGetDepth(pixs) != 1)
1778 return ERROR_INT(
"pixs not defined or not 1 bpp", __func__, 1);
1779 if (deltafract < 0.15 || deltafract > 0.75)
1780 L_WARNING(
"deltafract not in [0.15 ... 0.75]\n", __func__);
1781 if (peakfract < 0.25 || peakfract > 0.9)
1782 L_WARNING(
"peakfract not in [0.25 ... 0.9]\n", __func__);
1783 if (clipfract < 0.0 || clipfract >= 0.5)
1784 return ERROR_INT(
"clipfract not in [0.0 ... 0.5)\n", __func__, 1);
1785 if (pixadb) pixaAddPix(pixadb, pixs,
L_COPY);
1788 if ((res = pixGetXRes(pixs)) == 0) {
1789 L_WARNING(
"resolution undefined; set to 300\n", __func__);
1790 pixSetResolution(pixs, 300, 300);
1794 L_WARNING(
"resolution %d very low\n", __func__, res);
1795 scalefact = 37.5 / res;
1796 pix1 = pixScale(pixs, scalefact, scalefact);
1798 redfact = (l_float32)res / 37.5;
1800 pix1 = pixClone(pixs);
1801 else if (redfact < 4.0)
1802 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1803 else if (redfact < 8.0)
1804 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0);
1805 else if (redfact < 16.0)
1806 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0);
1808 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2);
1810 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
1813 pixGetDimensions(pix1, &w, &h, NULL);
1814 box = boxCreate(clipfract * w, clipfract * h,
1815 (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h);
1816 pix2 = pixClipRectangle(pix1, box, NULL);
1817 pixGetDimensions(pix2, &w, &h, NULL);
1819 if (pixadb) pixaAddPix(pixadb, pix2,
L_COPY);
1822 pix3 = pixDeskew(pix2, 0);
1823 if (pixadb) pixaAddPix(pixadb, pix3,
L_COPY);
1826 pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21);
1827 if (pixadb) pixaAddPix(pixadb, pix4,
L_COPY);
1828 pixInvert(pix4, pix4);
1829 na1 = pixCountByColumn(pix4, NULL);
1832 gplotSimple1(na1, GPLOT_PNG,
"/tmp/lept/plot", NULL);
1833 pix5 = pixRead(
"/tmp/lept/plot.png");
1834 pixaAddPix(pixadb, pix5,
L_INSERT);
1842 numaGetMax(na1, &maxval, NULL);
1843 numaGetMin(na1, &minval, NULL);
1844 fract = (l_float32)(maxval - minval) / h;
1846 L_INFO(
"very little content on page; 0 text columns\n", __func__);
1849 na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3);
1850 na4 = numaTransform(na2, 0, 1.0 / w);
1851 na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval));
1852 n = numaGetCount(na4);
1853 for (i = 0, npeak = 0; i < n; i++) {
1854 numaGetFValue(na4, i, &val4);
1855 numaGetFValue(na5, i, &val5);
1856 if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) {
1858 L_INFO(
"Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5);
1861 *pncols = npeak + 1;
1912l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp;
1913l_float32 ratio1, ratio2;
1915BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5;
1916PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7;
1921 return ERROR_INT(
"&istext not defined", __func__, 1);
1924 return ERROR_INT(
"pixs not defined", __func__, 1);
1928 return ERROR_INT(
"pix1 not made", __func__, 1);
1930 pixZero(pix1, &empty);
1933 L_INFO(
"pix is empty\n", __func__);
1936 w = pixGetWidth(pix1);
1945 pix2 = pixCreate(11, 81, 1);
1946 for (i = 0; i < 81; i++)
1947 pixSetPixel(pix2, 5, i, 1);
1948 sel1 = selCreateFromPix(pix2, 40, 5, NULL);
1949 selSetElement(sel1, 20, 0, SEL_MISS);
1950 selSetElement(sel1, 20, 10, SEL_MISS);
1951 selSetElement(sel1, 40, 0, SEL_MISS);
1952 selSetElement(sel1, 40, 10, SEL_MISS);
1953 selSetElement(sel1, 60, 0, SEL_MISS);
1954 selSetElement(sel1, 60, 10, SEL_MISS);
1955 pix3 = pixHMT(NULL, pix1, sel1);
1956 pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000);
1957 pix5 = pixXor(NULL, pix1, pix4);
1962 pix6 = pixMorphCompSequence(pix5,
"c30.1 + o15.1 + c60.1 + o2.2", 0);
1966 pixGetDimensions(pix6, NULL, &h, NULL);
1972 bmf = bmfCreate(NULL, 6);
1973 pixaAddPixWithText(pixadb, pix1, 1, bmf,
"threshold/crop to binary",
1975 pixaAddPixWithText(pixadb, pix3, 2, bmf,
"hit-miss for vertical line",
1977 pixaAddPixWithText(pixadb, pix4, 2, bmf,
"restricted seed-fill",
1979 pixaAddPixWithText(pixadb, pix5, 2, bmf,
"remove using xor",
1981 pixaAddPixWithText(pixadb, pix6, 2, bmf,
"make long horiz components",
1987 boxa1 = pixConnComp(pix6, &pixa1, 8);
1988 pix7 = pixaDisplayRandomCmap(pixa1, 0, 0);
1989 pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255);
1990 pixaAddPixWithText(pixadb, pix7, 2, bmf,
"show connected components",
1993 pixaDestroy(&pixa1);
1996 boxa1 = pixConnComp(pix6, NULL, 8);
2015 boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL);
2022 big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1;
2023 n1 = boxaGetCount(boxa1);
2024 n2 = boxaGetCount(boxa3);
2025 n3 = boxaGetCount(boxa4);
2026 ratio1 = (l_float32)maxw / (l_float32)w;
2027 ratio2 = (l_float32)n3 / (l_float32)n2;
2028 minlines = L_MAX(2, h / 125);
2029 if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines)
2034 if (*pistext == 1) {
2035 L_INFO(
"This is text: \n n1 = %d, n2 = %d, n3 = %d, "
2036 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
2037 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
2038 maxw, ratio1, h, big_comp);
2040 L_INFO(
"This is not text: \n n1 = %d, n2 = %d, n3 = %d, "
2041 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
2042 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
2043 maxw, ratio1, h, big_comp);
2047 boxaDestroy(&boxa1);
2048 boxaDestroy(&boxa2);
2049 boxaDestroy(&boxa3);
2050 boxaDestroy(&boxa4);
2051 boxaDestroy(&boxa5);
2080 if (ptop) *ptop = 0;
2081 if (pbot) *pbot = 0;
2083 return ERROR_INT(
"nothing to determine", __func__, 1);
2084 if (!pixs || pixGetDepth(pixs) != 1)
2085 return ERROR_INT(
"pixs not defined or not 1 bpp", __func__, 1);
2087 na = pixCountPixelsByRow(pixs, NULL);
2088 n = numaGetCount(na);
2089 array = numaGetIArray(na);
2091 for (i = 0; i < n; i++) {
2092 if (array[i] >= thresh) {
2099 for (i = n - 1; i >= 0; i--) {
2100 if (array[i] >= thresh) {
2165l_int32 empty, nhb, nvb, nvw, score, htfound;
2166PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
2169 return ERROR_INT(
"&score not defined", __func__, 1);
2172 return ERROR_INT(
"pixs not defined", __func__, 1);
2178 if (htfound && pixadb) pixaAddPix(pixadb, pix2,
L_COPY);
2183 L_INFO(
"pix has an image region\n", __func__);
2189 return ERROR_INT(
"pix1 not made", __func__, 1);
2191 pixZero(pix1, &empty);
2195 L_INFO(
"pix is empty\n", __func__);
2206 pix2 = pixDilateBrick(NULL, pix1, 2, 2);
2210 pix3 = pixDeskewBoth(pix2, 1);
2212 pixaAddPix(pixadb, pix2,
L_COPY);
2213 pixaAddPix(pixadb, pix3,
L_COPY);
2216 pix4 = pixRotate90(pix3, 1);
2218 pix4 = pixClone(pix3);
2222 pix1 = pixClone(pix4);
2226 pix2 = pixMorphSequence(pix1,
"o100.1 + c1.4", 0);
2227 pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8);
2228 pix4 = pixMorphSequence(pix1,
"o1.100 + c4.1", 0);
2229 pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8);
2230 pix6 = pixOr(NULL, pix3, pix5);
2232 pixaAddPix(pixadb, pix2,
L_COPY);
2233 pixaAddPix(pixadb, pix4,
L_COPY);
2234 pixaAddPix(pixadb, pix3,
L_COPY);
2235 pixaAddPix(pixadb, pix5,
L_COPY);
2236 pixaAddPix(pixadb, pix6,
L_COPY);
2238 pixCountConnComp(pix2, 8, &nhb);
2239 pixCountConnComp(pix4, 8, &nvb);
2242 pixSubtract(pix1, pix1, pix6);
2243 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
2246 pix7 = pixMorphSequence(pix1,
"c4.1 + o8.1", 0);
2247 if (pixadb) pixaAddPix(pixadb, pix7,
L_COPY);
2255 pixInvert(pix7, pix7);
2256 pix8 = pixMorphSequence(pix7,
"r1 + o1.100", 0);
2259 pixCountConnComp(pix9, 8, &nvw);
2261 pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0),
L_INSERT);
2262 pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0),
L_INSERT);
2269 if (nhb > 1) score++;
2270 if (nvb > 2) score++;
2271 if (nvw > 3) score++;
2272 if (nvw > 6) score++;
2309 l_float32 cropfract,
2315PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2318 return (
PIX *)ERROR_PTR(
"pixs not defined", __func__, NULL);
2324 pix1 = pixClipRectangle(pixs, box, NULL);
2326 pixGetDimensions(pixs, &w, &h, NULL);
2327 box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h),
2328 (l_int32)((1.0 - 2 * cropfract) * w),
2329 (l_int32)((1.0 - 2 * cropfract) * h));
2330 pix1 = pixClipRectangle(pixs, box1, NULL);
2335 if (pixGetDepth(pixs) > 1) {
2336 pix2 = pixConvertTo8(pix1, 0);
2341 L_INFO(
"pix cleaning failed\n", __func__);
2344 pix4 = pixThresholdToBinary(pix3, 200);
2347 pix4 = pixClone(pix1);
2355 if ((res = pixGetXRes(pixs)) == 0) {
2356 L_WARNING(
"Resolution is not set: using 300 ppi\n", __func__);
2359 if (res != outres) {
2360 factor = (l_float32)outres / (l_float32)res;
2361 pix5 = pixScale(pix4, factor, factor);
2363 pix5 = pixClone(pix4);
2395l_int32 w, h, sampling;
2398PIX *pix1, *pix2, *pixm;
2401 return ERROR_INT(
"&bg not defined", __func__, 1);
2403 if (!pixs || pixGetDepth(pixs) != 8)
2404 return ERROR_INT(
"pixs not defined or not 8 bpp", __func__, 1);
2405 if (darkthresh > 128)
2406 L_WARNING(
"darkthresh unusually large\n", __func__);
2407 if (edgecrop < 0.0 || edgecrop >= 1.0)
2408 return ERROR_INT(
"edgecrop not in [0.0 ... 1.0)", __func__, 1);
2411 pixGetDimensions(pix1, &w, &h, NULL);
2414 if (edgecrop > 0.0) {
2415 box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h,
2416 (1.0 - edgecrop) * w, (1.0 - edgecrop) * h);
2417 pix2 = pixClipRectangle(pix1, box, NULL);
2420 pix2 = pixClone(pix1);
2424 sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5));
2428 if (darkthresh > 0) {
2429 pixm = pixThresholdToBinary(pix2, darkthresh);
2430 pixInvert(pixm, pixm);
2433 pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL);
2434 *pbg = (l_int32)(fbg + 0.5);
2478l_int32 i, op, bx, by, bw, bh;
2483 if (ppixdb) *ppixdb = NULL;
2485 return ERROR_INT(
"&boxa not defined", __func__, 1);
2487 if (!pixs || pixGetDepth(pixs) != 1)
2488 return ERROR_INT(
"pixs not defined or not 1 bpp", __func__, 1);
2489 if (polarity != 0 && polarity != 1)
2490 return ERROR_INT(
"invalid polarity", __func__, 1);
2492 L_WARNING(
"large num rectangles = %d requested; using 1000\n",
2497 pix = pixCopy(NULL, pixs);
2498 boxa = boxaCreate(nrect);
2502 for (i = 0; i < nrect; i++) {
2505 L_ERROR(
"failure in pixFindLargestRectangle\n", __func__);
2510 boxGetGeometry(box, &bx, &by, &bw, &bh);
2511 pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0);
2515 *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3);
2578l_int32 i, j, w, h, d, wpls, val;
2579l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2;
2581l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg;
2583l_uint32 *datas, *lines;
2584l_uint32 **linew, **lineh;
2589 if (ppixdb) *ppixdb = NULL;
2591 return ERROR_INT(
"&box not defined", __func__, 1);
2594 return ERROR_INT(
"pixs not defined", __func__, 1);
2595 pixGetDimensions(pixs, &w, &h, &d);
2597 return ERROR_INT(
"pixs not 1 bpp", __func__, 1);
2598 if (polarity != 0 && polarity != 1)
2599 return ERROR_INT(
"invalid polarity", __func__, 1);
2602 lowestfg = (l_int32 *)LEPT_CALLOC(w,
sizeof(l_int32));
2603 for (i = 0; i < w; i++)
2609 pixw = pixCreate(w, h, 32);
2610 pixh = pixCreate(w, h, 32);
2611 linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL);
2612 lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL);
2613 datas = pixGetData(pixs);
2614 wpls = pixGetWpl(pixs);
2615 maxarea = xmax = ymax = wmax = hmax = 0;
2616 for (i = 0; i < h; i++) {
2617 lines = datas + i * wpls;
2619 for (j = 0; j < w; j++) {
2621 if ((val ^ polarity) == 0) {
2622 if (i == 0 && j == 0) {
2624 }
else if (i == 0) {
2625 wp = linew[i][j - 1] + 1;
2627 }
else if (j == 0) {
2629 hp = lineh[i - 1][j] + 1;
2632 w1 = linew[i - 1][j];
2633 h1 = lineh[i - 1][j];
2634 horizdist = j - prevfg;
2635 wmin = L_MIN(w1, horizdist);
2636 area1 = wmin * (h1 + 1);
2639 w2 = linew[i][j - 1];
2640 h2 = lineh[i][j - 1];
2641 vertdist = i - lowestfg[j];
2642 hmin = L_MIN(h2, vertdist);
2643 area2 = hmin * (w2 + 1);
2645 if (area1 > area2) {
2660 if (wp * hp > maxarea) {
2671 box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax);
2675 *ppixdb = pixConvertTo8(pixs, TRUE);
2681 LEPT_FREE(lowestfg);
2723pixFindRectangleInCC(
PIX *pixs,
2730l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res;
2731l_int32 xfirst, xlast, xstart, yfirst, ylast, length;
2732BOX *box1, *box2, *box3, *box4, *box5;
2733PIX *pix1, *pix2, *pixdb1, *pixdb2;
2736 if (!pixs || pixGetDepth(pixs) != 1)
2737 return (
BOX *)ERROR_PTR(
"pixs undefined or not 1 bpp", __func__, NULL);
2738 if (fract <= 0.0 || fract > 1.0)
2739 return (
BOX *)ERROR_PTR(
"invalid fraction", __func__, NULL);
2741 return (
BOX *)ERROR_PTR(
"invalid scan direction", __func__, NULL);
2744 return (
BOX *)ERROR_PTR(
"invalid select", __func__, NULL);
2749 pix1 = pixClipRectangle(pixs, boxs, NULL);
2750 boxGetGeometry(boxs, &x, &y, NULL, NULL);
2752 pix1 = pixClone(pixs);
2757 pix2 = pixRotate90(pix1, 1);
2759 pix2 = pixClone(pix1);
2760 pixGetDimensions(pix2, &w, &h, NULL);
2762 pixadb = (debug) ? pixaCreate(0) : NULL;
2765 lept_mkdir(
"lept/rect");
2766 pixaAddPix(pixadb, pix1,
L_CLONE);
2767 pixdb1 = pixConvertTo32(pix2);
2774 for (i = 0; i < h; i++) {
2775 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2776 if (length >= (l_int32)(fract * w + 0.5)) {
2779 xlast = xfirst + length - 1;
2785 L_WARNING(
"no run of sufficient size was found\n", __func__);
2787 pixDestroy(&pixdb1);
2788 pixaDestroy(&pixadb);
2793 w1 = xlast - xfirst + 1;
2796 for (i = yfirst + 1; i < h; i++) {
2797 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2798 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2801 h1 = ylast - yfirst + 1;
2805 box1 = boxCreate(xfirst, yfirst, w1, h1);
2809 for (i = h - 1; i >= 0; i--) {
2810 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2811 if (length >= (l_int32)(fract * w + 0.5)) {
2814 xlast = xfirst + length - 1;
2820 w2 = xlast - xfirst + 1;
2822 for (i = ylast - 1; i >= 0; i--) {
2823 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2824 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2827 h2 = ylast - yfirst + 1;
2831 box2 = boxCreate(xfirst, yfirst, w2, h2);
2835 pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
2836 pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
2837 pixaAddPix(pixadb, pixdb1,
L_INSERT);
2842 box3 = boxBoundingRegion(box1, box2);
2844 box3 = boxOverlapRegion(box1, box2);
2846 box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2848 box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2856 box4 = boxRotateOrth(box3, w, h, 3);
2858 box4 = boxCopy(box3);
2862 box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
2868 pixdb1 = pixConvertTo8(pixs, 0);
2869 pixAddConstantGray(pixdb1, 190);
2870 pixdb2 = pixConvertTo32(pixdb1);
2871 if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
2872 pixaAddPix(pixadb, pixdb2,
L_INSERT);
2873 res = pixGetXRes(pixs);
2874 L_INFO(
"Writing debug files to /tmp/lept/rect/\n", __func__);
2876 "/tmp/lept/rect/fitrect.pdf");
2877 pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
2878 pixWrite(
"/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
2880 pixDestroy(&pixdb1);
2881 pixaDestroy(&pixadb);
2915l_int32 i, n, empty, x, y, w, h;
2919PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2921 if (ppixm) *ppixm = NULL;
2923 return (
PIX *)ERROR_PTR(
"pixs not defined", __func__, NULL);
2924 if (thresh == 0) thresh = 128;
2926 if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL)
2927 return (
PIX *)ERROR_PTR(
"pix1 not made", __func__, NULL);
2928 if (pixadb) pixaAddPix(pixadb, pix1,
L_COPY);
2937 pix3 = pixMorphSequence(pix2,
"o15.15 + c25.25", 0);
2938 pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0);
2940 pixaAddPix(pixadb, pix2,
L_CLONE);
2941 pixaAddPix(pixadb, pix3,
L_CLONE);
2942 pixaAddPix(pixadb, pix4,
L_COPY);
2946 pixZero(pix4, &empty);
2954 boxa1 = pixConnCompBB(pix4, 8);
2955 n = boxaGetCount(boxa1);
2956 for (i = 0; i < n; i++) {
2957 box1 = boxaGetBox(boxa1, i,
L_COPY);
2958 pix5 = pixClipRectangle(pix1, box1, NULL);
2959 pixForegroundFraction(pix5, &fgfract);
2960 if (pixadb) lept_stderr(
"fg fraction: %5.3f\n", fgfract);
2961 boxGetGeometry(box1, &x, &y, &w, &h);
2963 pixRasterop(pix4, x, y, w, h,
PIX_CLR, NULL, 0, 0);
2967 boxaDestroy(&boxa1);
2968 pixZero(pix4, &empty);
2975 pix5 = pixInvert(NULL, pix1);
2976 pixCombineMasked(pix1, pix5, pix4);
2979 pixaAddPix(pixadb, pix5,
L_CLONE);
2980 pixaAddPix(pixadb, pix1,
L_COPY);
PIX * pixConvertTo8MinMax(PIX *pixs)
pixConvertTo8MinMax()
PIX * pixBackgroundNormTo1MinMax(PIX *pixs, l_int32 contrast, l_int32 scalefactor)
pixBackgroundNormTo1MinMax()
PIX * pixCleanBackgroundToWhite(PIX *pixs, PIX *pixim, PIX *pixg, l_float32 gamma, l_int32 blackval, l_int32 whiteval)
pixCleanBackgroundToWhite()
#define GET_DATA_BIT(pdata, n)
l_ok pixFindThreshFgExtent(PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot)
pixFindThreshFgExtent()
PIX * pixCropImage(PIX *pixs, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_border, l_int32 tb_border, l_float32 maxwiden, l_int32 printwiden, const char *debugfile, BOX **pcropbox)
pixCropImage()
static l_ok pixMaxCompAfterVClosing(PIX *pixs, BOX **pbox)
pixMaxCompAfterVClosing()
PIX * pixGenTextblockMask(PIX *pixs, PIX *pixvws, PIXA *pixadb)
pixGenTextblockMask()
l_ok pixGetRegionsBinary(PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb)
pixGetRegionsBinary()
PIX * pixGenTextlineMask(PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb)
pixGenTextlineMask()
l_ok pixEstimateBackground(PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg)
pixEstimateBackground()
BOX * pixFindPageForeground(PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac)
pixFindPageForeground()
PIXA * pixExtractRawTextlines(PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
pixExtractRawTextlines()
PIX * pixAutoPhotoinvert(PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb)
pixFindRectangleInCC()
PIX * pixPrepare1bpp(PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres)
pixPrepare1bpp()
PIXA * pixExtractTextlines(PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
pixExtractTextlines()
static l_ok pixFindPageInsideBlackBorder(PIX *pixs, BOX **pbox)
pixFindPageInsideBlackBorder()
l_ok pixFindLargeRectangles(PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb)
pixFindLargeRectangles()
l_ok pixCountTextColumns(PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb)
pixCountTextColumns()
l_ok pixSplitIntoCharacters(PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug)
pixSplitIntoCharacters()
PIX * pixGenHalftoneMask(PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug)
pixGenHalftoneMask()
l_ok pixFindLargestRectangle(PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb)
pixFindLargestRectangle()
PIX * pixGenerateHalftoneMask(PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb)
pixGenerateHalftoneMask()
static PIX * pixRescaleForCropping(PIX *pixs, l_int32 w, l_int32 h, l_int32 lr_border, l_int32 tb_border, l_float32 maxwiden, PIX **ppixsc)
pixRescaleForCropping()
PIX * pixCleanImage(PIX *pixs, l_int32 contrast, l_int32 rotation, l_int32 scale, l_int32 opensize)
pixCleanImage()
l_ok pixDecideIfTable(PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb)
pixDecideIfTable()
BOXA * pixSplitComponentWithProfile(PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug)
pixSplitComponentWithProfile()
l_ok pixDecideIfText(PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb)
pixDecideIfText()
@ REMOVE_CMAP_TO_GRAYSCALE
@ L_GEOMETRIC_INTERSECTION