Leptonica 1.85.0
Image processing and image analysis suite
Loading...
Searching...
No Matches
baseline.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
50#ifdef HAVE_CONFIG_H
51#include <config_auto.h>
52#endif /* HAVE_CONFIG_H */
53
54#include <math.h>
55#include "allheaders.h"
56
57 /* Minimum distance to travel after finding max before abandoning peak */
58static const l_int32 MinDistInPeak = 35;
59
60 /* Thresholds for peaks and zeros, relative to the max peak */
61static const l_int32 PeakThresholdRatio = 20;
62static const l_int32 ZeroThresholdRatio = 100;
63
64 /* Default values for determining local skew */
65static const l_int32 DefaultSlices = 10;
66static const l_int32 DefaultSweepReduction = 2;
67static const l_int32 DefaultBsReduction = 1;
68static const l_float32 DefaultSweepRange = 5.; /* degrees */
69static const l_float32 DefaultSweepDelta = 1.; /* degrees */
70static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */
71
72 /* Overlap slice fraction added to top and bottom of each slice */
73static const l_float32 OverlapFraction = 0.5;
74
75 /* Minimum allowed confidence (ratio) for accepting a value */
76static const l_float32 MinAllowedConfidence = 3.0;
77
78
79/*---------------------------------------------------------------------*
80 * Locate text baselines in an image *
81 *---------------------------------------------------------------------*/
116NUMA *
118 PTA **ppta,
119 PIXA *pixadb)
120{
121l_int32 h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
122l_int32 imaxloc, peakthresh, zerothresh, inpeak;
123l_int32 mintosearch, max, maxloc, nloc, locval;
124l_int32 *array;
125l_float32 maxval;
126BOXA *boxa1, *boxa2, *boxa3;
127GPLOT *gplot;
128NUMA *nasum, *nadiff, *naloc, *naval;
129PIX *pix1, *pix2;
130PTA *pta;
131
132 if (ppta) *ppta = NULL;
133 if (!pixs || pixGetDepth(pixs) != 1)
134 return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
135
136 /* Close up the text characters, removing noise */
137 pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0);
138
139 /* Estimate the resolution */
140 if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);
141
142 /* Save the difference of adjacent row sums.
143 * The high positive-going peaks are the baselines */
144 if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) {
145 pixDestroy(&pix1);
146 return (NUMA *)ERROR_PTR("nasum not made", __func__, NULL);
147 }
148 h = pixGetHeight(pixs);
149 nadiff = numaCreate(h);
150 numaGetIValue(nasum, 0, &val2);
151 for (i = 0; i < h - 1; i++) {
152 val1 = val2;
153 numaGetIValue(nasum, i + 1, &val2);
154 numaAddNumber(nadiff, val1 - val2);
155 }
156 numaDestroy(&nasum);
157
158 if (pixadb) { /* show the difference signal */
159 lept_mkdir("lept/baseline");
160 gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig");
161 pix2 = pixRead("/tmp/lept/baseline/diff.png");
162 pixaAddPix(pixadb, pix2, L_INSERT);
163 }
164
165 /* Use the zeroes of the profile to locate each baseline. */
166 array = numaGetIArray(nadiff);
167 ndiff = numaGetCount(nadiff);
168 numaGetMax(nadiff, &maxval, &imaxloc);
169 numaDestroy(&nadiff);
170
171 /* Use this to begin locating a new peak: */
172 peakthresh = (l_int32)maxval / PeakThresholdRatio;
173 /* Use this to begin a region between peaks: */
174 zerothresh = (l_int32)maxval / ZeroThresholdRatio;
175
176 naloc = numaCreate(0);
177 naval = numaCreate(0);
178 inpeak = FALSE;
179 for (i = 0; i < ndiff; i++) {
180 if (inpeak == FALSE) {
181 if (array[i] > peakthresh) { /* transition to in-peak */
182 inpeak = TRUE;
183 mintosearch = i + MinDistInPeak; /* accept no zeros
184 * between i and mintosearch */
185 max = array[i];
186 maxloc = i;
187 }
188 } else { /* inpeak == TRUE; look for max */
189 if (array[i] > max) {
190 max = array[i];
191 maxloc = i;
192 mintosearch = i + MinDistInPeak;
193 } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */
194 inpeak = FALSE;
195 numaAddNumber(naval, max);
196 numaAddNumber(naloc, maxloc);
197 }
198 }
199 }
200 LEPT_FREE(array);
201
202 /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
203 if (inpeak) {
204 numaAddNumber(naval, max);
205 numaAddNumber(naloc, maxloc);
206 }
207
208 if (pixadb) { /* show the raster locations for the peaks */
209 gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs",
210 "rasterline", "height");
211 gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
212 gplotMakeOutput(gplot);
213 gplotDestroy(&gplot);
214 pix2 = pixRead("/tmp/lept/baseline/loc.png");
215 pixaAddPix(pixadb, pix2, L_INSERT);
216 }
217 numaDestroy(&naval);
218
219 /* Generate an approximate profile of text line width.
220 * First, filter the boxes of text, where there may be
221 * more than one box for a given textline. */
222 pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0);
223 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
224 boxa1 = pixConnComp(pix2, NULL, 4);
225 pixDestroy(&pix1);
226 pixDestroy(&pix2);
227 if (boxaGetCount(boxa1) == 0) {
228 numaDestroy(&naloc);
229 boxaDestroy(&boxa1);
230 L_INFO("no components after filtering\n", __func__);
231 return NULL;
232 }
233 boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
234 boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
235 boxaDestroy(&boxa1);
236 boxaDestroy(&boxa2);
237
238 /* Optionally, find the baseline segments */
239 pta = NULL;
240 if (ppta) {
241 pta = ptaCreate(0);
242 *ppta = pta;
243 }
244 if (pta) {
245 nloc = numaGetCount(naloc);
246 nbox = boxaGetCount(boxa3);
247 for (i = 0; i < nbox; i++) {
248 boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
249 for (j = 0; j < nloc; j++) {
250 numaGetIValue(naloc, j, &locval);
251 if (L_ABS(locval - (by + bh)) > 25)
252 continue;
253 ptaAddPt(pta, bx, locval);
254 ptaAddPt(pta, bx + bw, locval);
255 break;
256 }
257 }
258 }
259 boxaDestroy(&boxa3);
260
261 if (pixadb && pta) { /* display baselines */
262 l_int32 npts, x1, y1, x2, y2;
263 pix1 = pixConvertTo32(pixs);
264 npts = ptaGetCount(pta);
265 for (i = 0; i < npts; i += 2) {
266 ptaGetIPt(pta, i, &x1, &y1);
267 ptaGetIPt(pta, i + 1, &x2, &y2);
268 pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0);
269 }
270 pixWriteDebug("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG);
271 pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);
272 pixDestroy(&pix1);
273 }
274
275 return naloc;
276}
277
278
279/*---------------------------------------------------------------------*
280 * Projective transform to remove local skew *
281 *---------------------------------------------------------------------*/
321PIX *
323 l_int32 nslices,
324 l_int32 redsweep,
325 l_int32 redsearch,
326 l_float32 sweeprange,
327 l_float32 sweepdelta,
328 l_float32 minbsdelta)
329{
330l_int32 ret;
331PIX *pixd;
332PTA *ptas, *ptad;
333
334 if (!pixs || pixGetDepth(pixs) != 1)
335 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
336
337 /* Skew array gives skew angle (deg) as fctn of raster line
338 * where it intersects the LHS of the image */
339 ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch,
340 sweeprange, sweepdelta, minbsdelta,
341 &ptas, &ptad);
342 if (ret != 0)
343 return (PIX *)ERROR_PTR("transform pts not found", __func__, NULL);
344
345 /* Use a projective transform */
346 pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE);
347
348 ptaDestroy(&ptas);
349 ptaDestroy(&ptad);
350 return pixd;
351}
352
353
354/*---------------------------------------------------------------------*
355 * Determine the local skew *
356 *---------------------------------------------------------------------*/
388l_ok
390 l_int32 nslices,
391 l_int32 redsweep,
392 l_int32 redsearch,
393 l_float32 sweeprange,
394 l_float32 sweepdelta,
395 l_float32 minbsdelta,
396 PTA **pptas,
397 PTA **pptad)
398{
399l_int32 w, h, i;
400l_float32 deg2rad, angr, angd, dely;
401NUMA *naskew;
402PTA *ptas, *ptad;
403
404 if (!pptas || !pptad)
405 return ERROR_INT("&ptas and &ptad not defined", __func__, 1);
406 *pptas = *pptad = NULL;
407 if (!pixs || pixGetDepth(pixs) != 1)
408 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
409 if (nslices < 2 || nslices > 20)
410 nslices = DefaultSlices;
411 if (redsweep < 1 || redsweep > 8)
412 redsweep = DefaultSweepReduction;
413 if (redsearch < 1 || redsearch > redsweep)
414 redsearch = DefaultBsReduction;
415 if (sweeprange == 0.0)
416 sweeprange = DefaultSweepRange;
417 if (sweepdelta == 0.0)
418 sweepdelta = DefaultSweepDelta;
419 if (minbsdelta == 0.0)
420 minbsdelta = DefaultMinbsDelta;
421
422 naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch,
423 sweeprange, sweepdelta, minbsdelta,
424 NULL, NULL, 0);
425 if (!naskew)
426 return ERROR_INT("naskew not made", __func__, 1);
427
428 deg2rad = 3.14159265f / 180.f;
429 w = pixGetWidth(pixs);
430 h = pixGetHeight(pixs);
431 ptas = ptaCreate(4);
432 ptad = ptaCreate(4);
433 *pptas = ptas;
434 *pptad = ptad;
435
436 /* Find i for skew line that intersects LHS at i and RHS at h / 20 */
437 for (i = 0; i < h; i++) {
438 numaGetFValue(naskew, i, &angd);
439 angr = angd * deg2rad;
440 dely = w * tan(angr);
441 if (i - dely > 0.05 * h)
442 break;
443 }
444 ptaAddPt(ptas, 0, i);
445 ptaAddPt(ptas, w - 1, i - dely);
446 ptaAddPt(ptad, 0, i);
447 ptaAddPt(ptad, w - 1, i);
448
449 /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */
450 for (i = h - 1; i > 0; i--) {
451 numaGetFValue(naskew, i, &angd);
452 angr = angd * deg2rad;
453 dely = w * tan(angr);
454 if (i - dely < 0.95 * h)
455 break;
456 }
457 ptaAddPt(ptas, 0, i);
458 ptaAddPt(ptas, w - 1, i - dely);
459 ptaAddPt(ptad, 0, i);
460 ptaAddPt(ptad, w - 1, i);
461
462 numaDestroy(&naskew);
463 return 0;
464}
465
466
507NUMA *
509 l_int32 nslices,
510 l_int32 redsweep,
511 l_int32 redsearch,
512 l_float32 sweeprange,
513 l_float32 sweepdelta,
514 l_float32 minbsdelta,
515 l_float32 *pa,
516 l_float32 *pb,
517 l_int32 debug)
518{
519l_int32 w, h, hs, i, ystart, yend, ovlap, npts;
520l_float32 angle, conf, ycenter, a, b;
521BOX *box;
522GPLOT *gplot;
523NUMA *naskew, *nax, *nay;
524PIX *pix;
525PTA *pta;
526
527 if (!pixs || pixGetDepth(pixs) != 1)
528 return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
529 if (nslices < 2 || nslices > 20)
530 nslices = DefaultSlices;
531 if (redsweep < 1 || redsweep > 8)
532 redsweep = DefaultSweepReduction;
533 if (redsearch < 1 || redsearch > redsweep)
534 redsearch = DefaultBsReduction;
535 if (sweeprange == 0.0)
536 sweeprange = DefaultSweepRange;
537 if (sweepdelta == 0.0)
538 sweepdelta = DefaultSweepDelta;
539 if (minbsdelta == 0.0)
540 minbsdelta = DefaultMinbsDelta;
541
542 pixGetDimensions(pixs, &w, &h, NULL);
543 hs = h / nslices;
544 ovlap = (l_int32)(OverlapFraction * hs);
545 pta = ptaCreate(nslices);
546 for (i = 0; i < nslices; i++) {
547 ystart = L_MAX(0, hs * i - ovlap);
548 yend = L_MIN(h - 1, hs * (i + 1) + ovlap);
549 ycenter = (l_float32)(ystart + yend) / 2;
550 box = boxCreate(0, ystart, w, yend - ystart + 1);
551 pix = pixClipRectangle(pixs, box, NULL);
552 pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch,
553 sweeprange, sweepdelta, minbsdelta);
554 if (conf > MinAllowedConfidence)
555 ptaAddPt(pta, ycenter, angle);
556 pixDestroy(&pix);
557 boxDestroy(&box);
558 }
559
560 /* Do linear least squares fit */
561 if ((npts = ptaGetCount(pta)) < 2) {
562 ptaDestroy(&pta);
563 return (NUMA *)ERROR_PTR("can't fit skew", __func__, NULL);
564 }
565 ptaGetLinearLSF(pta, &a, &b, NULL);
566 if (pa) *pa = a;
567 if (pb) *pb = b;
568
569 /* Make skew angle array as function of raster line */
570 naskew = numaCreate(h);
571 for (i = 0; i < h; i++) {
572 angle = a * i + b;
573 numaAddNumber(naskew, angle);
574 }
575
576 if (debug) {
577 lept_mkdir("lept/baseline");
578 ptaGetArrays(pta, &nax, &nay);
579 gplot = gplotCreate("/tmp/lept/baseline/skew", GPLOT_PNG,
580 "skew as fctn of y", "y (in raster lines from top)",
581 "angle (in degrees)");
582 gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf");
583 gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts");
584 gplotMakeOutput(gplot);
585 gplotDestroy(&gplot);
586 numaDestroy(&nax);
587 numaDestroy(&nay);
588 }
589
590 ptaDestroy(&pta);
591 return naskew;
592}
NUMA * pixFindBaselines(PIX *pixs, PTA **ppta, PIXA *pixadb)
pixFindBaselines()
Definition baseline.c:117
NUMA * pixGetLocalSkewAngles(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_float32 *pa, l_float32 *pb, l_int32 debug)
pixGetLocalSkewAngles()
Definition baseline.c:508
PIX * pixDeskewLocal(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta)
pixDeskewLocal()
Definition baseline.c:322
l_ok pixGetLocalSkewTransform(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, PTA **pptas, PTA **pptad)
pixGetLocalSkewTransform()
Definition baseline.c:389
@ L_SORT_BY_Y
Definition pix.h:529
@ L_COPY
Definition pix.h:505
@ L_INSERT
Definition pix.h:504
@ L_SORT_INCREASING
Definition pix.h:522
@ L_BRING_IN_WHITE
Definition pix.h:662
Definition gplot.h:77