Leptonica 1.82.0
Image processing and image analysis suite
flipdetect.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
201#ifdef HAVE_CONFIG_H
202#include <config_auto.h>
203#endif /* HAVE_CONFIG_H */
204
205#include <math.h>
206#include "allheaders.h"
207
208 /* Sels for pixOrientDetect() and pixMirrorDetect() */
209static const char *textsel1 = "x oo "
210 "x oOo "
211 "x o "
212 "x "
213 "xxxxxx";
214
215static const char *textsel2 = " oo x"
216 " oOo x"
217 " o x"
218 " x"
219 "xxxxxx";
220
221static const char *textsel3 = "xxxxxx"
222 "x "
223 "x o "
224 "x oOo "
225 "x oo ";
226
227static const char *textsel4 = "xxxxxx"
228 " x"
229 " o x"
230 " oOo x"
231 " oo x";
232
233 /* Parameters for determining orientation */
234static const l_int32 DefaultMinUpDownCount = 70;
235static const l_float32 DefaultMinUpDownConf = 8.0;
236static const l_float32 DefaultMinUpDownRatio = 2.5;
237
238 /* Parameters for determining mirror flip */
239static const l_int32 DefaultMinMirrorFlipCount = 100;
240static const l_float32 DefaultMinMirrorFlipConf = 5.0;
241
242 /* Static debug function */
243static void pixDebugFlipDetect(const char *filename, PIX *pixs,
244 PIX *pixhm, l_int32 enable);
245
246
247/*----------------------------------------------------------------*
248 * High-level interface for detection and correction *
249 *----------------------------------------------------------------*/
273PIX *
275 l_float32 minupconf,
276 l_float32 minratio,
277 l_float32 *pupconf,
278 l_float32 *pleftconf,
279 l_int32 *protation,
280 l_int32 debug)
281{
282l_int32 orient;
283l_float32 upconf, leftconf;
284PIX *pix1;
285
286 PROCNAME("pixOrientCorrect");
287
288 if (!pixs || pixGetDepth(pixs) != 1)
289 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);
290
291 /* Get confidences for orientation */
292 pixUpDownDetect(pixs, &upconf, 0, 0, debug);
293 pix1 = pixRotate90(pixs, 1);
294 pixUpDownDetect(pix1, &leftconf, 0, 0, debug);
295 pixDestroy(&pix1);
296 if (pupconf) *pupconf = upconf;
297 if (pleftconf) *pleftconf = leftconf;
298
299 /* Decide what to do */
300 makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug);
301
302 /* Do it */
303 switch (orient)
304 {
306 L_INFO("text orientation not determined; no rotation\n", procName);
307 if (protation) *protation = 0;
308 return pixCopy(NULL, pixs);
309 break;
310 case L_TEXT_ORIENT_UP:
311 L_INFO("text is oriented up; no rotation\n", procName);
312 if (protation) *protation = 0;
313 return pixCopy(NULL, pixs);
314 break;
316 L_INFO("landscape; text oriented left; 90 cw rotation\n", procName);
317 if (protation) *protation = 90;
318 return pixRotateOrth(pixs, 1);
319 break;
321 L_INFO("text oriented down; 180 cw rotation\n", procName);
322 if (protation) *protation = 180;
323 return pixRotateOrth(pixs, 2);
324 break;
326 L_INFO("landscape; text oriented right; 270 cw rotation\n", procName);
327 if (protation) *protation = 270;
328 return pixRotateOrth(pixs, 3);
329 break;
330 default:
331 L_ERROR("invalid orient flag!\n", procName);
332 return pixCopy(NULL, pixs);
333 }
334}
335
336
337/*----------------------------------------------------------------*
338 * Orientation detection (four 90 degree angles) *
339 *----------------------------------------------------------------*/
404l_ok
406 l_float32 *pupconf,
407 l_float32 *pleftconf,
408 l_int32 mincount,
409 l_int32 debug)
410{
411PIX *pix1;
412
413 PROCNAME("pixOrientDetect");
414
415 if (!pixs || pixGetDepth(pixs) != 1)
416 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
417 if (!pupconf && !pleftconf)
418 return ERROR_INT("nothing to do", procName, 1);
419 if (mincount == 0)
420 mincount = DefaultMinUpDownCount;
421
422 if (pupconf)
423 pixUpDownDetect(pixs, pupconf, mincount, 0, debug);
424 if (pleftconf) {
425 pix1 = pixRotate90(pixs, 1);
426 pixUpDownDetect(pix1, pleftconf, mincount, 0, debug);
427 pixDestroy(&pix1);
428 }
429
430 return 0;
431}
432
433
463l_ok
464makeOrientDecision(l_float32 upconf,
465 l_float32 leftconf,
466 l_float32 minupconf,
467 l_float32 minratio,
468 l_int32 *porient,
469 l_int32 debug)
470{
471l_float32 absupconf, absleftconf;
472
473 PROCNAME("makeOrientDecision");
474
475 if (!porient)
476 return ERROR_INT("&orient not defined", procName, 1);
477 *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */
478 if (upconf == 0.0 || leftconf == 0.0) {
479 L_INFO("not enough confidence to get orientation\n", procName);
480 return 0;
481 }
482
483 if (minupconf == 0.0)
484 minupconf = DefaultMinUpDownConf;
485 if (minratio == 0.0)
486 minratio = DefaultMinUpDownRatio;
487 absupconf = L_ABS(upconf);
488 absleftconf = L_ABS(leftconf);
489
490 /* Here are the four possible orientation decisions, based
491 * on satisfaction of two threshold constraints. */
492 if (upconf > minupconf && absupconf > minratio * absleftconf)
493 *porient = L_TEXT_ORIENT_UP;
494 else if (leftconf > minupconf && absleftconf > minratio * absupconf)
495 *porient = L_TEXT_ORIENT_LEFT;
496 else if (upconf < -minupconf && absupconf > minratio * absleftconf)
497 *porient = L_TEXT_ORIENT_DOWN;
498 else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
499 *porient = L_TEXT_ORIENT_RIGHT;
500
501 if (debug) {
502 lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
503 if (*porient == L_TEXT_ORIENT_UNKNOWN)
504 lept_stderr("Confidence is low; no determination is made\n");
505 else if (*porient == L_TEXT_ORIENT_UP)
506 lept_stderr("Text is rightside-up\n");
507 else if (*porient == L_TEXT_ORIENT_LEFT)
508 lept_stderr("Text is rotated 90 deg ccw\n");
509 else if (*porient == L_TEXT_ORIENT_DOWN)
510 lept_stderr("Text is upside-down\n");
511 else /* *porient == L_TEXT_ORIENT_RIGHT */
512 lept_stderr("Text is rotated 90 deg cw\n");
513 }
514
515 return 0;
516}
517
518
557l_ok
559 l_float32 *pconf,
560 l_int32 mincount,
561 l_int32 npixels,
562 l_int32 debug)
563{
564l_int32 countup, countdown, nmax;
565l_float32 nup, ndown;
566PIX *pix0, *pix1, *pix2, *pix3, *pixm;
567SEL *sel1, *sel2, *sel3, *sel4;
568
569 PROCNAME("pixUpDownDetect");
570
571 if (!pconf)
572 return ERROR_INT("&conf not defined", procName, 1);
573 *pconf = 0.0;
574 if (!pixs || pixGetDepth(pixs) != 1)
575 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
576 if (mincount == 0)
577 mincount = DefaultMinUpDownCount;
578 if (npixels < 0)
579 npixels = 0;
580
581 if (debug) {
582 lept_mkdir("lept/orient");
583 }
584
585 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
586 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
587 sel3 = selCreateFromString(textsel3, 5, 6, NULL);
588 sel4 = selCreateFromString(textsel4, 5, 6, NULL);
589
590 /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
591 * This closes holes in x-height characters and joins them at
592 * the x-height. There is more noise in the descender detection
593 * from this, but it works fairly well. */
594 pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
595
596 /* Optionally, make a mask of the word bounding boxes, shortening
597 * each of them by a fixed amount at each end. */
598 pixm = NULL;
599 if (npixels > 0) {
600 l_int32 i, nbox, x, y, w, h;
601 BOX *box;
602 BOXA *boxa;
603 pix1 = pixMorphSequence(pix0, "o10.1", 0);
604 boxa = pixConnComp(pix1, NULL, 8);
605 pixm = pixCreateTemplate(pix1);
606 pixDestroy(&pix1);
607 nbox = boxaGetCount(boxa);
608 for (i = 0; i < nbox; i++) {
609 box = boxaGetBox(boxa, i, L_CLONE);
610 boxGetGeometry(box, &x, &y, &w, &h);
611 if (w > 2 * npixels)
612 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
613 PIX_SET, NULL, 0, 0);
614 boxDestroy(&box);
615 }
616 boxaDestroy(&boxa);
617 }
618
619 /* Find the ascenders and optionally filter with pixm.
620 * For an explanation of the procedure used for counting the result
621 * of the HMT, see comments at the beginning of this function. */
622 pix1 = pixHMT(NULL, pix0, sel1);
623 pix2 = pixHMT(NULL, pix0, sel2);
624 pixOr(pix1, pix1, pix2);
625 if (pixm)
626 pixAnd(pix1, pix1, pixm);
627 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
628 pixCountPixels(pix3, &countup, NULL);
629 pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug);
630 pixDestroy(&pix1);
631 pixDestroy(&pix2);
632 pixDestroy(&pix3);
633
634 /* Find the ascenders and optionally filter with pixm. */
635 pix1 = pixHMT(NULL, pix0, sel3);
636 pix2 = pixHMT(NULL, pix0, sel4);
637 pixOr(pix1, pix1, pix2);
638 if (pixm)
639 pixAnd(pix1, pix1, pixm);
640 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
641 pixCountPixels(pix3, &countdown, NULL);
642 pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug);
643 pixDestroy(&pix1);
644 pixDestroy(&pix2);
645 pixDestroy(&pix3);
646
647 /* Evaluate statistically, generating a confidence that is
648 * related to the probability with a gaussian distribution. */
649 nup = (l_float32)(countup);
650 ndown = (l_float32)(countdown);
651 nmax = L_MAX(countup, countdown);
652 if (nmax > mincount)
653 *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
654
655 if (debug) {
656 if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG);
657 lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
658 nup, ndown, *pconf);
659 if (*pconf > DefaultMinUpDownConf)
660 lept_stderr("Text is rightside-up\n");
661 if (*pconf < -DefaultMinUpDownConf)
662 lept_stderr("Text is upside-down\n");
663 }
664
665 pixDestroy(&pix0);
666 pixDestroy(&pixm);
667 selDestroy(&sel1);
668 selDestroy(&sel2);
669 selDestroy(&sel3);
670 selDestroy(&sel4);
671 return 0;
672}
673
674
675/*----------------------------------------------------------------*
676 * Left-right mirror detection *
677 *----------------------------------------------------------------*/
717l_ok
719 l_float32 *pconf,
720 l_int32 mincount,
721 l_int32 debug)
722{
723l_int32 count1, count2, nmax;
724l_float32 nleft, nright;
725PIX *pix0, *pix1, *pix2, *pix3;
726SEL *sel1, *sel2;
727
728 PROCNAME("pixMirrorDetect");
729
730 if (!pconf)
731 return ERROR_INT("&conf not defined", procName, 1);
732 *pconf = 0.0;
733 if (!pixs || pixGetDepth(pixs) != 1)
734 return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
735 if (mincount == 0)
736 mincount = DefaultMinMirrorFlipCount;
737
738 if (debug) {
739 lept_mkdir("lept/orient");
740 }
741
742 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
743 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
744
745 /* Fill x-height characters but not space between them, sort of. */
746 pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
747 pixXor(pix3, pix3, pixs);
748 pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
749 pixXor(pix0, pix0, pixs);
750 pixAnd(pix0, pix0, pix3);
751 pixOr(pix0, pix0, pixs);
752 pixDestroy(&pix3);
753
754 /* Filter the right-facing characters. */
755 pix1 = pixHMT(NULL, pix0, sel1);
756 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
757 pixCountPixels(pix3, &count1, NULL);
758 pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
759 pixDestroy(&pix1);
760 pixDestroy(&pix3);
761
762 /* Filter the left-facing characters. */
763 pix2 = pixHMT(NULL, pix0, sel2);
764 pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
765 pixCountPixels(pix3, &count2, NULL);
766 pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
767 pixDestroy(&pix2);
768 pixDestroy(&pix3);
769
770 nright = (l_float32)count1;
771 nleft = (l_float32)count2;
772 nmax = L_MAX(count1, count2);
773 pixDestroy(&pix0);
774 selDestroy(&sel1);
775 selDestroy(&sel2);
776
777 if (nmax > mincount)
778 *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
779
780 if (debug) {
781 lept_stderr("nright = %f, nleft = %f\n", nright, nleft);
782 if (*pconf > DefaultMinMirrorFlipConf)
783 lept_stderr("Text is not mirror reversed\n");
784 if (*pconf < -DefaultMinMirrorFlipConf)
785 lept_stderr("Text is mirror reversed\n");
786 }
787
788 return 0;
789}
790
791
792/*----------------------------------------------------------------*
793 * Static debug helper *
794 *----------------------------------------------------------------*/
795/*
796 * \brief pixDebugFlipDetect()
797 *
798 * \param[in] filename for output debug file
799 * \param[in] pixs input to pix*Detect
800 * \param[in] pixhm hit-miss result from ascenders or descenders
801 * \param[in] enable 1 to enable this function; 0 to disable
802 * \return void
803 */
804static void
805pixDebugFlipDetect(const char *filename,
806 PIX *pixs,
807 PIX *pixhm,
808 l_int32 enable)
809{
810PIX *pixt, *pixthm;
811
812 if (!enable) return;
813
814 /* Display with red dot at counted locations */
815 pixt = pixConvert1To4Cmap(pixs);
816 pixthm = pixMorphSequence(pixhm, "d5.5", 0);
817 pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
818
819 pixWriteDebug(filename, pixt, IFF_PNG);
820 pixDestroy(&pixthm);
821 pixDestroy(&pixt);
822 return;
823}
PIX * pixReduceRankBinaryCascade(PIX *pixs, l_int32 level1, l_int32 level2, l_int32 level3, l_int32 level4)
pixReduceRankBinaryCascade()
Definition: binreduce.c:152
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:779
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:282
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:734
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:313
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:583
BOXA * pixConnComp(PIX *pixs, PIXA **ppixa, l_int32 connectivity)
pixConnComp()
Definition: conncomp.c:151
l_ok makeOrientDecision(l_float32 upconf, l_float32 leftconf, l_float32 minupconf, l_float32 minratio, l_int32 *porient, l_int32 debug)
makeOrientDecision()
Definition: flipdetect.c:464
l_ok pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
pixMirrorDetect()
Definition: flipdetect.c:718
l_ok pixUpDownDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug)
pixUpDownDetect()
Definition: flipdetect.c:558
l_ok pixOrientDetect(PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug)
pixOrientDetect()
Definition: flipdetect.c:405
PIX * pixOrientCorrect(PIX *pixs, l_float32 minupconf, l_float32 minratio, l_float32 *pupconf, l_float32 *pleftconf, l_int32 *protation, l_int32 debug)
pixOrientCorrect()
Definition: flipdetect.c:274
PIX * pixHMT(PIX *pixd, PIX *pixs, SEL *sel)
pixHMT()
Definition: morph.c:342
PIX * pixMorphCompSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphCompSequence()
Definition: morphseq.c:304
PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphSequence()
Definition: morphseq.c:137
l_ok pixSetMaskedCmap(PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 rval, l_int32 gval, l_int32 bval)
pixSetMaskedCmap()
Definition: paintcmap.c:698
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:621
PIX * pixCreateTemplate(const PIX *pixs)
pixCreateTemplate()
Definition: pix1.c:383
PIX * pixCopy(PIX *pixd, const PIX *pixs)
pixCopy()
Definition: pix1.c:705
PIX * pixOr(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixOr()
Definition: pix3.c:1560
l_ok pixCountPixels(PIX *pixs, l_int32 *pcount, l_int32 *tab8)
pixCountPixels()
Definition: pix3.c:1937
PIX * pixAnd(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixAnd()
Definition: pix3.c:1624
PIX * pixXor(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixXor()
Definition: pix3.c:1688
@ L_CLONE
Definition: pix.h:713
@ L_TEXT_ORIENT_DOWN
Definition: pix.h:994
@ L_TEXT_ORIENT_UP
Definition: pix.h:992
@ L_TEXT_ORIENT_LEFT
Definition: pix.h:993
@ L_TEXT_ORIENT_RIGHT
Definition: pix.h:995
@ L_TEXT_ORIENT_UNKNOWN
Definition: pix.h:991
#define PIX_SET
Definition: pix.h:334
PIX * pixConvert1To4Cmap(PIX *pixs)
pixConvert1To4Cmap()
Definition: pixconv.c:2237
l_ok pixRasterop(PIX *pixd, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op, PIX *pixs, l_int32 sx, l_int32 sy)
pixRasterop()
Definition: rop.c:204
PIX * pixRotate90(PIX *pixs, l_int32 direction)
pixRotate90()
Definition: rotateorth.c:166
PIX * pixRotateOrth(PIX *pixs, l_int32 quads)
pixRotateOrth()
Definition: rotateorth.c:75
void selDestroy(SEL **psel)
selDestroy()
Definition: sel1.c:340
SEL * selCreateFromString(const char *text, l_int32 h, l_int32 w, const char *name)
selCreateFromString()
Definition: sel1.c:1607
Definition: pix.h:481
Definition: pix.h:492
Definition: pix.h:139
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
Definition: utils2.c:2218