Leptonica 1.84.1
Image processing and image analysis suite
Loading...
Searching...
No Matches
flipdetect.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
201#ifdef HAVE_CONFIG_H
202#include <config_auto.h>
203#endif /* HAVE_CONFIG_H */
204
205#include <math.h>
206#include "allheaders.h"
207
208 /* Sels for pixOrientDetect() and pixMirrorDetect() */
209static const char *textsel1 = "x oo "
210 "x oOo "
211 "x o "
212 "x "
213 "xxxxxx";
214
215static const char *textsel2 = " oo x"
216 " oOo x"
217 " o x"
218 " x"
219 "xxxxxx";
220
221static const char *textsel3 = "xxxxxx"
222 "x "
223 "x o "
224 "x oOo "
225 "x oo ";
226
227static const char *textsel4 = "xxxxxx"
228 " x"
229 " o x"
230 " oOo x"
231 " oo x";
232
233 /* Parameters for determining orientation */
234static const l_int32 DefaultMinUpDownCount = 70;
235static const l_float32 DefaultMinUpDownConf = 8.0;
236static const l_float32 DefaultMinUpDownRatio = 2.5;
237
238 /* Parameters for determining mirror flip */
239static const l_int32 DefaultMinMirrorFlipCount = 100;
240static const l_float32 DefaultMinMirrorFlipConf = 5.0;
241
242 /* Static debug function */
243static void pixDebugFlipDetect(const char *filename, PIX *pixs,
244 PIX *pixhm, l_int32 enable);
245
246
247/*----------------------------------------------------------------*
248 * High-level interface for detection and correction *
249 *----------------------------------------------------------------*/
273PIX *
275 l_float32 minupconf,
276 l_float32 minratio,
277 l_float32 *pupconf,
278 l_float32 *pleftconf,
279 l_int32 *protation,
280 l_int32 debug)
281{
282l_int32 orient;
283l_float32 upconf, leftconf;
284PIX *pix1;
285
286 if (!pixs || pixGetDepth(pixs) != 1)
287 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
288
289 /* Get confidences for orientation */
290 pixUpDownDetect(pixs, &upconf, 0, 0, debug);
291 pix1 = pixRotate90(pixs, 1);
292 pixUpDownDetect(pix1, &leftconf, 0, 0, debug);
293 pixDestroy(&pix1);
294 if (pupconf) *pupconf = upconf;
295 if (pleftconf) *pleftconf = leftconf;
296
297 /* Decide what to do */
298 makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug);
299
300 /* Do it */
301 switch (orient)
302 {
304 L_INFO("text orientation not determined; no rotation\n", __func__);
305 if (protation) *protation = 0;
306 return pixCopy(NULL, pixs);
307 break;
308 case L_TEXT_ORIENT_UP:
309 L_INFO("text is oriented up; no rotation\n", __func__);
310 if (protation) *protation = 0;
311 return pixCopy(NULL, pixs);
312 break;
314 L_INFO("landscape; text oriented left; 90 cw rotation\n", __func__);
315 if (protation) *protation = 90;
316 return pixRotateOrth(pixs, 1);
317 break;
319 L_INFO("text oriented down; 180 cw rotation\n", __func__);
320 if (protation) *protation = 180;
321 return pixRotateOrth(pixs, 2);
322 break;
324 L_INFO("landscape; text oriented right; 270 cw rotation\n", __func__);
325 if (protation) *protation = 270;
326 return pixRotateOrth(pixs, 3);
327 break;
328 default:
329 L_ERROR("invalid orient flag!\n", __func__);
330 return pixCopy(NULL, pixs);
331 }
332}
333
334
335/*----------------------------------------------------------------*
336 * Orientation detection (four 90 degree angles) *
337 *----------------------------------------------------------------*/
402l_ok
404 l_float32 *pupconf,
405 l_float32 *pleftconf,
406 l_int32 mincount,
407 l_int32 debug)
408{
409PIX *pix1;
410
411 if (!pixs || pixGetDepth(pixs) != 1)
412 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
413 if (!pupconf && !pleftconf)
414 return ERROR_INT("nothing to do", __func__, 1);
415 if (mincount == 0)
416 mincount = DefaultMinUpDownCount;
417
418 if (pupconf)
419 pixUpDownDetect(pixs, pupconf, mincount, 0, debug);
420 if (pleftconf) {
421 pix1 = pixRotate90(pixs, 1);
422 pixUpDownDetect(pix1, pleftconf, mincount, 0, debug);
423 pixDestroy(&pix1);
424 }
425
426 return 0;
427}
428
429
459l_ok
460makeOrientDecision(l_float32 upconf,
461 l_float32 leftconf,
462 l_float32 minupconf,
463 l_float32 minratio,
464 l_int32 *porient,
465 l_int32 debug)
466{
467l_float32 absupconf, absleftconf;
468
469 if (!porient)
470 return ERROR_INT("&orient not defined", __func__, 1);
471 *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */
472 if (upconf == 0.0 || leftconf == 0.0) {
473 L_INFO("not enough confidence to get orientation\n", __func__);
474 return 0;
475 }
476
477 if (minupconf == 0.0)
478 minupconf = DefaultMinUpDownConf;
479 if (minratio == 0.0)
480 minratio = DefaultMinUpDownRatio;
481 absupconf = L_ABS(upconf);
482 absleftconf = L_ABS(leftconf);
483
484 /* Here are the four possible orientation decisions, based
485 * on satisfaction of two threshold constraints. */
486 if (upconf > minupconf && absupconf > minratio * absleftconf)
487 *porient = L_TEXT_ORIENT_UP;
488 else if (leftconf > minupconf && absleftconf > minratio * absupconf)
489 *porient = L_TEXT_ORIENT_LEFT;
490 else if (upconf < -minupconf && absupconf > minratio * absleftconf)
491 *porient = L_TEXT_ORIENT_DOWN;
492 else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
493 *porient = L_TEXT_ORIENT_RIGHT;
494
495 if (debug) {
496 lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
497 if (*porient == L_TEXT_ORIENT_UNKNOWN)
498 lept_stderr("Confidence is low; no determination is made\n");
499 else if (*porient == L_TEXT_ORIENT_UP)
500 lept_stderr("Text is rightside-up\n");
501 else if (*porient == L_TEXT_ORIENT_LEFT)
502 lept_stderr("Text is rotated 90 deg ccw\n");
503 else if (*porient == L_TEXT_ORIENT_DOWN)
504 lept_stderr("Text is upside-down\n");
505 else /* *porient == L_TEXT_ORIENT_RIGHT */
506 lept_stderr("Text is rotated 90 deg cw\n");
507 }
508
509 return 0;
510}
511
512
551l_ok
553 l_float32 *pconf,
554 l_int32 mincount,
555 l_int32 npixels,
556 l_int32 debug)
557{
558l_int32 countup, countdown, nmax;
559l_float32 nup, ndown;
560PIX *pix0, *pix1, *pix2, *pix3, *pixm;
561SEL *sel1, *sel2, *sel3, *sel4;
562
563 if (!pconf)
564 return ERROR_INT("&conf not defined", __func__, 1);
565 *pconf = 0.0;
566 if (!pixs || pixGetDepth(pixs) != 1)
567 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
568 if (mincount == 0)
569 mincount = DefaultMinUpDownCount;
570 if (npixels < 0)
571 npixels = 0;
572
573 if (debug) {
574 lept_mkdir("lept/orient");
575 }
576
577 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
578 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
579 sel3 = selCreateFromString(textsel3, 5, 6, NULL);
580 sel4 = selCreateFromString(textsel4, 5, 6, NULL);
581
582 /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
583 * This closes holes in x-height characters and joins them at
584 * the x-height. There is more noise in the descender detection
585 * from this, but it works fairly well. */
586 pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
587
588 /* Optionally, make a mask of the word bounding boxes, shortening
589 * each of them by a fixed amount at each end. */
590 pixm = NULL;
591 if (npixels > 0) {
592 l_int32 i, nbox, x, y, w, h;
593 BOX *box;
594 BOXA *boxa;
595 pix1 = pixMorphSequence(pix0, "o10.1", 0);
596 boxa = pixConnComp(pix1, NULL, 8);
597 pixm = pixCreateTemplate(pix1);
598 pixDestroy(&pix1);
599 nbox = boxaGetCount(boxa);
600 for (i = 0; i < nbox; i++) {
601 box = boxaGetBox(boxa, i, L_CLONE);
602 boxGetGeometry(box, &x, &y, &w, &h);
603 if (w > 2 * npixels)
604 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
605 PIX_SET, NULL, 0, 0);
606 boxDestroy(&box);
607 }
608 boxaDestroy(&boxa);
609 }
610
611 /* Find the ascenders and optionally filter with pixm.
612 * For an explanation of the procedure used for counting the result
613 * of the HMT, see comments at the beginning of this function. */
614 pix1 = pixHMT(NULL, pix0, sel1);
615 pix2 = pixHMT(NULL, pix0, sel2);
616 pixOr(pix1, pix1, pix2);
617 if (pixm)
618 pixAnd(pix1, pix1, pixm);
619 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
620 pixCountPixels(pix3, &countup, NULL);
621 pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug);
622 pixDestroy(&pix1);
623 pixDestroy(&pix2);
624 pixDestroy(&pix3);
625
626 /* Find the ascenders and optionally filter with pixm. */
627 pix1 = pixHMT(NULL, pix0, sel3);
628 pix2 = pixHMT(NULL, pix0, sel4);
629 pixOr(pix1, pix1, pix2);
630 if (pixm)
631 pixAnd(pix1, pix1, pixm);
632 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
633 pixCountPixels(pix3, &countdown, NULL);
634 pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug);
635 pixDestroy(&pix1);
636 pixDestroy(&pix2);
637 pixDestroy(&pix3);
638
639 /* Evaluate statistically, generating a confidence that is
640 * related to the probability with a gaussian distribution. */
641 nup = (l_float32)(countup);
642 ndown = (l_float32)(countdown);
643 nmax = L_MAX(countup, countdown);
644 if (nmax > mincount)
645 *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
646
647 if (debug) {
648 if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG);
649 lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
650 nup, ndown, *pconf);
651 if (*pconf > DefaultMinUpDownConf)
652 lept_stderr("Text is rightside-up\n");
653 if (*pconf < -DefaultMinUpDownConf)
654 lept_stderr("Text is upside-down\n");
655 }
656
657 pixDestroy(&pix0);
658 pixDestroy(&pixm);
659 selDestroy(&sel1);
660 selDestroy(&sel2);
661 selDestroy(&sel3);
662 selDestroy(&sel4);
663 return 0;
664}
665
666
667/*----------------------------------------------------------------*
668 * Left-right mirror detection *
669 *----------------------------------------------------------------*/
709l_ok
711 l_float32 *pconf,
712 l_int32 mincount,
713 l_int32 debug)
714{
715l_int32 count1, count2, nmax;
716l_float32 nleft, nright;
717PIX *pix0, *pix1, *pix2, *pix3;
718SEL *sel1, *sel2;
719
720 if (!pconf)
721 return ERROR_INT("&conf not defined", __func__, 1);
722 *pconf = 0.0;
723 if (!pixs || pixGetDepth(pixs) != 1)
724 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
725 if (mincount == 0)
726 mincount = DefaultMinMirrorFlipCount;
727
728 if (debug) {
729 lept_mkdir("lept/orient");
730 }
731
732 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
733 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
734
735 /* Fill x-height characters but not space between them, sort of. */
736 pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
737 pixXor(pix3, pix3, pixs);
738 pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
739 pixXor(pix0, pix0, pixs);
740 pixAnd(pix0, pix0, pix3);
741 pixOr(pix0, pix0, pixs);
742 pixDestroy(&pix3);
743
744 /* Filter the right-facing characters. */
745 pix1 = pixHMT(NULL, pix0, sel1);
746 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
747 pixCountPixels(pix3, &count1, NULL);
748 pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
749 pixDestroy(&pix1);
750 pixDestroy(&pix3);
751
752 /* Filter the left-facing characters. */
753 pix2 = pixHMT(NULL, pix0, sel2);
754 pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
755 pixCountPixels(pix3, &count2, NULL);
756 pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
757 pixDestroy(&pix2);
758 pixDestroy(&pix3);
759
760 nright = (l_float32)count1;
761 nleft = (l_float32)count2;
762 nmax = L_MAX(count1, count2);
763 pixDestroy(&pix0);
764 selDestroy(&sel1);
765 selDestroy(&sel2);
766
767 if (nmax > mincount)
768 *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
769
770 if (debug) {
771 lept_stderr("nright = %f, nleft = %f\n", nright, nleft);
772 if (*pconf > DefaultMinMirrorFlipConf)
773 lept_stderr("Text is not mirror reversed\n");
774 if (*pconf < -DefaultMinMirrorFlipConf)
775 lept_stderr("Text is mirror reversed\n");
776 }
777
778 return 0;
779}
780
781
782/*----------------------------------------------------------------*
783 * Static debug helper *
784 *----------------------------------------------------------------*/
785/*
786 * \brief pixDebugFlipDetect()
787 *
788 * \param[in] filename for output debug file
789 * \param[in] pixs input to pix*Detect
790 * \param[in] pixhm hit-miss result from ascenders or descenders
791 * \param[in] enable 1 to enable this function; 0 to disable
792 * \return void
793 */
794static void
795pixDebugFlipDetect(const char *filename,
796 PIX *pixs,
797 PIX *pixhm,
798 l_int32 enable)
799{
800PIX *pixt, *pixthm;
801
802 if (!enable) return;
803
804 /* Display with red dot at counted locations */
805 pixt = pixConvert1To4Cmap(pixs);
806 pixthm = pixMorphSequence(pixhm, "d5.5", 0);
807 pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
808
809 pixWriteDebug(filename, pixt, IFF_PNG);
810 pixDestroy(&pixthm);
811 pixDestroy(&pixt);
812 return;
813}
l_ok makeOrientDecision(l_float32 upconf, l_float32 leftconf, l_float32 minupconf, l_float32 minratio, l_int32 *porient, l_int32 debug)
makeOrientDecision()
Definition flipdetect.c:460
l_ok pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
pixMirrorDetect()
Definition flipdetect.c:710
l_ok pixUpDownDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug)
pixUpDownDetect()
Definition flipdetect.c:552
l_ok pixOrientDetect(PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug)
pixOrientDetect()
Definition flipdetect.c:403
PIX * pixOrientCorrect(PIX *pixs, l_float32 minupconf, l_float32 minratio, l_float32 *pupconf, l_float32 *pleftconf, l_int32 *protation, l_int32 debug)
pixOrientCorrect()
Definition flipdetect.c:274
@ L_CLONE
Definition pix.h:506
@ L_TEXT_ORIENT_DOWN
Definition pix.h:787
@ L_TEXT_ORIENT_UP
Definition pix.h:785
@ L_TEXT_ORIENT_LEFT
Definition pix.h:786
@ L_TEXT_ORIENT_RIGHT
Definition pix.h:788
@ L_TEXT_ORIENT_UNKNOWN
Definition pix.h:784
#define PIX_SET
Definition pix.h:448