Leptonica 1.85.0
Image processing and image analysis suite
Loading...
Searching...
No Matches
flipdetect.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
215#ifdef HAVE_CONFIG_H
216#include <config_auto.h>
217#endif /* HAVE_CONFIG_H */
218
219#include <math.h>
220#include "allheaders.h"
221
222 /* Sels for pixOrientDetect() and pixMirrorDetect() */
223static const char *textsel1 = "x oo "
224 "x oOo "
225 "x o "
226 "x "
227 "xxxxxx";
228
229static const char *textsel2 = " oo x"
230 " oOo x"
231 " o x"
232 " x"
233 "xxxxxx";
234
235static const char *textsel3 = "xxxxxx"
236 "x "
237 "x o "
238 "x oOo "
239 "x oo ";
240
241static const char *textsel4 = "xxxxxx"
242 " x"
243 " o x"
244 " oOo x"
245 " oo x";
246
247 /* Parameters for determining orientation */
248static const l_int32 DefaultMinUpDownCount = 70;
249static const l_float32 DefaultMinUpDownConf = 8.0;
250static const l_float32 DefaultMinUpDownRatio = 2.5;
251
252 /* Parameters for determining mirror flip */
253static const l_int32 DefaultMinMirrorFlipCount = 100;
254static const l_float32 DefaultMinMirrorFlipConf = 5.0;
255
256 /* Static debug function */
257static void pixDebugFlipDetect(const char *filename, PIX *pixs,
258 PIX *pixhm, l_int32 enable);
259
260
261/*----------------------------------------------------------------*
262 * High-level interface for detection and correction *
263 *----------------------------------------------------------------*/
288PIX *
290 l_float32 minupconf,
291 l_float32 minratio,
292 l_float32 *pupconf,
293 l_float32 *pleftconf,
294 l_int32 *protation,
295 l_int32 debug)
296{
297l_int32 orient;
298l_float32 upconf, leftconf;
299PIX *pix1;
300
301 if (!pixs || pixGetDepth(pixs) != 1)
302 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
303
304 /* Get confidences for orientation */
305 pixUpDownDetect(pixs, &upconf, 0, 0, debug);
306 pix1 = pixRotate90(pixs, 1);
307 pixUpDownDetect(pix1, &leftconf, 0, 0, debug);
308 pixDestroy(&pix1);
309 if (pupconf) *pupconf = upconf;
310 if (pleftconf) *pleftconf = leftconf;
311
312 /* Decide what to do */
313 makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug);
314
315 /* Do it */
316 switch (orient)
317 {
319 L_INFO("text orientation not determined; no rotation\n", __func__);
320 if (protation) *protation = 0;
321 return pixCopy(NULL, pixs);
322 break;
323 case L_TEXT_ORIENT_UP:
324 L_INFO("text is oriented up; no rotation\n", __func__);
325 if (protation) *protation = 0;
326 return pixCopy(NULL, pixs);
327 break;
329 L_INFO("landscape; text oriented left; 90 cw rotation\n", __func__);
330 if (protation) *protation = 90;
331 return pixRotateOrth(pixs, 1);
332 break;
334 L_INFO("text oriented down; 180 cw rotation\n", __func__);
335 if (protation) *protation = 180;
336 return pixRotateOrth(pixs, 2);
337 break;
339 L_INFO("landscape; text oriented right; 270 cw rotation\n", __func__);
340 if (protation) *protation = 270;
341 return pixRotateOrth(pixs, 3);
342 break;
343 default:
344 L_ERROR("invalid orient flag!\n", __func__);
345 return pixCopy(NULL, pixs);
346 }
347}
348
349
350/*----------------------------------------------------------------*
351 * Orientation detection (four 90 degree angles) *
352 *----------------------------------------------------------------*/
418l_ok
420 l_float32 *pupconf,
421 l_float32 *pleftconf,
422 l_int32 mincount,
423 l_int32 debug)
424{
425PIX *pix1;
426
427 if (!pixs || pixGetDepth(pixs) != 1)
428 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
429 if (!pupconf && !pleftconf)
430 return ERROR_INT("nothing to do", __func__, 1);
431 if (mincount == 0)
432 mincount = DefaultMinUpDownCount;
433
434 if (pupconf)
435 pixUpDownDetect(pixs, pupconf, mincount, 0, debug);
436 if (pleftconf) {
437 pix1 = pixRotate90(pixs, 1);
438 pixUpDownDetect(pix1, pleftconf, mincount, 0, debug);
439 pixDestroy(&pix1);
440 }
441
442 return 0;
443}
444
445
475l_ok
476makeOrientDecision(l_float32 upconf,
477 l_float32 leftconf,
478 l_float32 minupconf,
479 l_float32 minratio,
480 l_int32 *porient,
481 l_int32 debug)
482{
483l_float32 absupconf, absleftconf;
484
485 if (!porient)
486 return ERROR_INT("&orient not defined", __func__, 1);
487 *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */
488 if (upconf == 0.0 || leftconf == 0.0) {
489 L_INFO("not enough confidence to get orientation\n", __func__);
490 return 0;
491 }
492
493 if (minupconf == 0.0)
494 minupconf = DefaultMinUpDownConf;
495 if (minratio == 0.0)
496 minratio = DefaultMinUpDownRatio;
497 absupconf = L_ABS(upconf);
498 absleftconf = L_ABS(leftconf);
499
500 /* Here are the four possible orientation decisions, based
501 * on satisfaction of two threshold constraints. */
502 if (upconf > minupconf && absupconf > minratio * absleftconf)
503 *porient = L_TEXT_ORIENT_UP;
504 else if (leftconf > minupconf && absleftconf > minratio * absupconf)
505 *porient = L_TEXT_ORIENT_LEFT;
506 else if (upconf < -minupconf && absupconf > minratio * absleftconf)
507 *porient = L_TEXT_ORIENT_DOWN;
508 else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
509 *porient = L_TEXT_ORIENT_RIGHT;
510
511 if (debug) {
512 lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
513 if (*porient == L_TEXT_ORIENT_UNKNOWN)
514 lept_stderr("Confidence is low; no determination is made\n");
515 else if (*porient == L_TEXT_ORIENT_UP)
516 lept_stderr("Text is rightside-up\n");
517 else if (*porient == L_TEXT_ORIENT_LEFT)
518 lept_stderr("Text is rotated 90 deg ccw\n");
519 else if (*porient == L_TEXT_ORIENT_DOWN)
520 lept_stderr("Text is upside-down\n");
521 else /* *porient == L_TEXT_ORIENT_RIGHT */
522 lept_stderr("Text is rotated 90 deg cw\n");
523 }
524
525 return 0;
526}
527
528
568l_ok
570 l_float32 *pconf,
571 l_int32 mincount,
572 l_int32 npixels,
573 l_int32 debug)
574{
575l_int32 countup, countdown, nmax;
576l_float32 nup, ndown;
577PIX *pix0, *pix1, *pix2, *pix3, *pixm;
578SEL *sel1, *sel2, *sel3, *sel4;
579
580 if (!pconf)
581 return ERROR_INT("&conf not defined", __func__, 1);
582 *pconf = 0.0;
583 if (!pixs || pixGetDepth(pixs) != 1)
584 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
585 if (mincount == 0)
586 mincount = DefaultMinUpDownCount;
587 if (npixels < 0)
588 npixels = 0;
589
590 if (debug) {
591 lept_mkdir("lept/orient");
592 }
593
594 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
595 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
596 sel3 = selCreateFromString(textsel3, 5, 6, NULL);
597 sel4 = selCreateFromString(textsel4, 5, 6, NULL);
598
599 /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
600 * This closes holes in x-height characters and joins them at
601 * the x-height. There is more noise in the descender detection
602 * from this, but it works fairly well. */
603 pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
604
605 /* Optionally, make a mask of the word bounding boxes, shortening
606 * each of them by a fixed amount at each end. */
607 pixm = NULL;
608 if (npixels > 0) {
609 l_int32 i, nbox, x, y, w, h;
610 BOX *box;
611 BOXA *boxa;
612 pix1 = pixMorphSequence(pix0, "o10.1", 0);
613 boxa = pixConnComp(pix1, NULL, 8);
614 pixm = pixCreateTemplate(pix1);
615 pixDestroy(&pix1);
616 nbox = boxaGetCount(boxa);
617 for (i = 0; i < nbox; i++) {
618 box = boxaGetBox(boxa, i, L_CLONE);
619 boxGetGeometry(box, &x, &y, &w, &h);
620 if (w > 2 * npixels)
621 pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
622 PIX_SET, NULL, 0, 0);
623 boxDestroy(&box);
624 }
625 boxaDestroy(&boxa);
626 }
627
628 /* Find the ascenders and optionally filter with pixm.
629 * For an explanation of the procedure used for counting the result
630 * of the HMT, see comments at the beginning of this function. */
631 pix1 = pixHMT(NULL, pix0, sel1);
632 pix2 = pixHMT(NULL, pix0, sel2);
633 pixOr(pix1, pix1, pix2);
634 if (pixm)
635 pixAnd(pix1, pix1, pixm);
636 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
637 pixCountPixels(pix3, &countup, NULL);
638 pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug);
639 pixDestroy(&pix1);
640 pixDestroy(&pix2);
641 pixDestroy(&pix3);
642
643 /* Find the ascenders and optionally filter with pixm. */
644 pix1 = pixHMT(NULL, pix0, sel3);
645 pix2 = pixHMT(NULL, pix0, sel4);
646 pixOr(pix1, pix1, pix2);
647 if (pixm)
648 pixAnd(pix1, pix1, pixm);
649 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
650 pixCountPixels(pix3, &countdown, NULL);
651 pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug);
652 pixDestroy(&pix1);
653 pixDestroy(&pix2);
654 pixDestroy(&pix3);
655
656 /* Evaluate statistically, generating a confidence that is
657 * related to the probability with a gaussian distribution. */
658 nup = (l_float32)(countup);
659 ndown = (l_float32)(countdown);
660 nmax = L_MAX(countup, countdown);
661 if (nmax > mincount)
662 *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
663
664 if (debug) {
665 if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG);
666 lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
667 nup, ndown, *pconf);
668 if (*pconf > DefaultMinUpDownConf)
669 lept_stderr("Text is rightside-up\n");
670 if (*pconf < -DefaultMinUpDownConf)
671 lept_stderr("Text is upside-down\n");
672 }
673
674 pixDestroy(&pix0);
675 pixDestroy(&pixm);
676 selDestroy(&sel1);
677 selDestroy(&sel2);
678 selDestroy(&sel3);
679 selDestroy(&sel4);
680 return 0;
681}
682
683
684/*----------------------------------------------------------------*
685 * Left-right mirror detection *
686 *----------------------------------------------------------------*/
727l_ok
729 l_float32 *pconf,
730 l_int32 mincount,
731 l_int32 debug)
732{
733l_int32 count1, count2, nmax;
734l_float32 nleft, nright;
735PIX *pix0, *pix1, *pix2, *pix3;
736SEL *sel1, *sel2;
737
738 if (!pconf)
739 return ERROR_INT("&conf not defined", __func__, 1);
740 *pconf = 0.0;
741 if (!pixs || pixGetDepth(pixs) != 1)
742 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
743 if (mincount == 0)
744 mincount = DefaultMinMirrorFlipCount;
745
746 if (debug) {
747 lept_mkdir("lept/orient");
748 }
749
750 sel1 = selCreateFromString(textsel1, 5, 6, NULL);
751 sel2 = selCreateFromString(textsel2, 5, 6, NULL);
752
753 /* Fill x-height characters but not space between them, sort of. */
754 pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
755 pixXor(pix3, pix3, pixs);
756 pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
757 pixXor(pix0, pix0, pixs);
758 pixAnd(pix0, pix0, pix3);
759 pixOr(pix0, pix0, pixs);
760 pixDestroy(&pix3);
761
762 /* Filter the right-facing characters. */
763 pix1 = pixHMT(NULL, pix0, sel1);
764 pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
765 pixCountPixels(pix3, &count1, NULL);
766 pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
767 pixDestroy(&pix1);
768 pixDestroy(&pix3);
769
770 /* Filter the left-facing characters. */
771 pix2 = pixHMT(NULL, pix0, sel2);
772 pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
773 pixCountPixels(pix3, &count2, NULL);
774 pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
775 pixDestroy(&pix2);
776 pixDestroy(&pix3);
777
778 nright = (l_float32)count1;
779 nleft = (l_float32)count2;
780 nmax = L_MAX(count1, count2);
781 pixDestroy(&pix0);
782 selDestroy(&sel1);
783 selDestroy(&sel2);
784
785 if (nmax > mincount)
786 *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
787
788 if (debug) {
789 lept_stderr("nright = %f, nleft = %f\n", nright, nleft);
790 if (*pconf > DefaultMinMirrorFlipConf)
791 lept_stderr("Text is not mirror reversed\n");
792 if (*pconf < -DefaultMinMirrorFlipConf)
793 lept_stderr("Text is mirror reversed\n");
794 }
795
796 return 0;
797}
798
799
800/*----------------------------------------------------------------*
801 * Static debug helper *
802 *----------------------------------------------------------------*/
803/*
804 * \brief pixDebugFlipDetect()
805 *
806 * \param[in] filename for output debug file
807 * \param[in] pixs input to pix*Detect
808 * \param[in] pixhm hit-miss result from ascenders or descenders
809 * \param[in] enable 1 to enable this function; 0 to disable
810 * \return void
811 */
812static void
813pixDebugFlipDetect(const char *filename,
814 PIX *pixs,
815 PIX *pixhm,
816 l_int32 enable)
817{
818PIX *pixt, *pixthm;
819
820 if (!enable) return;
821
822 /* Display with red dot at counted locations */
823 pixt = pixConvert1To4Cmap(pixs);
824 pixthm = pixMorphSequence(pixhm, "d5.5", 0);
825 pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
826
827 pixWriteDebug(filename, pixt, IFF_PNG);
828 pixDestroy(&pixthm);
829 pixDestroy(&pixt);
830 return;
831}
l_ok makeOrientDecision(l_float32 upconf, l_float32 leftconf, l_float32 minupconf, l_float32 minratio, l_int32 *porient, l_int32 debug)
makeOrientDecision()
Definition flipdetect.c:476
l_ok pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
pixMirrorDetect()
Definition flipdetect.c:728
l_ok pixUpDownDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug)
pixUpDownDetect()
Definition flipdetect.c:569
l_ok pixOrientDetect(PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug)
pixOrientDetect()
Definition flipdetect.c:419
PIX * pixOrientCorrect(PIX *pixs, l_float32 minupconf, l_float32 minratio, l_float32 *pupconf, l_float32 *pleftconf, l_int32 *protation, l_int32 debug)
pixOrientCorrect()
Definition flipdetect.c:289
@ L_CLONE
Definition pix.h:506
@ L_TEXT_ORIENT_DOWN
Definition pix.h:787
@ L_TEXT_ORIENT_UP
Definition pix.h:785
@ L_TEXT_ORIENT_LEFT
Definition pix.h:786
@ L_TEXT_ORIENT_RIGHT
Definition pix.h:788
@ L_TEXT_ORIENT_UNKNOWN
Definition pix.h:784
#define PIX_SET
Definition pix.h:448