Leptonica 1.85.0
Image processing and image analysis suite
Loading...
Searching...
No Matches
pageseg.c File Reference
#include <math.h>
#include "allheaders.h"
#include "pix_internal.h"

Go to the source code of this file.

Functions

static l_ok pixMaxCompAfterVClosing (PIX *pixs, BOX **pbox)
 
static l_ok pixFindPageInsideBlackBorder (PIX *pixs, BOX **pbox)
 
static PIXpixRescaleForCropping (PIX *pixs, l_int32 w, l_int32 h, l_int32 lr_border, l_int32 tb_border, l_float32 maxwiden, PIX **ppixsc)
 
l_ok pixGetRegionsBinary (PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb)
 
PIXpixGenHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug)
 
PIXpixGenerateHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb)
 
PIXpixGenTextlineMask (PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb)
 
PIXpixGenTextblockMask (PIX *pixs, PIX *pixvws, PIXA *pixadb)
 
PIXpixCropImage (PIX *pixs, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_border, l_int32 tb_border, l_float32 maxwiden, l_int32 printwiden, const char *debugfile, BOX **pcropbox)
 
PIXpixCleanImage (PIX *pixs, l_int32 contrast, l_int32 rotation, l_int32 scale, l_int32 opensize)
 
BOXpixFindPageForeground (PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac)
 
l_ok pixSplitIntoCharacters (PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug)
 
BOXApixSplitComponentWithProfile (PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug)
 
PIXApixExtractTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
 
PIXApixExtractRawTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
 
l_ok pixCountTextColumns (PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb)
 
l_ok pixDecideIfText (PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb)
 
l_ok pixFindThreshFgExtent (PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot)
 
l_ok pixDecideIfTable (PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb)
 
PIXpixPrepare1bpp (PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres)
 
l_ok pixEstimateBackground (PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg)
 
l_ok pixFindLargeRectangles (PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb)
 
l_ok pixFindLargestRectangle (PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb)
 
PIXpixAutoPhotoinvert (PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb)
 

Variables

static const l_int32 MinWidth = 100
 
static const l_int32 MinHeight = 100
 

Detailed Description


     Top level page segmentation
         l_int32   pixGetRegionsBinary()

     Halftone region extraction
         PIX      *pixGenHalftoneMask()    **Deprecated wrapper**
         PIX      *pixGenerateHalftoneMask()

     Textline extraction
         PIX      *pixGenTextlineMask()

     Textblock extraction
         PIX      *pixGenTextblockMask()

     Location and extraction of page foreground; cleaning pages
         PIX            *pixCropImage()
         static l_int32  pixMaxCompAfterVClosing()
         static l_int32  pixFindPageInsideBlackBorder()
         static PIX     *pixRescaleForCropping()
         PIX            *pixCleanImage()
         BOX            *pixFindPageForeground()

     Extraction of characters from image with only text
         l_int32   pixSplitIntoCharacters()
         BOXA     *pixSplitComponentWithProfile()

     Extraction of lines of text
         PIXA     *pixExtractTextlines()
         PIXA     *pixExtractRawTextlines()

     How many text columns
         l_int32   pixCountTextColumns()

     Decision: text vs photo
         l_int32   pixDecideIfText()
         l_int32   pixFindThreshFgExtent()

     Decision: table vs text
         l_int32   pixDecideIfTable()
         Pix      *pixPrepare1bpp()

     Estimate the grayscale background value
         l_int32   pixEstimateBackground()

     Largest white or black rectangles in an image
         l_int32   pixFindLargeRectangles()
         l_int32   pixFindLargestRectangle()

     Generate rectangle inside connected component
         BOX      *pixFindRectangleInCC()

     Automatic photoinvert for OCR
         PIX      *pixAutoPhotoinvert()

Definition in file pageseg.c.

Function Documentation

◆ pixAutoPhotoinvert()

PIX * pixAutoPhotoinvert ( PIX * pixs,
l_int32 thresh,
PIX ** ppixm,
PIXA * pixadb )

pixFindRectangleInCC()

Parameters
[in]pixs1 bpp, with sufficient closings to make the fg be a single c.c. that is a convex hull
[in]boxs[optional] if NULL, pixs should be a minimum container of a single c.c.
[in]fractfirst and all consecutive lines found must be at least this fraction of the fast scan dimension
[in]dirL_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of fast scan
[in]selectL_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, L_LARGEST_AREA, L_SMALEST_AREA
[in]debugif 1, generates output pdf showing intermediate computation and final result
Returns
box of included rectangle, or NULL on error
   Notes:
        (1) Computation is similar to pixFindLargestRectangle(), but allows
            a different set of results to choose from.
        (2) Select the fast scan direction.  Then, scanning in the slow
            direction, find the longest run of ON pixels in the fast
            scan direction and look for the first run that is longer
            than fract of the dimension.  Continue until a shorter run
            is found.  This generates a box of ON pixels fitting into the c.c.
        (3) Do this from both slow scan directions and use select to get
            a resulting box from these two.
        (4) The extracted rectangle is not necessarily the largest that
            can fit in the c.c.  To get that, use pixFindLargestRectangle().
 */
BOX *
pixFindRectangleInCC(PIX       *pixs,
                     BOX       *boxs,
                     l_float32  fract,
                     l_int32    dir,
                     l_int32    select,
                     l_int32    debug)
{
l_int32  x, y, i, w, h, w1, h1, w2, h2, found, res;
l_int32  xfirst, xlast, xstart, yfirst, ylast, length;
BOX     *box1, *box2, *box3, *box4, *box5;
PIX     *pix1, *pix2, *pixdb1, *pixdb2;
PIXA    *pixadb;

    if (!pixs || pixGetDepth(pixs) != 1)
        return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
    if (fract <= 0.0 || fract > 1.0)
        return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL);
    if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
        return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL);
    if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
        select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
        return (BOX *)ERROR_PTR("invalid select", __func__, NULL);

        /* Extract the c.c. if necessary */
    x = y = 0;
    if (boxs) {
        pix1 = pixClipRectangle(pixs, boxs, NULL);
        boxGetGeometry(boxs, &x, &y, NULL, NULL);
    } else {
        pix1 = pixClone(pixs);
    }

        /* All fast scans are horizontal; rotate 90 deg cw if necessary */
    if (dir == L_SCAN_VERTICAL)
        pix2 = pixRotate90(pix1, 1);
    else  /* L_SCAN_HORIZONTAL */
        pix2 = pixClone(pix1);
    pixGetDimensions(pix2, &w, &h, NULL);

    pixadb = (debug) ? pixaCreate(0) : NULL;
    pixdb1 = NULL;
    if (pixadb) {
        lept_mkdir("lept/rect");
        pixaAddPix(pixadb, pix1, L_CLONE);
        pixdb1 = pixConvertTo32(pix2);
    }
    pixDestroy(&pix1);

        /* Scanning down, find the first scanline with a long enough run.
           That run goes from (xfirst, yfirst) to (xlast, yfirst).  */
    found = FALSE;
    for (i = 0; i < h; i++) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (length >= (l_int32)(fract * w + 0.5)) {
            yfirst = i;
            xfirst = xstart;
            xlast = xfirst + length - 1;
            found = TRUE;
            break;
        }
    }
    if (!found) {
        L_WARNING("no run of sufficient size was found\n", __func__);
        pixDestroy(&pix2);
        pixDestroy(&pixdb1);
        pixaDestroy(&pixadb);
        return NULL;
    }

         /* Continue down until the condition fails */
    w1 = xlast - xfirst + 1;
    h1 = h - yfirst;  /* init */
    ylast = h - 1;  /* init */
    for (i = yfirst + 1; i < h; i++) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
            i == h - 1) {
            ylast = i - 1;
            h1 = ylast - yfirst + 1;
            break;
        }
    }
    box1 = boxCreate(xfirst, yfirst, w1, h1);

        /* Scanning up, find the first scanline with a long enough run.
           That run goes from (xfirst, ylast) to (xlast, ylast).  */
    for (i = h - 1; i >= 0; i--) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (length >= (l_int32)(fract * w + 0.5)) {
            ylast = i;
            xfirst = xstart;
            xlast = xfirst + length - 1;
            break;
        }
    }

         /* Continue up until the condition fails */
    w2 = xlast - xfirst + 1;
    h2 = ylast + 1;  /* initialize */
    for (i = ylast - 1; i >= 0; i--) {
        pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
        if (xstart > xfirst || (xstart + length - 1 < xlast) ||
            i == 0) {
            yfirst = i + 1;
            h2 = ylast - yfirst + 1;
            break;
        }
    }
    box2 = boxCreate(xfirst, yfirst, w2, h2);
    pixDestroy(&pix2);

    if (pixadb) {
        pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
        pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
        pixaAddPix(pixadb, pixdb1, L_INSERT);
    }

        /* Select the final result from the two boxes */
    if (select == L_GEOMETRIC_UNION)
        box3 = boxBoundingRegion(box1, box2);
    else if (select == L_GEOMETRIC_INTERSECTION)
        box3 = boxOverlapRegion(box1, box2);
    else if (select == L_LARGEST_AREA)
        box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
    else  /* select == L_SMALLEST_AREA) */
        box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
    boxDestroy(&box1);
    boxDestroy(&box2);

        /* Rotate the box 90 degrees ccw if necessary */
    box4 = NULL;
    if (box3) {
        if (dir == L_SCAN_VERTICAL)
            box4 = boxRotateOrth(box3, w, h, 3);
        else
            box4 = boxCopy(box3);
    }

        /* Transform back to global coordinates if boxs exists */
    box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
    boxDestroy(&box3);
    boxDestroy(&box4);

        /* Debug output */
    if (pixadb) {
        pixdb1 = pixConvertTo8(pixs, 0);
        pixAddConstantGray(pixdb1, 190);
        pixdb2 = pixConvertTo32(pixdb1);
        if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
        pixaAddPix(pixadb, pixdb2, L_INSERT);
        res = pixGetXRes(pixs);
        L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__);
        pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
                        "/tmp/lept/rect/fitrect.pdf");
        pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
        pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
        pixDestroy(&pix1);
        pixDestroy(&pixdb1);
        pixaDestroy(&pixadb);
    }

    return box5;
}

/*------------------------------------------------------------------*
                      Automatic photoinvert for OCR                 *
 *------------------------------------------------------------------*/
/*!

pixAutoPhotoinvert()

Parameters
[in]pixs any depth, colormap ok
[in]thresh binarization threshold; use 0 for default
[out]ppixm [optional] image regions to be inverted
[out]pixadb [optional] debug; input NULL to skip
Returns
pixd 1 bpp image to be sent to OCR, or NULL on error
   Notes:
        (1) A 1 bpp image is returned, where pixels in image regions are
            photo-inverted.
        (2) If there is light text with a dark background, this will
            identify the region and photoinvert the pixels there if
            there are at least 60% fg pixels in the region.
        (3) For debug output, input a (typically empty) pixadb.
   

Definition at line 2910 of file pageseg.c.

References L_CLONE, L_COPY, PIX_CLR, and pixGenerateHalftoneMask().

◆ pixCleanImage()

PIX * pixCleanImage ( PIX * pixs,
l_int32 contrast,
l_int32 rotation,
l_int32 scale,
l_int32 opensize )

pixCleanImage()

Parameters
[in]pixsfull resolution (any type or depth)
[in]contrastvary contrast: 1 = lightest; 10 = darkest; suggest 1 unless light features are being lost
[in]rotationcw by 90 degrees: {0,1,2,3} represent 0, 90, 180 and 270 degree cw rotations
[in]scale1 (no scaling) or 2 (2x upscaling)
[in]opensizeopening size of structuring element for noise removal: {0 or 1 to skip; 2, 3 for opening}
Returns
cleaned pix, or NULL on error
Notes:
   (1) This deskews, optionally rotates and darkens, cleans background
       to white, binarizes and optionally removes small noise.
   (2) For color and grayscale input, local background normalization is
       done to 200, and a threshold of 180 sets the maximum foreground
       value in the normalized image.
   (3) The contrast parameter adjusts the binarization to avoid losing
       lighter input pixels.  Contrast is increased as contrast increases
       from 1 to 10.
   (4) The scale parameter controls the thresholding to 1 bpp. Two values:
           1 = threshold
           2 = linear interpolated 2x upscaling before threshold.
   (5) The #opensize parameter is the size of a square SEL used with
       opening to remove small speckle noise.  Allowed open sizes are 2,3.
       If this is to be used, try 2 before 3.
   (6) This does the image processing for cleanTo1bppFilesToPdf() and
       prog/cleanpdf.c.

Definition at line 1015 of file pageseg.c.

References pixBackgroundNormTo1MinMax(), and pixConvertTo8MinMax().

◆ pixCountTextColumns()

l_ok pixCountTextColumns ( PIX * pixs,
l_float32 deltafract,
l_float32 peakfract,
l_float32 clipfract,
l_int32 * pncols,
PIXA * pixadb )

pixCountTextColumns()

Parameters
[in]pixs1 bpp
[in]deltafractfraction of (max - min) to be used in the delta for extrema finding; typ 0.3
[in]peakfractfraction of (max - min) to be used to threshold the peak value; typ. 0.5
[in]clipfractfraction of image dimension removed on each side; typ. 0.1, which leaves w and h reduced by 0.8
[out]pncolsnumber of columns; -1 if not determined
[in]pixadb[optional] pre-allocated, for showing intermediate computation; use null to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is assumed that pixs has the correct resolution set.
         If the resolution is 0, we set to 300 and issue a warning.
     (2) If necessary, the image is scaled to between 37 and 75 ppi;
         most of the processing is done at this resolution.
     (3) If no text is found (essentially a blank page),
         this returns ncols = 0.
     (4) For debug output, input a pre-allocated pixa.

Definition at line 1761 of file pageseg.c.

References L_COPY, and L_INSERT.

◆ pixCropImage()

PIX * pixCropImage ( PIX * pixs,
l_int32 lr_clear,
l_int32 tb_clear,
l_int32 edgeclean,
l_int32 lr_border,
l_int32 tb_border,
l_float32 maxwiden,
l_int32 printwiden,
const char * debugfile,
BOX ** pcropbox )

pixCropImage()

Parameters
[in]pixsfull resolution (any type or depth)
[in]lr_clearfull res pixels cleared at left and right sides
[in]tb_clearfull res pixels cleared at top and bottom sides
[in]edgecleanparameter for removing edge noise (-1 to 15) default = 0 (no removal); 15 is maximally aggressive for random noise -1 for aggressively removing side noise -2 to extract page embedded in black background
[in]lr_borderfull res final "added" pixels on left and right
[in]tb_borderfull res final "added" pixels on top and bottom
[in]maxwidenmax fractional horizontal stretch allowed
[in]printwiden0 to skip, 1 for 8.5x11, 2 for A4
[in]*debugfile[optional] usually is NULL
[out]*pcropbox[optional] crop box at full resolution
Returns
cropped pix, or NULL on error
Notes:
     (1) This binarizes and crops a page image.
         (a) Binarizes if necessary and does 2x reduction.
         (b) Clears near the border by lr_clear and tb_clear full
             resolution pixels.  (This is done at 2x reduction.)
         (c) If edgeclean > 0, it removes isolated sets of pixels,
             using a close/open operation of size edgeclean + 1.
             If edgeclean == -1, it uses a large vertical morphological
             close/open and the extraction of either the largest
             resulting connected component (or the largest two components
             if the page has 2 columns), to eliminate noise on left
             and right sides.
             If edgeclean == -2, it extracts the page region from a
             possible exterior black surround.
         (d) Find the bounding box of remaining fg pixels and scales
             the box up 2x back to full resolution.
         (e) Crops the binarized image to the bounding box.
         (f) Slightly thickens long horizontal lines.
         (g) Rescales this image to fit within the original image,
             less lr_border on the sides and tb_border above and below.
             The rescaling is done isomorphically with a (possible)
             optional additional widening.  Suggest the additional
             widening factor not exceed 1.15.
         (h) Optionally do additional horizontal stretch if needed to
             better fill a printed page.  Default is 0 to skip; 1 to
             widen for 8.5x11 page, 2 for A4 page.
         Note that (b) - (d) are done at 2x reduction for efficiency.
     (2) Side clearing must not exceed 1/6 of the dimension on that side.
     (3) The clear and border pixel parameters must be >= 0.
     (4) The "clear" parameters act on the input image, whereas the
         "border" parameters act to give a white border to the final
         image.  They are not literally added, because the input and final
         images are the same size.  If the resulting images are to be
         printed, it is useful to have border pixel parameters of at
         least 60 at 300 ppi, to avoid losing content at the edges.
     (5) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.
     (6) Step (f) above helps with orthographically-produced music notation,
         where the horizontal staff lines can be very thin and thus
         subject to printer alias.
     (7) If you are not concerned with printing on paper, use the
         default value 0 for printwiden.  Widening only takes place
         if the ratio h/w exceeds the specified paper size by 3%,
         and the horizontal scaling factor will not exceed 1.25.

Definition at line 605 of file pageseg.c.

References L_COPY, L_DEFAULT_ENCODE, L_INSERT, PIX_CLR, pixBackgroundNormTo1MinMax(), pixFindPageInsideBlackBorder(), pixMaxCompAfterVClosing(), and pixRescaleForCropping().

◆ pixDecideIfTable()

l_ok pixDecideIfTable ( PIX * pixs,
BOX * box,
l_int32 orient,
l_int32 * pscore,
PIXA * pixadb )

pixDecideIfTable()

Parameters
[in]pixsany depth, any resolution >= 75 ppi
[in]box[optional] if null, use entire pixs
[in]orientL_PORTRAIT_MODE, L_LANDSCAPE_MODE
[out]pscore0 - 4; -1 if not determined
[in]pixadb[optional] pre-allocated, for showing intermediate computation; use NULL to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is assumed that pixs has the correct resolution set.
         If the resolution is 0, we assume it is 300 ppi and issue a warning.
     (2) If orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees
         clockwise before being analyzed.
     (3) The interpretation of the returned score:
           -1     undetermined
            0     no table
            1     unlikely to have a table
            2     likely to have a table
            3     even more likely to have a table
            4     extremely likely to have a table
         * Setting the condition for finding a table at score >= 2 works
           well, except for false positives on kanji and landscape text.
         * These false positives can be removed by setting the condition
           at score >= 3, but recall is lowered because it will not find
           tables without either horizontal or vertical lines.
     (4) Most of the processing takes place at 75 ppi.
     (5) Internally, three numbers are determined, for horizontal and
         vertical fg lines, and for vertical bg lines.  From these,
         four tests are made to decide if there is a table occupying
         a significant part of the image.
     (6) Images have arbitrary content and would be likely to trigger
         this detector, so they are checked for first, and if found,
         return with a 0 (no table) score.
     (7) Musical scores (tablature) are likely to trigger the detector.
     (8) Tables of content with more than 2 columns are likely to
         trigger the detector.
     (9) For debug output, input a pre-allocated pixa.

Definition at line 2159 of file pageseg.c.

References L_COPY, L_INSERT, L_LANDSCAPE_MODE, L_SELECT_IF_GTE, L_SELECT_WIDTH, pixGenerateHalftoneMask(), and pixPrepare1bpp().

◆ pixDecideIfText()

l_ok pixDecideIfText ( PIX * pixs,
BOX * box,
l_int32 * pistext,
PIXA * pixadb )

pixDecideIfText()

Parameters
[in]pixsany depth
[in]box[optional] if null, use entire pixs
[out]pistext1 if text; 0 if photo; -1 if not determined or empty
[in]pixadb[optional] pre-allocated, for showing intermediate computation; use NULL to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is assumed that pixs has the correct resolution set.
         If the resolution is 0, we set to 300 and issue a warning.
     (2) If necessary, the image is scaled to 300 ppi; most of the
         processing is done at this resolution.
     (3) Text is assumed to be in horizontal lines.
     (4) Because thin vertical lines are removed before filtering for
         text lines, this should identify tables as text.
     (5) If box is null and pixs contains both text lines and line art,
         this function might return istext == true.
     (6) If the input pixs is empty, or for some other reason the
         result can not be determined, return -1.
     (7) For debug output, input a pre-allocated pixa.

Definition at line 1907 of file pageseg.c.

References L_ADD_BELOW, L_SELECT_HEIGHT, L_SELECT_IF_BOTH, L_SELECT_IF_GT, L_SELECT_IF_GTE, L_SELECT_IF_LTE, L_SELECT_WIDTH, L_SORT_BY_WIDTH, L_SORT_DECREASING, pixFindThreshFgExtent(), and pixPrepare1bpp().

◆ pixEstimateBackground()

l_ok pixEstimateBackground ( PIX * pixs,
l_int32 darkthresh,
l_float32 edgecrop,
l_int32 * pbg )

pixEstimateBackground()

Parameters
[in]pixs8 bpp, with or without colormap
[in]darkthreshpixels below this value are never considered part of the background; typ. 70; use 0 to skip
[in]edgecropfraction of half-width on each side, and of half-height at top and bottom, that are cropped
[out]pbgestimated background, or 0 on error
Returns
0 if OK, 1 on error
Notes:
     (1) Caller should check that return bg value is > 0.

Definition at line 2390 of file pageseg.c.

References REMOVE_CMAP_TO_GRAYSCALE.

◆ pixExtractRawTextlines()

PIXA * pixExtractRawTextlines ( PIX * pixs,
l_int32 maxw,
l_int32 maxh,
l_int32 adjw,
l_int32 adjh,
PIXA * pixadb )

pixExtractRawTextlines()

Parameters
[in]pixsany depth, assumed to have nearly horizontal text
[in]maxw,maxhinitial filtering: remove any components in pixs with components larger than maxw or maxh; use 0 for default values.
[in]adjw,adjhfinal adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount.
[in]pixadbpixa for saving intermediate steps; NULL to omit
Returns
pixa of textline images, including bounding boxes, or NULL on error
Notes:
     (1) This function assumes that textlines have sufficient
         vertical separation and small enough skew so that a
         horizontal dilation sufficient to join words will not join
         textlines.  It aggressively joins textlines across multiple
         columns, so if that is not desired, you must either (a) make
         sure that pixs is a single column of text or (b) use instead
         pixExtractTextlines(), which is more conservative
         about joining text fragments that have vertical overlap.
     (2) This first removes components from pixs that are either
         very wide (> maxw) or very tall (> maxh).
     (3) For reasonable accuracy, the resolution of pixs should be
         at least 100 ppi.  For reasonable efficiency, the resolution
         should not exceed 600 ppi.
     (4) This can be used to determine if some region of a scanned
         image is horizontal text.
     (5) As an example, for a pix with resolution 300 ppi, a reasonable
         set of parameters is:
            pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL);
     (6) The output pixa is composed of subimages, one for each textline,
         and the boxa in the pixa tells where in pixs each textline goes.

Definition at line 1635 of file pageseg.c.

References L_COPY, L_INSERT, L_SELECT_IF_BOTH, L_SELECT_IF_LT, and pixCleanBackgroundToWhite().

◆ pixExtractTextlines()

PIXA * pixExtractTextlines ( PIX * pixs,
l_int32 maxw,
l_int32 maxh,
l_int32 minw,
l_int32 minh,
l_int32 adjw,
l_int32 adjh,
PIXA * pixadb )

pixExtractTextlines()

Parameters
[in]pixsany depth, assumed to have nearly horizontal text
[in]maxw,maxhinitial filtering: remove any components in pixs with components larger than maxw or maxh
[in]minw,minhfinal filtering: remove extracted 'lines' with sizes smaller than minw or minh; use 0 for default.
[in]adjw,adjhfinal adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount.
[in]pixadbpixa for saving intermediate steps; NULL to omit
Returns
pixa of textline images, including bounding boxes, or NULL on error
Notes:
     (1) This function assumes that textline fragments have sufficient
         vertical separation and small enough skew so that a
         horizontal dilation sufficient to join words will not join
         textlines.  It does not guarantee that horizontally adjacent
         textline fragments on the same line will be joined.
     (2) For images with multiple columns, it attempts to avoid joining
         textlines across the space between columns.  If that is not
         a concern, you can also use pixExtractRawTextlines(),
         which will join them with alacrity.
     (3) This first removes components from pixs that are either
         wide (> maxw) or tall (> maxh).
     (4) A final filtering operation removes small components, such
         that width < minw or height < minh.
     (5) For reasonable accuracy, the resolution of pixs should be
         at least 100 ppi.  For reasonable efficiency, the resolution
         should not exceed 600 ppi.
     (6) This can be used to determine if some region of a scanned
         image is horizontal text.
     (7) As an example, for a pix with resolution 300 ppi, a reasonable
         set of parameters is:
            pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL);
         The defaults minw and minh for 300 ppi are about 36 and 20,
         so the same result is obtained with:
            pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL);
     (8) The output pixa is composed of subimages, one for each textline,
         and the boxa in the pixa tells where in pixs each textline goes.

Definition at line 1497 of file pageseg.c.

References Pixa::boxa, L_CLONE, L_COPY, L_INSERT, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, L_SELECT_IF_LT, and pixCleanBackgroundToWhite().

◆ pixFindLargeRectangles()

l_ok pixFindLargeRectangles ( PIX * pixs,
l_int32 polarity,
l_int32 nrect,
BOXA ** pboxa,
PIX ** ppixdb )

pixFindLargeRectangles()

Parameters
[in]pixs1 bpp
[in]polarity0 within background, 1 within foreground
[in]nrectnumber of rectangles to be found
[out]pboxalargest rectangles, sorted by decreasing area
[in,out]ppixdboptional return output with rectangles drawn on it
Returns
0 if OK, 1 on error
Notes:
     (1) This does a greedy search to find the largest rectangles,
         either black or white and without overlaps, in pix.
     (2) See pixFindLargestRectangle(), which is called multiple
         times, for details.  On each call, the largest rectangle
         found is painted, so that none of its pixels can be
         used later, before calling it again.
     (3) This function is surprisingly fast.  Although
         pixFindLargestRectangle() runs at about 50 MPix/sec, when it
         is run multiple times by pixFindLargeRectangles(), it processes
         at 150 - 250 MPix/sec, and the time is approximately linear
         in nrect.  For example, for a 1 MPix image, searching for
         the largest 50 boxes takes about 0.2 seconds.

Definition at line 2472 of file pageseg.c.

References L_INSERT, PIX_CLR, PIX_SET, and pixFindLargestRectangle().

◆ pixFindLargestRectangle()

l_ok pixFindLargestRectangle ( PIX * pixs,
l_int32 polarity,
BOX ** pbox,
PIX ** ppixdb )

pixFindLargestRectangle()

Parameters
[in]pixs1 bpp
[in]polarity0 within background, 1 within foreground
[out]pboxlargest area rectangle
[in,out]ppixdboptional return output with rectangle drawn on it
Returns
0 if OK, 1 on error
Notes:
     (1) This is a simple and elegant solution to a problem in
         computational geometry that at first appears to be quite
         difficult: what is the largest rectangle that can be
         placed in the image, covering only pixels of one polarity
         (bg or fg)?  The solution is O(n), where n is the number
         of pixels in the image, and it requires nothing more than
         using a simple recursion relation in a single sweep of the image.
     (2) In a sweep from UL to LR with left-to-right being the fast
         direction, calculate the largest white rectangle at (x, y),
         using previously calculated values at pixels #1 and #2:
            #1:    (x, y - 1)
            #2:    (x - 1, y)
         We also need the most recent "black" pixels that were seen
         in the current row and column.
         Consider the largest area.  There are only two possibilities:
            (a)  Min(w(1), horizdist) * (h(1) + 1)
            (b)  Min(h(2), vertdist) * (w(2) + 1)
         where
            horizdist: the distance from the rightmost "black" pixel seen
                       in the current row across to the current pixel
            vertdist: the distance from the lowest "black" pixel seen
                      in the current column down to the current pixel
         and we choose the Max of (a) and (b).
     (3) To convince yourself that these recursion relations are correct,
         it helps to draw the maximum rectangles at #1 and #2.
         Then for #1, you try to extend the rectangle down one line,
         so that the height is h(1) + 1.  Do you get the full
         width of #1, w(1)?  It depends on where the black pixels are
         in the current row.  You know the final width is bounded by w(1)
         and w(2) + 1, but the actual value depends on the distribution
         of black pixels in the current row that are at a distance
         from the current pixel that is between these limits.
         We call that value "horizdist", and the area is then given
         by the expression (a) above.  Using similar reasoning for #2,
         where you attempt to extend the rectangle to the right
         by 1 pixel, you arrive at (b).  The largest rectangle is
         then found by taking the Max.

Definition at line 2573 of file pageseg.c.

References GET_DATA_BIT, and L_NEG_SLOPE_LINE.

Referenced by pixFindLargeRectangles().

◆ pixFindPageForeground()

BOX * pixFindPageForeground ( PIX * pixs,
l_int32 threshold,
l_int32 mindist,
l_int32 erasedist,
l_int32 showmorph,
PIXAC * pixac )

pixFindPageForeground()

Parameters
[in]pixsfull resolution (any type or depth)
[in]thresholdfor binarization; typically about 128
[in]mindistmin distance of text from border to allow cleaning near border; at 2x reduction, this should be larger than 50; typically about 70
[in]erasedistwhen conditions are satisfied, erase anything within this distance of the edge; typically 20-30 at 2x reduction
[in]showmorphdebug: set to a negative integer to show steps in generating masks; this is typically used for debugging region extraction
[in]pixacdebug: allocate outside and pass this in to accumulate results of each call to this function, which can be displayed in a mosaic or a pdf.
Returns
box region including foreground, with some pixel noise removed, or NULL if not found
Notes:
     (1) This doesn't simply crop to the fg.  It attempts to remove
         pixel noise and junk at the edge of the image before cropping.
         The input threshold is used if pixs is not 1 bpp.
     (2) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.
     (3) Debug: set showmorph to display the intermediate image in
         the morphological operations on this page.
     (4) Debug: to get pdf output of results when called repeatedly,
         call with an existing pixac, which will add an image of this page,
         with the fg outlined.  If no foreground is found, there is
         no output for this page image.

Definition at line 1118 of file pageseg.c.

References L_CLONE, L_SORT_BY_AREA, L_SORT_DECREASING, and PIX_CLR.

◆ pixFindPageInsideBlackBorder()

static l_ok pixFindPageInsideBlackBorder ( PIX * pixs,
BOX ** pbox )
static

pixFindPageInsideBlackBorder()

Parameters
[in]pixs1 bpp (input at 2x reduction)
[out]**pboxpage region at input resolution (2x reduction)
Returns
0 if OK, 1 on error
Notes:
     (1) This extracts the page region from the image.  It is designed
         to work when the page is within a fairly solid black border.
     (2) It returns a bounding box for the page region at the input res.
     (3) The input pixs is expected to be at a resolution 100 - 150 ppi.
     (4) This is used as an option to pixCropImage(), when given an
         edgecrop parameter of -2.

Definition at line 843 of file pageseg.c.

References L_COPY, L_SORT_BY_AREA, and L_SORT_DECREASING.

Referenced by pixCropImage().

◆ pixFindThreshFgExtent()

l_ok pixFindThreshFgExtent ( PIX * pixs,
l_int32 thresh,
l_int32 * ptop,
l_int32 * pbot )

pixFindThreshFgExtent()

Parameters
[in]pixs1 bpp
[in]threshthreshold number of pixels in row
[out]ptop[optional] location of top of region
[out]pbot[optional] location of bottom of region
Returns
0 if OK, 1 on error

Definition at line 2071 of file pageseg.c.

Referenced by pixDecideIfText().

◆ pixGenerateHalftoneMask()

PIX * pixGenerateHalftoneMask ( PIX * pixs,
PIX ** ppixtext,
l_int32 * phtfound,
PIXA * pixadb )

pixGenerateHalftoneMask()

Parameters
[in]pixs1 bpp, assumed to be 150 to 200 ppi
[out]ppixtext[optional] text part of pixs
[out]phtfound[optional] 1 if the mask is not empty
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
pixd halftone mask, or NULL on error
Notes:
     (1) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.

Definition at line 315 of file pageseg.c.

References L_COPY.

Referenced by pixAutoPhotoinvert(), pixDecideIfTable(), pixGenHalftoneMask(), and pixGetRegionsBinary().

◆ pixGenHalftoneMask()

PIX * pixGenHalftoneMask ( PIX * pixs,
PIX ** ppixtext,
l_int32 * phtfound,
l_int32 debug )

pixGenHalftoneMask()

Deprecated:
  This wrapper avoids an ABI change with tesseract 3.0.4.
  It should be removed when we no longer need to support 3.0.4.
  The debug parameter is ignored (assumed 0).

Definition at line 290 of file pageseg.c.

References pixGenerateHalftoneMask().

◆ pixGenTextblockMask()

PIX * pixGenTextblockMask ( PIX * pixs,
PIX * pixvws,
PIXA * pixadb )

pixGenTextblockMask()

Parameters
[in]pixs1 bpp, textline mask, assumed to be 150 to 200 ppi
[in]pixvwsvertical white space mask
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
pixd textblock mask, or NULL if empty or on error
Notes:
     (1) Both the input masks (textline and vertical white space) and
         the returned textblock mask are at the same resolution.
     (2) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.
     (3) The result is somewhat noisy, in that small "blocks" of
         text may be included.  These can be removed by post-processing,
         using, e.g.,
            pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
                            L_SELECT_IF_GTE, NULL);

Definition at line 486 of file pageseg.c.

References L_COPY, L_SELECT_IF_BOTH, and L_SELECT_IF_GTE.

Referenced by pixGetRegionsBinary().

◆ pixGenTextlineMask()

PIX * pixGenTextlineMask ( PIX * pixs,
PIX ** ppixvws,
l_int32 * ptlfound,
PIXA * pixadb )

pixGenTextlineMask()

Parameters
[in]pixs1 bpp, assumed to be 150 to 200 ppi
[out]ppixvwsvertical whitespace mask
[out]ptlfound[optional] 1 if the mask is not empty
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
pixd textline mask, or NULL on error
Notes:
     (1) The input pixs should be deskewed.
     (2) pixs should have no halftone pixels.
     (3) This is not intended to work on small thumbnails.  The
         dimensions of pixs must be at least MinWidth x MinHeight.
     (4) Both the input image and the returned textline mask
         are at the same resolution.

Definition at line 396 of file pageseg.c.

References L_COPY.

Referenced by pixGetRegionsBinary().

◆ pixGetRegionsBinary()

l_ok pixGetRegionsBinary ( PIX * pixs,
PIX ** ppixhm,
PIX ** ppixtm,
PIX ** ppixtb,
PIXA * pixadb )

pixGetRegionsBinary()

Parameters
[in]pixs1 bpp, assumed to be 300 to 400 ppi
[out]ppixhm[optional] halftone mask
[out]ppixtm[optional] textline mask
[out]ppixtb[optional] textblock mask
[in]pixadbinput for collecting debug pix; use NULL to skip
Returns
0 if OK, 1 on error
Notes:
     (1) It is best to deskew the image before segmenting.
     (2) Passing in pixadb enables debug output.

Definition at line 124 of file pageseg.c.

References L_COPY, L_INSERT, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, pixGenerateHalftoneMask(), pixGenTextblockMask(), and pixGenTextlineMask().

◆ pixMaxCompAfterVClosing()

static l_ok pixMaxCompAfterVClosing ( PIX * pixs,
BOX ** pbox )
static

pixMaxCompAfterVClosing()

Parameters
[in]pixs1 bpp (input at 2x reduction)
[out]**pboxmain region at input resolution (2x reduction)
Returns
0 if OK, 1 on error
Notes:
     (1) This removes foreground noise along left and right edges,
         returning a bounding box for the remaining foreground pixels
         at the input resolution.
     (2) The input pixs should be at a resolution 100 - 150 ppi.
     (3) It does two 2x level1 rank binary reductions, followed
         by a large vertical close/open, with a very small horizontal
         close/oopen, and then a 4x expansion back to the input resolution.
     (4) To work properly with 2-column layout, if the largest and
         second-largest regions are comparable in size, both are included.
     (5) This is used as an option to pixCropImage(), when given
         an edgecrop parameter of -1.

Definition at line 776 of file pageseg.c.

References L_COPY, L_SORT_BY_AREA, and L_SORT_DECREASING.

Referenced by pixCropImage().

◆ pixPrepare1bpp()

PIX * pixPrepare1bpp ( PIX * pixs,
BOX * box,
l_float32 cropfract,
l_int32 outres )

pixPrepare1bpp()

Parameters
[in]pixsany depth
[in]box[optional] if null, use entire pixs
[in]cropfractfraction to be removed from the boundary; use 0.0 to retain the entire image
[in]outresdesired resolution of output image; if the input image resolution is not set, assume 300 ppi; use 0 to skip scaling.
Returns
pixd if OK, NULL on error
Notes:
     (1) This handles some common pre-processing operations,
         where the page segmentation algorithm takes a 1 bpp image.

Definition at line 2307 of file pageseg.c.

References pixCleanBackgroundToWhite().

Referenced by pixDecideIfTable(), and pixDecideIfText().

◆ pixRescaleForCropping()

static PIX * pixRescaleForCropping ( PIX * pixs,
l_int32 w,
l_int32 h,
l_int32 lr_border,
l_int32 tb_border,
l_float32 maxwiden,
PIX ** ppixsc )
static

pixRescaleForCropping()

Parameters
[in]pixs1 bpp
[in]wwidth of output lmage
[in]hheight of output lmage
[in]lr_bordercleared final border pixels on left and right
[in]tb_bordercleared final border pixels on top and bottom
[in]maxwidenmax fractional horizontal stretch allowed; >= 1.0
[out]*ppixsc[optional] rescaled foreground region
Returns
pixd output image, or NULL on error
Notes:
     (1) This rescales pixs to fit maximally within an image of
         size (w x h), under two conditions:
         (a) the final image has cleared border regions given by the
             input parameters lr_border and tb_border, and
         (b) the input image is first isotropically scaled to fit
             maximally within the allowed final region, and then further
             maxiximally widened, subject to the constraints of the
             cleared border and the maxwiden parameter.
     (2) The cleared border pixel parameters must be >= 0.
     (3) If there is extra horizontal stretching by a factor
         maxwiden larger than about 1.15, the appearance may be
         unpleasingly distorted; hence the suggestion not to exceed it.

Definition at line 911 of file pageseg.c.

References PIX_SRC.

Referenced by pixCropImage().

◆ pixSplitComponentWithProfile()

BOXA * pixSplitComponentWithProfile ( PIX * pixs,
l_int32 delta,
l_int32 mindel,
PIX ** ppixdebug )

pixSplitComponentWithProfile()

Parameters
[in]pixs1 bpp, exactly one connected component
[in]deltadistance used in extrema finding in a numa; typ. 10
[in]mindelminimum required difference between profile minimum and profile values +2 and -2 away; typ. 7
[out]ppixdebug[optional] debug image of splitting
Returns
boxa of c.c. after splitting, or NULL on error
Notes:
     (1) This will split the most obvious cases of touching characters.
         The split points it is searching for are narrow and deep
         minimima in the vertical pixel projection profile, after a
         large vertical closing has been applied to the component.

Definition at line 1343 of file pageseg.c.

References L_CLONE, and L_INSERT.

Referenced by pixSplitIntoCharacters().

◆ pixSplitIntoCharacters()

l_ok pixSplitIntoCharacters ( PIX * pixs,
l_int32 minw,
l_int32 minh,
BOXA ** pboxa,
PIXA ** ppixa,
PIX ** ppixdebug )

pixSplitIntoCharacters()

Parameters
[in]pixs1 bpp, contains only deskewed text
[in]minwmin component width for initial filtering; typ. 4
[in]minhmin component height for initial filtering; typ. 4
[out]pboxa[optional] character bounding boxes
[out]ppixa[optional] character images
[out]ppixdebug[optional] showing splittings
Returns
0 if OK, 1 on error
Notes:
     (1) This is a simple function that attempts to find split points
         based on vertical pixel profiles.
     (2) It should be given an image that has an arbitrary number
         of text characters.
     (3) The returned pixa includes the boxes from which the
         (possibly split) components are extracted.

Definition at line 1244 of file pageseg.c.

References L_CLONE, L_INSERT, L_SELECT_IF_BOTH, L_SELECT_IF_GT, and pixSplitComponentWithProfile().

Variable Documentation

◆ MinHeight

const l_int32 MinHeight = 100
static

Definition at line 96 of file pageseg.c.

◆ MinWidth

const l_int32 MinWidth = 100
static

Definition at line 95 of file pageseg.c.