![]() |
Leptonica 1.85.0
Image processing and image analysis suite
|
Go to the source code of this file.
Functions | |
| static l_ok | pixMaxCompAfterVClosing (PIX *pixs, BOX **pbox) |
| static l_ok | pixFindPageInsideBlackBorder (PIX *pixs, BOX **pbox) |
| static PIX * | pixRescaleForCropping (PIX *pixs, l_int32 w, l_int32 h, l_int32 lr_border, l_int32 tb_border, l_float32 maxwiden, PIX **ppixsc) |
| l_ok | pixGetRegionsBinary (PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb) |
| PIX * | pixGenHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug) |
| PIX * | pixGenerateHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb) |
| PIX * | pixGenTextlineMask (PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb) |
| PIX * | pixGenTextblockMask (PIX *pixs, PIX *pixvws, PIXA *pixadb) |
| PIX * | pixCropImage (PIX *pixs, l_int32 lr_clear, l_int32 tb_clear, l_int32 edgeclean, l_int32 lr_border, l_int32 tb_border, l_float32 maxwiden, l_int32 printwiden, const char *debugfile, BOX **pcropbox) |
| PIX * | pixCleanImage (PIX *pixs, l_int32 contrast, l_int32 rotation, l_int32 scale, l_int32 opensize) |
| BOX * | pixFindPageForeground (PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac) |
| l_ok | pixSplitIntoCharacters (PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug) |
| BOXA * | pixSplitComponentWithProfile (PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug) |
| PIXA * | pixExtractTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb) |
| PIXA * | pixExtractRawTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb) |
| l_ok | pixCountTextColumns (PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb) |
| l_ok | pixDecideIfText (PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb) |
| l_ok | pixFindThreshFgExtent (PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot) |
| l_ok | pixDecideIfTable (PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb) |
| PIX * | pixPrepare1bpp (PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres) |
| l_ok | pixEstimateBackground (PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg) |
| l_ok | pixFindLargeRectangles (PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb) |
| l_ok | pixFindLargestRectangle (PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb) |
| PIX * | pixAutoPhotoinvert (PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb) |
Variables | |
| static const l_int32 | MinWidth = 100 |
| static const l_int32 | MinHeight = 100 |
Top level page segmentation
l_int32 pixGetRegionsBinary()
Halftone region extraction
PIX *pixGenHalftoneMask() **Deprecated wrapper**
PIX *pixGenerateHalftoneMask()
Textline extraction
PIX *pixGenTextlineMask()
Textblock extraction
PIX *pixGenTextblockMask()
Location and extraction of page foreground; cleaning pages
PIX *pixCropImage()
static l_int32 pixMaxCompAfterVClosing()
static l_int32 pixFindPageInsideBlackBorder()
static PIX *pixRescaleForCropping()
PIX *pixCleanImage()
BOX *pixFindPageForeground()
Extraction of characters from image with only text
l_int32 pixSplitIntoCharacters()
BOXA *pixSplitComponentWithProfile()
Extraction of lines of text
PIXA *pixExtractTextlines()
PIXA *pixExtractRawTextlines()
How many text columns
l_int32 pixCountTextColumns()
Decision: text vs photo
l_int32 pixDecideIfText()
l_int32 pixFindThreshFgExtent()
Decision: table vs text
l_int32 pixDecideIfTable()
Pix *pixPrepare1bpp()
Estimate the grayscale background value
l_int32 pixEstimateBackground()
Largest white or black rectangles in an image
l_int32 pixFindLargeRectangles()
l_int32 pixFindLargestRectangle()
Generate rectangle inside connected component
BOX *pixFindRectangleInCC()
Automatic photoinvert for OCR
PIX *pixAutoPhotoinvert()
Definition in file pageseg.c.
pixFindRectangleInCC()
| [in] | pixs | 1 bpp, with sufficient closings to make the fg be a single c.c. that is a convex hull |
| [in] | boxs | [optional] if NULL, pixs should be a minimum container of a single c.c. |
| [in] | fract | first and all consecutive lines found must be at least this fraction of the fast scan dimension |
| [in] | dir | L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of fast scan |
| [in] | select | L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, L_LARGEST_AREA, L_SMALEST_AREA |
| [in] | debug | if 1, generates output pdf showing intermediate computation and final result |
Notes:
(1) Computation is similar to pixFindLargestRectangle(), but allows
a different set of results to choose from.
(2) Select the fast scan direction. Then, scanning in the slow
direction, find the longest run of ON pixels in the fast
scan direction and look for the first run that is longer
than fract of the dimension. Continue until a shorter run
is found. This generates a box of ON pixels fitting into the c.c.
(3) Do this from both slow scan directions and use select to get
a resulting box from these two.
(4) The extracted rectangle is not necessarily the largest that
can fit in the c.c. To get that, use pixFindLargestRectangle().
*/
BOX *
pixFindRectangleInCC(PIX *pixs,
BOX *boxs,
l_float32 fract,
l_int32 dir,
l_int32 select,
l_int32 debug)
{
l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res;
l_int32 xfirst, xlast, xstart, yfirst, ylast, length;
BOX *box1, *box2, *box3, *box4, *box5;
PIX *pix1, *pix2, *pixdb1, *pixdb2;
PIXA *pixadb;
if (!pixs || pixGetDepth(pixs) != 1)
return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
if (fract <= 0.0 || fract > 1.0)
return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL);
if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL);
if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
return (BOX *)ERROR_PTR("invalid select", __func__, NULL);
/* Extract the c.c. if necessary */
x = y = 0;
if (boxs) {
pix1 = pixClipRectangle(pixs, boxs, NULL);
boxGetGeometry(boxs, &x, &y, NULL, NULL);
} else {
pix1 = pixClone(pixs);
}
/* All fast scans are horizontal; rotate 90 deg cw if necessary */
if (dir == L_SCAN_VERTICAL)
pix2 = pixRotate90(pix1, 1);
else /* L_SCAN_HORIZONTAL */
pix2 = pixClone(pix1);
pixGetDimensions(pix2, &w, &h, NULL);
pixadb = (debug) ? pixaCreate(0) : NULL;
pixdb1 = NULL;
if (pixadb) {
lept_mkdir("lept/rect");
pixaAddPix(pixadb, pix1, L_CLONE);
pixdb1 = pixConvertTo32(pix2);
}
pixDestroy(&pix1);
/* Scanning down, find the first scanline with a long enough run.
That run goes from (xfirst, yfirst) to (xlast, yfirst). */
found = FALSE;
for (i = 0; i < h; i++) {
pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
if (length >= (l_int32)(fract * w + 0.5)) {
yfirst = i;
xfirst = xstart;
xlast = xfirst + length - 1;
found = TRUE;
break;
}
}
if (!found) {
L_WARNING("no run of sufficient size was found\n", __func__);
pixDestroy(&pix2);
pixDestroy(&pixdb1);
pixaDestroy(&pixadb);
return NULL;
}
/* Continue down until the condition fails */
w1 = xlast - xfirst + 1;
h1 = h - yfirst; /* init */
ylast = h - 1; /* init */
for (i = yfirst + 1; i < h; i++) {
pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
if (xstart > xfirst || (xstart + length - 1 < xlast) ||
i == h - 1) {
ylast = i - 1;
h1 = ylast - yfirst + 1;
break;
}
}
box1 = boxCreate(xfirst, yfirst, w1, h1);
/* Scanning up, find the first scanline with a long enough run.
That run goes from (xfirst, ylast) to (xlast, ylast). */
for (i = h - 1; i >= 0; i--) {
pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
if (length >= (l_int32)(fract * w + 0.5)) {
ylast = i;
xfirst = xstart;
xlast = xfirst + length - 1;
break;
}
}
/* Continue up until the condition fails */
w2 = xlast - xfirst + 1;
h2 = ylast + 1; /* initialize */
for (i = ylast - 1; i >= 0; i--) {
pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
if (xstart > xfirst || (xstart + length - 1 < xlast) ||
i == 0) {
yfirst = i + 1;
h2 = ylast - yfirst + 1;
break;
}
}
box2 = boxCreate(xfirst, yfirst, w2, h2);
pixDestroy(&pix2);
if (pixadb) {
pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
pixaAddPix(pixadb, pixdb1, L_INSERT);
}
/* Select the final result from the two boxes */
if (select == L_GEOMETRIC_UNION)
box3 = boxBoundingRegion(box1, box2);
else if (select == L_GEOMETRIC_INTERSECTION)
box3 = boxOverlapRegion(box1, box2);
else if (select == L_LARGEST_AREA)
box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
else /* select == L_SMALLEST_AREA) */
box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
boxDestroy(&box1);
boxDestroy(&box2);
/* Rotate the box 90 degrees ccw if necessary */
box4 = NULL;
if (box3) {
if (dir == L_SCAN_VERTICAL)
box4 = boxRotateOrth(box3, w, h, 3);
else
box4 = boxCopy(box3);
}
/* Transform back to global coordinates if boxs exists */
box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
boxDestroy(&box3);
boxDestroy(&box4);
/* Debug output */
if (pixadb) {
pixdb1 = pixConvertTo8(pixs, 0);
pixAddConstantGray(pixdb1, 190);
pixdb2 = pixConvertTo32(pixdb1);
if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
pixaAddPix(pixadb, pixdb2, L_INSERT);
res = pixGetXRes(pixs);
L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__);
pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
"/tmp/lept/rect/fitrect.pdf");
pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
pixDestroy(&pix1);
pixDestroy(&pixdb1);
pixaDestroy(&pixadb);
}
return box5;
}
/*------------------------------------------------------------------*
Automatic photoinvert for OCR *
*------------------------------------------------------------------*/
/*!
pixAutoPhotoinvert()
| [in] | pixs | any depth, colormap ok |
| [in] | thresh | binarization threshold; use 0 for default |
| [out] | ppixm | [optional] image regions to be inverted |
| [out] | pixadb | [optional] debug; input NULL to skip |
Notes:
(1) A 1 bpp image is returned, where pixels in image regions are
photo-inverted.
(2) If there is light text with a dark background, this will
identify the region and photoinvert the pixels there if
there are at least 60% fg pixels in the region.
(3) For debug output, input a (typically empty) pixadb.
Definition at line 2910 of file pageseg.c.
References L_CLONE, L_COPY, PIX_CLR, and pixGenerateHalftoneMask().
| PIX * pixCleanImage | ( | PIX * | pixs, |
| l_int32 | contrast, | ||
| l_int32 | rotation, | ||
| l_int32 | scale, | ||
| l_int32 | opensize ) |
| [in] | pixs | full resolution (any type or depth) |
| [in] | contrast | vary contrast: 1 = lightest; 10 = darkest; suggest 1 unless light features are being lost |
| [in] | rotation | cw by 90 degrees: {0,1,2,3} represent 0, 90, 180 and 270 degree cw rotations |
| [in] | scale | 1 (no scaling) or 2 (2x upscaling) |
| [in] | opensize | opening size of structuring element for noise removal: {0 or 1 to skip; 2, 3 for opening} |
Notes:
(1) This deskews, optionally rotates and darkens, cleans background
to white, binarizes and optionally removes small noise.
(2) For color and grayscale input, local background normalization is
done to 200, and a threshold of 180 sets the maximum foreground
value in the normalized image.
(3) The contrast parameter adjusts the binarization to avoid losing
lighter input pixels. Contrast is increased as contrast increases
from 1 to 10.
(4) The scale parameter controls the thresholding to 1 bpp. Two values:
1 = threshold
2 = linear interpolated 2x upscaling before threshold.
(5) The #opensize parameter is the size of a square SEL used with
opening to remove small speckle noise. Allowed open sizes are 2,3.
If this is to be used, try 2 before 3.
(6) This does the image processing for cleanTo1bppFilesToPdf() and
prog/cleanpdf.c.
Definition at line 1015 of file pageseg.c.
References pixBackgroundNormTo1MinMax(), and pixConvertTo8MinMax().
| l_ok pixCountTextColumns | ( | PIX * | pixs, |
| l_float32 | deltafract, | ||
| l_float32 | peakfract, | ||
| l_float32 | clipfract, | ||
| l_int32 * | pncols, | ||
| PIXA * | pixadb ) |
| [in] | pixs | 1 bpp |
| [in] | deltafract | fraction of (max - min) to be used in the delta for extrema finding; typ 0.3 |
| [in] | peakfract | fraction of (max - min) to be used to threshold the peak value; typ. 0.5 |
| [in] | clipfract | fraction of image dimension removed on each side; typ. 0.1, which leaves w and h reduced by 0.8 |
| [out] | pncols | number of columns; -1 if not determined |
| [in] | pixadb | [optional] pre-allocated, for showing intermediate computation; use null to skip |
Notes:
(1) It is assumed that pixs has the correct resolution set.
If the resolution is 0, we set to 300 and issue a warning.
(2) If necessary, the image is scaled to between 37 and 75 ppi;
most of the processing is done at this resolution.
(3) If no text is found (essentially a blank page),
this returns ncols = 0.
(4) For debug output, input a pre-allocated pixa.
| PIX * pixCropImage | ( | PIX * | pixs, |
| l_int32 | lr_clear, | ||
| l_int32 | tb_clear, | ||
| l_int32 | edgeclean, | ||
| l_int32 | lr_border, | ||
| l_int32 | tb_border, | ||
| l_float32 | maxwiden, | ||
| l_int32 | printwiden, | ||
| const char * | debugfile, | ||
| BOX ** | pcropbox ) |
| [in] | pixs | full resolution (any type or depth) |
| [in] | lr_clear | full res pixels cleared at left and right sides |
| [in] | tb_clear | full res pixels cleared at top and bottom sides |
| [in] | edgeclean | parameter for removing edge noise (-1 to 15) default = 0 (no removal); 15 is maximally aggressive for random noise -1 for aggressively removing side noise -2 to extract page embedded in black background |
| [in] | lr_border | full res final "added" pixels on left and right |
| [in] | tb_border | full res final "added" pixels on top and bottom |
| [in] | maxwiden | max fractional horizontal stretch allowed |
| [in] | printwiden | 0 to skip, 1 for 8.5x11, 2 for A4 |
| [in] | *debugfile | [optional] usually is NULL |
| [out] | *pcropbox | [optional] crop box at full resolution |
Notes:
(1) This binarizes and crops a page image.
(a) Binarizes if necessary and does 2x reduction.
(b) Clears near the border by lr_clear and tb_clear full
resolution pixels. (This is done at 2x reduction.)
(c) If edgeclean > 0, it removes isolated sets of pixels,
using a close/open operation of size edgeclean + 1.
If edgeclean == -1, it uses a large vertical morphological
close/open and the extraction of either the largest
resulting connected component (or the largest two components
if the page has 2 columns), to eliminate noise on left
and right sides.
If edgeclean == -2, it extracts the page region from a
possible exterior black surround.
(d) Find the bounding box of remaining fg pixels and scales
the box up 2x back to full resolution.
(e) Crops the binarized image to the bounding box.
(f) Slightly thickens long horizontal lines.
(g) Rescales this image to fit within the original image,
less lr_border on the sides and tb_border above and below.
The rescaling is done isomorphically with a (possible)
optional additional widening. Suggest the additional
widening factor not exceed 1.15.
(h) Optionally do additional horizontal stretch if needed to
better fill a printed page. Default is 0 to skip; 1 to
widen for 8.5x11 page, 2 for A4 page.
Note that (b) - (d) are done at 2x reduction for efficiency.
(2) Side clearing must not exceed 1/6 of the dimension on that side.
(3) The clear and border pixel parameters must be >= 0.
(4) The "clear" parameters act on the input image, whereas the
"border" parameters act to give a white border to the final
image. They are not literally added, because the input and final
images are the same size. If the resulting images are to be
printed, it is useful to have border pixel parameters of at
least 60 at 300 ppi, to avoid losing content at the edges.
(5) This is not intended to work on small thumbnails. The
dimensions of pixs must be at least MinWidth x MinHeight.
(6) Step (f) above helps with orthographically-produced music notation,
where the horizontal staff lines can be very thin and thus
subject to printer alias.
(7) If you are not concerned with printing on paper, use the
default value 0 for printwiden. Widening only takes place
if the ratio h/w exceeds the specified paper size by 3%,
and the horizontal scaling factor will not exceed 1.25.
Definition at line 605 of file pageseg.c.
References L_COPY, L_DEFAULT_ENCODE, L_INSERT, PIX_CLR, pixBackgroundNormTo1MinMax(), pixFindPageInsideBlackBorder(), pixMaxCompAfterVClosing(), and pixRescaleForCropping().
| [in] | pixs | any depth, any resolution >= 75 ppi |
| [in] | box | [optional] if null, use entire pixs |
| [in] | orient | L_PORTRAIT_MODE, L_LANDSCAPE_MODE |
| [out] | pscore | 0 - 4; -1 if not determined |
| [in] | pixadb | [optional] pre-allocated, for showing intermediate computation; use NULL to skip |
Notes:
(1) It is assumed that pixs has the correct resolution set.
If the resolution is 0, we assume it is 300 ppi and issue a warning.
(2) If orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees
clockwise before being analyzed.
(3) The interpretation of the returned score:
-1 undetermined
0 no table
1 unlikely to have a table
2 likely to have a table
3 even more likely to have a table
4 extremely likely to have a table
* Setting the condition for finding a table at score >= 2 works
well, except for false positives on kanji and landscape text.
* These false positives can be removed by setting the condition
at score >= 3, but recall is lowered because it will not find
tables without either horizontal or vertical lines.
(4) Most of the processing takes place at 75 ppi.
(5) Internally, three numbers are determined, for horizontal and
vertical fg lines, and for vertical bg lines. From these,
four tests are made to decide if there is a table occupying
a significant part of the image.
(6) Images have arbitrary content and would be likely to trigger
this detector, so they are checked for first, and if found,
return with a 0 (no table) score.
(7) Musical scores (tablature) are likely to trigger the detector.
(8) Tables of content with more than 2 columns are likely to
trigger the detector.
(9) For debug output, input a pre-allocated pixa.
Definition at line 2159 of file pageseg.c.
References L_COPY, L_INSERT, L_LANDSCAPE_MODE, L_SELECT_IF_GTE, L_SELECT_WIDTH, pixGenerateHalftoneMask(), and pixPrepare1bpp().
| [in] | pixs | any depth |
| [in] | box | [optional] if null, use entire pixs |
| [out] | pistext | 1 if text; 0 if photo; -1 if not determined or empty |
| [in] | pixadb | [optional] pre-allocated, for showing intermediate computation; use NULL to skip |
Notes:
(1) It is assumed that pixs has the correct resolution set.
If the resolution is 0, we set to 300 and issue a warning.
(2) If necessary, the image is scaled to 300 ppi; most of the
processing is done at this resolution.
(3) Text is assumed to be in horizontal lines.
(4) Because thin vertical lines are removed before filtering for
text lines, this should identify tables as text.
(5) If box is null and pixs contains both text lines and line art,
this function might return istext == true.
(6) If the input pixs is empty, or for some other reason the
result can not be determined, return -1.
(7) For debug output, input a pre-allocated pixa.
Definition at line 1907 of file pageseg.c.
References L_ADD_BELOW, L_SELECT_HEIGHT, L_SELECT_IF_BOTH, L_SELECT_IF_GT, L_SELECT_IF_GTE, L_SELECT_IF_LTE, L_SELECT_WIDTH, L_SORT_BY_WIDTH, L_SORT_DECREASING, pixFindThreshFgExtent(), and pixPrepare1bpp().
| l_ok pixEstimateBackground | ( | PIX * | pixs, |
| l_int32 | darkthresh, | ||
| l_float32 | edgecrop, | ||
| l_int32 * | pbg ) |
| [in] | pixs | 8 bpp, with or without colormap |
| [in] | darkthresh | pixels below this value are never considered part of the background; typ. 70; use 0 to skip |
| [in] | edgecrop | fraction of half-width on each side, and of half-height at top and bottom, that are cropped |
| [out] | pbg | estimated background, or 0 on error |
Notes:
(1) Caller should check that return bg value is > 0.
Definition at line 2390 of file pageseg.c.
References REMOVE_CMAP_TO_GRAYSCALE.
| PIXA * pixExtractRawTextlines | ( | PIX * | pixs, |
| l_int32 | maxw, | ||
| l_int32 | maxh, | ||
| l_int32 | adjw, | ||
| l_int32 | adjh, | ||
| PIXA * | pixadb ) |
| [in] | pixs | any depth, assumed to have nearly horizontal text |
| [in] | maxw,maxh | initial filtering: remove any components in pixs with components larger than maxw or maxh; use 0 for default values. |
| [in] | adjw,adjh | final adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount. |
| [in] | pixadb | pixa for saving intermediate steps; NULL to omit |
Notes:
(1) This function assumes that textlines have sufficient
vertical separation and small enough skew so that a
horizontal dilation sufficient to join words will not join
textlines. It aggressively joins textlines across multiple
columns, so if that is not desired, you must either (a) make
sure that pixs is a single column of text or (b) use instead
pixExtractTextlines(), which is more conservative
about joining text fragments that have vertical overlap.
(2) This first removes components from pixs that are either
very wide (> maxw) or very tall (> maxh).
(3) For reasonable accuracy, the resolution of pixs should be
at least 100 ppi. For reasonable efficiency, the resolution
should not exceed 600 ppi.
(4) This can be used to determine if some region of a scanned
image is horizontal text.
(5) As an example, for a pix with resolution 300 ppi, a reasonable
set of parameters is:
pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL);
(6) The output pixa is composed of subimages, one for each textline,
and the boxa in the pixa tells where in pixs each textline goes.
Definition at line 1635 of file pageseg.c.
References L_COPY, L_INSERT, L_SELECT_IF_BOTH, L_SELECT_IF_LT, and pixCleanBackgroundToWhite().
| PIXA * pixExtractTextlines | ( | PIX * | pixs, |
| l_int32 | maxw, | ||
| l_int32 | maxh, | ||
| l_int32 | minw, | ||
| l_int32 | minh, | ||
| l_int32 | adjw, | ||
| l_int32 | adjh, | ||
| PIXA * | pixadb ) |
| [in] | pixs | any depth, assumed to have nearly horizontal text |
| [in] | maxw,maxh | initial filtering: remove any components in pixs with components larger than maxw or maxh |
| [in] | minw,minh | final filtering: remove extracted 'lines' with sizes smaller than minw or minh; use 0 for default. |
| [in] | adjw,adjh | final adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount. |
| [in] | pixadb | pixa for saving intermediate steps; NULL to omit |
Notes:
(1) This function assumes that textline fragments have sufficient
vertical separation and small enough skew so that a
horizontal dilation sufficient to join words will not join
textlines. It does not guarantee that horizontally adjacent
textline fragments on the same line will be joined.
(2) For images with multiple columns, it attempts to avoid joining
textlines across the space between columns. If that is not
a concern, you can also use pixExtractRawTextlines(),
which will join them with alacrity.
(3) This first removes components from pixs that are either
wide (> maxw) or tall (> maxh).
(4) A final filtering operation removes small components, such
that width < minw or height < minh.
(5) For reasonable accuracy, the resolution of pixs should be
at least 100 ppi. For reasonable efficiency, the resolution
should not exceed 600 ppi.
(6) This can be used to determine if some region of a scanned
image is horizontal text.
(7) As an example, for a pix with resolution 300 ppi, a reasonable
set of parameters is:
pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL);
The defaults minw and minh for 300 ppi are about 36 and 20,
so the same result is obtained with:
pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL);
(8) The output pixa is composed of subimages, one for each textline,
and the boxa in the pixa tells where in pixs each textline goes.
Definition at line 1497 of file pageseg.c.
References Pixa::boxa, L_CLONE, L_COPY, L_INSERT, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, L_SELECT_IF_LT, and pixCleanBackgroundToWhite().
| l_ok pixFindLargeRectangles | ( | PIX * | pixs, |
| l_int32 | polarity, | ||
| l_int32 | nrect, | ||
| BOXA ** | pboxa, | ||
| PIX ** | ppixdb ) |
| [in] | pixs | 1 bpp |
| [in] | polarity | 0 within background, 1 within foreground |
| [in] | nrect | number of rectangles to be found |
| [out] | pboxa | largest rectangles, sorted by decreasing area |
| [in,out] | ppixdb | optional return output with rectangles drawn on it |
Notes:
(1) This does a greedy search to find the largest rectangles,
either black or white and without overlaps, in pix.
(2) See pixFindLargestRectangle(), which is called multiple
times, for details. On each call, the largest rectangle
found is painted, so that none of its pixels can be
used later, before calling it again.
(3) This function is surprisingly fast. Although
pixFindLargestRectangle() runs at about 50 MPix/sec, when it
is run multiple times by pixFindLargeRectangles(), it processes
at 150 - 250 MPix/sec, and the time is approximately linear
in nrect. For example, for a 1 MPix image, searching for
the largest 50 boxes takes about 0.2 seconds.
Definition at line 2472 of file pageseg.c.
References L_INSERT, PIX_CLR, PIX_SET, and pixFindLargestRectangle().
| [in] | pixs | 1 bpp |
| [in] | polarity | 0 within background, 1 within foreground |
| [out] | pbox | largest area rectangle |
| [in,out] | ppixdb | optional return output with rectangle drawn on it |
Notes:
(1) This is a simple and elegant solution to a problem in
computational geometry that at first appears to be quite
difficult: what is the largest rectangle that can be
placed in the image, covering only pixels of one polarity
(bg or fg)? The solution is O(n), where n is the number
of pixels in the image, and it requires nothing more than
using a simple recursion relation in a single sweep of the image.
(2) In a sweep from UL to LR with left-to-right being the fast
direction, calculate the largest white rectangle at (x, y),
using previously calculated values at pixels #1 and #2:
#1: (x, y - 1)
#2: (x - 1, y)
We also need the most recent "black" pixels that were seen
in the current row and column.
Consider the largest area. There are only two possibilities:
(a) Min(w(1), horizdist) * (h(1) + 1)
(b) Min(h(2), vertdist) * (w(2) + 1)
where
horizdist: the distance from the rightmost "black" pixel seen
in the current row across to the current pixel
vertdist: the distance from the lowest "black" pixel seen
in the current column down to the current pixel
and we choose the Max of (a) and (b).
(3) To convince yourself that these recursion relations are correct,
it helps to draw the maximum rectangles at #1 and #2.
Then for #1, you try to extend the rectangle down one line,
so that the height is h(1) + 1. Do you get the full
width of #1, w(1)? It depends on where the black pixels are
in the current row. You know the final width is bounded by w(1)
and w(2) + 1, but the actual value depends on the distribution
of black pixels in the current row that are at a distance
from the current pixel that is between these limits.
We call that value "horizdist", and the area is then given
by the expression (a) above. Using similar reasoning for #2,
where you attempt to extend the rectangle to the right
by 1 pixel, you arrive at (b). The largest rectangle is
then found by taking the Max.
Definition at line 2573 of file pageseg.c.
References GET_DATA_BIT, and L_NEG_SLOPE_LINE.
Referenced by pixFindLargeRectangles().
| BOX * pixFindPageForeground | ( | PIX * | pixs, |
| l_int32 | threshold, | ||
| l_int32 | mindist, | ||
| l_int32 | erasedist, | ||
| l_int32 | showmorph, | ||
| PIXAC * | pixac ) |
| [in] | pixs | full resolution (any type or depth) |
| [in] | threshold | for binarization; typically about 128 |
| [in] | mindist | min distance of text from border to allow cleaning near border; at 2x reduction, this should be larger than 50; typically about 70 |
| [in] | erasedist | when conditions are satisfied, erase anything within this distance of the edge; typically 20-30 at 2x reduction |
| [in] | showmorph | debug: set to a negative integer to show steps in generating masks; this is typically used for debugging region extraction |
| [in] | pixac | debug: allocate outside and pass this in to accumulate results of each call to this function, which can be displayed in a mosaic or a pdf. |
Notes:
(1) This doesn't simply crop to the fg. It attempts to remove
pixel noise and junk at the edge of the image before cropping.
The input threshold is used if pixs is not 1 bpp.
(2) This is not intended to work on small thumbnails. The
dimensions of pixs must be at least MinWidth x MinHeight.
(3) Debug: set showmorph to display the intermediate image in
the morphological operations on this page.
(4) Debug: to get pdf output of results when called repeatedly,
call with an existing pixac, which will add an image of this page,
with the fg outlined. If no foreground is found, there is
no output for this page image.
Definition at line 1118 of file pageseg.c.
References L_CLONE, L_SORT_BY_AREA, L_SORT_DECREASING, and PIX_CLR.
pixFindPageInsideBlackBorder()
| [in] | pixs | 1 bpp (input at 2x reduction) |
| [out] | **pbox | page region at input resolution (2x reduction) |
Notes:
(1) This extracts the page region from the image. It is designed
to work when the page is within a fairly solid black border.
(2) It returns a bounding box for the page region at the input res.
(3) The input pixs is expected to be at a resolution 100 - 150 ppi.
(4) This is used as an option to pixCropImage(), when given an
edgecrop parameter of -2.
Definition at line 843 of file pageseg.c.
References L_COPY, L_SORT_BY_AREA, and L_SORT_DECREASING.
Referenced by pixCropImage().
| l_ok pixFindThreshFgExtent | ( | PIX * | pixs, |
| l_int32 | thresh, | ||
| l_int32 * | ptop, | ||
| l_int32 * | pbot ) |
| [in] | pixs | 1 bpp |
| [in] | thresh | threshold number of pixels in row |
| [out] | ptop | [optional] location of top of region |
| [out] | pbot | [optional] location of bottom of region |
Definition at line 2071 of file pageseg.c.
Referenced by pixDecideIfText().
| [in] | pixs | 1 bpp, assumed to be 150 to 200 ppi |
| [out] | ppixtext | [optional] text part of pixs |
| [out] | phtfound | [optional] 1 if the mask is not empty |
| [in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes:
(1) This is not intended to work on small thumbnails. The
dimensions of pixs must be at least MinWidth x MinHeight.
Definition at line 315 of file pageseg.c.
References L_COPY.
Referenced by pixAutoPhotoinvert(), pixDecideIfTable(), pixGenHalftoneMask(), and pixGetRegionsBinary().
Deprecated: This wrapper avoids an ABI change with tesseract 3.0.4. It should be removed when we no longer need to support 3.0.4. The debug parameter is ignored (assumed 0).
Definition at line 290 of file pageseg.c.
References pixGenerateHalftoneMask().
| [in] | pixs | 1 bpp, textline mask, assumed to be 150 to 200 ppi |
| [in] | pixvws | vertical white space mask |
| [in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes:
(1) Both the input masks (textline and vertical white space) and
the returned textblock mask are at the same resolution.
(2) This is not intended to work on small thumbnails. The
dimensions of pixs must be at least MinWidth x MinHeight.
(3) The result is somewhat noisy, in that small "blocks" of
text may be included. These can be removed by post-processing,
using, e.g.,
pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
L_SELECT_IF_GTE, NULL);
Definition at line 486 of file pageseg.c.
References L_COPY, L_SELECT_IF_BOTH, and L_SELECT_IF_GTE.
Referenced by pixGetRegionsBinary().
| [in] | pixs | 1 bpp, assumed to be 150 to 200 ppi |
| [out] | ppixvws | vertical whitespace mask |
| [out] | ptlfound | [optional] 1 if the mask is not empty |
| [in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes:
(1) The input pixs should be deskewed.
(2) pixs should have no halftone pixels.
(3) This is not intended to work on small thumbnails. The
dimensions of pixs must be at least MinWidth x MinHeight.
(4) Both the input image and the returned textline mask
are at the same resolution.
Definition at line 396 of file pageseg.c.
References L_COPY.
Referenced by pixGetRegionsBinary().
| [in] | pixs | 1 bpp, assumed to be 300 to 400 ppi |
| [out] | ppixhm | [optional] halftone mask |
| [out] | ppixtm | [optional] textline mask |
| [out] | ppixtb | [optional] textblock mask |
| [in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes:
(1) It is best to deskew the image before segmenting.
(2) Passing in pixadb enables debug output.
Definition at line 124 of file pageseg.c.
References L_COPY, L_INSERT, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, pixGenerateHalftoneMask(), pixGenTextblockMask(), and pixGenTextlineMask().
| [in] | pixs | 1 bpp (input at 2x reduction) |
| [out] | **pbox | main region at input resolution (2x reduction) |
Notes:
(1) This removes foreground noise along left and right edges,
returning a bounding box for the remaining foreground pixels
at the input resolution.
(2) The input pixs should be at a resolution 100 - 150 ppi.
(3) It does two 2x level1 rank binary reductions, followed
by a large vertical close/open, with a very small horizontal
close/oopen, and then a 4x expansion back to the input resolution.
(4) To work properly with 2-column layout, if the largest and
second-largest regions are comparable in size, both are included.
(5) This is used as an option to pixCropImage(), when given
an edgecrop parameter of -1.
Definition at line 776 of file pageseg.c.
References L_COPY, L_SORT_BY_AREA, and L_SORT_DECREASING.
Referenced by pixCropImage().
| [in] | pixs | any depth |
| [in] | box | [optional] if null, use entire pixs |
| [in] | cropfract | fraction to be removed from the boundary; use 0.0 to retain the entire image |
| [in] | outres | desired resolution of output image; if the input image resolution is not set, assume 300 ppi; use 0 to skip scaling. |
Notes:
(1) This handles some common pre-processing operations,
where the page segmentation algorithm takes a 1 bpp image.
Definition at line 2307 of file pageseg.c.
References pixCleanBackgroundToWhite().
Referenced by pixDecideIfTable(), and pixDecideIfText().
|
static |
| [in] | pixs | 1 bpp |
| [in] | w | width of output lmage |
| [in] | h | height of output lmage |
| [in] | lr_border | cleared final border pixels on left and right |
| [in] | tb_border | cleared final border pixels on top and bottom |
| [in] | maxwiden | max fractional horizontal stretch allowed; >= 1.0 |
| [out] | *ppixsc | [optional] rescaled foreground region |
Notes:
(1) This rescales pixs to fit maximally within an image of
size (w x h), under two conditions:
(a) the final image has cleared border regions given by the
input parameters lr_border and tb_border, and
(b) the input image is first isotropically scaled to fit
maximally within the allowed final region, and then further
maxiximally widened, subject to the constraints of the
cleared border and the maxwiden parameter.
(2) The cleared border pixel parameters must be >= 0.
(3) If there is extra horizontal stretching by a factor
maxwiden larger than about 1.15, the appearance may be
unpleasingly distorted; hence the suggestion not to exceed it.
Definition at line 911 of file pageseg.c.
References PIX_SRC.
Referenced by pixCropImage().
pixSplitComponentWithProfile()
| [in] | pixs | 1 bpp, exactly one connected component |
| [in] | delta | distance used in extrema finding in a numa; typ. 10 |
| [in] | mindel | minimum required difference between profile minimum and profile values +2 and -2 away; typ. 7 |
| [out] | ppixdebug | [optional] debug image of splitting |
Notes:
(1) This will split the most obvious cases of touching characters.
The split points it is searching for are narrow and deep
minimima in the vertical pixel projection profile, after a
large vertical closing has been applied to the component.
Definition at line 1343 of file pageseg.c.
References L_CLONE, and L_INSERT.
Referenced by pixSplitIntoCharacters().
| l_ok pixSplitIntoCharacters | ( | PIX * | pixs, |
| l_int32 | minw, | ||
| l_int32 | minh, | ||
| BOXA ** | pboxa, | ||
| PIXA ** | ppixa, | ||
| PIX ** | ppixdebug ) |
| [in] | pixs | 1 bpp, contains only deskewed text |
| [in] | minw | min component width for initial filtering; typ. 4 |
| [in] | minh | min component height for initial filtering; typ. 4 |
| [out] | pboxa | [optional] character bounding boxes |
| [out] | ppixa | [optional] character images |
| [out] | ppixdebug | [optional] showing splittings |
Notes:
(1) This is a simple function that attempts to find split points
based on vertical pixel profiles.
(2) It should be given an image that has an arbitrary number
of text characters.
(3) The returned pixa includes the boxes from which the
(possibly split) components are extracted.
Definition at line 1244 of file pageseg.c.
References L_CLONE, L_INSERT, L_SELECT_IF_BOTH, L_SELECT_IF_GT, and pixSplitComponentWithProfile().