![]() |
Leptonica 1.85.0
Image processing and image analysis suite
|
#include <string.h>#include "allheaders.h"Go to the source code of this file.
Macros | |
| #define | L_BUF_SIZE 512 |
Functions | |
| static l_int32 | testLineAlignmentX (NUMA *na1, NUMA *na2, l_int32 shiftx, l_int32 delx, l_int32 nperline) |
| static l_int32 | countAlignedMatches (NUMA *nai1, NUMA *nai2, NUMA *nasx, NUMA *nasy, l_int32 n1, l_int32 n2, l_int32 delx, l_int32 dely, l_int32 nreq, l_int32 *psame, l_int32 debugflag) |
| static void | printRowIndices (l_int32 *index1, l_int32 n1, l_int32 *index2, l_int32 n2) |
| l_ok | jbCorrelation (const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag) |
| l_ok | jbRankHaus (const char *dirin, l_int32 size, l_float32 rank, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag) |
| JBCLASSER * | jbWordsInTextlines (const char *dirin, l_int32 reduction, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weight, NUMA **pnatl, l_int32 firstpage, l_int32 npages) |
| l_ok | pixGetWordsInTextlines (PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai) |
| l_ok | pixGetWordBoxesInTextlines (PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, NUMA **pnai) |
| l_ok | pixFindWordAndCharacterBoxes (PIX *pixs, BOX *boxs, l_int32 thresh, BOXA **pboxaw, BOXAA **pboxaac, const char *debugdir) |
| NUMAA * | boxaExtractSortedPattern (BOXA *boxa, NUMA *na) |
| l_ok | numaaCompareImagesByBoxes (NUMAA *naa1, NUMAA *naa2, l_int32 nperline, l_int32 nreq, l_int32 maxshiftx, l_int32 maxshifty, l_int32 delx, l_int32 dely, l_int32 *psame, l_int32 debugflag) |
Variables | |
| static const l_int32 | JB_WORDS_MIN_WIDTH = 5 |
| static const l_int32 | JB_WORDS_MIN_HEIGHT = 3 |
Top-level jb2 correlation and rank-hausdorff
l_int32 jbCorrelation()
l_int32 jbRankHaus()
Extract and classify words in textline order
JBCLASSER *jbWordsInTextlines()
l_int32 pixGetWordsInTextlines()
l_int32 pixGetWordBoxesInTextlines()
Extract word and character bounding boxes
l_int32 pixFindWordAndCharacterBoxes()
Use word bounding boxes to compare page images
NUMAA *boxaExtractSortedPattern()
l_int32 numaaCompareImagesByBoxes()
static l_int32 testLineAlignmentX()
static l_int32 countAlignedMatches()
static void printRowIndices()
Definition in file classapp.c.
| #define L_BUF_SIZE 512 |
size of filename buffer
Definition at line 59 of file classapp.c.
Referenced by fhmtautogen1(), fhmtautogen2(), fmorphautogen1(), jbCorrelation(), jbRankHaus(), makeBarrelshiftString(), makeBarrelshiftString(), sarrayMakeInnerLoopDWACode(), sarrayMakeInnerLoopDWACode(), selaAddBasic(), selaAddCrossJunctions(), selaAddDwaCombs(), selaAddDwaLinear(), selaAddTJunctions(), zlibCompress(), and zlibUncompress().
| [in] | boxa | typ. of word bounding boxes, in textline order |
| [in] | na | index of textline for each box in boxa |
Notes:
(1) The input is expected to come from pixGetWordBoxesInTextlines().
(2) Each numa in the output consists of an average y coordinate
of the first box in the textline, followed by pairs of
x coordinates representing the left and right edges of each
of the boxes in the textline.
Definition at line 658 of file classapp.c.
|
static |
Definition at line 927 of file classapp.c.
| l_ok jbCorrelation | ( | const char * | dirin, |
| l_float32 | thresh, | ||
| l_float32 | weight, | ||
| l_int32 | components, | ||
| const char * | rootname, | ||
| l_int32 | firstpage, | ||
| l_int32 | npages, | ||
| l_int32 | renderflag ) |
| [in] | dirin | directory of input images |
| [in] | thresh | typically ~0.8 |
| [in] | weight | typically ~0.6 |
| [in] | components | JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS |
| [in] | rootname | for output files |
| [in] | firstpage | 0-based |
| [in] | npages | use 0 for all pages in dirin |
| [in] | renderflag | 1 to render from templates; 0 to skip |
Notes:
(1) The images must be 1 bpp. If they are not, you can convert
them using convertFilesTo1bpp().
(2) See prog/jbcorrelation for generating more output (e.g.,
for debugging)
Definition at line 99 of file classapp.c.
References L_BUF_SIZE, and L_CLONE.
| l_ok jbRankHaus | ( | const char * | dirin, |
| l_int32 | size, | ||
| l_float32 | rank, | ||
| l_int32 | components, | ||
| const char * | rootname, | ||
| l_int32 | firstpage, | ||
| l_int32 | npages, | ||
| l_int32 | renderflag ) |
| [in] | dirin | directory of input images |
| [in] | size | of Sel used for dilation; typ. 2 |
| [in] | rank | rank value of match; typ. 0.97 |
| [in] | components | JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS |
| [in] | rootname | for output files |
| [in] | firstpage | 0-based |
| [in] | npages | use 0 for all pages in dirin |
| [in] | renderflag | 1 to render from templates; 0 to skip |
Notes:
(1) See prog/jbrankhaus for generating more output (e.g.,
for debugging)
Definition at line 179 of file classapp.c.
References L_BUF_SIZE, and L_CLONE.
| JBCLASSER * jbWordsInTextlines | ( | const char * | dirin, |
| l_int32 | reduction, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| l_float32 | thresh, | ||
| l_float32 | weight, | ||
| NUMA ** | pnatl, | ||
| l_int32 | firstpage, | ||
| l_int32 | npages ) |
| [in] | dirin | directory of input pages |
| [in] | reduction | 1 for full res; 2 for half-res |
| [in] | maxwidth | of word mask components, to be kept |
| [in] | maxheight | of word mask components, to be kept |
| [in] | thresh | on correlation; 0.80 is reasonable |
| [in] | weight | for handling thick text; 0.6 is reasonable |
| [out] | pnatl | numa with textline index for each component |
| [in] | firstpage | 0-based |
| [in] | npages | use 0 for all pages in dirin |
Notes:
(1) This is a high-level function. See prog/jbwords for example
of usage.
(2) Typically, use input of 75 - 150 ppi for finding words.
Definition at line 265 of file classapp.c.
References JbClasser::h, JB_WORDS_MIN_HEIGHT, JB_WORDS_MIN_WIDTH, L_NOCOPY, pixGetWordsInTextlines(), JbClasser::safiles, and JbClasser::w.
| l_ok numaaCompareImagesByBoxes | ( | NUMAA * | naa1, |
| NUMAA * | naa2, | ||
| l_int32 | nperline, | ||
| l_int32 | nreq, | ||
| l_int32 | maxshiftx, | ||
| l_int32 | maxshifty, | ||
| l_int32 | delx, | ||
| l_int32 | dely, | ||
| l_int32 * | psame, | ||
| l_int32 | debugflag ) |
| [in] | naa1 | for image 1, formatted by boxaExtractSortedPattern() |
| [in] | naa2 | for image 2, formatted by boxaExtractSortedPattern() |
| [in] | nperline | number of box regions to be used in each textline |
| [in] | nreq | number of complete row matches required |
| [in] | maxshiftx | max allowed x shift between two patterns, in pixels |
| [in] | maxshifty | max allowed y shift between two patterns, in pixels |
| [in] | delx | max allowed difference in x data, after alignment |
| [in] | dely | max allowed difference in y data, after alignment |
| [out] | psame | 1 if nreq row matches are found; 0 otherwise |
| [in] | debugflag | 1 for debug output |
Notes:
(1) Each input numaa describes a set of sorted bounding boxes
(sorted by textline and, within each textline, from
left to right) in the images from which they are derived.
See boxaExtractSortedPattern() for a description of the data
format in each of the input numaa.
(2) This function does an alignment between the input
descriptions of bounding boxes for two images. The
input parameter nperline specifies the number of boxes
to consider in each line when testing for a match, and
nreq is the required number of lines that must be well-aligned
to get a match.
(3) Testing by alignment has 3 steps:
(a) Generating the location of word bounding boxes from the
images (prior to calling this function).
(b) Listing all possible pairs of aligned rows, based on
tolerances in horizontal and vertical positions of
the boxes. Specifically, all pairs of rows are enumerated
whose first nperline boxes can be brought into close
alignment, based on the delx parameter for boxes in the
line and within the overall the maxshiftx and maxshifty
constraints.
(c) Each pair, starting with the first, is used to search
for a set of nreq - 1 other pairs that can all be aligned
with a difference in global translation of not more
than (delx, dely).
Definition at line 744 of file classapp.c.
References L_CLONE.
| l_ok pixFindWordAndCharacterBoxes | ( | PIX * | pixs, |
| BOX * | boxs, | ||
| l_int32 | thresh, | ||
| BOXA ** | pboxaw, | ||
| BOXAA ** | pboxaac, | ||
| const char * | debugdir ) |
pixFindWordAndCharacterBoxes()
| [in] | pixs | 2, 4, 8 or 32 bpp; colormap OK; typ. 300 ppi |
| [in] | boxs | [optional] region to select in pixs |
| [in] | thresh | binarization threshold (typ. 100 - 150) |
| [out] | pboxaw | return the word boxes |
| [out] | pboxaac | return the character boxes |
| [in] | debugdir | [optional] for debug images; use NULL to skip |
Notes:
(1) If boxs == NULL, the entire input image is used.
(2) Having an input pix that is not 1bpp is necessary to reduce
touching characters by using a low binarization threshold.
Suggested thresholds are between 100 and 150.
(3) The coordinates in the output boxes are global, with respect
to the input image.
Definition at line 516 of file classapp.c.
References L_COPY, L_INSERT, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, L_SORT_BY_X, L_SORT_INCREASING, and pixGetWordBoxesInTextlines().
| l_ok pixGetWordBoxesInTextlines | ( | PIX * | pixs, |
| l_int32 | minwidth, | ||
| l_int32 | minheight, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| BOXA ** | pboxad, | ||
| NUMA ** | pnai ) |
| [in] | pixs | 1 bpp, typ. 75 - 150 ppi |
| [in] | minwidth | of saved components; smaller are discarded |
| [in] | minheight | of saved components; smaller are discarded |
| [in] | maxwidth | of saved components; larger are discarded |
| [in] | maxheight | of saved components; larger are discarded |
| [out] | pboxad | word boxes sorted in textline line order |
| [out] | pnai | [optional] index of textline for each word |
Notes:
(1) The input should be at a resolution of between 75 and 150 ppi.
(2) This is a special version of pixGetWordsInTextlines(), that
just finds the word boxes in line order, with a numa
giving the textline index for each word.
See pixGetWordsInTextlines() for more details.
Definition at line 452 of file classapp.c.
References L_CLONE.
Referenced by pixFindWordAndCharacterBoxes().
| l_ok pixGetWordsInTextlines | ( | PIX * | pixs, |
| l_int32 | minwidth, | ||
| l_int32 | minheight, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| BOXA ** | pboxad, | ||
| PIXA ** | ppixad, | ||
| NUMA ** | pnai ) |
| [in] | pixs | 1 bpp, typ. 75 - 150 ppi |
| [in] | minwidth | of saved components; smaller are discarded |
| [in] | minheight | of saved components; smaller are discarded |
| [in] | maxwidth | of saved components; larger are discarded |
| [in] | maxheight | of saved components; larger are discarded |
| [out] | pboxad | word boxes sorted in textline line order |
| [out] | ppixad | word images sorted in textline line order |
| [out] | pnai | index of textline for each word |
Notes:
(1) The input should be at a resolution of between 75 and 150 ppi.
(2) The four size constraints on saved components are all
scaled by reduction.
(3) The result are word images (and their b.b.), extracted in
textline order, at either full res or 2x reduction,
and with a numa giving the textline index for each word.
(4) The pixa and boxa interfaces should make this type of
application simple to put together. The steps are:
~ generate first estimate of word masks
~ get b.b. of these, and remove the small and big ones
~ extract pixa of the word images, using the b.b.
~ sort actual word images in textline order (2d)
~ flatten them to a pixa (1d), saving the textline index
for each pix
(5) In an actual application, it may be desirable to pre-filter
the input image to remove large components, to extract
single columns of text, and to deskew them. For example,
to remove both large components and small noisy components
that can interfere with the statistics used to estimate
parameters for segmenting by words, but still retain text lines,
the following image preprocessing can be done:
Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
pixAnd(pixf, pixf, pixs); // the filtered image
The closing turns text lines into long blobs, but does not
significantly increase their height. But if there are many
small connected components in a dense texture, this is likely
to generate tall components that will be eliminated in pixf.
Definition at line 377 of file classapp.c.
References L_CLONE, and L_COPY.
Referenced by jbWordsInTextlines().
|
static |
Definition at line 1014 of file classapp.c.
|
static |
Definition at line 877 of file classapp.c.
|
static |
min. word height in pixels
Definition at line 61 of file classapp.c.
Referenced by jbWordsInTextlines().
|
static |
min. word width in pixels
Definition at line 60 of file classapp.c.
Referenced by jbWordsInTextlines().