|
| static l_int32 | pixCorrelationBestShift (PIX *pix1, PIX *pix2, NUMA *nasum1, NUMA *namoment1, l_int32 area2, l_int32 ycent2, l_int32 maxyshift, l_int32 *tab8, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag) |
| |
| static L_RCH * | rchCreate (l_int32 index, l_float32 score, char *text, l_int32 sample, l_int32 xloc, l_int32 yloc, l_int32 width) |
| |
| static L_RCHA * | rchaCreate () |
| |
| static l_int32 | transferRchToRcha (L_RCH *rch, L_RCHA *rcha) |
| |
| static PIX * | recogPreSplittingFilter (L_RECOG *recog, PIX *pixs, l_int32 minh, l_float32 minaf, l_int32 debug) |
| |
| static l_int32 | recogSplittingFilter (L_RECOG *recog, PIX *pixs, l_int32 minh, l_float32 minaf, l_int32 *premove, l_int32 debug) |
| |
| static void | l_showIndicatorSplitValues (NUMA *na1, NUMA *na2, NUMA *na3, NUMA *na4, NUMA *na5, NUMA *na6) |
| |
| l_ok | recogIdentifyMultiple (L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, PIX **ppixdb, l_int32 debugsplit) |
| |
| l_ok | recogSplitIntoCharacters (L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, l_int32 debug) |
| |
| l_ok | recogCorrelationBestRow (L_RECOG *recog, PIX *pixs, BOXA **pboxa, NUMA **pnascore, NUMA **pnaindex, SARRAY **psachar, l_int32 debug) |
| |
| l_ok | recogCorrelationBestChar (L_RECOG *recog, PIX *pixs, BOX **pbox, l_float32 *pscore, l_int32 *pindex, char **pcharstr, PIX **ppixdb) |
| |
| l_ok | recogIdentifyPixa (L_RECOG *recog, PIXA *pixa, PIX **ppixdb) |
| |
| l_ok | recogIdentifyPix (L_RECOG *recog, PIX *pixs, PIX **ppixdb) |
| |
| l_ok | recogSkipIdentify (L_RECOG *recog) |
| |
| void | rchaDestroy (L_RCHA **prcha) |
| |
| void | rchDestroy (L_RCH **prch) |
| |
| l_ok | rchaExtract (L_RCHA *rcha, NUMA **pnaindex, NUMA **pnascore, SARRAY **psatext, NUMA **pnasample, NUMA **pnaxloc, NUMA **pnayloc, NUMA **pnawidth) |
| |
| l_ok | rchExtract (L_RCH *rch, l_int32 *pindex, l_float32 *pscore, char **ptext, l_int32 *psample, l_int32 *pxloc, l_int32 *pyloc, l_int32 *pwidth) |
| |
| PIX * | recogProcessToIdentify (L_RECOG *recog, PIX *pixs, l_int32 pad) |
| |
| SARRAY * | recogExtractNumbers (L_RECOG *recog, BOXA *boxas, l_float32 scorethresh, l_int32 spacethresh, BOXAA **pbaa, NUMAA **pnaa) |
| |
| PIXA * | showExtractNumbers (PIX *pixs, SARRAY *sa, BOXAA *baa, NUMAA *naa, PIX **ppixdb) |
| |
Top-level identification
l_int32 recogIdentifyMultiple()
Segmentation and noise removal
l_int32 recogSplitIntoCharacters()
Greedy character splitting
l_int32 recogCorrelationBestRow()
l_int32 recogCorrelationBestChar()
static l_int32 pixCorrelationBestShift()
Low-level identification of single characters
l_int32 recogIdentifyPixa()
l_int32 recogIdentifyPix()
l_int32 recogSkipIdentify()
Operations for handling identification results
static L_RCHA *rchaCreate()
void rchaDestroy()
static L_RCH *rchCreate()
void rchDestroy()
l_int32 rchaExtract()
l_int32 rchExtract()
static l_int32 transferRchToRcha()
Preprocessing and filtering
l_int32 recogProcessToIdentify()
static PIX *recogPreSplittingFilter()
static PIX *recogSplittingFilter()
Postprocessing
SARRAY *recogExtractNumbers()
PIX *showExtractNumbers()
Static debug helper
static void l_showIndicatorSplitValues()
See recogbasic.c for examples of training a recognizer, which is
required before it can be used for identification.
The character splitter repeatedly does a greedy correlation with each
averaged unscaled template, at all pixel locations along the text to
be identified. The vertical alignment is between the template
centroid and the (moving) windowed centroid, including a delta of
1 pixel above and below. The best match then removes part of the
input image, leaving 1 or 2 pieces, which, after filtering,
are put in a queue. The process ends when the queue is empty.
The filtering is based on the size and aspect ratio of the
remaining pieces; the intent is to remove anything that is
unlikely to be text, such as small pieces and line graphics.
After splitting, the selected segments are identified using
the input parameters that were initially specified for the
recognizer. Unlike the splitter, which uses the averaged
templates from the unscaled input, the recognizer can use
either all training examples or averaged templates, and these
can be either scaled or unscaled. These choices are specified
when the recognizer is constructed.
Definition in file recogident.c.
| static l_int32 pixCorrelationBestShift |
( |
PIX * |
pix1, |
|
|
PIX * |
pix2, |
|
|
NUMA * |
nasum1, |
|
|
NUMA * |
namoment1, |
|
|
l_int32 |
area2, |
|
|
l_int32 |
ycent2, |
|
|
l_int32 |
maxyshift, |
|
|
l_int32 * |
tab8, |
|
|
l_int32 * |
pdelx, |
|
|
l_int32 * |
pdely, |
|
|
l_float32 * |
pscore, |
|
|
l_int32 |
debugflag |
|
) |
| |
|
static |
pixCorrelationBestShift()
- Parameters
-
| [in] | pix1 | 1 bpp, the unknown image; typically larger |
| [in] | pix2 | 1 bpp, the matching template image) |
| [in] | nasum1 | vertical column pixel sums for pix1 |
| [in] | namoment1 | vertical column first moment of pixels for pix1 |
| [in] | area2 | number of on pixels in pix2 |
| [in] | ycent2 | y component of centroid of pix2 |
| [in] | maxyshift | max y shift of pix2 around the location where the centroids of pix2 and a windowed part of pix1 are vertically aligned |
| [in] | tab8 | [optional] sum tab for ON pixels in byte; can be NULL |
| [out] | pdelx | [optional] best x shift of pix2 relative to pix1 |
| [out] | pdely | [optional] best y shift of pix2 relative to pix1 |
| [out] | pscore | [optional] maximum score found; can be NULL |
| [in] | debugflag | <= 0 to skip; positive to generate output; the integer is used to label the debug image. |
- Returns
- 0 if OK, 1 on error
Notes:
(1) This maximizes the correlation score between two 1 bpp images,
one of which is typically wider. In a typical example,
pix1 is a bitmap of 2 or more touching characters and pix2 is
a single character template. This finds the location of pix2
that gives the largest correlation.
(2) The windowed area of fg pixels and windowed first moment
in the y direction are computed from the input sum and moment
column arrays, nasum1 and namoment1
(3) This is a brute force operation. We compute the correlation
at every x shift for which pix2 fits entirely within pix1,
and where the centroid of pix2 is aligned, within +-maxyshift,
with the centroid of a window of pix1 of the same width.
The correlation is taken over the full height of pix1.
This can be made more efficient.
Definition at line 729 of file recogident.c.
| static L_RCH * rchCreate |
( |
l_int32 |
index, |
|
|
l_float32 |
score, |
|
|
char * |
text, |
|
|
l_int32 |
sample, |
|
|
l_int32 |
xloc, |
|
|
l_int32 |
yloc, |
|
|
l_int32 |
width |
|
) |
| |
|
static |
rchCreate()
- Parameters
-
| [in] | index | index of best template |
| [in] | score | correlation score of best template |
| [in] | text | character string of best template |
| [in] | sample | index of best sample; -1 if averages are used |
| [in] | xloc | x-location of template: delx + shiftx |
| [in] | yloc | y-location of template: dely + shifty |
| [in] | width | width of best template |
- Returns
- 0 if OK, 1 on error
Notes:
(1) Be sure to destroy any existing rch before assigning this.
(2) This stores the text string, not a copy of it, so the
caller must not destroy the string.
Definition at line 1217 of file recogident.c.
References L_Rch::index, L_Rch::sample, L_Rch::score, L_Rch::text, L_Rch::width, L_Rch::xloc, and L_Rch::yloc.
Referenced by recogSkipIdentify().
| l_ok rchExtract |
( |
L_RCH * |
rch, |
|
|
l_int32 * |
pindex, |
|
|
l_float32 * |
pscore, |
|
|
char ** |
ptext, |
|
|
l_int32 * |
psample, |
|
|
l_int32 * |
pxloc, |
|
|
l_int32 * |
pyloc, |
|
|
l_int32 * |
pwidth |
|
) |
| |
rchExtract()
- Parameters
-
| [in] | rch | |
| [out] | pindex | [optional] index of best template |
| [out] | pscore | [optional] correlation score of best template |
| [out] | ptext | [optional] character string of best template |
| [out] | psample | [optional] index of best sample |
| [out] | pxloc | [optional] x-location of template |
| [out] | pyloc | [optional] y-location of template |
| [out] | pwidth | [optional] width of best template |
- Returns
- 0 if OK, 1 on error
Definition at line 1329 of file recogident.c.
References L_Rch::index, L_Rch::sample, L_Rch::score, stringNew(), L_Rch::text, L_Rch::width, L_Rch::xloc, and L_Rch::yloc.
Referenced by recogDebugAverages(), recogIdentifyPixa(), recogRescoreDidResult(), recogShowMatchesInRange(), and recogTrainFromBoot().
| l_ok recogCorrelationBestChar |
( |
L_RECOG * |
recog, |
|
|
PIX * |
pixs, |
|
|
BOX ** |
pbox, |
|
|
l_float32 * |
pscore, |
|
|
l_int32 * |
pindex, |
|
|
char ** |
pcharstr, |
|
|
PIX ** |
ppixdb |
|
) |
| |
recogCorrelationBestChar()
- Parameters
-
| [in] | recog | with LUT's pre-computed |
| [in] | pixs | can be of multiple touching characters, 1 bpp |
| [out] | pbox | bounding box of best fit character |
| [out] | pscore | correlation score |
| [out] | pindex | [optional] index of class |
| [out] | pcharstr | [optional] character string of class |
| [out] | ppixdb | [optional] debug pix showing input and best fit |
- Returns
- 0 if OK, 1 on error
Notes:
(1) Basic matching character splitter. Finds the best match among
all templates to some region of the image. This can result
in splitting the image into two parts. This is "image decoding"
without dynamic programming, because we don't use a setwidth
and compute the best matching score for the entire image.
(2) Matching is to the average templates, without character scaling.
Definition at line 587 of file recogident.c.
| SARRAY * recogExtractNumbers |
( |
L_RECOG * |
recog, |
|
|
BOXA * |
boxas, |
|
|
l_float32 |
scorethresh, |
|
|
l_int32 |
spacethresh, |
|
|
BOXAA ** |
pbaa, |
|
|
NUMAA ** |
pnaa |
|
) |
| |
recogExtractNumbers()
- Parameters
-
| [in] | recog | |
| [in] | boxas | location of components |
| [in] | scorethresh | min score for which we accept a component |
| [in] | spacethresh | max horizontal distance allowed between digits; use -1 for default |
| [out] | pbaa | [optional] bounding boxes of identified numbers |
| [out] | pnaa | [optional] scores of identified digits |
- Returns
- sa of identified numbers, or NULL on error
Notes:
(1) This extracts digit data after recogaIdentifyMultiple() or
lower-level identification has taken place.
(2) Each string in the returned sa contains a sequence of ascii
digits in a number.
(3) The horizontal distance between boxes (limited by spacethresh)
is the negative of the horizontal overlap.
(4) Components with a score less than scorethresh, which may
be hyphens or other small characters, will signal the
end of the current sequence of digits in the number. A typical
value for scorethresh is 0.60.
(5) We allow two digits to be combined if these conditions apply:
(a) the first is to the left of the second
(b) the second has a horizontal separation less than spacethresh
(c) the vertical overlap >= 0 (vertical separation < 0)
(d) both have a score that exceeds scorethresh
(6) Each numa in the optionally returned naa contains the digit
scores of a number. Each boxa in the optionally returned baa
contains the bounding boxes of the digits in the number.
Definition at line 1630 of file recogident.c.
References boxaaAddBoxa(), boxaaCreate(), boxaAddBox(), boxaaDestroy(), boxaCreate(), boxaGetBox(), boxDestroy(), boxGetGeometry(), boxOverlapDistance(), L_CLONE, L_COPY, L_INSERT, L_NOCOPY, L_Recog::maxheight_u, numaaAddNuma(), numaaCreate(), numaAddNumber(), numaaDestroy(), numaCreate(), numaDestroy(), numaGetCount(), numaGetFValue(), L_Recog::rcha, rchaExtract(), sarrayAddString(), sarrayCreate(), sarrayDestroy(), sarrayGetCount(), sarrayGetString(), and sarrayToString().
| l_ok recogIdentifyPix |
( |
L_RECOG * |
recog, |
|
|
PIX * |
pixs, |
|
|
PIX ** |
ppixdb |
|
) |
| |
recogIdentifyPix()
- Parameters
-
| [in] | recog | with LUT's pre-computed |
| [in] | pixs | of a single character, 1 bpp |
| [out] | ppixdb | [optional] debug pix showing input and best fit |
- Returns
- 0 if OK, 1 on error
Notes:
(1) Basic recognition function for a single character.
(2) If templ_use == L_USE_ALL_TEMPLATES, which is the default
situation, matching is attempted to every bitmap in the recog,
and the identify of the best match is returned.
(3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and
matching is only attemplted to the averaged bitmaps. For this
case, the index of the bestsample is meaningless (0 is returned
if requested).
(4) The score is related to the confidence (probability of correct
identification), in that a higher score is correlated with
a higher probability. However, the actual relation between
the correlation (score) and the probability is not known;
we call this a "score" because "confidence" can be misinterpreted
as an actual probability.
Definition at line 975 of file recogident.c.
Referenced by recogDebugAverages(), recogIdentifyPixa(), recogRescoreDidResult(), recogShowMatchesInRange(), and recogTrainFromBoot().
| l_ok recogIdentifyPixa |
( |
L_RECOG * |
recog, |
|
|
PIXA * |
pixa, |
|
|
PIX ** |
ppixdb |
|
) |
| |
recogIdentifyPixa()
- Parameters
-
| [in] | recog | |
| [in] | pixa | of 1 bpp images to match |
| [out] | ppixdb | [optional] pix showing inputs and best fits |
- Returns
- 0 if OK, 1 on error
Notes:
(1) This should be called by recogIdentifyMuliple(), which
binarizes and splits characters before sending pixa here.
(2) This calls recogIdentifyPix(), which does the same operation
on each pix in pixa, and optionally returns the arrays
of results (scores, class index and character string)
for the best correlation match.
Definition at line 882 of file recogident.c.
References L_CLONE, pixaCreate(), pixaGetCount(), pixaGetPix(), pixDestroy(), pixSetText(), L_Recog::rch, L_Recog::rcha, rchaCreate(), rchaDestroy(), rchExtract(), recogIdentifyPix(), recogShowMatch(), and recogSkipIdentify().
| l_ok recogSplitIntoCharacters |
( |
L_RECOG * |
recog, |
|
|
PIX * |
pixs, |
|
|
l_int32 |
minh, |
|
|
l_int32 |
skipsplit, |
|
|
BOXA ** |
pboxa, |
|
|
PIXA ** |
ppixa, |
|
|
l_int32 |
debug |
|
) |
| |
recogSplitIntoCharacters()
- Parameters
-
| [in] | recog | |
| [in] | pixs | 1 bpp, contains only mostly deskewed text |
| [in] | minh | remove shorter components; use 0 for default |
| [in] | skipsplit | 1 to skip the splitting step |
| [out] | pboxa | character bounding boxes |
| [out] | ppixa | character images |
| [in] | debug | 1 for results written to pixadb_split |
- Returns
- 0 if OK, 1 on error or if no components are returned
Notes:
(1) This can be given an image that has an arbitrary number
of text characters. It optionally splits connected
components based on document image decoding in recogDecode().
The returned pixa includes the boxes from which the
(possibly split) components are extracted.
(2) After noise filtering, the resulting components are put in
row-major (2D) order, and the smaller of overlapping
components are removed if they satisfy conditions of
relative size and fractional overlap.
(3) Note that the splitting function uses unscaled templates
and does not bother returning the class results and scores.
These are more accurately found later using the scaled templates.
Definition at line 250 of file recogident.c.
References lept_mkdir(), and L_Recog::train_done.
showExtractNumbers()
- Parameters
-
| [in] | pixs | input 1 bpp image |
| [in] | sa | recognized text strings |
| [in] | baa | boxa array for location of characters in each string |
| [in] | naa | numa array for scores of characters in each string |
| [out] | ppixdb | [optional] input pixs with identified chars outlined |
- Returns
- pixa of identified strings with text and scores, or NULL on error
Notes:
(1) This is a debugging routine on digit identification; e.g.:
recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0);
sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa);
pixa = showExtractNumbers(pixs, sa, baa, naa, NULL);
Definition at line 1765 of file recogident.c.
References bmfCreate(), bmfDestroy(), boxaaGetBoxa(), boxaDestroy(), boxAdjustSides(), boxaGetExtent(), boxDestroy(), L_ADD_BELOW, L_CLONE, L_INSERT, L_NOCOPY, L_SET_WHITE, numaaGetNuma(), numaDestroy(), numaGetCount(), numaGetFValue(), pixaAddPix(), pixaCreate(), pixAddBlackOrWhiteBorder(), pixAddTextlines(), pixClipRectangle(), pixConvertTo8(), pixDestroy(), pixRenderBoxArb(), sarrayGetCount(), sarrayGetString(), and stringJoinIP().
| static l_int32 transferRchToRcha |
( |
L_RCH * |
rch, |
|
|
L_RCHA * |
rcha |
|
) |
| |
|
static |
transferRchToRcha()
- Parameters
-
| [in] | rch | source of data |
| [in] | rcha | append to arrays in this destination |
- Returns
- 0 if OK, 1 on error
Notes:
(1) This is used to transfer the results of a single character
identification to an rcha array for the array of characters.
Definition at line 1375 of file recogident.c.
References L_Rch::index, L_COPY, L_Rcha::naindex, L_Rcha::nasample, L_Rcha::nascore, L_Rcha::nawidth, L_Rcha::naxloc, L_Rcha::nayloc, numaAddNumber(), L_Rch::sample, sarrayAddString(), L_Rcha::satext, L_Rch::score, L_Rch::text, L_Rch::width, L_Rch::xloc, and L_Rch::yloc.