![]() |
Leptonica 1.85.0
Image processing and image analysis suite
|
#include <math.h>#include "allheaders.h"Go to the source code of this file.
Macros | |
| #define | DEBUG_HISTO 0 |
| #define | DEBUG_CROSSINGS 0 |
| #define | DEBUG_FREQUENCY 0 |
Functions | |
| NUMA * | numaErode (NUMA *nas, l_int32 size) |
| NUMA * | numaDilate (NUMA *nas, l_int32 size) |
| NUMA * | numaOpen (NUMA *nas, l_int32 size) |
| NUMA * | numaClose (NUMA *nas, l_int32 size) |
| NUMA * | numaTransform (NUMA *nas, l_float32 shift, l_float32 scale) |
| l_ok | numaSimpleStats (NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar) |
| l_ok | numaWindowedStats (NUMA *nas, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv) |
| NUMA * | numaWindowedMean (NUMA *nas, l_int32 wc) |
| NUMA * | numaWindowedMeanSquare (NUMA *nas, l_int32 wc) |
| l_ok | numaWindowedVariance (NUMA *nam, NUMA *nams, NUMA **pnav, NUMA **pnarv) |
| NUMA * | numaWindowedMedian (NUMA *nas, l_int32 halfwin) |
| NUMA * | numaConvertToInt (NUMA *nas) |
| NUMA * | numaMakeHistogram (NUMA *na, l_int32 maxbins, l_int32 *pbinsize, l_int32 *pbinstart) |
| NUMA * | numaMakeHistogramAuto (NUMA *na, l_int32 maxbins) |
| NUMA * | numaMakeHistogramClipped (NUMA *na, l_float32 binsize, l_float32 maxsize) |
| NUMA * | numaRebinHistogram (NUMA *nas, l_int32 newsize) |
| NUMA * | numaNormalizeHistogram (NUMA *nas, l_float32 tsum) |
| l_ok | numaGetStatsUsingHistogram (NUMA *na, l_int32 maxbins, l_float32 *pmin, l_float32 *pmax, l_float32 *pmean, l_float32 *pvariance, l_float32 *pmedian, l_float32 rank, l_float32 *prval, NUMA **phisto) |
| l_ok | numaGetHistogramStats (NUMA *nahisto, l_float32 startx, l_float32 deltax, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance) |
| l_ok | numaGetHistogramStatsOnInterval (NUMA *nahisto, l_float32 startx, l_float32 deltax, l_int32 ifirst, l_int32 ilast, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance) |
| l_ok | numaMakeRankFromHistogram (l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 npts, NUMA **pnax, NUMA **pnay) |
| l_ok | numaHistogramGetRankFromVal (NUMA *na, l_float32 rval, l_float32 *prank) |
| l_ok | numaHistogramGetValFromRank (NUMA *na, l_float32 rank, l_float32 *prval) |
| l_ok | numaDiscretizeSortedInBins (NUMA *na, l_int32 nbins, NUMA **pnabinval) |
| l_ok | numaDiscretizeHistoInBins (NUMA *na, l_int32 nbins, NUMA **pnabinval, NUMA **pnarank) |
| l_ok | numaGetRankBinValues (NUMA *na, l_int32 nbins, NUMA **pnam) |
| NUMA * | numaGetUniformBinSizes (l_int32 ntotal, l_int32 nbins) |
| l_ok | numaSplitDistribution (NUMA *na, l_float32 scorefract, l_int32 *psplitindex, l_float32 *pave1, l_float32 *pave2, l_float32 *pnum1, l_float32 *pnum2, NUMA **pnascore) |
| l_ok | grayHistogramsToEMD (NUMAA *naa1, NUMAA *naa2, NUMA **pnad) |
| l_ok | numaEarthMoverDistance (NUMA *na1, NUMA *na2, l_float32 *pdist) |
| l_ok | grayInterHistogramStats (NUMAA *naa, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv) |
| NUMA * | numaFindPeaks (NUMA *nas, l_int32 nmax, l_float32 fract1, l_float32 fract2) |
| NUMA * | numaFindExtrema (NUMA *nas, l_float32 delta, NUMA **pnav) |
| l_ok | numaFindLocForThreshold (NUMA *na, l_int32 skip, l_int32 *pthresh, l_float32 *pfract) |
| l_ok | numaCountReversals (NUMA *nas, l_float32 minreversal, l_int32 *pnr, l_float32 *prd) |
| l_ok | numaSelectCrossingThreshold (NUMA *nax, NUMA *nay, l_float32 estthresh, l_float32 *pbestthresh) |
| NUMA * | numaCrossingsByThreshold (NUMA *nax, NUMA *nay, l_float32 thresh) |
| NUMA * | numaCrossingsByPeaks (NUMA *nax, NUMA *nay, l_float32 delta) |
| l_ok | numaEvalBestHaarParameters (NUMA *nas, l_float32 relweight, l_int32 nwidth, l_int32 nshift, l_float32 minwidth, l_float32 maxwidth, l_float32 *pbestwidth, l_float32 *pbestshift, l_float32 *pbestscore) |
| l_ok | numaEvalHaarSum (NUMA *nas, l_float32 width, l_float32 shift, l_float32 relweight, l_float32 *pscore) |
| NUMA * | genConstrainedNumaInRange (l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs) |
Variables | |
| static const l_int32 | BinSizeArray [] |
| static const l_int32 | NBinSizes = 24 |
--------------------------------------
This file has these Numa utilities:
- morphological operations
- arithmetic transforms
- windowed statistical operations
- histogram extraction
- histogram comparison
- extrema finding
- frequency and crossing analysis
--------------------------------------
Morphological (min/max) operations
NUMA *numaErode()
NUMA *numaDilate()
NUMA *numaOpen()
NUMA *numaClose()
Other transforms
NUMA *numaTransform()
l_int32 numaSimpleStats()
l_int32 numaWindowedStats()
NUMA *numaWindowedMean()
NUMA *numaWindowedMeanSquare()
l_int32 numaWindowedVariance()
NUMA *numaWindowedMedian()
NUMA *numaConvertToInt()
Histogram generation and statistics
NUMA *numaMakeHistogram()
NUMA *numaMakeHistogramAuto()
NUMA *numaMakeHistogramClipped()
NUMA *numaRebinHistogram()
NUMA *numaNormalizeHistogram()
l_int32 numaGetStatsUsingHistogram()
l_int32 numaGetHistogramStats()
l_int32 numaGetHistogramStatsOnInterval()
l_int32 numaMakeRankFromHistogram()
l_int32 numaHistogramGetRankFromVal()
l_int32 numaHistogramGetValFromRank()
l_int32 numaDiscretizeSortedInBins()
l_int32 numaDiscretizeHistoInBins()
l_int32 numaGetRankBinValues()
NUMA *numaGetUniformBinSizes()
Splitting a distribution
l_int32 numaSplitDistribution()
Comparing histograms
l_int32 grayHistogramsToEMD()
l_int32 numaEarthMoverDistance()
l_int32 grayInterHistogramStats()
Extrema finding
NUMA *numaFindPeaks()
NUMA *numaFindExtrema()
NUMA *numaFindLocForThreshold()
l_int32 *numaCountReversals()
Threshold crossings and frequency analysis
l_int32 numaSelectCrossingThreshold()
NUMA *numaCrossingsByThreshold()
NUMA *numaCrossingsByPeaks()
NUMA *numaEvalBestHaarParameters()
l_int32 numaEvalHaarSum()
Generating numbers in a range under constraints
NUMA *genConstrainedNumaInRange()
Things to remember when using the Numa:
(1) The numa is a struct, not an array. Always use accessors
(see numabasic.c), never the fields directly.
(2) The number array holds l_float32 values. It can also
be used to store l_int32 values. See numabasic.c for
details on using the accessors. Integers larger than
about 10M will lose accuracy due on retrieval due to round-off.
For large integers, use the dna (array of l_float64) instead.
(3) Occasionally, in the comments we denote the i-th element of a
numa by na[i]. This is conceptual only -- the numa is not an array!
Some general comments on histograms:
(1) Histograms are the generic statistical representation of
the data about some attribute. Typically they're not
normalized -- they simply give the number of occurrences
within each range of values of the attribute. This range
of values is referred to as a 'bucket'. For example,
the histogram could specify how many connected components
are found for each value of their width; in that case,
the bucket size is 1.
(2) In leptonica, all buckets have the same size. Histograms
are therefore specified by a numa of occurrences, along
with two other numbers: the 'value' associated with the
occupants of the first bucket and the size (i.e., 'width')
of each bucket. These two numbers then allow us to calculate
the value associated with the occupants of each bucket.
These numbers are fields in the numa, initialized to
a startx value of 0.0 and a binsize of 1.0. Accessors for
these fields are functions numa*Parameters(). All histograms
must have these two numbers properly set.
Definition in file numafunc2.c.
| #define DEBUG_CROSSINGS 0 |
Definition at line 154 of file numafunc2.c.
| #define DEBUG_FREQUENCY 0 |
Definition at line 155 of file numafunc2.c.
| #define DEBUG_HISTO 0 |
Definition at line 153 of file numafunc2.c.
| NUMA * genConstrainedNumaInRange | ( | l_int32 | first, |
| l_int32 | last, | ||
| l_int32 | nmax, | ||
| l_int32 | use_pairs ) |
| [in] | first | first number to choose; >= 0 |
| [in] | last | biggest possible number to reach; >= first |
| [in] | nmax | maximum number of numbers to select; > 0 |
| [in] | use_pairs | 1 = select pairs of adjacent numbers; 0 = select individual numbers |
Notes:
(1) Selection is made uniformly in the range. This can be used
to select pages distributed as uniformly as possible
through a book, where you are constrained to:
~ choose between [first, ... biggest],
~ choose no more than nmax numbers, and
and you have the option of requiring pairs of adjacent numbers.
Definition at line 3223 of file numafunc2.c.
| [in] | naa1,naa2 | two numaa, each with one or more 256-element histograms |
| [out] | pnad | nad of EM distances for each histogram |
Notes:
(1) The two numaas must be the same size and have corresponding
256-element histograms. Pairs do not need to be normalized
to the same sum.
(2) This is typically used on two sets of histograms from
corresponding tiles of two images. The similarity of two
images can be found with the scoring function used in
pixCompareGrayByHisto():
score S = 1.0 - k * D, where
k is a constant, say in the range 5-10
D = EMD
for each tile; for multiple tiles, take the Min(S) over
the set of tiles to be the final score.
Definition at line 2134 of file numafunc2.c.
References L_CLONE, and numaEarthMoverDistance().
| l_ok grayInterHistogramStats | ( | NUMAA * | naa, |
| l_int32 | wc, | ||
| NUMA ** | pnam, | ||
| NUMA ** | pnams, | ||
| NUMA ** | pnav, | ||
| NUMA ** | pnarv ) |
| [in] | naa | numaa with two or more 256-element histograms |
| [in] | wc | half-width of the smoothing window |
| [out] | pnam | [optional] mean values |
| [out] | pnams | [optional] mean square values |
| [out] | pnav | [optional] variances |
| [out] | pnarv | [optional] rms deviations from the mean |
Notes:
(1) The naa has two or more 256-element numa histograms, which
are to be compared value-wise at each of the 256 gray levels.
The result are stats (mean, mean square, variance, root variance)
aggregated across the set of histograms, and each is output
as a 256 entry numa. Think of these histograms as a matrix,
where each histogram is one row of the array. The stats are
then aggregated column-wise, between the histograms.
(2) These stats are:
~ average value: <v> (nam)
~ average squared value: <v*v> (nams)
~ variance: <(v - <v>)*(v - <v>)> = <v*v> - <v>*<v> (nav)
~ square-root of variance: (narv)
where the brackets < .. > indicate that the average value is
to be taken over each column of the array.
(3) The input histograms are optionally smoothed before these
statistical operations.
(4) The input histograms are normalized to a sum of 10000. By
doing this, the resulting numbers are independent of the
number of samples used in building the individual histograms.
(5) A typical application is on a set of histograms from tiles
of an image, to distinguish between text/tables and photo
regions. If the tiles are much larger than the text line
spacing, text/table regions typically have smaller variance
across tiles than photo regions. For this application, it
may be useful to ignore values near white, which are large for
text and would magnify the variance due to variations in
illumination. However, because the variance of a drawing or
a light photo can be similar to that of grayscale text, this
function is only a discriminator between darker photos/drawings
and light photos/text/line-graphics.
Definition at line 2287 of file numafunc2.c.
References L_CLONE, L_COPY, numaNormalizeHistogram(), numaSimpleStats(), and numaWindowedMean().
| [in] | nas | |
| [in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes:
(1) The structuring element (sel) is linear, all "hits"
(2) If size == 1, this returns a copy
(3) We add a border before doing this operation, for the same
reason that we add a border to a pix before doing a safe closing.
Without the border, a small component near the border gets
clipped at the border on dilation, and can be entirely removed
by the following erosion, violating the basic extensivity
property of closing.
Definition at line 362 of file numafunc2.c.
References numaDilate(), and numaErode().
| [in] | nas | source numa |
Definition at line 811 of file numafunc2.c.
Referenced by numaMakeHistogram().
| l_ok numaCountReversals | ( | NUMA * | nas, |
| l_float32 | minreversal, | ||
| l_int32 * | pnr, | ||
| l_float32 * | prd ) |
| [in] | nas | input values |
| [in] | minreversal | relative amount to resolve peaks and valleys |
| [out] | pnr | [optional] number of reversals |
| [out] | prd | [optional] reversal density: reversals/length |
Notes:
(1) The input numa is can be generated from pixExtractAlongLine().
If so, the x parameters can be used to find the reversal
frequency along a line.
(2) If the input numa was generated from a 1 bpp pix, the
values will be 0 and 1. Use minreversal == 1 to get
the number of pixel flips. If the only values are 0 and 1,
but minreversal > 1, set the reversal count to 0 and
issue a warning.
Definition at line 2709 of file numafunc2.c.
References numaFindExtrema().
| [in] | nax | [optional] numa of abscissa values |
| [in] | nay | numa of ordinate values, corresponding to nax |
| [in] | delta | parameter used to identify when a new peak can be found |
Notes:
(1) If nax == NULL, we use startx and delx from nay to compute
the crossing values in nad.
Definition at line 2976 of file numafunc2.c.
References numaFindExtrema().
| [in] | nax | [optional] numa of abscissa values; can be NULL |
| [in] | nay | numa of ordinate values, corresponding to nax |
| [in] | thresh | threshold value for nay |
Notes:
(1) If nax == NULL, we use startx and delx from nay to compute
the crossing values in nad.
Definition at line 2912 of file numafunc2.c.
Referenced by numaSelectCrossingThreshold().
| [in] | nas | |
| [in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes:
(1) The structuring element (sel) is linear, all "hits"
(2) If size == 1, this returns a copy
Definition at line 250 of file numafunc2.c.
References L_NOCOPY.
Referenced by numaClose(), and numaOpen().
| [in] | na | histogram |
| [in] | nbins | number of equal population bins (> 1) |
| [out] | pnabinval | average "gray" values in each bin |
| [out] | pnarank | [optional] rank value of input histogram; this is a cumulative norm histogram. |
Notes:
(1) With nbins == 100, nabinval is the average gray value in
each of the 100 equally populated bins. It is the function
gray[100 * rank].
Thus it is the inverse of
rank[gray]
which is optionally returned in narank.
(2) The "gray value" is the index into the input histogram.
(3) The two output arrays give the following mappings, where the
input is an un-normalized histogram of array values:
bin number --> average array value in bin (nabinval)
array values --> cumulative normalized histogram (narank)
Definition at line 1736 of file numafunc2.c.
References numaGetUniformBinSizes(), and numaNormalizeHistogram().
Referenced by numaGetRankBinValues().
| [in] | na | sorted |
| [in] | nbins | number of equal population bins (> 1) |
| [out] | pnabinval | average "gray" values in each bin |
Notes:
(1) The input na is sorted in increasing value.
(2) The output array has the following mapping:
bin number --> average array value in bin (nabinval)
(3) With nbins == 100, nabinval is the average gray value in
each of the 100 equally populated bins. It is the function
gray[100 * rank].
Thus it is the inverse of
rank[gray]
(4) Contast with numaDiscretizeHistoInBins(), where the input na
is a histogram.
Definition at line 1661 of file numafunc2.c.
References numaGetUniformBinSizes().
Referenced by numaGetRankBinValues().
| [in] | na1,na2 | two numas of the same size, typically histograms |
| [out] | pdist | earthmover distance |
Notes:
(1) The two numas must have the same size. They do not need to be
normalized to the same sum before applying the function.
(2) For a 1D discrete function, the implementation of the EMD
is trivial. Just keep filling or emptying buckets in one numa
to match the amount in the other, moving sequentially along
both arrays.
(3) We divide the sum of the absolute value of everything moved
(by 1 unit at a time) by the sum of the numa (amount of "earth")
to get the average distance that the "earth" was moved.
This is the value returned here.
(4) The caller can do a further normalization, by the number of
buckets (minus 1), to get the EM distance as a fraction of
the maximum possible distance, which is n-1. This fraction
is 1.0 for the situation where all the 'earth' in the first
array is at one end, and all in the second array is at the
other end.
Definition at line 2198 of file numafunc2.c.
References L_NOCOPY, and numaTransform().
Referenced by grayHistogramsToEMD().
| [in] | nas | |
| [in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes:
(1) The structuring element (sel) is linear, all "hits"
(2) If size == 1, this returns a copy
(3) General comment. The morphological operations are equivalent
to those that would be performed on a 1-dimensional fpix.
However, because we have not implemented morphological
operations on fpix, we do this here. Because it is only
1 dimensional, there is no reason to use the more
complicated van Herk/Gil-Werman algorithm, and we do it
by brute force.
Definition at line 183 of file numafunc2.c.
References L_NOCOPY.
Referenced by numaClose(), and numaOpen().
| l_ok numaEvalBestHaarParameters | ( | NUMA * | nas, |
| l_float32 | relweight, | ||
| l_int32 | nwidth, | ||
| l_int32 | nshift, | ||
| l_float32 | minwidth, | ||
| l_float32 | maxwidth, | ||
| l_float32 * | pbestwidth, | ||
| l_float32 * | pbestshift, | ||
| l_float32 * | pbestscore ) |
| [in] | nas | numa of non-negative signal values |
| [in] | relweight | relative weight of (-1 comb) / (+1 comb) contributions to the 'convolution'. In effect, the convolution kernel is a comb consisting of alternating +1 and -weight. |
| [in] | nwidth | number of widths to consider |
| [in] | nshift | number of shifts to consider for each width |
| [in] | minwidth | smallest width to consider |
| [in] | maxwidth | largest width to consider |
| [out] | pbestwidth | width giving largest score |
| [out] | pbestshift | shift giving largest score |
| [out] | pbestscore | [optional] convolution with "Haar"-like comb |
Notes:
(1) This does a linear sweep of widths, evaluating at nshift
shifts for each width, computing the score from a convolution
with a long comb, and finding the (width, shift) pair that
gives the maximum score. The best width is the "half-wavelength"
of the signal.
(2) The convolving function is a comb of alternating values
+1 and -1 * relweight, separated by the width and phased by
the shift. This is similar to a Haar transform, except
there the convolution is performed with a square wave.
(3) The function is useful for finding the line spacing
and strength of line signal from pixel sum projections.
(4) The score is normalized to the size of nas divided by
the number of half-widths. For image applications, the input is
typically an array of pixel projections, so one should
normalize by dividing the score by the image width in the
pixel projection direction.
Definition at line 3085 of file numafunc2.c.
References numaEvalHaarSum().
| l_ok numaEvalHaarSum | ( | NUMA * | nas, |
| l_float32 | width, | ||
| l_float32 | shift, | ||
| l_float32 | relweight, | ||
| l_float32 * | pscore ) |
| [in] | nas | numa of non-negative signal values |
| [in] | width | distance between +1 and -1 in convolution comb |
| [in] | shift | phase of the comb: location of first +1 |
| [in] | relweight | relative weight of (-1 comb) / (+1 comb) contributions to the 'convolution'. In effect, the convolution kernel is a comb consisting of alternating +1 and -weight. |
| [out] | pscore | convolution with "Haar"-like comb |
Notes:
(1) This does a convolution with a comb of alternating values
+1 and -relweight, separated by the width and phased by the shift.
This is similar to a Haar transform, except that for Haar,
(1) the convolution kernel is symmetric about 0, so the
relweight is 1.0, and
(2) the convolution is performed with a square wave.
(2) The score is normalized to the size of nas divided by
twice the "width". For image applications, the input is
typically an array of pixel projections, so one should
normalize by dividing the score by the image width in the
pixel projection direction.
(3) To get a Haar-like result, use relweight = 1.0. For detecting
signals where you expect every other sample to be close to
zero, as with barcodes or filtered text lines, you can
use relweight > 1.0.
Definition at line 3168 of file numafunc2.c.
Referenced by numaEvalBestHaarParameters().
| [in] | nas | input values |
| [in] | delta | relative amount to resolve peaks and valleys |
| [out] | pnav | [optional] values of extrema |
Notes:
(1) This returns a sequence of extrema (peaks and valleys).
(2) The algorithm is analogous to that for determining
mountain peaks. Suppose we have a local peak, with
bumps on the side. Under what conditions can we consider
those 'bumps' to be actual peaks? The answer: if the
bump is separated from the peak by a saddle that is at
least 500 feet below the bump.
(3) Operationally, suppose we are trying to identify a peak.
We have a previous valley, and also the largest value that
we have seen since that valley. We can identify this as
a peak if we find a value that is delta BELOW it. When
we find such a value, label the peak, use the current
value to label the starting point for the search for
a valley, and do the same operation in reverse. Namely,
keep track of the lowest point seen, and look for a value
that is delta ABOVE it. Once found, the lowest point is
labeled the valley, and continue, looking for the next peak.
Definition at line 2491 of file numafunc2.c.
Referenced by numaCountReversals(), and numaCrossingsByPeaks().
| l_ok numaFindLocForThreshold | ( | NUMA * | na, |
| l_int32 | skip, | ||
| l_int32 * | pthresh, | ||
| l_float32 * | pfract ) |
| [in] | nas | input histogram |
| [in] | skip | look-ahead distance to avoid false mininma; use 0 for default |
| [out] | pthresh | threshold value |
| [out] | pfract | [optional] fraction below or at threshold |
Notes:
(1) This finds a good place to set a threshold for a histogram
of values that has two peaks. The peaks can differ greatly
in area underneath them. The number of buckets in the
histogram is expected to be 256 (e.g, from an 8 bpp gray image).
(2) The input histogram should have been smoothed with a window
to avoid false peak and valley detection due to noise. For
example, see pixThresholdByHisto().
(3) A skip value can be input to determine the look-ahead distance
to ignore a false peak on the rise or descent from the first peak.
Input 0 to use the default value (it assumes a histo size of 256).
(4) Optionally, the fractional area under the first peak can
be returned.
Definition at line 2597 of file numafunc2.c.
References L_NOCOPY.
| [in] | nas | source numa |
| [in] | nmax | max number of peaks to be found |
| [in] | fract1 | min fraction of peak value |
| [in] | fract2 | min slope |
Notes:
(1) The returned na consists of sets of four numbers representing
the peak, in the following order:
left edge; peak center; right edge; normalized peak area
Definition at line 2375 of file numafunc2.c.
| l_ok numaGetHistogramStats | ( | NUMA * | nahisto, |
| l_float32 | startx, | ||
| l_float32 | deltax, | ||
| l_float32 * | pxmean, | ||
| l_float32 * | pxmedian, | ||
| l_float32 * | pxmode, | ||
| l_float32 * | pxvariance ) |
| [in] | nahisto | histogram: y(x(i)), i = 0 ... nbins - 1 |
| [in] | startx | x value of first bin: x(0) |
| [in] | deltax | x increment between bins; the bin size; x(1) - x(0) |
| [out] | pxmean | [optional] mean value of histogram |
| [out] | pxmedian | [optional] median value of histogram |
| [out] | pxmode | [optional] mode value of histogram: xmode = x(imode), where y(xmode) >= y(x(i)) for all i != imode |
| [out] | pxvariance | [optional] variance of x |
Notes:
(1) If the histogram represents the relation y(x), the
computed values that are returned are the x values.
These are NOT the bucket indices i; they are related to the
bucket indices by
x(i) = startx + i * deltax
Definition at line 1316 of file numafunc2.c.
References numaGetHistogramStatsOnInterval().
Referenced by numaSplitDistribution().
| l_ok numaGetHistogramStatsOnInterval | ( | NUMA * | nahisto, |
| l_float32 | startx, | ||
| l_float32 | deltax, | ||
| l_int32 | ifirst, | ||
| l_int32 | ilast, | ||
| l_float32 * | pxmean, | ||
| l_float32 * | pxmedian, | ||
| l_float32 * | pxmode, | ||
| l_float32 * | pxvariance ) |
numaGetHistogramStatsOnInterval()
| [in] | nahisto | histogram: y(x(i)), i = 0 ... nbins - 1 |
| [in] | startx | x value of first bin: x(0) |
| [in] | deltax | x increment between bins; the bin size; x(1) - x(0) |
| [in] | ifirst | first bin to use for collecting stats |
| [in] | ilast | last bin for collecting stats; -1 to go to the end |
| [out] | pxmean | [optional] mean value of histogram |
| [out] | pxmedian | [optional] median value of histogram |
| [out] | pxmode | [optional] mode value of histogram: xmode = x(imode), where y(xmode) >= y(x(i)) for all i != imode |
| [out] | pxvariance | [optional] variance of x |
Notes:
(1) If the histogram represents the relation y(x), the
computed values that are returned are the x values.
These are NOT the bucket indices i; they are related to the
bucket indices by
x(i) = startx + i * deltax
Definition at line 1363 of file numafunc2.c.
Referenced by numaGetHistogramStats().
| [in] | na | an array of values |
| [in] | nbins | number of bins at which the rank is divided |
| [out] | pnam | mean intensity in a bin vs rank bin value, with nbins of discretized rank values |
Notes:
(1) Simple interface for getting a binned rank representation
of an input array of values. This returns:
rank bin number --> average array value in each rank bin (nam)
(2) Uses bins either a sorted array or a histogram, depending on
the values in the array and the size of the array.
Definition at line 1822 of file numafunc2.c.
References L_SHELL_SORT, L_SORT_INCREASING, numaDiscretizeHistoInBins(), numaDiscretizeSortedInBins(), and numaMakeHistogram().
| l_ok numaGetStatsUsingHistogram | ( | NUMA * | na, |
| l_int32 | maxbins, | ||
| l_float32 * | pmin, | ||
| l_float32 * | pmax, | ||
| l_float32 * | pmean, | ||
| l_float32 * | pvariance, | ||
| l_float32 * | pmedian, | ||
| l_float32 | rank, | ||
| l_float32 * | prval, | ||
| NUMA ** | phisto ) |
| [in] | na | an arbitrary set of numbers; not ordered and not a histogram |
| [in] | maxbins | the maximum number of bins to be allowed in the histogram; use an integer larger than the largest number in na for consecutive integer bins |
| [out] | pmin | [optional] min value of set |
| [out] | pmax | [optional] max value of set |
| [out] | pmean | [optional] mean value of set |
| [out] | pvariance | [optional] variance |
| [out] | pmedian | [optional] median value of set |
| [in] | rank | in [0.0 ... 1.0]; median has a rank 0.5; ignored if &rval == NULL |
| [out] | prval | [optional] value in na corresponding to rank |
| [out] | phisto | [optional] Numa histogram; use NULL to prevent |
Notes:
(1) This is a simple interface for gathering statistics
from a numa, where a histogram is used 'under the covers'
to avoid sorting if a rank value is requested. In that case,
by using a histogram we are trading speed for accuracy, because
the values in na are quantized to the center of a set of bins.
(2) If the median, other rank value, or histogram are not requested,
the calculation is all performed on the input Numa.
(3) The variance is the average of the square of the
difference from the mean. The median is the value in na
with rank 0.5.
(4) There are two situations where this gives rank results with
accuracy comparable to computing stastics directly on the input
data, without binning into a histogram:
(a) the data is integers and the range of data is less than
maxbins, and
(b) the data is floats and the range is small compared to
maxbins, so that the binsize is much less than 1.
(5) If a histogram is used and the numbers in the Numa extend
over a large range, you can limit the required storage by
specifying the maximum number of bins in the histogram.
Use maxbins == 0 to force the bin size to be 1.
(6) This optionally returns the median and one arbitrary rank value.
If you need several rank values, return the histogram and use
numaHistogramGetValFromRank(nah, rank, &rval)
multiple times.
Definition at line 1227 of file numafunc2.c.
References numaHistogramGetValFromRank(), and numaMakeHistogramAuto().
| NUMA * numaGetUniformBinSizes | ( | l_int32 | ntotal, |
| l_int32 | nbins ) |
| [in] | ntotal | number of values to be split up |
| [in] | nbins | number of bins |
Notes:
(1) The numbers in the bins can differ by 1. The sum of
bin numbers in naeach is ntotal.
Definition at line 1891 of file numafunc2.c.
Referenced by numaDiscretizeHistoInBins(), and numaDiscretizeSortedInBins().
| l_ok numaHistogramGetRankFromVal | ( | NUMA * | na, |
| l_float32 | rval, | ||
| l_float32 * | prank ) |
| [in] | na | histogram |
| [in] | rval | value of input sample for which we want the rank |
| [out] | prank | fraction of total samples below rval |
Notes:
(1) If we think of the histogram as a function y(x), normalized
to 1, for a given input value of x, this computes the
rank of x, which is the integral of y(x) from the start
value of x to the input value.
(2) This function only makes sense when applied to a Numa that
is a histogram. The values in the histogram can be ints and
floats, and are computed as floats. The rank is returned
as a float between 0.0 and 1.0.
(3) The numa parameters startx and binsize are used to
compute x from the Numa index i.
Definition at line 1521 of file numafunc2.c.
| l_ok numaHistogramGetValFromRank | ( | NUMA * | na, |
| l_float32 | rank, | ||
| l_float32 * | prval ) |
| [in] | na | histogram |
| [in] | rank | fraction of total samples |
| [out] | prval | approx. to the bin value |
Notes:
(1) If we think of the histogram as a function y(x), this returns
the value x such that the integral of y(x) from the start
value to x gives the fraction 'rank' of the integral
of y(x) over all bins.
(2) This function only makes sense when applied to a Numa that
is a histogram. The values in the histogram can be ints and
floats, and are computed as floats. The val is returned
as a float, even though the buckets are of integer width.
(3) The numa parameters startx and binsize are used to
compute x from the Numa index i.
Definition at line 1590 of file numafunc2.c.
Referenced by numaGetStatsUsingHistogram().
| [in] | na | |
| [in] | maxbins | max number of histogram bins |
| [out] | pbinsize | [optional] size of histogram bins |
| [out] | pbinstart | [optional] start val of minimum bin; input NULL to force start at 0 |
Notes:
(1) This simple interface is designed for integer data.
The bins are of integer width and start on integer boundaries,
so the results on float data will not have high precision.
(2) Specify the max number of input bins. Then binsize,
the size of bins necessary to accommodate the input data,
is returned. It is optionally returned and one of the sequence:
{1, 2, 5, 10, 20, 50, ...}.
(3) If &binstart is given, all values are accommodated,
and the min value of the starting bin is returned.
Otherwise, all negative values are discarded and
the histogram bins start at 0.
Definition at line 861 of file numafunc2.c.
References numaConvertToInt().
Referenced by numaGetRankBinValues().
| [in] | na | numa of floats; these may be integers |
| [in] | maxbins | max number of histogram bins; >= 1 |
Notes:
(1) This simple interface is designed for accurate binning
of both integer and float data.
(2) If the array data is integers, and the range of integers
is smaller than maxbins, they are binned as they fall,
with binsize = 1.
(3) If the range of data, (maxval - minval), is larger than
maxbins, or if the data is floats, they are binned into
exactly maxbins bins.
(4) Unlike numaMakeHistogram(), these bins in general have
non-integer location and width, even for integer data.
Definition at line 972 of file numafunc2.c.
Referenced by numaGetStatsUsingHistogram().
| [in] | na | |
| [in] | binsize | typically 1.0 |
| [in] | maxsize | of histogram ordinate |
Notes:
(1) This simple function generates a histogram of values
from na, discarding all values < 0.0 or greater than
min(maxsize, maxval), where maxval is the maximum value in na.
The histogram data is put in bins of size delx = binsize,
starting at x = 0.0. We use as many bins as are
needed to hold the data.
Definition at line 1054 of file numafunc2.c.
| l_ok numaMakeRankFromHistogram | ( | l_float32 | startx, |
| l_float32 | deltax, | ||
| NUMA * | nasy, | ||
| l_int32 | npts, | ||
| NUMA ** | pnax, | ||
| NUMA ** | pnay ) |
| [in] | startx | xval corresponding to first element in nay |
| [in] | deltax | x increment between array elements in nay |
| [in] | nasy | input histogram, assumed equally spaced |
| [in] | npts | number of points to evaluate rank function |
| [out] | pnax | [optional] array of x values in range |
| [out] | pnay | rank array of specified npts |
Definition at line 1455 of file numafunc2.c.
References L_LINEAR_INTERP, and numaNormalizeHistogram().
| [in] | nas | input histogram |
| [in] | tsum | target sum of all numbers in dest histogram; e.g., use tsum= 1.0 if this represents a probability distribution |
Definition at line 1147 of file numafunc2.c.
Referenced by grayInterHistogramStats(), numaDiscretizeHistoInBins(), and numaMakeRankFromHistogram().
| [in] | nas | |
| [in] | size | of sel; greater than 0, odd. The origin is implicitly in the center. |
Notes:
(1) The structuring element (sel) is linear, all "hits"
(2) If size == 1, this returns a copy
Definition at line 317 of file numafunc2.c.
References numaDilate(), and numaErode().
| [in] | nas | input histogram |
| [in] | newsize | number of old bins contained in each new bin |
Definition at line 1101 of file numafunc2.c.
| l_ok numaSelectCrossingThreshold | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_float32 | estthresh, | ||
| l_float32 * | pbestthresh ) |
| [in] | nax | [optional] numa of abscissa values; can be NULL |
| [in] | nay | signal |
| [in] | estthresh | estimated pixel threshold for crossing: e.g., for images, white <--> black; typ. ~120 |
| [out] | pbestthresh | robust estimate of threshold to use |
Notes:
(1) When a valid threshold is used, the number of crossings is
a maximum, because none are missed. If no threshold intersects
all the crossings, the crossings must be determined with
numaCrossingsByPeaks().
(2) estthresh is an input estimate of the threshold that should
be used. We compute the crossings with 41 thresholds
(20 below and 20 above). There is a range in which the
number of crossings is a maximum. Return a threshold
in the center of this stable plateau of crossings.
This can then be used with numaCrossingsByThreshold()
to get a good estimate of crossing locations.
(3) If the count of nay is less than 2, a warning is issued.
Definition at line 2803 of file numafunc2.c.
References numaCrossingsByThreshold().
| l_ok numaSimpleStats | ( | NUMA * | na, |
| l_int32 | first, | ||
| l_int32 | last, | ||
| l_float32 * | pmean, | ||
| l_float32 * | pvar, | ||
| l_float32 * | prvar ) |
| [in] | na | input numa |
| [in] | first | first element to use |
| [in] | last | last element to use; -1 to go to the end |
| [out] | pmean | [optional] mean value |
| [out] | pvar | [optional] variance |
| [out] | prvar | [optional] rms deviation from the mean |
Definition at line 442 of file numafunc2.c.
Referenced by grayInterHistogramStats().
| l_ok numaSplitDistribution | ( | NUMA * | na, |
| l_float32 | scorefract, | ||
| l_int32 * | psplitindex, | ||
| l_float32 * | pave1, | ||
| l_float32 * | pave2, | ||
| l_float32 * | pnum1, | ||
| l_float32 * | pnum2, | ||
| NUMA ** | pnascore ) |
| [in] | na | histogram |
| [in] | scorefract | fraction of the max score, used to determine range over which the histogram min is searched |
| [out] | psplitindex | [optional] index for splitting |
| [out] | pave1 | [optional] average of lower distribution |
| [out] | pave2 | [optional] average of upper distribution |
| [out] | pnum1 | [optional] population of lower distribution |
| [out] | pnum2 | [optional] population of upper distribution |
| [out] | pnascore | [optional] for debugging; otherwise use NULL |
Notes:
(1) This function is intended to be used on a distribution of
values that represent two sets, such as a histogram of
pixel values for an image with a fg and bg, and the goal
is to determine the averages of the two sets and the
best splitting point.
(2) The Otsu method finds a split point that divides the distribution
into two parts by maximizing a score function that is the
product of two terms:
(a) the square of the difference of centroids, (ave1 - ave2)^2
(b) fract1 * (1 - fract1)
where fract1 is the fraction in the lower distribution.
(3) This works well for images where the fg and bg are
each relatively homogeneous and well-separated in color.
However, if the actual fg and bg sets are very different
in size, and the bg is highly varied, as can occur in some
scanned document images, this will bias the split point
into the larger "bump" (i.e., toward the point where the
(b) term reaches its maximum of 0.25 at fract1 = 0.5.
To avoid this, we define a range of values near the
maximum of the score function, and choose the value within
this range such that the histogram itself has a minimum value.
The range is determined by scorefract: we include all abscissa
values to the left and right of the value that maximizes the
score, such that the score stays above (1 - scorefract) * maxscore.
The intuition behind this modification is to try to find
a split point that both has a high variance score and is
at or near a minimum in the histogram, so that the histogram
slope is small at the split point.
(4) We normalize the score so that if the two distributions
were of equal size and at opposite ends of the numa, the
score would be 1.0.
Definition at line 1974 of file numafunc2.c.
References numaGetHistogramStats().
| [in] | nas | |
| [in] | shift | add this to each number |
| [in] | scale | multiply each number by this |
Notes:
(1) Each number is shifted before scaling.
Definition at line 407 of file numafunc2.c.
Referenced by numaEarthMoverDistance().
| [in] | nas | |
| [in] | wc | half width of the convolution window |
Notes:
(1) This is a convolution. The window has width = 2 * wc + 1.
(2) We add a mirrored border of size wc to each end of the array.
Definition at line 574 of file numafunc2.c.
References L_MIRRORED_BORDER, and L_NOCOPY.
Referenced by grayInterHistogramStats(), and numaWindowedStats().
| [in] | nas | |
| [in] | wc | half width of the window |
Notes:
(1) The window has width = 2 * wc + 1.
(2) We add a mirrored border of size wc to each end of the array.
Definition at line 632 of file numafunc2.c.
References L_MIRRORED_BORDER, and L_NOCOPY.
Referenced by numaWindowedStats().
| [in] | nas | |
| [in] | halfwin | half width of window over which the median is found |
Notes:
(1) The requested window has width = 2 * halfwin + 1.
(2) If the input nas has less then 3 elements, return a copy.
(3) If the filter is too small (halfwin <= 0), return a copy.
(4) If the filter is too large, it is reduced in size.
(5) We add a mirrored border of size halfwin to each end of
the array to simplify the calculation by avoiding end-effects.
Definition at line 764 of file numafunc2.c.
References L_MIRRORED_BORDER.
| l_ok numaWindowedStats | ( | NUMA * | nas, |
| l_int32 | wc, | ||
| NUMA ** | pnam, | ||
| NUMA ** | pnams, | ||
| NUMA ** | pnav, | ||
| NUMA ** | pnarv ) |
| [in] | nas | input numa |
| [in] | wc | half width of the window |
| [out] | pnam | [optional] mean value in window |
| [out] | pnams | [optional] mean square value in window |
| [out] | pnav | [optional] variance in window |
| [out] | pnarv | [optional] rms deviation from the mean |
Notes:
(1) This is a high-level convenience function for calculating
any or all of these derived arrays.
(2) These statistical measures over the values in the
rectangular window are:
~ average value: [x] (nam)
~ average squared value: [x*x] (nams)
~ variance: [(x - [x])*(x - [x])] = [x*x] - [x]*[x] (nav)
~ square-root of variance: (narv)
where the brackets [ .. ] indicate that the average value is
to be taken over the window.
(3) Note that the variance is just the mean square difference from
the mean value; and the square root of the variance is the
root mean square difference from the mean, sometimes also
called the 'standard deviation'.
(4) Internally, use mirrored borders to handle values near the
end of each array.
Definition at line 525 of file numafunc2.c.
References numaWindowedMean(), numaWindowedMeanSquare(), and numaWindowedVariance().
| [in] | nam | windowed mean values |
| [in] | nams | windowed mean square values |
| [out] | pnav | [optional] numa of variance – the ms deviation from the mean |
| [out] | pnarv | [optional] numa of rms deviation from the mean |
Notes:
(1) The numas of windowed mean and mean square are precomputed,
using numaWindowedMean() and numaWindowedMeanSquare().
(2) Either or both of the variance and square-root of variance
are returned, where the variance is the average over the
window of the mean square difference of the pixel value
from the mean:
[(x - [x])*(x - [x])] = [x*x] - [x]*[x]
Definition at line 698 of file numafunc2.c.
References L_NOCOPY.
Referenced by numaWindowedStats().
|
static |
Definition at line 145 of file numafunc2.c.
|
static |
Definition at line 149 of file numafunc2.c.