Leptonica 1.82.0
Image processing and image analysis suite
numafunc2.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
137#ifdef HAVE_CONFIG_H
138#include <config_auto.h>
139#endif /* HAVE_CONFIG_H */
140
141#include <math.h>
142#include "allheaders.h"
143
144 /* bin sizes in numaMakeHistogram() */
145static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\
146 2000, 5000, 10000, 20000, 50000, 100000, 200000,\
147 500000, 1000000, 2000000, 5000000, 10000000,\
148 200000000, 50000000, 100000000};
149static const l_int32 NBinSizes = 24;
150
151
152#ifndef NO_CONSOLE_IO
153#define DEBUG_HISTO 0
154#define DEBUG_CROSSINGS 0
155#define DEBUG_FREQUENCY 0
156#endif /* ~NO_CONSOLE_IO */
157
158/*----------------------------------------------------------------------*
159 * Morphological operations *
160 *----------------------------------------------------------------------*/
182NUMA *
184 l_int32 size)
185{
186l_int32 i, j, n, hsize, len;
187l_float32 minval;
188l_float32 *fa, *fas, *fad;
189NUMA *nad;
190
191 PROCNAME("numaErode");
192
193 if (!nas)
194 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
195 if (size <= 0)
196 return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
197 if ((size & 1) == 0 ) {
198 L_WARNING("sel size must be odd; increasing by 1\n", procName);
199 size++;
200 }
201
202 if (size == 1)
203 return numaCopy(nas);
204
205 /* Make a source fa (fas) that has an added (size / 2) boundary
206 * on left and right, contains a copy of nas in the interior region
207 * (between 'size' and 'size + n', and has large values
208 * inserted in the boundary (because it is an erosion). */
209 n = numaGetCount(nas);
210 hsize = size / 2;
211 len = n + 2 * hsize;
212 if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
213 return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
214 for (i = 0; i < hsize; i++)
215 fas[i] = 1.0e37;
216 for (i = hsize + n; i < len; i++)
217 fas[i] = 1.0e37;
218 fa = numaGetFArray(nas, L_NOCOPY);
219 for (i = 0; i < n; i++)
220 fas[hsize + i] = fa[i];
221
222 nad = numaMakeConstant(0, n);
223 numaCopyParameters(nad, nas);
224 fad = numaGetFArray(nad, L_NOCOPY);
225 for (i = 0; i < n; i++) {
226 minval = 1.0e37; /* start big */
227 for (j = 0; j < size; j++)
228 minval = L_MIN(minval, fas[i + j]);
229 fad[i] = minval;
230 }
231
232 LEPT_FREE(fas);
233 return nad;
234}
235
236
251NUMA *
253 l_int32 size)
254{
255l_int32 i, j, n, hsize, len;
256l_float32 maxval;
257l_float32 *fa, *fas, *fad;
258NUMA *nad;
259
260 PROCNAME("numaDilate");
261
262 if (!nas)
263 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
264 if (size <= 0)
265 return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
266 if ((size & 1) == 0 ) {
267 L_WARNING("sel size must be odd; increasing by 1\n", procName);
268 size++;
269 }
270
271 if (size == 1)
272 return numaCopy(nas);
273
274 /* Make a source fa (fas) that has an added (size / 2) boundary
275 * on left and right, contains a copy of nas in the interior region
276 * (between 'size' and 'size + n', and has small values
277 * inserted in the boundary (because it is a dilation). */
278 n = numaGetCount(nas);
279 hsize = size / 2;
280 len = n + 2 * hsize;
281 if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
282 return (NUMA *)ERROR_PTR("fas not made", procName, NULL);
283 for (i = 0; i < hsize; i++)
284 fas[i] = -1.0e37;
285 for (i = hsize + n; i < len; i++)
286 fas[i] = -1.0e37;
287 fa = numaGetFArray(nas, L_NOCOPY);
288 for (i = 0; i < n; i++)
289 fas[hsize + i] = fa[i];
290
291 nad = numaMakeConstant(0, n);
292 numaCopyParameters(nad, nas);
293 fad = numaGetFArray(nad, L_NOCOPY);
294 for (i = 0; i < n; i++) {
295 maxval = -1.0e37; /* start small */
296 for (j = 0; j < size; j++)
297 maxval = L_MAX(maxval, fas[i + j]);
298 fad[i] = maxval;
299 }
300
301 LEPT_FREE(fas);
302 return nad;
303}
304
305
320NUMA *
322 l_int32 size)
323{
324NUMA *nat, *nad;
325
326 PROCNAME("numaOpen");
327
328 if (!nas)
329 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
330 if (size <= 0)
331 return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
332 if ((size & 1) == 0 ) {
333 L_WARNING("sel size must be odd; increasing by 1\n", procName);
334 size++;
335 }
336
337 if (size == 1)
338 return numaCopy(nas);
339
340 nat = numaErode(nas, size);
341 nad = numaDilate(nat, size);
342 numaDestroy(&nat);
343 return nad;
344}
345
346
367NUMA *
369 l_int32 size)
370{
371NUMA *nab, *nat1, *nat2, *nad;
372
373 PROCNAME("numaClose");
374
375 if (!nas)
376 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
377 if (size <= 0)
378 return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL);
379 if ((size & 1) == 0 ) {
380 L_WARNING("sel size must be odd; increasing by 1\n", procName);
381 size++;
382 }
383
384 if (size == 1)
385 return numaCopy(nas);
386
387 nab = numaAddBorder(nas, size, size, 0); /* to preserve extensivity */
388 nat1 = numaDilate(nab, size);
389 nat2 = numaErode(nat1, size);
390 nad = numaRemoveBorder(nat2, size, size);
391 numaDestroy(&nab);
392 numaDestroy(&nat1);
393 numaDestroy(&nat2);
394 return nad;
395}
396
397
398/*----------------------------------------------------------------------*
399 * Other transforms *
400 *----------------------------------------------------------------------*/
414NUMA *
416 l_float32 shift,
417 l_float32 scale)
418{
419l_int32 i, n;
420l_float32 val;
421NUMA *nad;
422
423 PROCNAME("numaTransform");
424
425 if (!nas)
426 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
427 n = numaGetCount(nas);
428 if ((nad = numaCreate(n)) == NULL)
429 return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
430 numaCopyParameters(nad, nas);
431 for (i = 0; i < n; i++) {
432 numaGetFValue(nas, i, &val);
433 val = scale * (val + shift);
434 numaAddNumber(nad, val);
435 }
436 return nad;
437}
438
439
451l_ok
453 l_int32 first,
454 l_int32 last,
455 l_float32 *pmean,
456 l_float32 *pvar,
457 l_float32 *prvar)
458{
459l_int32 i, n, ni;
460l_float32 sum, sumsq, val, mean, var;
461
462 PROCNAME("numaSimpleStats");
463
464 if (pmean) *pmean = 0.0;
465 if (pvar) *pvar = 0.0;
466 if (prvar) *prvar = 0.0;
467 if (!pmean && !pvar && !prvar)
468 return ERROR_INT("nothing requested", procName, 1);
469 if (!na)
470 return ERROR_INT("na not defined", procName, 1);
471 if ((n = numaGetCount(na)) == 0)
472 return ERROR_INT("na is empty", procName, 1);
473 first = L_MAX(0, first);
474 if (last < 0) last = n - 1;
475 if (first >= n)
476 return ERROR_INT("invalid first", procName, 1);
477 if (last >= n) {
478 L_WARNING("last = %d is beyond max index = %d; adjusting\n",
479 procName, last, n - 1);
480 last = n - 1;
481 }
482 if (first > last)
483 return ERROR_INT("first > last\n", procName, 1);
484 ni = last - first + 1;
485 sum = sumsq = 0.0;
486 for (i = first; i <= last; i++) {
487 numaGetFValue(na, i, &val);
488 sum += val;
489 sumsq += val * val;
490 }
491
492 mean = sum / ni;
493 if (pmean)
494 *pmean = mean;
495 if (pvar || prvar) {
496 var = sumsq / ni - mean * mean;
497 if (pvar) *pvar = var;
498 if (prvar) *prvar = sqrtf(var);
499 }
500
501 return 0;
502}
503
504
536l_ok
538 l_int32 wc,
539 NUMA **pnam,
540 NUMA **pnams,
541 NUMA **pnav,
542 NUMA **pnarv)
543{
544NUMA *nam, *nams;
545
546 PROCNAME("numaWindowedStats");
547
548 if (!nas)
549 return ERROR_INT("nas not defined", procName, 1);
550 if (2 * wc + 1 > numaGetCount(nas))
551 L_WARNING("filter wider than input array!\n", procName);
552
553 if (!pnav && !pnarv) {
554 if (pnam) *pnam = numaWindowedMean(nas, wc);
555 if (pnams) *pnams = numaWindowedMeanSquare(nas, wc);
556 return 0;
557 }
558
559 nam = numaWindowedMean(nas, wc);
560 nams = numaWindowedMeanSquare(nas, wc);
561 numaWindowedVariance(nam, nams, pnav, pnarv);
562 if (pnam)
563 *pnam = nam;
564 else
565 numaDestroy(&nam);
566 if (pnams)
567 *pnams = nams;
568 else
569 numaDestroy(&nams);
570 return 0;
571}
572
573
587NUMA *
589 l_int32 wc)
590{
591l_int32 i, n, n1, width;
592l_float32 sum, norm;
593l_float32 *fa1, *fad, *suma;
594NUMA *na1, *nad;
595
596 PROCNAME("numaWindowedMean");
597
598 if (!nas)
599 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
600 n = numaGetCount(nas);
601 width = 2 * wc + 1; /* filter width */
602 if (width > n)
603 L_WARNING("filter wider than input array!\n", procName);
604
605 na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
606 n1 = n + 2 * wc;
607 fa1 = numaGetFArray(na1, L_NOCOPY);
608 nad = numaMakeConstant(0, n);
609 fad = numaGetFArray(nad, L_NOCOPY);
610
611 /* Make sum array; note the indexing */
612 if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
613 numaDestroy(&na1);
614 numaDestroy(&nad);
615 return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
616 }
617 sum = 0.0;
618 suma[0] = 0.0;
619 for (i = 0; i < n1; i++) {
620 sum += fa1[i];
621 suma[i + 1] = sum;
622 }
623
624 norm = 1. / (2 * wc + 1);
625 for (i = 0; i < n; i++)
626 fad[i] = norm * (suma[width + i] - suma[i]);
627
628 LEPT_FREE(suma);
629 numaDestroy(&na1);
630 return nad;
631}
632
633
647NUMA *
649 l_int32 wc)
650{
651l_int32 i, n, n1, width;
652l_float32 sum, norm;
653l_float32 *fa1, *fad, *suma;
654NUMA *na1, *nad;
655
656 PROCNAME("numaWindowedMeanSquare");
657
658 if (!nas)
659 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
660 n = numaGetCount(nas);
661 width = 2 * wc + 1; /* filter width */
662 if (width > n)
663 L_WARNING("filter wider than input array!\n", procName);
664
665 na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
666 n1 = n + 2 * wc;
667 fa1 = numaGetFArray(na1, L_NOCOPY);
668 nad = numaMakeConstant(0, n);
669 fad = numaGetFArray(nad, L_NOCOPY);
670
671 /* Make sum array; note the indexing */
672 if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
673 numaDestroy(&na1);
674 numaDestroy(&nad);
675 return (NUMA *)ERROR_PTR("suma not made", procName, NULL);
676 }
677 sum = 0.0;
678 suma[0] = 0.0;
679 for (i = 0; i < n1; i++) {
680 sum += fa1[i] * fa1[i];
681 suma[i + 1] = sum;
682 }
683
684 norm = 1. / (2 * wc + 1);
685 for (i = 0; i < n; i++)
686 fad[i] = norm * (suma[width + i] - suma[i]);
687
688 LEPT_FREE(suma);
689 numaDestroy(&na1);
690 return nad;
691}
692
693
715l_ok
717 NUMA *nams,
718 NUMA **pnav,
719 NUMA **pnarv)
720{
721l_int32 i, nm, nms;
722l_float32 var;
723l_float32 *fam, *fams, *fav, *farv;
724NUMA *nav, *narv; /* variance and square root of variance */
725
726 PROCNAME("numaWindowedVariance");
727
728 if (pnav) *pnav = NULL;
729 if (pnarv) *pnarv = NULL;
730 if (!pnav && !pnarv)
731 return ERROR_INT("neither &nav nor &narv are defined", procName, 1);
732 if (!nam)
733 return ERROR_INT("nam not defined", procName, 1);
734 if (!nams)
735 return ERROR_INT("nams not defined", procName, 1);
736 nm = numaGetCount(nam);
737 nms = numaGetCount(nams);
738 if (nm != nms)
739 return ERROR_INT("sizes of nam and nams differ", procName, 1);
740
741 if (pnav) {
742 nav = numaMakeConstant(0, nm);
743 *pnav = nav;
744 fav = numaGetFArray(nav, L_NOCOPY);
745 }
746 if (pnarv) {
747 narv = numaMakeConstant(0, nm);
748 *pnarv = narv;
749 farv = numaGetFArray(narv, L_NOCOPY);
750 }
751 fam = numaGetFArray(nam, L_NOCOPY);
752 fams = numaGetFArray(nams, L_NOCOPY);
753
754 for (i = 0; i < nm; i++) {
755 var = fams[i] - fam[i] * fam[i];
756 if (pnav)
757 fav[i] = var;
758 if (pnarv)
759 farv[i] = sqrtf(var);
760 }
761
762 return 0;
763}
764
765
783NUMA *
785 l_int32 halfwin)
786{
787l_int32 i, n;
788l_float32 medval;
789NUMA *na1, *na2, *nad;
790
791 PROCNAME("numaWindowedMedian");
792
793 if (!nas)
794 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
795 if ((n = numaGetCount(nas)) < 3)
796 return numaCopy(nas);
797 if (halfwin <= 0) {
798 L_ERROR("filter too small; returning a copy\n", procName);
799 return numaCopy(nas);
800 }
801
802 if (halfwin > (n - 1) / 2) {
803 halfwin = (n - 1) / 2;
804 L_INFO("reducing filter to halfwin = %d\n", procName, halfwin);
805 }
806
807 /* Add a border to both ends */
808 na1 = numaAddSpecifiedBorder(nas, halfwin, halfwin, L_MIRRORED_BORDER);
809
810 /* Get the median value at the center of each window, corresponding
811 * to locations in the input nas. */
812 nad = numaCreate(n);
813 for (i = 0; i < n; i++) {
814 na2 = numaClipToInterval(na1, i, i + 2 * halfwin);
815 numaGetMedian(na2, &medval);
816 numaAddNumber(nad, medval);
817 numaDestroy(&na2);
818 }
819
820 numaDestroy(&na1);
821 return nad;
822}
823
824
832NUMA *
834{
835l_int32 i, n, ival;
836NUMA *nad;
837
838 PROCNAME("numaConvertToInt");
839
840 if (!nas)
841 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
842
843 n = numaGetCount(nas);
844 if ((nad = numaCreate(n)) == NULL)
845 return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
846 numaCopyParameters(nad, nas);
847 for (i = 0; i < n; i++) {
848 numaGetIValue(nas, i, &ival);
849 numaAddNumber(nad, ival);
850 }
851 return nad;
852}
853
854
855/*----------------------------------------------------------------------*
856 * Histogram generation and statistics *
857 *----------------------------------------------------------------------*/
884NUMA *
886 l_int32 maxbins,
887 l_int32 *pbinsize,
888 l_int32 *pbinstart)
889{
890l_int32 i, n, ival, hval;
891l_int32 iminval, imaxval, range, binsize, nbins, ibin;
892l_float32 val, ratio;
893NUMA *nai, *nahist;
894
895 PROCNAME("numaMakeHistogram");
896
897 if (pbinsize) *pbinsize = 0;
898 if (pbinstart) *pbinstart = 0;
899 if (!na)
900 return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
901 if (maxbins < 1)
902 return (NUMA *)ERROR_PTR("maxbins < 1", procName, NULL);
903
904 /* Determine input range */
905 numaGetMin(na, &val, NULL);
906 iminval = (l_int32)(val + 0.5);
907 numaGetMax(na, &val, NULL);
908 imaxval = (l_int32)(val + 0.5);
909 if (pbinstart == NULL) { /* clip negative vals; start from 0 */
910 iminval = 0;
911 if (imaxval < 0)
912 return (NUMA *)ERROR_PTR("all values < 0", procName, NULL);
913 }
914
915 /* Determine binsize */
916 range = imaxval - iminval + 1;
917 if (range > maxbins - 1) {
918 ratio = (l_float64)range / (l_float64)maxbins;
919 binsize = 0;
920 for (i = 0; i < NBinSizes; i++) {
921 if (ratio < BinSizeArray[i]) {
922 binsize = BinSizeArray[i];
923 break;
924 }
925 }
926 if (binsize == 0)
927 return (NUMA *)ERROR_PTR("numbers too large", procName, NULL);
928 } else {
929 binsize = 1;
930 }
931 if (pbinsize) *pbinsize = binsize;
932 nbins = 1 + range / binsize; /* +1 seems to be sufficient */
933
934 /* Redetermine iminval */
935 if (pbinstart && binsize > 1) {
936 if (iminval >= 0)
937 iminval = binsize * (iminval / binsize);
938 else
939 iminval = binsize * ((iminval - binsize + 1) / binsize);
940 }
941 if (pbinstart) *pbinstart = iminval;
942
943#if DEBUG_HISTO
944 lept_stderr(" imaxval = %d, range = %d, nbins = %d\n",
945 imaxval, range, nbins);
946#endif /* DEBUG_HISTO */
947
948 /* Use integerized data for input */
949 if ((nai = numaConvertToInt(na)) == NULL)
950 return (NUMA *)ERROR_PTR("nai not made", procName, NULL);
951 n = numaGetCount(nai);
952
953 /* Make histogram, converting value in input array
954 * into a bin number for this histogram array. */
955 if ((nahist = numaCreate(nbins)) == NULL) {
956 numaDestroy(&nai);
957 return (NUMA *)ERROR_PTR("nahist not made", procName, NULL);
958 }
959 numaSetCount(nahist, nbins);
960 numaSetParameters(nahist, iminval, binsize);
961 for (i = 0; i < n; i++) {
962 numaGetIValue(nai, i, &ival);
963 ibin = (ival - iminval) / binsize;
964 if (ibin >= 0 && ibin < nbins) {
965 numaGetIValue(nahist, ibin, &hval);
966 numaSetValue(nahist, ibin, hval + 1.0);
967 }
968 }
969
970 numaDestroy(&nai);
971 return nahist;
972}
973
974
997NUMA *
999 l_int32 maxbins)
1000{
1001l_int32 i, n, imin, imax, irange, ibin, ival, allints;
1002l_float32 minval, maxval, range, binsize, fval;
1003NUMA *nah;
1004
1005 PROCNAME("numaMakeHistogramAuto");
1006
1007 if (!na)
1008 return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
1009 maxbins = L_MAX(1, maxbins);
1010
1011 /* Determine input range */
1012 numaGetMin(na, &minval, NULL);
1013 numaGetMax(na, &maxval, NULL);
1014
1015 /* Determine if values are all integers */
1016 n = numaGetCount(na);
1017 numaHasOnlyIntegers(na, &allints);
1018
1019 /* Do simple integer binning if possible */
1020 if (allints && (maxval - minval < maxbins)) {
1021 imin = (l_int32)minval;
1022 imax = (l_int32)maxval;
1023 irange = imax - imin + 1;
1024 nah = numaCreate(irange);
1025 numaSetCount(nah, irange); /* init */
1026 numaSetParameters(nah, minval, 1.0);
1027 for (i = 0; i < n; i++) {
1028 numaGetIValue(na, i, &ival);
1029 ibin = ival - imin;
1030 numaGetIValue(nah, ibin, &ival);
1031 numaSetValue(nah, ibin, ival + 1.0);
1032 }
1033
1034 return nah;
1035 }
1036
1037 /* Do float binning, even if the data is integers. */
1038 range = maxval - minval;
1039 binsize = range / (l_float32)maxbins;
1040 if (range == 0.0) {
1041 nah = numaCreate(1);
1042 numaSetParameters(nah, minval, binsize);
1043 numaAddNumber(nah, n);
1044 return nah;
1045 }
1046 nah = numaCreate(maxbins);
1047 numaSetCount(nah, maxbins);
1048 numaSetParameters(nah, minval, binsize);
1049 for (i = 0; i < n; i++) {
1050 numaGetFValue(na, i, &fval);
1051 ibin = (l_int32)((fval - minval) / binsize);
1052 ibin = L_MIN(ibin, maxbins - 1); /* "edge" case; stay in bounds */
1053 numaGetIValue(nah, ibin, &ival);
1054 numaSetValue(nah, ibin, ival + 1.0);
1055 }
1056
1057 return nah;
1058}
1059
1060
1081NUMA *
1083 l_float32 binsize,
1084 l_float32 maxsize)
1085{
1086l_int32 i, n, nbins, ival, ibin;
1087l_float32 val, maxval;
1088NUMA *nad;
1089
1090 PROCNAME("numaMakeHistogramClipped");
1091
1092 if (!na)
1093 return (NUMA *)ERROR_PTR("na not defined", procName, NULL);
1094 if (binsize <= 0.0)
1095 return (NUMA *)ERROR_PTR("binsize must be > 0.0", procName, NULL);
1096 if (binsize > maxsize)
1097 binsize = maxsize; /* just one bin */
1098
1099 numaGetMax(na, &maxval, NULL);
1100 n = numaGetCount(na);
1101 maxsize = L_MIN(maxsize, maxval);
1102 nbins = (l_int32)(maxsize / binsize) + 1;
1103
1104/* lept_stderr("maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */
1105
1106 if ((nad = numaCreate(nbins)) == NULL)
1107 return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1108 numaSetParameters(nad, 0.0, binsize);
1109 numaSetCount(nad, nbins); /* interpret zeroes in bins as data */
1110 for (i = 0; i < n; i++) {
1111 numaGetFValue(na, i, &val);
1112 ibin = (l_int32)(val / binsize);
1113 if (ibin >= 0 && ibin < nbins) {
1114 numaGetIValue(nad, ibin, &ival);
1115 numaSetValue(nad, ibin, ival + 1.0);
1116 }
1117 }
1118
1119 return nad;
1120}
1121
1122
1130NUMA *
1132 l_int32 newsize)
1133{
1134l_int32 i, j, ns, nd, index, count, val;
1135l_float32 start, oldsize;
1136NUMA *nad;
1137
1138 PROCNAME("numaRebinHistogram");
1139
1140 if (!nas)
1141 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
1142 if (newsize <= 1)
1143 return (NUMA *)ERROR_PTR("newsize must be > 1", procName, NULL);
1144 if ((ns = numaGetCount(nas)) == 0)
1145 return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
1146
1147 nd = (ns + newsize - 1) / newsize;
1148 if ((nad = numaCreate(nd)) == NULL)
1149 return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1150 numaGetParameters(nad, &start, &oldsize);
1151 numaSetParameters(nad, start, oldsize * newsize);
1152
1153 for (i = 0; i < nd; i++) { /* new bins */
1154 count = 0;
1155 index = i * newsize;
1156 for (j = 0; j < newsize; j++) {
1157 if (index < ns) {
1158 numaGetIValue(nas, index, &val);
1159 count += val;
1160 index++;
1161 }
1162 }
1163 numaAddNumber(nad, count);
1164 }
1165
1166 return nad;
1167}
1168
1169
1178NUMA *
1180 l_float32 tsum)
1181{
1182l_int32 i, ns;
1183l_float32 sum, factor, fval;
1184NUMA *nad;
1185
1186 PROCNAME("numaNormalizeHistogram");
1187
1188 if (!nas)
1189 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
1190 if (tsum <= 0.0)
1191 return (NUMA *)ERROR_PTR("tsum must be > 0.0", procName, NULL);
1192 if ((ns = numaGetCount(nas)) == 0)
1193 return (NUMA *)ERROR_PTR("no bins in nas", procName, NULL);
1194
1195 numaGetSum(nas, &sum);
1196 factor = tsum / sum;
1197
1198 if ((nad = numaCreate(ns)) == NULL)
1199 return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
1200 numaCopyParameters(nad, nas);
1201
1202 for (i = 0; i < ns; i++) {
1203 numaGetFValue(nas, i, &fval);
1204 fval *= factor;
1205 numaAddNumber(nad, fval);
1206 }
1207
1208 return nad;
1209}
1210
1211
1260l_ok
1262 l_int32 maxbins,
1263 l_float32 *pmin,
1264 l_float32 *pmax,
1265 l_float32 *pmean,
1266 l_float32 *pvariance,
1267 l_float32 *pmedian,
1268 l_float32 rank,
1269 l_float32 *prval,
1270 NUMA **phisto)
1271{
1272l_int32 i, n;
1273l_float32 minval, maxval, fval, mean, sum;
1274NUMA *nah;
1275
1276 PROCNAME("numaGetStatsUsingHistogram");
1277
1278 if (pmin) *pmin = 0.0;
1279 if (pmax) *pmax = 0.0;
1280 if (pmean) *pmean = 0.0;
1281 if (pvariance) *pvariance = 0.0;
1282 if (pmedian) *pmedian = 0.0;
1283 if (prval) *prval = 0.0;
1284 if (phisto) *phisto = NULL;
1285 if (!na)
1286 return ERROR_INT("na not defined", procName, 1);
1287 if ((n = numaGetCount(na)) == 0)
1288 return ERROR_INT("numa is empty", procName, 1);
1289
1290 numaGetMin(na, &minval, NULL);
1291 numaGetMax(na, &maxval, NULL);
1292 if (pmin) *pmin = minval;
1293 if (pmax) *pmax = maxval;
1294 if (pmean || pvariance) {
1295 sum = 0.0;
1296 for (i = 0; i < n; i++) {
1297 numaGetFValue(na, i, &fval);
1298 sum += fval;
1299 }
1300 mean = sum / (l_float32)n;
1301 if (pmean) *pmean = mean;
1302 }
1303 if (pvariance) {
1304 sum = 0.0;
1305 for (i = 0; i < n; i++) {
1306 numaGetFValue(na, i, &fval);
1307 sum += fval * fval;
1308 }
1309 *pvariance = sum / (l_float32)n - mean * mean;
1310 }
1311
1312 if (!pmedian && !prval && !phisto)
1313 return 0;
1314
1315 nah = numaMakeHistogramAuto(na, maxbins);
1316 if (pmedian)
1317 numaHistogramGetValFromRank(nah, 0.5, pmedian);
1318 if (prval)
1319 numaHistogramGetValFromRank(nah, rank, prval);
1320 if (phisto)
1321 *phisto = nah;
1322 else
1323 numaDestroy(&nah);
1324 return 0;
1325}
1326
1327
1351l_ok
1353 l_float32 startx,
1354 l_float32 deltax,
1355 l_float32 *pxmean,
1356 l_float32 *pxmedian,
1357 l_float32 *pxmode,
1358 l_float32 *pxvariance)
1359{
1360 PROCNAME("numaGetHistogramStats");
1361
1362 if (pxmean) *pxmean = 0.0;
1363 if (pxmedian) *pxmedian = 0.0;
1364 if (pxmode) *pxmode = 0.0;
1365 if (pxvariance) *pxvariance = 0.0;
1366 if (!nahisto)
1367 return ERROR_INT("nahisto not defined", procName, 1);
1368
1369 return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, -1,
1370 pxmean, pxmedian, pxmode,
1371 pxvariance);
1372}
1373
1374
1400l_ok
1402 l_float32 startx,
1403 l_float32 deltax,
1404 l_int32 ifirst,
1405 l_int32 ilast,
1406 l_float32 *pxmean,
1407 l_float32 *pxmedian,
1408 l_float32 *pxmode,
1409 l_float32 *pxvariance)
1410{
1411l_int32 i, n, imax;
1412l_float32 sum, sumval, halfsum, moment, var, x, y, ymax;
1413
1414 PROCNAME("numaGetHistogramStatsOnInterval");
1415
1416 if (pxmean) *pxmean = 0.0;
1417 if (pxmedian) *pxmedian = 0.0;
1418 if (pxmode) *pxmode = 0.0;
1419 if (pxvariance) *pxvariance = 0.0;
1420 if (!nahisto)
1421 return ERROR_INT("nahisto not defined", procName, 1);
1422 if (!pxmean && !pxmedian && !pxmode && !pxvariance)
1423 return ERROR_INT("nothing to compute", procName, 1);
1424
1425 n = numaGetCount(nahisto);
1426 ifirst = L_MAX(0, ifirst);
1427 if (ilast < 0) ilast = n - 1;
1428 if (ifirst >= n)
1429 return ERROR_INT("invalid ifirst", procName, 1);
1430 if (ilast >= n) {
1431 L_WARNING("ilast = %d is beyond max index = %d; adjusting\n",
1432 procName, ilast, n - 1);
1433 ilast = n - 1;
1434 }
1435 if (ifirst > ilast)
1436 return ERROR_INT("ifirst > ilast", procName, 1);
1437 for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) {
1438 x = startx + i * deltax;
1439 numaGetFValue(nahisto, i, &y);
1440 sum += y;
1441 moment += x * y;
1442 var += x * x * y;
1443 }
1444 if (sum == 0.0) {
1445 L_INFO("sum is 0\n", procName);
1446 return 0;
1447 }
1448
1449 if (pxmean)
1450 *pxmean = moment / sum;
1451 if (pxvariance)
1452 *pxvariance = var / sum - moment * moment / (sum * sum);
1453
1454 if (pxmedian) {
1455 halfsum = sum / 2.0;
1456 for (sumval = 0.0, i = ifirst; i <= ilast; i++) {
1457 numaGetFValue(nahisto, i, &y);
1458 sumval += y;
1459 if (sumval >= halfsum) {
1460 *pxmedian = startx + i * deltax;
1461 break;
1462 }
1463 }
1464 }
1465
1466 if (pxmode) {
1467 imax = -1;
1468 ymax = -1.0e10;
1469 for (i = ifirst; i <= ilast; i++) {
1470 numaGetFValue(nahisto, i, &y);
1471 if (y > ymax) {
1472 ymax = y;
1473 imax = i;
1474 }
1475 }
1476 *pxmode = startx + imax * deltax;
1477 }
1478
1479 return 0;
1480}
1481
1482
1494l_ok
1496 l_float32 deltax,
1497 NUMA *nasy,
1498 l_int32 npts,
1499 NUMA **pnax,
1500 NUMA **pnay)
1501{
1502l_int32 i, n;
1503l_float32 sum, fval;
1504NUMA *nan, *nar;
1505
1506 PROCNAME("numaMakeRankFromHistogram");
1507
1508 if (pnax) *pnax = NULL;
1509 if (!pnay)
1510 return ERROR_INT("&nay not defined", procName, 1);
1511 *pnay = NULL;
1512 if (!nasy)
1513 return ERROR_INT("nasy not defined", procName, 1);
1514 if ((n = numaGetCount(nasy)) == 0)
1515 return ERROR_INT("no bins in nas", procName, 1);
1516
1517 /* Normalize and generate the rank array corresponding to
1518 * the binned histogram. */
1519 nan = numaNormalizeHistogram(nasy, 1.0);
1520 nar = numaCreate(n + 1); /* rank numa corresponding to nan */
1521 sum = 0.0;
1522 numaAddNumber(nar, sum); /* first element is 0.0 */
1523 for (i = 0; i < n; i++) {
1524 numaGetFValue(nan, i, &fval);
1525 sum += fval;
1526 numaAddNumber(nar, sum);
1527 }
1528
1529 /* Compute rank array on full range with specified
1530 * number of points and correspondence to x-values. */
1531 numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP,
1532 startx, startx + n * deltax, npts,
1533 pnax, pnay);
1534 numaDestroy(&nan);
1535 numaDestroy(&nar);
1536 return 0;
1537}
1538
1539
1562l_ok
1564 l_float32 rval,
1565 l_float32 *prank)
1566{
1567l_int32 i, ibinval, n;
1568l_float32 startval, binsize, binval, maxval, fractval, total, sum, val;
1569
1570 PROCNAME("numaHistogramGetRankFromVal");
1571
1572 if (!prank)
1573 return ERROR_INT("prank not defined", procName, 1);
1574 *prank = 0.0;
1575 if (!na)
1576 return ERROR_INT("na not defined", procName, 1);
1577 numaGetParameters(na, &startval, &binsize);
1578 n = numaGetCount(na);
1579 if (rval < startval)
1580 return 0;
1581 maxval = startval + n * binsize;
1582 if (rval > maxval) {
1583 *prank = 1.0;
1584 return 0;
1585 }
1586
1587 binval = (rval - startval) / binsize;
1588 ibinval = (l_int32)binval;
1589 if (ibinval >= n) {
1590 *prank = 1.0;
1591 return 0;
1592 }
1593 fractval = binval - (l_float32)ibinval;
1594
1595 sum = 0.0;
1596 for (i = 0; i < ibinval; i++) {
1597 numaGetFValue(na, i, &val);
1598 sum += val;
1599 }
1600 numaGetFValue(na, ibinval, &val);
1601 sum += fractval * val;
1602 numaGetSum(na, &total);
1603 *prank = sum / total;
1604
1605/* lept_stderr("binval = %7.3f, rank = %7.3f\n", binval, *prank); */
1606
1607 return 0;
1608}
1609
1610
1633l_ok
1635 l_float32 rank,
1636 l_float32 *prval)
1637{
1638l_int32 i, n;
1639l_float32 startval, binsize, rankcount, total, sum, fract, val;
1640
1641 PROCNAME("numaHistogramGetValFromRank");
1642
1643 if (!prval)
1644 return ERROR_INT("prval not defined", procName, 1);
1645 *prval = 0.0;
1646 if (!na)
1647 return ERROR_INT("na not defined", procName, 1);
1648 if (rank < 0.0) {
1649 L_WARNING("rank < 0; setting to 0.0\n", procName);
1650 rank = 0.0;
1651 }
1652 if (rank > 1.0) {
1653 L_WARNING("rank > 1.0; setting to 1.0\n", procName);
1654 rank = 1.0;
1655 }
1656
1657 n = numaGetCount(na);
1658 numaGetParameters(na, &startval, &binsize);
1659 numaGetSum(na, &total);
1660 rankcount = rank * total; /* count that corresponds to rank */
1661 sum = 0.0;
1662 for (i = 0; i < n; i++) {
1663 numaGetFValue(na, i, &val);
1664 if (sum + val >= rankcount)
1665 break;
1666 sum += val;
1667 }
1668 if (val <= 0.0) /* can == 0 if rank == 0.0 */
1669 fract = 0.0;
1670 else /* sum + fract * val = rankcount */
1671 fract = (rankcount - sum) / val;
1672
1673 /* The use of the fraction of a bin allows a simple calculation
1674 * for the histogram value at the given rank. */
1675 *prval = startval + binsize * ((l_float32)i + fract);
1676
1677/* lept_stderr("rank = %7.3f, val = %7.3f\n", rank, *prval); */
1678
1679 return 0;
1680}
1681
1682
1705l_ok
1707 l_int32 nbins,
1708 NUMA **pnabinval)
1709{
1710NUMA *nabinval; /* average gray value in the bins */
1711NUMA *naeach;
1712l_int32 i, ntot, count, bincount, binindex, binsize;
1713l_float32 sum, val, ave;
1714
1715 PROCNAME("numaDiscretizeSortedInBins");
1716
1717 if (!pnabinval)
1718 return ERROR_INT("&nabinval not defined", procName, 1);
1719 *pnabinval = NULL;
1720 if (!na)
1721 return ERROR_INT("na not defined", procName, 1);
1722 if (nbins < 2)
1723 return ERROR_INT("nbins must be > 1", procName, 1);
1724
1725 /* Get the number of items in each bin */
1726 ntot = numaGetCount(na);
1727 if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL)
1728 return ERROR_INT("naeach not made", procName, 1);
1729
1730 /* Get the average value in each bin */
1731 sum = 0.0;
1732 bincount = 0;
1733 binindex = 0;
1734 numaGetIValue(naeach, 0, &binsize);
1735 nabinval = numaCreate(nbins);
1736 for (i = 0; i < ntot; i++) {
1737 numaGetFValue(na, i, &val);
1738 bincount++;
1739 sum += val;
1740 if (bincount == binsize) { /* add bin entry */
1741 ave = sum / binsize;
1742 numaAddNumber(nabinval, ave);
1743 sum = 0.0;
1744 bincount = 0;
1745 binindex++;
1746 if (binindex == nbins) break;
1747 numaGetIValue(naeach, binindex, &binsize);
1748 }
1749 }
1750 *pnabinval = nabinval;
1751
1752 numaDestroy(&naeach);
1753 return 0;
1754}
1755
1756
1782l_ok
1784 l_int32 nbins,
1785 NUMA **pnabinval,
1786 NUMA **pnarank)
1787{
1788NUMA *nabinval; /* average gray value in the bins */
1789NUMA *narank; /* rank value as function of input value */
1790NUMA *naeach, *nan;
1791l_int32 i, j, k, nxvals, occup, count, bincount, binindex, binsize;
1792l_float32 sum, ave, ntot;
1793
1794 PROCNAME("numaDiscretizeHistoInBins");
1795
1796 if (pnarank) *pnarank = NULL;
1797 if (!pnabinval)
1798 return ERROR_INT("&nabinval not defined", procName, 1);
1799 *pnabinval = NULL;
1800 if (!na)
1801 return ERROR_INT("na not defined", procName, 1);
1802 if (nbins < 2)
1803 return ERROR_INT("nbins must be > 1", procName, 1);
1804
1805 nxvals = numaGetCount(na);
1806 numaGetSum(na, &ntot);
1807 occup = ntot / nxvals;
1808 if (occup < 1) L_INFO("average occupancy %d < 1\n", procName, occup);
1809
1810 /* Get the number of items in each bin */
1811 if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL)
1812 return ERROR_INT("naeach not made", procName, 1);
1813
1814 /* Get the average value in each bin */
1815 sum = 0.0;
1816 bincount = 0;
1817 binindex = 0;
1818 numaGetIValue(naeach, 0, &binsize);
1819 nabinval = numaCreate(nbins);
1820 k = 0; /* count up to ntot */
1821 for (i = 0; i < nxvals; i++) {
1822 numaGetIValue(na, i, &count);
1823 for (j = 0; j < count; j++) {
1824 k++;
1825 bincount++;
1826 sum += i;
1827 if (bincount == binsize) { /* add bin entry */
1828 ave = sum / binsize;
1829 numaAddNumber(nabinval, ave);
1830 sum = 0.0;
1831 bincount = 0;
1832 binindex++;
1833 if (binindex == nbins) break;
1834 numaGetIValue(naeach, binindex, &binsize);
1835 }
1836 }
1837 if (binindex == nbins) break;
1838 }
1839 *pnabinval = nabinval;
1840 if (binindex != nbins)
1841 L_ERROR("binindex = %d != nbins = %d\n", procName, binindex, nbins);
1842
1843 /* Get cumulative normalized histogram (rank[gray value]).
1844 * This is the partial sum operating on the normalized histogram. */
1845 if (pnarank) {
1846 nan = numaNormalizeHistogram(na, 1.0);
1847 *pnarank = numaGetPartialSums(nan);
1848 numaDestroy(&nan);
1849 }
1850 numaDestroy(&naeach);
1851 return 0;
1852}
1853
1854
1873l_ok
1875 l_int32 nbins,
1876 NUMA **pnam)
1877{
1878NUMA *na1;
1879l_int32 maxbins, type;
1880l_float32 maxval, delx;
1881
1882 PROCNAME("numaGetRankBinValues");
1883
1884 if (!pnam)
1885 return ERROR_INT("&pnam not defined", procName, 1);
1886 *pnam = NULL;
1887 if (!na)
1888 return ERROR_INT("na not defined", procName, 1);
1889 if (numaGetCount(na) == 0)
1890 return ERROR_INT("na is empty", procName, 1);
1891 if (nbins < 2)
1892 return ERROR_INT("nbins must be > 1", procName, 1);
1893
1894 /* Choose between sorted array and a histogram.
1895 * If the input array is has a small number of numbers with
1896 * a large maximum, we will sort it. At the other extreme, if
1897 * the array has many numbers with a small maximum, such as the
1898 * values of pixels in an 8 bpp grayscale image, generate a histogram.
1899 * If type comes back as L_BIN_SORT, use a histogram. */
1900 type = numaChooseSortType(na);
1901 if (type == L_SHELL_SORT) { /* sort the array */
1902 L_INFO("sort the array: input size = %d\n", procName, numaGetCount(na));
1903 na1 = numaSort(NULL, na, L_SORT_INCREASING);
1904 numaDiscretizeSortedInBins(na1, nbins, pnam);
1905 numaDestroy(&na1);
1906 return 0;
1907 }
1908
1909 /* Make the histogram. Assuming there are no negative values
1910 * in the array, if the max value in the array does not exceed
1911 * about 100000, the bin size for generating the histogram will
1912 * be 1; maxbins refers to the number of entries in the histogram. */
1913 L_INFO("use a histogram: input size = %d\n", procName, numaGetCount(na));
1914 numaGetMax(na, &maxval, NULL);
1915 maxbins = L_MIN(100002, (l_int32)maxval + 2);
1916 na1 = numaMakeHistogram(na, maxbins, NULL, NULL);
1917
1918 /* Warn if there is a scale change. This shouldn't happen
1919 * unless the max value is above 100000. */
1920 numaGetParameters(na1, NULL, &delx);
1921 if (delx > 1.0)
1922 L_WARNING("scale change: delx = %6.2f\n", procName, delx);
1923
1924 /* Rank bin the results */
1925 numaDiscretizeHistoInBins(na1, nbins, pnam, NULL);
1926 numaDestroy(&na1);
1927 return 0;
1928}
1929
1930
1944NUMA *
1946 l_int32 nbins)
1947{
1948l_int32 i, start, end;
1949NUMA *naeach;
1950
1951 PROCNAME("numaGetUniformBinSizes");
1952
1953 if (ntotal <= 0)
1954 return (NUMA *)ERROR_PTR("ntotal <= 0", procName, NULL);
1955 if (nbins <= 0)
1956 return (NUMA *)ERROR_PTR("nbins <= 0", procName, NULL);
1957
1958 if ((naeach = numaCreate(nbins)) == NULL)
1959 return (NUMA *)ERROR_PTR("naeach not made", procName, NULL);
1960 start = 0;
1961 for (i = 0; i < nbins; i++) {
1962 end = ntotal * (i + 1) / nbins;
1963 numaAddNumber(naeach, end - start);
1964 start = end;
1965 }
1966 return naeach;
1967}
1968
1969
1970/*----------------------------------------------------------------------*
1971 * Splitting a distribution *
1972 *----------------------------------------------------------------------*/
2022l_ok
2024 l_float32 scorefract,
2025 l_int32 *psplitindex,
2026 l_float32 *pave1,
2027 l_float32 *pave2,
2028 l_float32 *pnum1,
2029 l_float32 *pnum2,
2030 NUMA **pnascore)
2031{
2032l_int32 i, n, bestsplit, minrange, maxrange, maxindex;
2033l_float32 ave1, ave2, ave1prev, ave2prev;
2034l_float32 num1, num2, num1prev, num2prev;
2035l_float32 val, minval, sum, fract1;
2036l_float32 norm, score, minscore, maxscore;
2037NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2;
2038
2039 PROCNAME("numaSplitDistribution");
2040
2041 if (psplitindex) *psplitindex = 0;
2042 if (pave1) *pave1 = 0.0;
2043 if (pave2) *pave2 = 0.0;
2044 if (pnum1) *pnum1 = 0.0;
2045 if (pnum2) *pnum2 = 0.0;
2046 if (pnascore) *pnascore = NULL;
2047 if (!na)
2048 return ERROR_INT("na not defined", procName, 1);
2049
2050 n = numaGetCount(na);
2051 if (n <= 1)
2052 return ERROR_INT("n = 1 in histogram", procName, 1);
2053 numaGetSum(na, &sum);
2054 if (sum <= 0.0)
2055 return ERROR_INT("sum <= 0.0", procName, 1);
2056 norm = 4.0 / ((l_float32)(n - 1) * (n - 1));
2057 ave1prev = 0.0;
2058 numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL);
2059 num1prev = 0.0;
2060 num2prev = sum;
2061 maxindex = n / 2; /* initialize with something */
2062
2063 /* Split the histogram with [0 ... i] in the lower part
2064 * and [i+1 ... n-1] in upper part. First, compute an otsu
2065 * score for each possible splitting. */
2066 if ((nascore = numaCreate(n)) == NULL)
2067 return ERROR_INT("nascore not made", procName, 1);
2068 naave1 = (pave1) ? numaCreate(n) : NULL;
2069 naave2 = (pave2) ? numaCreate(n) : NULL;
2070 nanum1 = (pnum1) ? numaCreate(n) : NULL;
2071 nanum2 = (pnum2) ? numaCreate(n) : NULL;
2072 maxscore = 0.0;
2073 for (i = 0; i < n; i++) {
2074 numaGetFValue(na, i, &val);
2075 num1 = num1prev + val;
2076 if (num1 == 0)
2077 ave1 = ave1prev;
2078 else
2079 ave1 = (num1prev * ave1prev + i * val) / num1;
2080 num2 = num2prev - val;
2081 if (num2 == 0)
2082 ave2 = ave2prev;
2083 else
2084 ave2 = (num2prev * ave2prev - i * val) / num2;
2085 fract1 = num1 / sum;
2086 score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1);
2087 numaAddNumber(nascore, score);
2088 if (pave1) numaAddNumber(naave1, ave1);
2089 if (pave2) numaAddNumber(naave2, ave2);
2090 if (pnum1) numaAddNumber(nanum1, num1);
2091 if (pnum2) numaAddNumber(nanum2, num2);
2092 if (score > maxscore) {
2093 maxscore = score;
2094 maxindex = i;
2095 }
2096 num1prev = num1;
2097 num2prev = num2;
2098 ave1prev = ave1;
2099 ave2prev = ave2;
2100 }
2101
2102 /* Next, for all contiguous scores within a specified fraction
2103 * of the max, choose the split point as the value with the
2104 * minimum in the histogram. */
2105 minscore = (1. - scorefract) * maxscore;
2106 for (i = maxindex - 1; i >= 0; i--) {
2107 numaGetFValue(nascore, i, &val);
2108 if (val < minscore)
2109 break;
2110 }
2111 minrange = i + 1;
2112 for (i = maxindex + 1; i < n; i++) {
2113 numaGetFValue(nascore, i, &val);
2114 if (val < minscore)
2115 break;
2116 }
2117 maxrange = i - 1;
2118 numaGetFValue(na, minrange, &minval);
2119 bestsplit = minrange;
2120 for (i = minrange + 1; i <= maxrange; i++) {
2121 numaGetFValue(na, i, &val);
2122 if (val < minval) {
2123 minval = val;
2124 bestsplit = i;
2125 }
2126 }
2127
2128 /* Add one to the bestsplit value to get the threshold value,
2129 * because when we take a threshold, as in pixThresholdToBinary(),
2130 * we always choose the set with values below the threshold. */
2131 bestsplit = L_MIN(255, bestsplit + 1);
2132
2133 if (psplitindex) *psplitindex = bestsplit;
2134 if (pave1) numaGetFValue(naave1, bestsplit, pave1);
2135 if (pave2) numaGetFValue(naave2, bestsplit, pave2);
2136 if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1);
2137 if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2);
2138
2139 if (pnascore) { /* debug mode */
2140 lept_stderr("minrange = %d, maxrange = %d\n", minrange, maxrange);
2141 lept_stderr("minval = %10.0f\n", minval);
2142 gplotSimple1(nascore, GPLOT_PNG, "/tmp/lept/nascore",
2143 "Score for split distribution");
2144 *pnascore = nascore;
2145 } else {
2146 numaDestroy(&nascore);
2147 }
2148
2149 if (pave1) numaDestroy(&naave1);
2150 if (pave2) numaDestroy(&naave2);
2151 if (pnum1) numaDestroy(&nanum1);
2152 if (pnum2) numaDestroy(&nanum2);
2153 return 0;
2154}
2155
2156
2157/*----------------------------------------------------------------------*
2158 * Comparing histograms *
2159 *----------------------------------------------------------------------*/
2184l_ok
2186 NUMAA *naa2,
2187 NUMA **pnad)
2188{
2189l_int32 i, n, nt;
2190l_float32 dist;
2191NUMA *na1, *na2, *nad;
2192
2193 PROCNAME("grayHistogramsToEMD");
2194
2195 if (!pnad)
2196 return ERROR_INT("&nad not defined", procName, 1);
2197 *pnad = NULL;
2198 if (!naa1 || !naa2)
2199 return ERROR_INT("na1 and na2 not both defined", procName, 1);
2200 n = numaaGetCount(naa1);
2201 if (n != numaaGetCount(naa2))
2202 return ERROR_INT("naa1 and naa2 numa counts differ", procName, 1);
2203 nt = numaaGetNumberCount(naa1);
2204 if (nt != numaaGetNumberCount(naa2))
2205 return ERROR_INT("naa1 and naa2 number counts differ", procName, 1);
2206 if (256 * n != nt) /* good enough check */
2207 return ERROR_INT("na sizes must be 256", procName, 1);
2208
2209 nad = numaCreate(n);
2210 *pnad = nad;
2211 for (i = 0; i < n; i++) {
2212 na1 = numaaGetNuma(naa1, i, L_CLONE);
2213 na2 = numaaGetNuma(naa2, i, L_CLONE);
2214 numaEarthMoverDistance(na1, na2, &dist);
2215 numaAddNumber(nad, dist / 255.); /* normalize to [0.0 - 1.0] */
2216 numaDestroy(&na1);
2217 numaDestroy(&na2);
2218 }
2219 return 0;
2220}
2221
2222
2250l_ok
2252 NUMA *na2,
2253 l_float32 *pdist)
2254{
2255l_int32 n, norm, i;
2256l_float32 sum1, sum2, diff, total;
2257l_float32 *array1, *array3;
2258NUMA *na3;
2259
2260 PROCNAME("numaEarthMoverDistance");
2261
2262 if (!pdist)
2263 return ERROR_INT("&dist not defined", procName, 1);
2264 *pdist = 0.0;
2265 if (!na1 || !na2)
2266 return ERROR_INT("na1 and na2 not both defined", procName, 1);
2267 n = numaGetCount(na1);
2268 if (n != numaGetCount(na2))
2269 return ERROR_INT("na1 and na2 have different size", procName, 1);
2270
2271 /* Generate na3; normalize to na1 if necessary */
2272 numaGetSum(na1, &sum1);
2273 numaGetSum(na2, &sum2);
2274 norm = (L_ABS(sum1 - sum2) < 0.00001 * L_ABS(sum1)) ? 1 : 0;
2275 if (!norm)
2276 na3 = numaTransform(na2, 0, sum1 / sum2);
2277 else
2278 na3 = numaCopy(na2);
2279 array1 = numaGetFArray(na1, L_NOCOPY);
2280 array3 = numaGetFArray(na3, L_NOCOPY);
2281
2282 /* Move earth in n3 from array elements, to match n1 */
2283 total = 0;
2284 for (i = 1; i < n; i++) {
2285 diff = array1[i - 1] - array3[i - 1];
2286 array3[i] -= diff;
2287 total += L_ABS(diff);
2288 }
2289 *pdist = total / sum1;
2290
2291 numaDestroy(&na3);
2292 return 0;
2293}
2294
2295
2341l_ok
2343 l_int32 wc,
2344 NUMA **pnam,
2345 NUMA **pnams,
2346 NUMA **pnav,
2347 NUMA **pnarv)
2348{
2349l_int32 i, j, n, nn;
2350l_float32 **arrays;
2351l_float32 mean, var, rvar;
2352NUMA *na1, *na2, *na3, *na4;
2353
2354 PROCNAME("grayInterHistogramStats");
2355
2356 if (pnam) *pnam = NULL;
2357 if (pnams) *pnams = NULL;
2358 if (pnav) *pnav = NULL;
2359 if (pnarv) *pnarv = NULL;
2360 if (!pnam && !pnams && !pnav && !pnarv)
2361 return ERROR_INT("nothing requested", procName, 1);
2362 if (!naa)
2363 return ERROR_INT("naa not defined", procName, 1);
2364 n = numaaGetCount(naa);
2365 for (i = 0; i < n; i++) {
2366 nn = numaaGetNumaCount(naa, i);
2367 if (nn != 256) {
2368 L_ERROR("%d numbers in numa[%d]\n", procName, nn, i);
2369 return 1;
2370 }
2371 }
2372
2373 if (pnam) *pnam = numaCreate(256);
2374 if (pnams) *pnams = numaCreate(256);
2375 if (pnav) *pnav = numaCreate(256);
2376 if (pnarv) *pnarv = numaCreate(256);
2377
2378 /* First, use mean smoothing, normalize each histogram,
2379 * and save all results in a 2D matrix. */
2380 arrays = (l_float32 **)LEPT_CALLOC(n, sizeof(l_float32 *));
2381 for (i = 0; i < n; i++) {
2382 na1 = numaaGetNuma(naa, i, L_CLONE);
2383 na2 = numaWindowedMean(na1, wc);
2384 na3 = numaNormalizeHistogram(na2, 10000.);
2385 arrays[i] = numaGetFArray(na3, L_COPY);
2386 numaDestroy(&na1);
2387 numaDestroy(&na2);
2388 numaDestroy(&na3);
2389 }
2390
2391 /* Get stats between histograms */
2392 for (j = 0; j < 256; j++) {
2393 na4 = numaCreate(n);
2394 for (i = 0; i < n; i++) {
2395 numaAddNumber(na4, arrays[i][j]);
2396 }
2397 numaSimpleStats(na4, 0, -1, &mean, &var, &rvar);
2398 if (pnam) numaAddNumber(*pnam, mean);
2399 if (pnams) numaAddNumber(*pnams, mean * mean);
2400 if (pnav) numaAddNumber(*pnav, var);
2401 if (pnarv) numaAddNumber(*pnarv, rvar);
2402 numaDestroy(&na4);
2403 }
2404
2405 for (i = 0; i < n; i++)
2406 LEPT_FREE(arrays[i]);
2407 LEPT_FREE(arrays);
2408 return 0;
2409}
2410
2411
2412/*----------------------------------------------------------------------*
2413 * Extrema finding *
2414 *----------------------------------------------------------------------*/
2431NUMA *
2433 l_int32 nmax,
2434 l_float32 fract1,
2435 l_float32 fract2)
2436{
2437l_int32 i, k, n, maxloc, lloc, rloc;
2438l_float32 fmaxval, sum, total, newtotal, val, lastval;
2439l_float32 peakfract;
2440NUMA *na, *napeak;
2441
2442 PROCNAME("numaFindPeaks");
2443
2444 if (!nas)
2445 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
2446 n = numaGetCount(nas);
2447 numaGetSum(nas, &total);
2448
2449 /* We munge this copy */
2450 if ((na = numaCopy(nas)) == NULL)
2451 return (NUMA *)ERROR_PTR("na not made", procName, NULL);
2452 if ((napeak = numaCreate(4 * nmax)) == NULL) {
2453 numaDestroy(&na);
2454 return (NUMA *)ERROR_PTR("napeak not made", procName, NULL);
2455 }
2456
2457 for (k = 0; k < nmax; k++) {
2458 numaGetSum(na, &newtotal);
2459 if (newtotal == 0.0) /* sanity check */
2460 break;
2461 numaGetMax(na, &fmaxval, &maxloc);
2462 sum = fmaxval;
2463 lastval = fmaxval;
2464 lloc = 0;
2465 for (i = maxloc - 1; i >= 0; --i) {
2466 numaGetFValue(na, i, &val);
2467 if (val == 0.0) {
2468 lloc = i + 1;
2469 break;
2470 }
2471 if (val > fract1 * fmaxval) {
2472 sum += val;
2473 lastval = val;
2474 continue;
2475 }
2476 if (lastval - val > fract2 * lastval) {
2477 sum += val;
2478 lastval = val;
2479 continue;
2480 }
2481 lloc = i;
2482 break;
2483 }
2484 lastval = fmaxval;
2485 rloc = n - 1;
2486 for (i = maxloc + 1; i < n; ++i) {
2487 numaGetFValue(na, i, &val);
2488 if (val == 0.0) {
2489 rloc = i - 1;
2490 break;
2491 }
2492 if (val > fract1 * fmaxval) {
2493 sum += val;
2494 lastval = val;
2495 continue;
2496 }
2497 if (lastval - val > fract2 * lastval) {
2498 sum += val;
2499 lastval = val;
2500 continue;
2501 }
2502 rloc = i;
2503 break;
2504 }
2505 peakfract = sum / total;
2506 numaAddNumber(napeak, lloc);
2507 numaAddNumber(napeak, maxloc);
2508 numaAddNumber(napeak, rloc);
2509 numaAddNumber(napeak, peakfract);
2510
2511 for (i = lloc; i <= rloc; i++)
2512 numaSetValue(na, i, 0.0);
2513 }
2514
2515 numaDestroy(&na);
2516 return napeak;
2517}
2518
2519
2549NUMA *
2551 l_float32 delta,
2552 NUMA **pnav)
2553{
2554l_int32 i, n, found, loc, direction;
2555l_float32 startval, val, maxval, minval;
2556NUMA *nav, *nad;
2557
2558 PROCNAME("numaFindExtrema");
2559
2560 if (pnav) *pnav = NULL;
2561 if (!nas)
2562 return (NUMA *)ERROR_PTR("nas not defined", procName, NULL);
2563 if (delta < 0.0)
2564 return (NUMA *)ERROR_PTR("delta < 0", procName, NULL);
2565
2566 n = numaGetCount(nas);
2567 nad = numaCreate(0);
2568 nav = NULL;
2569 if (pnav) {
2570 nav = numaCreate(0);
2571 *pnav = nav;
2572 }
2573
2574 /* We don't know if we'll find a peak or valley first,
2575 * but use the first element of nas as the reference point.
2576 * Break when we deviate by 'delta' from the first point. */
2577 numaGetFValue(nas, 0, &startval);
2578 found = FALSE;
2579 for (i = 1; i < n; i++) {
2580 numaGetFValue(nas, i, &val);
2581 if (L_ABS(val - startval) >= delta) {
2582 found = TRUE;
2583 break;
2584 }
2585 }
2586
2587 if (!found)
2588 return nad; /* it's empty */
2589
2590 /* Are we looking for a peak or a valley? */
2591 if (val > startval) { /* peak */
2592 direction = 1;
2593 maxval = val;
2594 } else {
2595 direction = -1;
2596 minval = val;
2597 }
2598 loc = i;
2599
2600 /* Sweep through the rest of the array, recording alternating
2601 * peak/valley extrema. */
2602 for (i = i + 1; i < n; i++) {
2603 numaGetFValue(nas, i, &val);
2604 if (direction == 1 && val > maxval ) { /* new local max */
2605 maxval = val;
2606 loc = i;
2607 } else if (direction == -1 && val < minval ) { /* new local min */
2608 minval = val;
2609 loc = i;
2610 } else if (direction == 1 && (maxval - val >= delta)) {
2611 numaAddNumber(nad, loc); /* save the current max location */
2612 if (nav) numaAddNumber(nav, maxval);
2613 direction = -1; /* reverse: start looking for a min */
2614 minval = val;
2615 loc = i; /* current min location */
2616 } else if (direction == -1 && (val - minval >= delta)) {
2617 numaAddNumber(nad, loc); /* save the current min location */
2618 if (nav) numaAddNumber(nav, minval);
2619 direction = 1; /* reverse: start looking for a max */
2620 maxval = val;
2621 loc = i; /* current max location */
2622 }
2623 }
2624
2625 /* Save the final extremum */
2626/* numaAddNumber(nad, loc); */
2627 return nad;
2628}
2629
2630
2656l_ok
2658 l_int32 skip,
2659 l_int32 *pthresh,
2660 l_float32 *pfract)
2661{
2662l_int32 i, n, start, index, minloc;
2663l_float32 val, pval, jval, minval, sum, partsum;
2664l_float32 *fa;
2665
2666 PROCNAME("numaFindLocForThreshold");
2667
2668 if (pfract) *pfract = 0.0;
2669 if (!pthresh)
2670 return ERROR_INT("&thresh not defined", procName, 1);
2671 *pthresh = 0;
2672 if (!na)
2673 return ERROR_INT("na not defined", procName, 1);
2674 if (skip <= 0) skip = 20;
2675
2676 /* Look for the top of the first peak */
2677 n = numaGetCount(na);
2678 fa = numaGetFArray(na, L_NOCOPY);
2679 pval = fa[0];
2680 for (i = 1; i < n; i++) {
2681 val = fa[i];
2682 index = L_MIN(i + skip, n - 1);
2683 jval = fa[index];
2684 if (val < pval && jval < pval) /* near the top if not there */
2685 break;
2686 pval = val;
2687 }
2688
2689 /* Look for the low point in the valley */
2690 start = i;
2691 pval = fa[start];
2692 for (i = start + 1; i < n; i++) {
2693 val = fa[i];
2694 if (val <= pval) { /* going down */
2695 pval = val;
2696 } else { /* going up */
2697 index = L_MIN(i + skip, n - 1);
2698 jval = fa[index]; /* junp ahead 20 */
2699 if (val > jval) { /* still going down; jump ahead */
2700 pval = jval;
2701 i = index;
2702 } else { /* really going up; passed the min */
2703 break;
2704 }
2705 }
2706 }
2707
2708 /* Find the location of the minimum in the interval */
2709 minloc = index; /* likely passed the min; look backward */
2710 minval = fa[index];
2711 for (i = index - 1; i > index - skip; i--) {
2712 if (fa[i] < minval) {
2713 minval = fa[i];
2714 minloc = i;
2715 }
2716 }
2717 *pthresh = minloc;
2718
2719 /* Find the fraction under the first peak */
2720 if (pfract) {
2721 numaGetSumOnInterval(na, 0, minloc, &partsum);
2722 numaGetSum(na, &sum);
2723 if (sum > 0.0)
2724 *pfract = partsum / sum;
2725 }
2726 return 0;
2727}
2728
2729
2751l_ok
2753 l_float32 minreversal,
2754 l_int32 *pnr,
2755 l_float32 *prd)
2756{
2757l_int32 i, n, nr, ival, binvals;
2758l_int32 *ia;
2759l_float32 fval, delx, len;
2760NUMA *nat;
2761
2762 PROCNAME("numaCountReversals");
2763
2764 if (pnr) *pnr = 0;
2765 if (prd) *prd = 0.0;
2766 if (!pnr && !prd)
2767 return ERROR_INT("neither &nr nor &rd are defined", procName, 1);
2768 if (!nas)
2769 return ERROR_INT("nas not defined", procName, 1);
2770 if ((n = numaGetCount(nas)) == 0) {
2771 L_INFO("nas is empty\n", procName);
2772 return 0;
2773 }
2774 if (minreversal < 0.0)
2775 return ERROR_INT("minreversal < 0", procName, 1);
2776
2777 /* Decide if the only values are 0 and 1 */
2778 binvals = TRUE;
2779 for (i = 0; i < n; i++) {
2780 numaGetFValue(nas, i, &fval);
2781 if (fval != 0.0 && fval != 1.0) {
2782 binvals = FALSE;
2783 break;
2784 }
2785 }
2786
2787 nr = 0;
2788 if (binvals) {
2789 if (minreversal > 1.0) {
2790 L_WARNING("binary values but minreversal > 1\n", procName);
2791 } else {
2792 ia = numaGetIArray(nas);
2793 ival = ia[0];
2794 for (i = 1; i < n; i++) {
2795 if (ia[i] != ival) {
2796 nr++;
2797 ival = ia[i];
2798 }
2799 }
2800 LEPT_FREE(ia);
2801 }
2802 } else {
2803 nat = numaFindExtrema(nas, minreversal, NULL);
2804 nr = numaGetCount(nat);
2805 numaDestroy(&nat);
2806 }
2807 if (pnr) *pnr = nr;
2808 if (prd) {
2809 numaGetParameters(nas, NULL, &delx);
2810 len = delx * n;
2811 *prd = (l_float32)nr / len;
2812 }
2813
2814 return 0;
2815}
2816
2817
2818/*----------------------------------------------------------------------*
2819 * Threshold crossings and frequency analysis *
2820 *----------------------------------------------------------------------*/
2847l_ok
2849 NUMA *nay,
2850 l_float32 estthresh,
2851 l_float32 *pbestthresh)
2852{
2853l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen;
2854l_int32 val, maxval, nmax, count;
2855l_float32 thresh, fmaxval, fmodeval;
2856NUMA *nat, *nac;
2857
2858 PROCNAME("numaSelectCrossingThreshold");
2859
2860 if (!pbestthresh)
2861 return ERROR_INT("&bestthresh not defined", procName, 1);
2862 *pbestthresh = 0.0;
2863 if (!nay)
2864 return ERROR_INT("nay not defined", procName, 1);
2865 if (numaGetCount(nay) < 2) {
2866 L_WARNING("nay count < 2; no threshold crossing\n", procName);
2867 return 1;
2868 }
2869
2870 /* Compute the number of crossings for different thresholds */
2871 nat = numaCreate(41);
2872 for (i = 0; i < 41; i++) {
2873 thresh = estthresh - 80.0 + 4.0 * i;
2874 nac = numaCrossingsByThreshold(nax, nay, thresh);
2875 numaAddNumber(nat, numaGetCount(nac));
2876 numaDestroy(&nac);
2877 }
2878
2879 /* Find the center of the plateau of max crossings, which
2880 * extends from thresh[istart] to thresh[iend]. */
2881 numaGetMax(nat, &fmaxval, NULL);
2882 maxval = (l_int32)fmaxval;
2883 nmax = 0;
2884 for (i = 0; i < 41; i++) {
2885 numaGetIValue(nat, i, &val);
2886 if (val == maxval)
2887 nmax++;
2888 }
2889 if (nmax < 3) { /* likely accidental max; try the mode */
2890 numaGetMode(nat, &fmodeval, &count);
2891 if (count > nmax && fmodeval > 0.5 * fmaxval)
2892 maxval = (l_int32)fmodeval; /* use the mode */
2893 }
2894
2895 inrun = FALSE;
2896 iend = 40;
2897 maxrunlen = 0, maxstart = 0, maxend = 0;
2898 for (i = 0; i < 41; i++) {
2899 numaGetIValue(nat, i, &val);
2900 if (val == maxval) {
2901 if (!inrun) {
2902 istart = i;
2903 inrun = TRUE;
2904 }
2905 continue;
2906 }
2907 if (inrun && (val != maxval)) {
2908 iend = i - 1;
2909 runlen = iend - istart + 1;
2910 inrun = FALSE;
2911 if (runlen > maxrunlen) {
2912 maxstart = istart;
2913 maxend = iend;
2914 maxrunlen = runlen;
2915 }
2916 }
2917 }
2918 if (inrun) {
2919 runlen = i - istart;
2920 if (runlen > maxrunlen) {
2921 maxstart = istart;
2922 maxend = i - 1;
2923 maxrunlen = runlen;
2924 }
2925 }
2926
2927 *pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend);
2928
2929#if DEBUG_CROSSINGS
2930 lept_stderr("\nCrossings attain a maximum at %d thresholds, between:\n"
2931 " thresh[%d] = %5.1f and thresh[%d] = %5.1f\n",
2932 nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart,
2933 maxend, estthresh - 80.0 + 4.0 * maxend);
2934 lept_stderr("The best choice: %5.1f\n", *pbestthresh);
2935 lept_stderr("Number of crossings at the 41 thresholds:");
2936 numaWriteStderr(nat);
2937#endif /* DEBUG_CROSSINGS */
2938
2939 numaDestroy(&nat);
2940 return 0;
2941}
2942
2943
2958NUMA *
2960 NUMA *nay,
2961 l_float32 thresh)
2962{
2963l_int32 i, n;
2964l_float32 startx, delx;
2965l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract;
2966NUMA *nad;
2967
2968 PROCNAME("numaCrossingsByThreshold");
2969
2970 if (!nay)
2971 return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
2972 n = numaGetCount(nay);
2973
2974 if (nax && (numaGetCount(nax) != n))
2975 return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
2976
2977 nad = numaCreate(0);
2978 if (n < 2) return nad;
2979 numaGetFValue(nay, 0, &yval1);
2980 numaGetParameters(nay, &startx, &delx);
2981 if (nax)
2982 numaGetFValue(nax, 0, &xval1);
2983 else
2984 xval1 = startx;
2985 for (i = 1; i < n; i++) {
2986 numaGetFValue(nay, i, &yval2);
2987 if (nax)
2988 numaGetFValue(nax, i, &xval2);
2989 else
2990 xval2 = startx + i * delx;
2991 delta1 = yval1 - thresh;
2992 delta2 = yval2 - thresh;
2993 if (delta1 == 0.0) {
2994 numaAddNumber(nad, xval1);
2995 } else if (delta2 == 0.0) {
2996 numaAddNumber(nad, xval2);
2997 } else if (delta1 * delta2 < 0.0) { /* crossing */
2998 fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
2999 crossval = xval1 + fract * (xval2 - xval1);
3000 numaAddNumber(nad, crossval);
3001 }
3002 xval1 = xval2;
3003 yval1 = yval2;
3004 }
3005
3006 return nad;
3007}
3008
3009
3024NUMA *
3026 NUMA *nay,
3027 l_float32 delta)
3028{
3029l_int32 i, j, n, np, previndex, curindex;
3030l_float32 startx, delx;
3031l_float32 xval1, xval2, yval1, yval2, delta1, delta2;
3032l_float32 prevval, curval, thresh, crossval, fract;
3033NUMA *nap, *nad;
3034
3035 PROCNAME("numaCrossingsByPeaks");
3036
3037 if (!nay)
3038 return (NUMA *)ERROR_PTR("nay not defined", procName, NULL);
3039
3040 n = numaGetCount(nay);
3041 if (nax && (numaGetCount(nax) != n))
3042 return (NUMA *)ERROR_PTR("nax and nay sizes differ", procName, NULL);
3043
3044 /* Find the extrema. Also add last point in nay to get
3045 * the last transition (from the last peak to the end).
3046 * The number of crossings is 1 more than the number of extrema. */
3047 nap = numaFindExtrema(nay, delta, NULL);
3048 numaAddNumber(nap, n - 1);
3049 np = numaGetCount(nap);
3050 L_INFO("Number of crossings: %d\n", procName, np);
3051
3052 /* Do all computation in index units of nax or the delx of nay */
3053 nad = numaCreate(np); /* output crossing locations, in nax units */
3054 previndex = 0; /* prime the search with 1st point */
3055 numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */
3056 numaGetParameters(nay, &startx, &delx);
3057 for (i = 0; i < np; i++) {
3058 numaGetIValue(nap, i, &curindex);
3059 numaGetFValue(nay, curindex, &curval);
3060 thresh = (prevval + curval) / 2.0;
3061 if (nax)
3062 numaGetFValue(nax, previndex, &xval1);
3063 else
3064 xval1 = startx + previndex * delx;
3065 numaGetFValue(nay, previndex, &yval1);
3066 for (j = previndex + 1; j <= curindex; j++) {
3067 if (nax)
3068 numaGetFValue(nax, j, &xval2);
3069 else
3070 xval2 = startx + j * delx;
3071 numaGetFValue(nay, j, &yval2);
3072 delta1 = yval1 - thresh;
3073 delta2 = yval2 - thresh;
3074 if (delta1 == 0.0) {
3075 numaAddNumber(nad, xval1);
3076 break;
3077 } else if (delta2 == 0.0) {
3078 numaAddNumber(nad, xval2);
3079 break;
3080 } else if (delta1 * delta2 < 0.0) { /* crossing */
3081 fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
3082 crossval = xval1 + fract * (xval2 - xval1);
3083 numaAddNumber(nad, crossval);
3084 break;
3085 }
3086 xval1 = xval2;
3087 yval1 = yval2;
3088 }
3089 previndex = curindex;
3090 prevval = curval;
3091 }
3092
3093 numaDestroy(&nap);
3094 return nad;
3095}
3096
3097
3135l_ok
3137 l_float32 relweight,
3138 l_int32 nwidth,
3139 l_int32 nshift,
3140 l_float32 minwidth,
3141 l_float32 maxwidth,
3142 l_float32 *pbestwidth,
3143 l_float32 *pbestshift,
3144 l_float32 *pbestscore)
3145{
3146l_int32 i, j;
3147l_float32 delwidth, delshift, width, shift, score;
3148l_float32 bestwidth, bestshift, bestscore;
3149
3150 PROCNAME("numaEvalBestHaarParameters");
3151
3152 if (pbestscore) *pbestscore = 0.0;
3153 if (pbestwidth) *pbestwidth = 0.0;
3154 if (pbestshift) *pbestshift = 0.0;
3155 if (!pbestwidth || !pbestshift)
3156 return ERROR_INT("&bestwidth and &bestshift not defined", procName, 1);
3157 if (!nas)
3158 return ERROR_INT("nas not defined", procName, 1);
3159
3160 bestscore = bestwidth = bestshift = 0.0;
3161 delwidth = (maxwidth - minwidth) / (nwidth - 1.0);
3162 for (i = 0; i < nwidth; i++) {
3163 width = minwidth + delwidth * i;
3164 delshift = width / (l_float32)(nshift);
3165 for (j = 0; j < nshift; j++) {
3166 shift = j * delshift;
3167 numaEvalHaarSum(nas, width, shift, relweight, &score);
3168 if (score > bestscore) {
3169 bestscore = score;
3170 bestwidth = width;
3171 bestshift = shift;
3172#if DEBUG_FREQUENCY
3173 lept_stderr("width = %7.3f, shift = %7.3f, score = %7.3f\n",
3174 width, shift, score);
3175#endif /* DEBUG_FREQUENCY */
3176 }
3177 }
3178 }
3179
3180 *pbestwidth = bestwidth;
3181 *pbestshift = bestshift;
3182 if (pbestscore)
3183 *pbestscore = bestscore;
3184 return 0;
3185}
3186
3187
3220l_ok
3222 l_float32 width,
3223 l_float32 shift,
3224 l_float32 relweight,
3225 l_float32 *pscore)
3226{
3227l_int32 i, n, nsamp, index;
3228l_float32 score, weight, val;
3229
3230 PROCNAME("numaEvalHaarSum");
3231
3232 if (!pscore)
3233 return ERROR_INT("&score not defined", procName, 1);
3234 *pscore = 0.0;
3235 if (!nas)
3236 return ERROR_INT("nas not defined", procName, 1);
3237 if ((n = numaGetCount(nas)) < 2 * width)
3238 return ERROR_INT("nas size too small", procName, 1);
3239
3240 score = 0.0;
3241 nsamp = (l_int32)((n - shift) / width);
3242 for (i = 0; i < nsamp; i++) {
3243 index = (l_int32)(shift + i * width);
3244 weight = (i % 2) ? 1.0 : -1.0 * relweight;
3245 numaGetFValue(nas, index, &val);
3246 score += weight * val;
3247 }
3248
3249 *pscore = 2.0 * width * score / (l_float32)n;
3250 return 0;
3251}
3252
3253
3254/*----------------------------------------------------------------------*
3255 * Generating numbers in a range under constraints *
3256 *----------------------------------------------------------------------*/
3277NUMA *
3279 l_int32 last,
3280 l_int32 nmax,
3281 l_int32 use_pairs)
3282{
3283l_int32 i, nsets, val;
3284l_float32 delta;
3285NUMA *na;
3286
3287 PROCNAME("genConstrainedNumaInRange");
3288
3289 first = L_MAX(0, first);
3290 if (last < first)
3291 return (NUMA *)ERROR_PTR("last < first!", procName, NULL);
3292 if (nmax < 1)
3293 return (NUMA *)ERROR_PTR("nmax < 1!", procName, NULL);
3294
3295 nsets = L_MIN(nmax, last - first + 1);
3296 if (use_pairs == 1)
3297 nsets = nsets / 2;
3298 if (nsets == 0)
3299 return (NUMA *)ERROR_PTR("nsets == 0", procName, NULL);
3300
3301 /* Select delta so that selection covers the full range if possible */
3302 if (nsets == 1) {
3303 delta = 0.0;
3304 } else {
3305 if (use_pairs == 0)
3306 delta = (l_float32)(last - first) / (nsets - 1);
3307 else
3308 delta = (l_float32)(last - first - 1) / (nsets - 1);
3309 }
3310
3311 na = numaCreate(nsets);
3312 for (i = 0; i < nsets; i++) {
3313 val = (l_int32)(first + i * delta + 0.5);
3314 numaAddNumber(na, val);
3315 if (use_pairs == 1)
3316 numaAddNumber(na, val + 1);
3317 }
3318
3319 return na;
3320}
@ L_LINEAR_INTERP
Definition: array.h:151
@ L_MIRRORED_BORDER
Definition: array.h:159
l_ok gplotSimple1(NUMA *na, l_int32 outformat, const char *outroot, const char *title)
gplotSimple1()
Definition: gplot.c:665
l_ok numaAddNumber(NUMA *na, l_float32 val)
numaAddNumber()
Definition: numabasic.c:478
l_int32 numaaGetNumberCount(NUMAA *naa)
numaaGetNumberCount()
Definition: numabasic.c:1670
NUMA * numaCopy(NUMA *na)
numaCopy()
Definition: numabasic.c:399
l_ok numaGetFValue(NUMA *na, l_int32 index, l_float32 *pval)
numaGetFValue()
Definition: numabasic.c:719
l_ok numaWriteStderr(NUMA *na)
numaWriteStderr()
Definition: numabasic.c:1313
NUMA * numaaGetNuma(NUMAA *naa, l_int32 index, l_int32 accessflag)
numaaGetNuma()
Definition: numabasic.c:1740
l_int32 numaaGetCount(NUMAA *naa)
numaaGetCount()
Definition: numabasic.c:1631
l_int32 * numaGetIArray(NUMA *na)
numaGetIArray()
Definition: numabasic.c:847
l_int32 numaaGetNumaCount(NUMAA *naa, l_int32 index)
numaaGetNumaCount()
Definition: numabasic.c:1649
l_ok numaSetCount(NUMA *na, l_int32 newcount)
numaSetCount()
Definition: numabasic.c:685
void numaDestroy(NUMA **pna)
numaDestroy()
Definition: numabasic.c:366
l_ok numaSetValue(NUMA *na, l_int32 index, l_float32 val)
numaSetValue()
Definition: numabasic.c:786
l_float32 * numaGetFArray(NUMA *na, l_int32 copyflag)
numaGetFArray()
Definition: numabasic.c:892
l_int32 numaGetCount(NUMA *na)
numaGetCount()
Definition: numabasic.c:658
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:754
NUMA * numaCreate(l_int32 n)
numaCreate()
Definition: numabasic.c:194
l_ok numaGetParameters(NUMA *na, l_float32 *pstartx, l_float32 *pdelx)
numaGetParameters()
Definition: numabasic.c:963
l_ok numaCopyParameters(NUMA *nad, NUMA *nas)
numaCopyParameters()
Definition: numabasic.c:1016
l_ok numaSetParameters(NUMA *na, l_float32 startx, l_float32 delx)
numaSetParameters()
Definition: numabasic.c:993
l_ok numaGetMode(NUMA *na, l_float32 *pval, l_int32 *pcount)
numaGetMode()
Definition: numafunc1.c:3560
NUMA * numaSort(NUMA *naout, NUMA *nain, l_int32 sortorder)
numaSort()
Definition: numafunc1.c:2650
NUMA * numaRemoveBorder(NUMA *nas, l_int32 left, l_int32 right)
numaRemoveBorder()
Definition: numafunc1.c:996
l_ok numaHasOnlyIntegers(NUMA *na, l_int32 *pallints)
numaHasOnlyIntegers()
Definition: numafunc1.c:656
l_ok numaGetMedian(NUMA *na, l_float32 *pval)
numaGetMedian()
Definition: numafunc1.c:3405
NUMA * numaAddBorder(NUMA *nas, l_int32 left, l_int32 right, l_float32 val)
numaAddBorder()
Definition: numafunc1.c:902
NUMA * numaClipToInterval(NUMA *nas, l_int32 first, l_int32 last)
numaClipToInterval()
Definition: numafunc1.c:1182
l_ok numaGetSumOnInterval(NUMA *na, l_int32 first, l_int32 last, l_float32 *psum)
numaGetSumOnInterval()
Definition: numafunc1.c:613
l_ok numaInterpolateEqxInterval(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 type, l_float32 x0, l_float32 x1, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaInterpolateEqxInterval()
Definition: numafunc1.c:1912
l_ok numaGetMin(NUMA *na, l_float32 *pminval, l_int32 *piminloc)
numaGetMin()
Definition: numafunc1.c:453
l_ok numaGetMax(NUMA *na, l_float32 *pmaxval, l_int32 *pimaxloc)
numaGetMax()
Definition: numafunc1.c:496
l_int32 numaChooseSortType(NUMA *nas)
numaChooseSortType()
Definition: numafunc1.c:2602
NUMA * numaAddSpecifiedBorder(NUMA *nas, l_int32 left, l_int32 right, l_int32 type)
numaAddSpecifiedBorder()
Definition: numafunc1.c:945
NUMA * numaMakeConstant(l_float32 val, l_int32 size)
numaMakeConstant()
Definition: numafunc1.c:851
NUMA * numaGetPartialSums(NUMA *na)
numaGetPartialSums()
Definition: numafunc1.c:579
l_ok numaGetSum(NUMA *na, l_float32 *psum)
numaGetSum()
Definition: numafunc1.c:538
NUMA * numaErode(NUMA *nas, l_int32 size)
numaErode()
Definition: numafunc2.c:183
l_ok numaGetRankBinValues(NUMA *na, l_int32 nbins, NUMA **pnam)
numaGetRankBinValues()
Definition: numafunc2.c:1874
NUMA * genConstrainedNumaInRange(l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs)
genConstrainedNumaInRange()
Definition: numafunc2.c:3278
l_ok numaCountReversals(NUMA *nas, l_float32 minreversal, l_int32 *pnr, l_float32 *prd)
numaCountReversals()
Definition: numafunc2.c:2752
NUMA * numaWindowedMeanSquare(NUMA *nas, l_int32 wc)
numaWindowedMeanSquare()
Definition: numafunc2.c:648
NUMA * numaClose(NUMA *nas, l_int32 size)
numaClose()
Definition: numafunc2.c:368
NUMA * numaNormalizeHistogram(NUMA *nas, l_float32 tsum)
numaNormalizeHistogram()
Definition: numafunc2.c:1179
NUMA * numaWindowedMedian(NUMA *nas, l_int32 halfwin)
numaWindowedMedian()
Definition: numafunc2.c:784
l_ok numaEvalHaarSum(NUMA *nas, l_float32 width, l_float32 shift, l_float32 relweight, l_float32 *pscore)
numaEvalHaarSum()
Definition: numafunc2.c:3221
NUMA * numaDilate(NUMA *nas, l_int32 size)
numaDilate()
Definition: numafunc2.c:252
l_ok numaEarthMoverDistance(NUMA *na1, NUMA *na2, l_float32 *pdist)
numaEarthMoverDistance()
Definition: numafunc2.c:2251
l_ok numaSelectCrossingThreshold(NUMA *nax, NUMA *nay, l_float32 estthresh, l_float32 *pbestthresh)
numaSelectCrossingThreshold()
Definition: numafunc2.c:2848
NUMA * numaTransform(NUMA *nas, l_float32 shift, l_float32 scale)
numaTransform()
Definition: numafunc2.c:415
l_ok numaWindowedStats(NUMA *nas, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
numaWindowedStats()
Definition: numafunc2.c:537
NUMA * numaGetUniformBinSizes(l_int32 ntotal, l_int32 nbins)
numaGetUniformBinSizes()
Definition: numafunc2.c:1945
l_ok numaWindowedVariance(NUMA *nam, NUMA *nams, NUMA **pnav, NUMA **pnarv)
numaWindowedVariance()
Definition: numafunc2.c:716
l_ok numaEvalBestHaarParameters(NUMA *nas, l_float32 relweight, l_int32 nwidth, l_int32 nshift, l_float32 minwidth, l_float32 maxwidth, l_float32 *pbestwidth, l_float32 *pbestshift, l_float32 *pbestscore)
numaEvalBestHaarParameters()
Definition: numafunc2.c:3136
l_ok numaDiscretizeHistoInBins(NUMA *na, l_int32 nbins, NUMA **pnabinval, NUMA **pnarank)
numaDiscretizeHistoInBins()
Definition: numafunc2.c:1783
l_ok numaGetHistogramStatsOnInterval(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_int32 ifirst, l_int32 ilast, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStatsOnInterval()
Definition: numafunc2.c:1401
NUMA * numaWindowedMean(NUMA *nas, l_int32 wc)
numaWindowedMean()
Definition: numafunc2.c:588
l_ok numaGetHistogramStats(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStats()
Definition: numafunc2.c:1352
l_ok grayHistogramsToEMD(NUMAA *naa1, NUMAA *naa2, NUMA **pnad)
grayHistogramsToEMD()
Definition: numafunc2.c:2185
NUMA * numaCrossingsByPeaks(NUMA *nax, NUMA *nay, l_float32 delta)
numaCrossingsByPeaks()
Definition: numafunc2.c:3025
NUMA * numaOpen(NUMA *nas, l_int32 size)
numaOpen()
Definition: numafunc2.c:321
NUMA * numaCrossingsByThreshold(NUMA *nax, NUMA *nay, l_float32 thresh)
numaCrossingsByThreshold()
Definition: numafunc2.c:2959
l_ok numaMakeRankFromHistogram(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaMakeRankFromHistogram()
Definition: numafunc2.c:1495
NUMA * numaMakeHistogramAuto(NUMA *na, l_int32 maxbins)
numaMakeHistogramAuto()
Definition: numafunc2.c:998
l_ok numaSplitDistribution(NUMA *na, l_float32 scorefract, l_int32 *psplitindex, l_float32 *pave1, l_float32 *pave2, l_float32 *pnum1, l_float32 *pnum2, NUMA **pnascore)
numaSplitDistribution()
Definition: numafunc2.c:2023
l_ok numaHistogramGetRankFromVal(NUMA *na, l_float32 rval, l_float32 *prank)
numaHistogramGetRankFromVal()
Definition: numafunc2.c:1563
NUMA * numaRebinHistogram(NUMA *nas, l_int32 newsize)
numaRebinHistogram()
Definition: numafunc2.c:1131
l_ok numaHistogramGetValFromRank(NUMA *na, l_float32 rank, l_float32 *prval)
numaHistogramGetValFromRank()
Definition: numafunc2.c:1634
NUMA * numaConvertToInt(NUMA *nas)
numaConvertToInt()
Definition: numafunc2.c:833
NUMA * numaFindExtrema(NUMA *nas, l_float32 delta, NUMA **pnav)
numaFindExtrema()
Definition: numafunc2.c:2550
l_ok numaFindLocForThreshold(NUMA *na, l_int32 skip, l_int32 *pthresh, l_float32 *pfract)
numaFindLocForThreshold()
Definition: numafunc2.c:2657
l_ok grayInterHistogramStats(NUMAA *naa, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
grayInterHistogramStats()
Definition: numafunc2.c:2342
NUMA * numaFindPeaks(NUMA *nas, l_int32 nmax, l_float32 fract1, l_float32 fract2)
numaFindPeaks()
Definition: numafunc2.c:2432
l_ok numaSimpleStats(NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar)
numaSimpleStats()
Definition: numafunc2.c:452
NUMA * numaMakeHistogramClipped(NUMA *na, l_float32 binsize, l_float32 maxsize)
numaMakeHistogramClipped()
Definition: numafunc2.c:1082
NUMA * numaMakeHistogram(NUMA *na, l_int32 maxbins, l_int32 *pbinsize, l_int32 *pbinstart)
numaMakeHistogram()
Definition: numafunc2.c:885
l_ok numaDiscretizeSortedInBins(NUMA *na, l_int32 nbins, NUMA **pnabinval)
numaDiscretizeSortedInBins()
Definition: numafunc2.c:1706
l_ok numaGetStatsUsingHistogram(NUMA *na, l_int32 maxbins, l_float32 *pmin, l_float32 *pmax, l_float32 *pmean, l_float32 *pvariance, l_float32 *pmedian, l_float32 rank, l_float32 *prval, NUMA **phisto)
numaGetStatsUsingHistogram()
Definition: numafunc2.c:1261
@ L_COPY
Definition: pix.h:712
@ L_CLONE
Definition: pix.h:713
@ L_NOCOPY
Definition: pix.h:710
@ L_SHELL_SORT
Definition: pix.h:723
@ L_SORT_INCREASING
Definition: pix.h:729
Definition: array.h:71
Definition: array.h:83
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306