/src/leptonica/src/numafunc2.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file numafunc2.c |
29 | | * <pre> |
30 | | * |
31 | | * -------------------------------------- |
32 | | * This file has these Numa utilities: |
33 | | * - morphological operations |
34 | | * - arithmetic transforms |
35 | | * - windowed statistical operations |
36 | | * - histogram extraction |
37 | | * - histogram comparison |
38 | | * - extrema finding |
39 | | * - frequency and crossing analysis |
40 | | * -------------------------------------- |
41 | | |
42 | | * Morphological (min/max) operations |
43 | | * NUMA *numaErode() |
44 | | * NUMA *numaDilate() |
45 | | * NUMA *numaOpen() |
46 | | * NUMA *numaClose() |
47 | | * |
48 | | * Other transforms |
49 | | * NUMA *numaTransform() |
50 | | * l_int32 numaSimpleStats() |
51 | | * l_int32 numaWindowedStats() |
52 | | * NUMA *numaWindowedMean() |
53 | | * NUMA *numaWindowedMeanSquare() |
54 | | * l_int32 numaWindowedVariance() |
55 | | * NUMA *numaWindowedMedian() |
56 | | * NUMA *numaConvertToInt() |
57 | | * |
58 | | * Histogram generation and statistics |
59 | | * NUMA *numaMakeHistogram() |
60 | | * NUMA *numaMakeHistogramAuto() |
61 | | * NUMA *numaMakeHistogramClipped() |
62 | | * NUMA *numaRebinHistogram() |
63 | | * NUMA *numaNormalizeHistogram() |
64 | | * l_int32 numaGetStatsUsingHistogram() |
65 | | * l_int32 numaGetHistogramStats() |
66 | | * l_int32 numaGetHistogramStatsOnInterval() |
67 | | * l_int32 numaMakeRankFromHistogram() |
68 | | * l_int32 numaHistogramGetRankFromVal() |
69 | | * l_int32 numaHistogramGetValFromRank() |
70 | | * l_int32 numaDiscretizeSortedInBins() |
71 | | * l_int32 numaDiscretizeHistoInBins() |
72 | | * l_int32 numaGetRankBinValues() |
73 | | * NUMA *numaGetUniformBinSizes() |
74 | | * |
75 | | * Splitting a distribution |
76 | | * l_int32 numaSplitDistribution() |
77 | | * |
78 | | * Comparing histograms |
79 | | * l_int32 grayHistogramsToEMD() |
80 | | * l_int32 numaEarthMoverDistance() |
81 | | * l_int32 grayInterHistogramStats() |
82 | | * |
83 | | * Extrema finding |
84 | | * NUMA *numaFindPeaks() |
85 | | * NUMA *numaFindExtrema() |
86 | | * NUMA *numaFindLocForThreshold() |
87 | | * l_int32 *numaCountReversals() |
88 | | * |
89 | | * Threshold crossings and frequency analysis |
90 | | * l_int32 numaSelectCrossingThreshold() |
91 | | * NUMA *numaCrossingsByThreshold() |
92 | | * NUMA *numaCrossingsByPeaks() |
93 | | * NUMA *numaEvalBestHaarParameters() |
94 | | * l_int32 numaEvalHaarSum() |
95 | | * |
96 | | * Generating numbers in a range under constraints |
97 | | * NUMA *genConstrainedNumaInRange() |
98 | | * |
99 | | * Things to remember when using the Numa: |
100 | | * |
101 | | * (1) The numa is a struct, not an array. Always use accessors |
102 | | * (see numabasic.c), never the fields directly. |
103 | | * |
104 | | * (2) The number array holds l_float32 values. It can also |
105 | | * be used to store l_int32 values. See numabasic.c for |
106 | | * details on using the accessors. Integers larger than |
107 | | * about 10M will lose accuracy due on retrieval due to round-off. |
108 | | * For large integers, use the dna (array of l_float64) instead. |
109 | | * |
110 | | * (3) Occasionally, in the comments we denote the i-th element of a |
111 | | * numa by na[i]. This is conceptual only -- the numa is not an array! |
112 | | * |
113 | | * Some general comments on histograms: |
114 | | * |
115 | | * (1) Histograms are the generic statistical representation of |
116 | | * the data about some attribute. Typically they're not |
117 | | * normalized -- they simply give the number of occurrences |
118 | | * within each range of values of the attribute. This range |
119 | | * of values is referred to as a 'bucket'. For example, |
120 | | * the histogram could specify how many connected components |
121 | | * are found for each value of their width; in that case, |
122 | | * the bucket size is 1. |
123 | | * |
124 | | * (2) In leptonica, all buckets have the same size. Histograms |
125 | | * are therefore specified by a numa of occurrences, along |
126 | | * with two other numbers: the 'value' associated with the |
127 | | * occupants of the first bucket and the size (i.e., 'width') |
128 | | * of each bucket. These two numbers then allow us to calculate |
129 | | * the value associated with the occupants of each bucket. |
130 | | * These numbers are fields in the numa, initialized to |
131 | | * a startx value of 0.0 and a binsize of 1.0. Accessors for |
132 | | * these fields are functions numa*Parameters(). All histograms |
133 | | * must have these two numbers properly set. |
134 | | * </pre> |
135 | | */ |
136 | | |
137 | | #ifdef HAVE_CONFIG_H |
138 | | #include <config_auto.h> |
139 | | #endif /* HAVE_CONFIG_H */ |
140 | | |
141 | | #include <math.h> |
142 | | #include "allheaders.h" |
143 | | |
144 | | /* bin sizes in numaMakeHistogram() */ |
145 | | static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\ |
146 | | 2000, 5000, 10000, 20000, 50000, 100000, 200000,\ |
147 | | 500000, 1000000, 2000000, 5000000, 10000000,\ |
148 | | 200000000, 50000000, 100000000}; |
149 | | static const l_int32 NBinSizes = 24; |
150 | | |
151 | | |
152 | | #ifndef NO_CONSOLE_IO |
153 | | #define DEBUG_HISTO 0 |
154 | | #define DEBUG_CROSSINGS 0 |
155 | | #define DEBUG_FREQUENCY 0 |
156 | | #endif /* ~NO_CONSOLE_IO */ |
157 | | |
158 | | /*----------------------------------------------------------------------* |
159 | | * Morphological operations * |
160 | | *----------------------------------------------------------------------*/ |
161 | | /*! |
162 | | * \brief numaErode() |
163 | | * |
164 | | * \param[in] nas |
165 | | * \param[in] size of sel; greater than 0, odd. The origin |
166 | | * is implicitly in the center. |
167 | | * \return nad eroded, or NULL on error |
168 | | * |
169 | | * <pre> |
170 | | * Notes: |
171 | | * (1) The structuring element (sel) is linear, all "hits" |
172 | | * (2) If size == 1, this returns a copy |
173 | | * (3) General comment. The morphological operations are equivalent |
174 | | * to those that would be performed on a 1-dimensional fpix. |
175 | | * However, because we have not implemented morphological |
176 | | * operations on fpix, we do this here. Because it is only |
177 | | * 1 dimensional, there is no reason to use the more |
178 | | * complicated van Herk/Gil-Werman algorithm, and we do it |
179 | | * by brute force. |
180 | | * </pre> |
181 | | */ |
182 | | NUMA * |
183 | | numaErode(NUMA *nas, |
184 | | l_int32 size) |
185 | 0 | { |
186 | 0 | l_int32 i, j, n, hsize, len; |
187 | 0 | l_float32 minval; |
188 | 0 | l_float32 *fa, *fas, *fad; |
189 | 0 | NUMA *nad; |
190 | |
|
191 | 0 | if (!nas) |
192 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
193 | 0 | if (size <= 0) |
194 | 0 | return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); |
195 | 0 | if ((size & 1) == 0 ) { |
196 | 0 | L_WARNING("sel size must be odd; increasing by 1\n", __func__); |
197 | 0 | size++; |
198 | 0 | } |
199 | |
|
200 | 0 | if (size == 1) |
201 | 0 | return numaCopy(nas); |
202 | | |
203 | | /* Make a source fa (fas) that has an added (size / 2) boundary |
204 | | * on left and right, contains a copy of nas in the interior region |
205 | | * (between 'size' and 'size + n', and has large values |
206 | | * inserted in the boundary (because it is an erosion). */ |
207 | 0 | n = numaGetCount(nas); |
208 | 0 | hsize = size / 2; |
209 | 0 | len = n + 2 * hsize; |
210 | 0 | if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL) |
211 | 0 | return (NUMA *)ERROR_PTR("fas not made", __func__, NULL); |
212 | 0 | for (i = 0; i < hsize; i++) |
213 | 0 | fas[i] = 1.0e37f; |
214 | 0 | for (i = hsize + n; i < len; i++) |
215 | 0 | fas[i] = 1.0e37f; |
216 | 0 | fa = numaGetFArray(nas, L_NOCOPY); |
217 | 0 | for (i = 0; i < n; i++) |
218 | 0 | fas[hsize + i] = fa[i]; |
219 | |
|
220 | 0 | nad = numaMakeConstant(0, n); |
221 | 0 | numaCopyParameters(nad, nas); |
222 | 0 | fad = numaGetFArray(nad, L_NOCOPY); |
223 | 0 | for (i = 0; i < n; i++) { |
224 | 0 | minval = 1.0e37f; /* start big */ |
225 | 0 | for (j = 0; j < size; j++) |
226 | 0 | minval = L_MIN(minval, fas[i + j]); |
227 | 0 | fad[i] = minval; |
228 | 0 | } |
229 | |
|
230 | 0 | LEPT_FREE(fas); |
231 | 0 | return nad; |
232 | 0 | } |
233 | | |
234 | | |
235 | | /*! |
236 | | * \brief numaDilate() |
237 | | * |
238 | | * \param[in] nas |
239 | | * \param[in] size of sel; greater than 0, odd. The origin |
240 | | * is implicitly in the center. |
241 | | * \return nad dilated, or NULL on error |
242 | | * |
243 | | * <pre> |
244 | | * Notes: |
245 | | * (1) The structuring element (sel) is linear, all "hits" |
246 | | * (2) If size == 1, this returns a copy |
247 | | * </pre> |
248 | | */ |
249 | | NUMA * |
250 | | numaDilate(NUMA *nas, |
251 | | l_int32 size) |
252 | 0 | { |
253 | 0 | l_int32 i, j, n, hsize, len; |
254 | 0 | l_float32 maxval; |
255 | 0 | l_float32 *fa, *fas, *fad; |
256 | 0 | NUMA *nad; |
257 | |
|
258 | 0 | if (!nas) |
259 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
260 | 0 | if (size <= 0) |
261 | 0 | return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); |
262 | 0 | if ((size & 1) == 0 ) { |
263 | 0 | L_WARNING("sel size must be odd; increasing by 1\n", __func__); |
264 | 0 | size++; |
265 | 0 | } |
266 | |
|
267 | 0 | if (size == 1) |
268 | 0 | return numaCopy(nas); |
269 | | |
270 | | /* Make a source fa (fas) that has an added (size / 2) boundary |
271 | | * on left and right, contains a copy of nas in the interior region |
272 | | * (between 'size' and 'size + n', and has small values |
273 | | * inserted in the boundary (because it is a dilation). */ |
274 | 0 | n = numaGetCount(nas); |
275 | 0 | hsize = size / 2; |
276 | 0 | len = n + 2 * hsize; |
277 | 0 | if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL) |
278 | 0 | return (NUMA *)ERROR_PTR("fas not made", __func__, NULL); |
279 | 0 | for (i = 0; i < hsize; i++) |
280 | 0 | fas[i] = -1.0e37f; |
281 | 0 | for (i = hsize + n; i < len; i++) |
282 | 0 | fas[i] = -1.0e37f; |
283 | 0 | fa = numaGetFArray(nas, L_NOCOPY); |
284 | 0 | for (i = 0; i < n; i++) |
285 | 0 | fas[hsize + i] = fa[i]; |
286 | |
|
287 | 0 | nad = numaMakeConstant(0, n); |
288 | 0 | numaCopyParameters(nad, nas); |
289 | 0 | fad = numaGetFArray(nad, L_NOCOPY); |
290 | 0 | for (i = 0; i < n; i++) { |
291 | 0 | maxval = -1.0e37f; /* start small */ |
292 | 0 | for (j = 0; j < size; j++) |
293 | 0 | maxval = L_MAX(maxval, fas[i + j]); |
294 | 0 | fad[i] = maxval; |
295 | 0 | } |
296 | |
|
297 | 0 | LEPT_FREE(fas); |
298 | 0 | return nad; |
299 | 0 | } |
300 | | |
301 | | |
302 | | /*! |
303 | | * \brief numaOpen() |
304 | | * |
305 | | * \param[in] nas |
306 | | * \param[in] size of sel; greater than 0, odd. The origin |
307 | | * is implicitly in the center. |
308 | | * \return nad opened, or NULL on error |
309 | | * |
310 | | * <pre> |
311 | | * Notes: |
312 | | * (1) The structuring element (sel) is linear, all "hits" |
313 | | * (2) If size == 1, this returns a copy |
314 | | * </pre> |
315 | | */ |
316 | | NUMA * |
317 | | numaOpen(NUMA *nas, |
318 | | l_int32 size) |
319 | 0 | { |
320 | 0 | NUMA *nat, *nad; |
321 | |
|
322 | 0 | if (!nas) |
323 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
324 | 0 | if (size <= 0) |
325 | 0 | return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); |
326 | 0 | if ((size & 1) == 0 ) { |
327 | 0 | L_WARNING("sel size must be odd; increasing by 1\n", __func__); |
328 | 0 | size++; |
329 | 0 | } |
330 | |
|
331 | 0 | if (size == 1) |
332 | 0 | return numaCopy(nas); |
333 | | |
334 | 0 | nat = numaErode(nas, size); |
335 | 0 | nad = numaDilate(nat, size); |
336 | 0 | numaDestroy(&nat); |
337 | 0 | return nad; |
338 | 0 | } |
339 | | |
340 | | |
341 | | /*! |
342 | | * \brief numaClose() |
343 | | * |
344 | | * \param[in] nas |
345 | | * \param[in] size of sel; greater than 0, odd. The origin |
346 | | * is implicitly in the center. |
347 | | * \return nad closed, or NULL on error |
348 | | * |
349 | | * <pre> |
350 | | * Notes: |
351 | | * (1) The structuring element (sel) is linear, all "hits" |
352 | | * (2) If size == 1, this returns a copy |
353 | | * (3) We add a border before doing this operation, for the same |
354 | | * reason that we add a border to a pix before doing a safe closing. |
355 | | * Without the border, a small component near the border gets |
356 | | * clipped at the border on dilation, and can be entirely removed |
357 | | * by the following erosion, violating the basic extensivity |
358 | | * property of closing. |
359 | | * </pre> |
360 | | */ |
361 | | NUMA * |
362 | | numaClose(NUMA *nas, |
363 | | l_int32 size) |
364 | 0 | { |
365 | 0 | NUMA *nab, *nat1, *nat2, *nad; |
366 | |
|
367 | 0 | if (!nas) |
368 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
369 | 0 | if (size <= 0) |
370 | 0 | return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL); |
371 | 0 | if ((size & 1) == 0 ) { |
372 | 0 | L_WARNING("sel size must be odd; increasing by 1\n", __func__); |
373 | 0 | size++; |
374 | 0 | } |
375 | |
|
376 | 0 | if (size == 1) |
377 | 0 | return numaCopy(nas); |
378 | | |
379 | 0 | nab = numaAddBorder(nas, size, size, 0); /* to preserve extensivity */ |
380 | 0 | nat1 = numaDilate(nab, size); |
381 | 0 | nat2 = numaErode(nat1, size); |
382 | 0 | nad = numaRemoveBorder(nat2, size, size); |
383 | 0 | numaDestroy(&nab); |
384 | 0 | numaDestroy(&nat1); |
385 | 0 | numaDestroy(&nat2); |
386 | 0 | return nad; |
387 | 0 | } |
388 | | |
389 | | |
390 | | /*----------------------------------------------------------------------* |
391 | | * Other transforms * |
392 | | *----------------------------------------------------------------------*/ |
393 | | /*! |
394 | | * \brief numaTransform() |
395 | | * |
396 | | * \param[in] nas |
397 | | * \param[in] shift add this to each number |
398 | | * \param[in] scale multiply each number by this |
399 | | * \return nad with all values shifted and scaled, or NULL on error |
400 | | * |
401 | | * <pre> |
402 | | * Notes: |
403 | | * (1) Each number is shifted before scaling. |
404 | | * </pre> |
405 | | */ |
406 | | NUMA * |
407 | | numaTransform(NUMA *nas, |
408 | | l_float32 shift, |
409 | | l_float32 scale) |
410 | 0 | { |
411 | 0 | l_int32 i, n; |
412 | 0 | l_float32 val; |
413 | 0 | NUMA *nad; |
414 | |
|
415 | 0 | if (!nas) |
416 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
417 | 0 | n = numaGetCount(nas); |
418 | 0 | if ((nad = numaCreate(n)) == NULL) |
419 | 0 | return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); |
420 | 0 | numaCopyParameters(nad, nas); |
421 | 0 | for (i = 0; i < n; i++) { |
422 | 0 | numaGetFValue(nas, i, &val); |
423 | 0 | val = scale * (val + shift); |
424 | 0 | numaAddNumber(nad, val); |
425 | 0 | } |
426 | 0 | return nad; |
427 | 0 | } |
428 | | |
429 | | |
430 | | /*! |
431 | | * \brief numaSimpleStats() |
432 | | * |
433 | | * \param[in] na input numa |
434 | | * \param[in] first first element to use |
435 | | * \param[in] last last element to use; -1 to go to the end |
436 | | * \param[out] pmean [optional] mean value |
437 | | * \param[out] pvar [optional] variance |
438 | | * \param[out] prvar [optional] rms deviation from the mean |
439 | | * \return 0 if OK, 1 on error |
440 | | */ |
441 | | l_ok |
442 | | numaSimpleStats(NUMA *na, |
443 | | l_int32 first, |
444 | | l_int32 last, |
445 | | l_float32 *pmean, |
446 | | l_float32 *pvar, |
447 | | l_float32 *prvar) |
448 | 0 | { |
449 | 0 | l_int32 i, n, ni; |
450 | 0 | l_float32 sum, sumsq, val, mean, var; |
451 | |
|
452 | 0 | if (pmean) *pmean = 0.0; |
453 | 0 | if (pvar) *pvar = 0.0; |
454 | 0 | if (prvar) *prvar = 0.0; |
455 | 0 | if (!pmean && !pvar && !prvar) |
456 | 0 | return ERROR_INT("nothing requested", __func__, 1); |
457 | 0 | if (!na) |
458 | 0 | return ERROR_INT("na not defined", __func__, 1); |
459 | 0 | if ((n = numaGetCount(na)) == 0) |
460 | 0 | return ERROR_INT("na is empty", __func__, 1); |
461 | 0 | first = L_MAX(0, first); |
462 | 0 | if (last < 0) last = n - 1; |
463 | 0 | if (first >= n) |
464 | 0 | return ERROR_INT("invalid first", __func__, 1); |
465 | 0 | if (last >= n) { |
466 | 0 | L_WARNING("last = %d is beyond max index = %d; adjusting\n", |
467 | 0 | __func__, last, n - 1); |
468 | 0 | last = n - 1; |
469 | 0 | } |
470 | 0 | if (first > last) |
471 | 0 | return ERROR_INT("first > last\n", __func__, 1); |
472 | 0 | ni = last - first + 1; |
473 | 0 | sum = sumsq = 0.0; |
474 | 0 | for (i = first; i <= last; i++) { |
475 | 0 | numaGetFValue(na, i, &val); |
476 | 0 | sum += val; |
477 | 0 | sumsq += val * val; |
478 | 0 | } |
479 | |
|
480 | 0 | mean = sum / ni; |
481 | 0 | if (pmean) |
482 | 0 | *pmean = mean; |
483 | 0 | if (pvar || prvar) { |
484 | 0 | var = sumsq / ni - mean * mean; |
485 | 0 | if (pvar) *pvar = var; |
486 | 0 | if (prvar) *prvar = sqrtf(var); |
487 | 0 | } |
488 | |
|
489 | 0 | return 0; |
490 | 0 | } |
491 | | |
492 | | |
493 | | /*! |
494 | | * \brief numaWindowedStats() |
495 | | * |
496 | | * \param[in] nas input numa |
497 | | * \param[in] wc half width of the window |
498 | | * \param[out] pnam [optional] mean value in window |
499 | | * \param[out] pnams [optional] mean square value in window |
500 | | * \param[out] pnav [optional] variance in window |
501 | | * \param[out] pnarv [optional] rms deviation from the mean |
502 | | * \return 0 if OK, 1 on error |
503 | | * |
504 | | * <pre> |
505 | | * Notes: |
506 | | * (1) This is a high-level convenience function for calculating |
507 | | * any or all of these derived arrays. |
508 | | * (2) These statistical measures over the values in the |
509 | | * rectangular window are: |
510 | | * ~ average value: [x] (nam) |
511 | | * ~ average squared value: [x*x] (nams) |
512 | | * ~ variance: [(x - [x])*(x - [x])] = [x*x] - [x]*[x] (nav) |
513 | | * ~ square-root of variance: (narv) |
514 | | * where the brackets [ .. ] indicate that the average value is |
515 | | * to be taken over the window. |
516 | | * (3) Note that the variance is just the mean square difference from |
517 | | * the mean value; and the square root of the variance is the |
518 | | * root mean square difference from the mean, sometimes also |
519 | | * called the 'standard deviation'. |
520 | | * (4) Internally, use mirrored borders to handle values near the |
521 | | * end of each array. |
522 | | * </pre> |
523 | | */ |
524 | | l_ok |
525 | | numaWindowedStats(NUMA *nas, |
526 | | l_int32 wc, |
527 | | NUMA **pnam, |
528 | | NUMA **pnams, |
529 | | NUMA **pnav, |
530 | | NUMA **pnarv) |
531 | 0 | { |
532 | 0 | NUMA *nam, *nams; |
533 | |
|
534 | 0 | if (!nas) |
535 | 0 | return ERROR_INT("nas not defined", __func__, 1); |
536 | 0 | if (2 * wc + 1 > numaGetCount(nas)) |
537 | 0 | L_WARNING("filter wider than input array!\n", __func__); |
538 | |
|
539 | 0 | if (!pnav && !pnarv) { |
540 | 0 | if (pnam) *pnam = numaWindowedMean(nas, wc); |
541 | 0 | if (pnams) *pnams = numaWindowedMeanSquare(nas, wc); |
542 | 0 | return 0; |
543 | 0 | } |
544 | | |
545 | 0 | nam = numaWindowedMean(nas, wc); |
546 | 0 | nams = numaWindowedMeanSquare(nas, wc); |
547 | 0 | numaWindowedVariance(nam, nams, pnav, pnarv); |
548 | 0 | if (pnam) |
549 | 0 | *pnam = nam; |
550 | 0 | else |
551 | 0 | numaDestroy(&nam); |
552 | 0 | if (pnams) |
553 | 0 | *pnams = nams; |
554 | 0 | else |
555 | 0 | numaDestroy(&nams); |
556 | 0 | return 0; |
557 | 0 | } |
558 | | |
559 | | |
560 | | /*! |
561 | | * \brief numaWindowedMean() |
562 | | * |
563 | | * \param[in] nas |
564 | | * \param[in] wc half width of the convolution window |
565 | | * \return nad after low-pass filtering, or NULL on error |
566 | | * |
567 | | * <pre> |
568 | | * Notes: |
569 | | * (1) This is a convolution. The window has width = 2 * %wc + 1. |
570 | | * (2) We add a mirrored border of size %wc to each end of the array. |
571 | | * </pre> |
572 | | */ |
573 | | NUMA * |
574 | | numaWindowedMean(NUMA *nas, |
575 | | l_int32 wc) |
576 | 0 | { |
577 | 0 | l_int32 i, n, n1, width; |
578 | 0 | l_float32 sum, norm; |
579 | 0 | l_float32 *fa1, *fad, *suma; |
580 | 0 | NUMA *na1, *nad; |
581 | |
|
582 | 0 | if (!nas) |
583 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
584 | 0 | n = numaGetCount(nas); |
585 | 0 | width = 2 * wc + 1; /* filter width */ |
586 | 0 | if (width > n) |
587 | 0 | L_WARNING("filter wider than input array!\n", __func__); |
588 | |
|
589 | 0 | na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER); |
590 | 0 | n1 = n + 2 * wc; |
591 | 0 | fa1 = numaGetFArray(na1, L_NOCOPY); |
592 | 0 | nad = numaMakeConstant(0, n); |
593 | 0 | fad = numaGetFArray(nad, L_NOCOPY); |
594 | | |
595 | | /* Make sum array; note the indexing */ |
596 | 0 | if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) { |
597 | 0 | numaDestroy(&na1); |
598 | 0 | numaDestroy(&nad); |
599 | 0 | return (NUMA *)ERROR_PTR("suma not made", __func__, NULL); |
600 | 0 | } |
601 | 0 | sum = 0.0; |
602 | 0 | suma[0] = 0.0; |
603 | 0 | for (i = 0; i < n1; i++) { |
604 | 0 | sum += fa1[i]; |
605 | 0 | suma[i + 1] = sum; |
606 | 0 | } |
607 | |
|
608 | 0 | norm = 1.f / (2 * wc + 1); |
609 | 0 | for (i = 0; i < n; i++) |
610 | 0 | fad[i] = norm * (suma[width + i] - suma[i]); |
611 | |
|
612 | 0 | LEPT_FREE(suma); |
613 | 0 | numaDestroy(&na1); |
614 | 0 | return nad; |
615 | 0 | } |
616 | | |
617 | | |
618 | | /*! |
619 | | * \brief numaWindowedMeanSquare() |
620 | | * |
621 | | * \param[in] nas |
622 | | * \param[in] wc half width of the window |
623 | | * \return nad containing windowed mean square values, or NULL on error |
624 | | * |
625 | | * <pre> |
626 | | * Notes: |
627 | | * (1) The window has width = 2 * %wc + 1. |
628 | | * (2) We add a mirrored border of size %wc to each end of the array. |
629 | | * </pre> |
630 | | */ |
631 | | NUMA * |
632 | | numaWindowedMeanSquare(NUMA *nas, |
633 | | l_int32 wc) |
634 | 0 | { |
635 | 0 | l_int32 i, n, n1, width; |
636 | 0 | l_float32 sum, norm; |
637 | 0 | l_float32 *fa1, *fad, *suma; |
638 | 0 | NUMA *na1, *nad; |
639 | |
|
640 | 0 | if (!nas) |
641 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
642 | 0 | n = numaGetCount(nas); |
643 | 0 | width = 2 * wc + 1; /* filter width */ |
644 | 0 | if (width > n) |
645 | 0 | L_WARNING("filter wider than input array!\n", __func__); |
646 | |
|
647 | 0 | na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER); |
648 | 0 | n1 = n + 2 * wc; |
649 | 0 | fa1 = numaGetFArray(na1, L_NOCOPY); |
650 | 0 | nad = numaMakeConstant(0, n); |
651 | 0 | fad = numaGetFArray(nad, L_NOCOPY); |
652 | | |
653 | | /* Make sum array; note the indexing */ |
654 | 0 | if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) { |
655 | 0 | numaDestroy(&na1); |
656 | 0 | numaDestroy(&nad); |
657 | 0 | return (NUMA *)ERROR_PTR("suma not made", __func__, NULL); |
658 | 0 | } |
659 | 0 | sum = 0.0; |
660 | 0 | suma[0] = 0.0; |
661 | 0 | for (i = 0; i < n1; i++) { |
662 | 0 | sum += fa1[i] * fa1[i]; |
663 | 0 | suma[i + 1] = sum; |
664 | 0 | } |
665 | |
|
666 | 0 | norm = 1.f / (2 * wc + 1); |
667 | 0 | for (i = 0; i < n; i++) |
668 | 0 | fad[i] = norm * (suma[width + i] - suma[i]); |
669 | |
|
670 | 0 | LEPT_FREE(suma); |
671 | 0 | numaDestroy(&na1); |
672 | 0 | return nad; |
673 | 0 | } |
674 | | |
675 | | |
676 | | /*! |
677 | | * \brief numaWindowedVariance() |
678 | | * |
679 | | * \param[in] nam windowed mean values |
680 | | * \param[in] nams windowed mean square values |
681 | | * \param[out] pnav [optional] numa of variance -- the ms deviation |
682 | | * from the mean |
683 | | * \param[out] pnarv [optional] numa of rms deviation from the mean |
684 | | * \return 0 if OK, 1 on error |
685 | | * |
686 | | * <pre> |
687 | | * Notes: |
688 | | * (1) The numas of windowed mean and mean square are precomputed, |
689 | | * using numaWindowedMean() and numaWindowedMeanSquare(). |
690 | | * (2) Either or both of the variance and square-root of variance |
691 | | * are returned, where the variance is the average over the |
692 | | * window of the mean square difference of the pixel value |
693 | | * from the mean: |
694 | | * [(x - [x])*(x - [x])] = [x*x] - [x]*[x] |
695 | | * </pre> |
696 | | */ |
697 | | l_ok |
698 | | numaWindowedVariance(NUMA *nam, |
699 | | NUMA *nams, |
700 | | NUMA **pnav, |
701 | | NUMA **pnarv) |
702 | 0 | { |
703 | 0 | l_int32 i, nm, nms; |
704 | 0 | l_float32 var; |
705 | 0 | l_float32 *fam, *fams, *fav = NULL, *farv = NULL; |
706 | 0 | NUMA *nav, *narv; /* variance and square root of variance */ |
707 | |
|
708 | 0 | if (pnav) *pnav = NULL; |
709 | 0 | if (pnarv) *pnarv = NULL; |
710 | 0 | if (!pnav && !pnarv) |
711 | 0 | return ERROR_INT("neither &nav nor &narv are defined", __func__, 1); |
712 | 0 | if (!nam) |
713 | 0 | return ERROR_INT("nam not defined", __func__, 1); |
714 | 0 | if (!nams) |
715 | 0 | return ERROR_INT("nams not defined", __func__, 1); |
716 | 0 | nm = numaGetCount(nam); |
717 | 0 | nms = numaGetCount(nams); |
718 | 0 | if (nm != nms) |
719 | 0 | return ERROR_INT("sizes of nam and nams differ", __func__, 1); |
720 | | |
721 | 0 | if (pnav) { |
722 | 0 | nav = numaMakeConstant(0, nm); |
723 | 0 | *pnav = nav; |
724 | 0 | fav = numaGetFArray(nav, L_NOCOPY); |
725 | 0 | } |
726 | 0 | if (pnarv) { |
727 | 0 | narv = numaMakeConstant(0, nm); |
728 | 0 | *pnarv = narv; |
729 | 0 | farv = numaGetFArray(narv, L_NOCOPY); |
730 | 0 | } |
731 | 0 | fam = numaGetFArray(nam, L_NOCOPY); |
732 | 0 | fams = numaGetFArray(nams, L_NOCOPY); |
733 | |
|
734 | 0 | for (i = 0; i < nm; i++) { |
735 | 0 | var = fams[i] - fam[i] * fam[i]; |
736 | 0 | if (pnav) |
737 | 0 | fav[i] = var; |
738 | 0 | if (pnarv) |
739 | 0 | farv[i] = sqrtf(var); |
740 | 0 | } |
741 | |
|
742 | 0 | return 0; |
743 | 0 | } |
744 | | |
745 | | |
746 | | /*! |
747 | | * \brief numaWindowedMedian() |
748 | | * |
749 | | * \param[in] nas |
750 | | * \param[in] halfwin half width of window over which the median is found |
751 | | * \return nad after windowed median filtering, or NULL on error |
752 | | * |
753 | | * <pre> |
754 | | * Notes: |
755 | | * (1) The requested window has width = 2 * %halfwin + 1. |
756 | | * (2) If the input nas has less then 3 elements, return a copy. |
757 | | * (3) If the filter is too small (%halfwin <= 0), return a copy. |
758 | | * (4) If the filter is too large, it is reduced in size. |
759 | | * (5) We add a mirrored border of size %halfwin to each end of |
760 | | * the array to simplify the calculation by avoiding end-effects. |
761 | | * </pre> |
762 | | */ |
763 | | NUMA * |
764 | | numaWindowedMedian(NUMA *nas, |
765 | | l_int32 halfwin) |
766 | 0 | { |
767 | 0 | l_int32 i, n; |
768 | 0 | l_float32 medval; |
769 | 0 | NUMA *na1, *na2, *nad; |
770 | |
|
771 | 0 | if (!nas) |
772 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
773 | 0 | if ((n = numaGetCount(nas)) < 3) |
774 | 0 | return numaCopy(nas); |
775 | 0 | if (halfwin <= 0) { |
776 | 0 | L_ERROR("filter too small; returning a copy\n", __func__); |
777 | 0 | return numaCopy(nas); |
778 | 0 | } |
779 | | |
780 | 0 | if (halfwin > (n - 1) / 2) { |
781 | 0 | halfwin = (n - 1) / 2; |
782 | 0 | L_INFO("reducing filter to halfwin = %d\n", __func__, halfwin); |
783 | 0 | } |
784 | | |
785 | | /* Add a border to both ends */ |
786 | 0 | na1 = numaAddSpecifiedBorder(nas, halfwin, halfwin, L_MIRRORED_BORDER); |
787 | | |
788 | | /* Get the median value at the center of each window, corresponding |
789 | | * to locations in the input nas. */ |
790 | 0 | nad = numaCreate(n); |
791 | 0 | for (i = 0; i < n; i++) { |
792 | 0 | na2 = numaClipToInterval(na1, i, i + 2 * halfwin); |
793 | 0 | numaGetMedian(na2, &medval); |
794 | 0 | numaAddNumber(nad, medval); |
795 | 0 | numaDestroy(&na2); |
796 | 0 | } |
797 | |
|
798 | 0 | numaDestroy(&na1); |
799 | 0 | return nad; |
800 | 0 | } |
801 | | |
802 | | |
803 | | /*! |
804 | | * \brief numaConvertToInt() |
805 | | * |
806 | | * \param[in] nas source numa |
807 | | * \return na with all values rounded to nearest integer, or |
808 | | * NULL on error |
809 | | */ |
810 | | NUMA * |
811 | | numaConvertToInt(NUMA *nas) |
812 | 0 | { |
813 | 0 | l_int32 i, n, ival; |
814 | 0 | NUMA *nad; |
815 | |
|
816 | 0 | if (!nas) |
817 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
818 | | |
819 | 0 | n = numaGetCount(nas); |
820 | 0 | if ((nad = numaCreate(n)) == NULL) |
821 | 0 | return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); |
822 | 0 | numaCopyParameters(nad, nas); |
823 | 0 | for (i = 0; i < n; i++) { |
824 | 0 | numaGetIValue(nas, i, &ival); |
825 | 0 | numaAddNumber(nad, ival); |
826 | 0 | } |
827 | 0 | return nad; |
828 | 0 | } |
829 | | |
830 | | |
831 | | /*----------------------------------------------------------------------* |
832 | | * Histogram generation and statistics * |
833 | | *----------------------------------------------------------------------*/ |
834 | | /*! |
835 | | * \brief numaMakeHistogram() |
836 | | * |
837 | | * \param[in] na |
838 | | * \param[in] maxbins max number of histogram bins |
839 | | * \param[out] pbinsize [optional] size of histogram bins |
840 | | * \param[out] pbinstart [optional] start val of minimum bin; |
841 | | * input NULL to force start at 0 |
842 | | * \return na consisiting of histogram of integerized values, |
843 | | * or NULL on error. |
844 | | * |
845 | | * <pre> |
846 | | * Notes: |
847 | | * (1) This simple interface is designed for integer data. |
848 | | * The bins are of integer width and start on integer boundaries, |
849 | | * so the results on float data will not have high precision. |
850 | | * (2) Specify the max number of input bins. Then %binsize, |
851 | | * the size of bins necessary to accommodate the input data, |
852 | | * is returned. It is optionally returned and one of the sequence: |
853 | | * {1, 2, 5, 10, 20, 50, ...}. |
854 | | * (3) If &binstart is given, all values are accommodated, |
855 | | * and the min value of the starting bin is returned. |
856 | | * Otherwise, all negative values are discarded and |
857 | | * the histogram bins start at 0. |
858 | | * </pre> |
859 | | */ |
860 | | NUMA * |
861 | | numaMakeHistogram(NUMA *na, |
862 | | l_int32 maxbins, |
863 | | l_int32 *pbinsize, |
864 | | l_int32 *pbinstart) |
865 | 0 | { |
866 | 0 | l_int32 i, n, ival, hval; |
867 | 0 | l_int32 iminval, imaxval, range, binsize, nbins, ibin; |
868 | 0 | l_float32 val, ratio; |
869 | 0 | NUMA *nai, *nahist; |
870 | |
|
871 | 0 | if (pbinsize) *pbinsize = 0; |
872 | 0 | if (pbinstart) *pbinstart = 0; |
873 | 0 | if (!na) |
874 | 0 | return (NUMA *)ERROR_PTR("na not defined", __func__, NULL); |
875 | 0 | if (maxbins < 1) |
876 | 0 | return (NUMA *)ERROR_PTR("maxbins < 1", __func__, NULL); |
877 | | |
878 | | /* Determine input range */ |
879 | 0 | numaGetMin(na, &val, NULL); |
880 | 0 | iminval = (l_int32)(val + 0.5); |
881 | 0 | numaGetMax(na, &val, NULL); |
882 | 0 | imaxval = (l_int32)(val + 0.5); |
883 | 0 | if (pbinstart == NULL) { /* clip negative vals; start from 0 */ |
884 | 0 | iminval = 0; |
885 | 0 | if (imaxval < 0) |
886 | 0 | return (NUMA *)ERROR_PTR("all values < 0", __func__, NULL); |
887 | 0 | } |
888 | | |
889 | | /* Determine binsize */ |
890 | 0 | range = imaxval - iminval + 1; |
891 | 0 | if (range > maxbins - 1) { |
892 | 0 | ratio = (l_float32)range / (l_float32)maxbins; |
893 | 0 | binsize = 0; |
894 | 0 | for (i = 0; i < NBinSizes; i++) { |
895 | 0 | if (ratio < BinSizeArray[i]) { |
896 | 0 | binsize = BinSizeArray[i]; |
897 | 0 | break; |
898 | 0 | } |
899 | 0 | } |
900 | 0 | if (binsize == 0) |
901 | 0 | return (NUMA *)ERROR_PTR("numbers too large", __func__, NULL); |
902 | 0 | } else { |
903 | 0 | binsize = 1; |
904 | 0 | } |
905 | 0 | if (pbinsize) *pbinsize = binsize; |
906 | 0 | nbins = 1 + range / binsize; /* +1 seems to be sufficient */ |
907 | | |
908 | | /* Redetermine iminval */ |
909 | 0 | if (pbinstart && binsize > 1) { |
910 | 0 | if (iminval >= 0) |
911 | 0 | iminval = binsize * (iminval / binsize); |
912 | 0 | else |
913 | 0 | iminval = binsize * ((iminval - binsize + 1) / binsize); |
914 | 0 | } |
915 | 0 | if (pbinstart) *pbinstart = iminval; |
916 | |
|
917 | | #if DEBUG_HISTO |
918 | | lept_stderr(" imaxval = %d, range = %d, nbins = %d\n", |
919 | | imaxval, range, nbins); |
920 | | #endif /* DEBUG_HISTO */ |
921 | | |
922 | | /* Use integerized data for input */ |
923 | 0 | if ((nai = numaConvertToInt(na)) == NULL) |
924 | 0 | return (NUMA *)ERROR_PTR("nai not made", __func__, NULL); |
925 | 0 | n = numaGetCount(nai); |
926 | | |
927 | | /* Make histogram, converting value in input array |
928 | | * into a bin number for this histogram array. */ |
929 | 0 | if ((nahist = numaCreate(nbins)) == NULL) { |
930 | 0 | numaDestroy(&nai); |
931 | 0 | return (NUMA *)ERROR_PTR("nahist not made", __func__, NULL); |
932 | 0 | } |
933 | 0 | numaSetCount(nahist, nbins); |
934 | 0 | numaSetParameters(nahist, iminval, binsize); |
935 | 0 | for (i = 0; i < n; i++) { |
936 | 0 | numaGetIValue(nai, i, &ival); |
937 | 0 | ibin = (ival - iminval) / binsize; |
938 | 0 | if (ibin >= 0 && ibin < nbins) { |
939 | 0 | numaGetIValue(nahist, ibin, &hval); |
940 | 0 | numaSetValue(nahist, ibin, hval + 1.0f); |
941 | 0 | } |
942 | 0 | } |
943 | |
|
944 | 0 | numaDestroy(&nai); |
945 | 0 | return nahist; |
946 | 0 | } |
947 | | |
948 | | |
949 | | /*! |
950 | | * \brief numaMakeHistogramAuto() |
951 | | * |
952 | | * \param[in] na numa of floats; these may be integers |
953 | | * \param[in] maxbins max number of histogram bins; >= 1 |
954 | | * \return na consisiting of histogram of quantized float values, |
955 | | * or NULL on error. |
956 | | * |
957 | | * <pre> |
958 | | * Notes: |
959 | | * (1) This simple interface is designed for accurate binning |
960 | | * of both integer and float data. |
961 | | * (2) If the array data is integers, and the range of integers |
962 | | * is smaller than %maxbins, they are binned as they fall, |
963 | | * with binsize = 1. |
964 | | * (3) If the range of data, (maxval - minval), is larger than |
965 | | * %maxbins, or if the data is floats, they are binned into |
966 | | * exactly %maxbins bins. |
967 | | * (4) Unlike numaMakeHistogram(), these bins in general have |
968 | | * non-integer location and width, even for integer data. |
969 | | * </pre> |
970 | | */ |
971 | | NUMA * |
972 | | numaMakeHistogramAuto(NUMA *na, |
973 | | l_int32 maxbins) |
974 | 0 | { |
975 | 0 | l_int32 i, n, imin, imax, irange, ibin, ival, allints; |
976 | 0 | l_float32 minval, maxval, range, binsize, fval; |
977 | 0 | NUMA *nah; |
978 | |
|
979 | 0 | if (!na) |
980 | 0 | return (NUMA *)ERROR_PTR("na not defined", __func__, NULL); |
981 | 0 | maxbins = L_MAX(1, maxbins); |
982 | | |
983 | | /* Determine input range */ |
984 | 0 | numaGetMin(na, &minval, NULL); |
985 | 0 | numaGetMax(na, &maxval, NULL); |
986 | | |
987 | | /* Determine if values are all integers */ |
988 | 0 | n = numaGetCount(na); |
989 | 0 | numaHasOnlyIntegers(na, &allints); |
990 | | |
991 | | /* Do simple integer binning if possible */ |
992 | 0 | if (allints && (maxval - minval < maxbins)) { |
993 | 0 | imin = (l_int32)minval; |
994 | 0 | imax = (l_int32)maxval; |
995 | 0 | irange = imax - imin + 1; |
996 | 0 | nah = numaCreate(irange); |
997 | 0 | numaSetCount(nah, irange); /* init */ |
998 | 0 | numaSetParameters(nah, minval, 1.0); |
999 | 0 | for (i = 0; i < n; i++) { |
1000 | 0 | numaGetIValue(na, i, &ival); |
1001 | 0 | ibin = ival - imin; |
1002 | 0 | numaGetIValue(nah, ibin, &ival); |
1003 | 0 | numaSetValue(nah, ibin, ival + 1.0f); |
1004 | 0 | } |
1005 | |
|
1006 | 0 | return nah; |
1007 | 0 | } |
1008 | | |
1009 | | /* Do float binning, even if the data is integers. */ |
1010 | 0 | range = maxval - minval; |
1011 | 0 | binsize = range / (l_float32)maxbins; |
1012 | 0 | if (range == 0.0) { |
1013 | 0 | nah = numaCreate(1); |
1014 | 0 | numaSetParameters(nah, minval, binsize); |
1015 | 0 | numaAddNumber(nah, n); |
1016 | 0 | return nah; |
1017 | 0 | } |
1018 | 0 | nah = numaCreate(maxbins); |
1019 | 0 | numaSetCount(nah, maxbins); |
1020 | 0 | numaSetParameters(nah, minval, binsize); |
1021 | 0 | for (i = 0; i < n; i++) { |
1022 | 0 | numaGetFValue(na, i, &fval); |
1023 | 0 | ibin = (l_int32)((fval - minval) / binsize); |
1024 | 0 | ibin = L_MIN(ibin, maxbins - 1); /* "edge" case; stay in bounds */ |
1025 | 0 | numaGetIValue(nah, ibin, &ival); |
1026 | 0 | numaSetValue(nah, ibin, ival + 1.0f); |
1027 | 0 | } |
1028 | |
|
1029 | 0 | return nah; |
1030 | 0 | } |
1031 | | |
1032 | | |
1033 | | /*! |
1034 | | * \brief numaMakeHistogramClipped() |
1035 | | * |
1036 | | * \param[in] na |
1037 | | * \param[in] binsize typically 1.0 |
1038 | | * \param[in] maxsize of histogram ordinate |
1039 | | * \return na histogram of bins of size %binsize, starting with |
1040 | | * the na[0] (x = 0.0 and going up to a maximum of |
1041 | | * x = %maxsize, by increments of %binsize), or NULL on error |
1042 | | * |
1043 | | * <pre> |
1044 | | * Notes: |
1045 | | * (1) This simple function generates a histogram of values |
1046 | | * from na, discarding all values < 0.0 or greater than |
1047 | | * min(%maxsize, maxval), where maxval is the maximum value in na. |
1048 | | * The histogram data is put in bins of size delx = %binsize, |
1049 | | * starting at x = 0.0. We use as many bins as are |
1050 | | * needed to hold the data. |
1051 | | * </pre> |
1052 | | */ |
1053 | | NUMA * |
1054 | | numaMakeHistogramClipped(NUMA *na, |
1055 | | l_float32 binsize, |
1056 | | l_float32 maxsize) |
1057 | 0 | { |
1058 | 0 | l_int32 i, n, nbins, ival, ibin; |
1059 | 0 | l_float32 val, maxval; |
1060 | 0 | NUMA *nad; |
1061 | |
|
1062 | 0 | if (!na) |
1063 | 0 | return (NUMA *)ERROR_PTR("na not defined", __func__, NULL); |
1064 | 0 | if (binsize <= 0.0) |
1065 | 0 | return (NUMA *)ERROR_PTR("binsize must be > 0.0", __func__, NULL); |
1066 | 0 | if (binsize > maxsize) |
1067 | 0 | binsize = maxsize; /* just one bin */ |
1068 | |
|
1069 | 0 | numaGetMax(na, &maxval, NULL); |
1070 | 0 | n = numaGetCount(na); |
1071 | 0 | maxsize = L_MIN(maxsize, maxval); |
1072 | 0 | nbins = (l_int32)(maxsize / binsize) + 1; |
1073 | | |
1074 | | /* lept_stderr("maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */ |
1075 | |
|
1076 | 0 | if ((nad = numaCreate(nbins)) == NULL) |
1077 | 0 | return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); |
1078 | 0 | numaSetParameters(nad, 0.0, binsize); |
1079 | 0 | numaSetCount(nad, nbins); /* interpret zeroes in bins as data */ |
1080 | 0 | for (i = 0; i < n; i++) { |
1081 | 0 | numaGetFValue(na, i, &val); |
1082 | 0 | ibin = (l_int32)(val / binsize); |
1083 | 0 | if (ibin >= 0 && ibin < nbins) { |
1084 | 0 | numaGetIValue(nad, ibin, &ival); |
1085 | 0 | numaSetValue(nad, ibin, ival + 1.0f); |
1086 | 0 | } |
1087 | 0 | } |
1088 | |
|
1089 | 0 | return nad; |
1090 | 0 | } |
1091 | | |
1092 | | |
1093 | | /*! |
1094 | | * \brief numaRebinHistogram() |
1095 | | * |
1096 | | * \param[in] nas input histogram |
1097 | | * \param[in] newsize number of old bins contained in each new bin |
1098 | | * \return nad more coarsely re-binned histogram, or NULL on error |
1099 | | */ |
1100 | | NUMA * |
1101 | | numaRebinHistogram(NUMA *nas, |
1102 | | l_int32 newsize) |
1103 | 0 | { |
1104 | 0 | l_int32 i, j, ns, nd, index, count, val; |
1105 | 0 | l_float32 start, oldsize; |
1106 | 0 | NUMA *nad; |
1107 | |
|
1108 | 0 | if (!nas) |
1109 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
1110 | 0 | if (newsize <= 1) |
1111 | 0 | return (NUMA *)ERROR_PTR("newsize must be > 1", __func__, NULL); |
1112 | 0 | if ((ns = numaGetCount(nas)) == 0) |
1113 | 0 | return (NUMA *)ERROR_PTR("no bins in nas", __func__, NULL); |
1114 | | |
1115 | 0 | nd = (ns + newsize - 1) / newsize; |
1116 | 0 | if ((nad = numaCreate(nd)) == NULL) |
1117 | 0 | return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); |
1118 | 0 | numaGetParameters(nad, &start, &oldsize); |
1119 | 0 | numaSetParameters(nad, start, oldsize * newsize); |
1120 | |
|
1121 | 0 | for (i = 0; i < nd; i++) { /* new bins */ |
1122 | 0 | count = 0; |
1123 | 0 | index = i * newsize; |
1124 | 0 | for (j = 0; j < newsize; j++) { |
1125 | 0 | if (index < ns) { |
1126 | 0 | numaGetIValue(nas, index, &val); |
1127 | 0 | count += val; |
1128 | 0 | index++; |
1129 | 0 | } |
1130 | 0 | } |
1131 | 0 | numaAddNumber(nad, count); |
1132 | 0 | } |
1133 | |
|
1134 | 0 | return nad; |
1135 | 0 | } |
1136 | | |
1137 | | |
1138 | | /*! |
1139 | | * \brief numaNormalizeHistogram() |
1140 | | * |
1141 | | * \param[in] nas input histogram |
1142 | | * \param[in] tsum target sum of all numbers in dest histogram; e.g., use |
1143 | | * %tsum= 1.0 if this represents a probability distribution |
1144 | | * \return nad normalized histogram, or NULL on error |
1145 | | */ |
1146 | | NUMA * |
1147 | | numaNormalizeHistogram(NUMA *nas, |
1148 | | l_float32 tsum) |
1149 | 0 | { |
1150 | 0 | l_int32 i, ns; |
1151 | 0 | l_float32 sum, factor, fval; |
1152 | 0 | NUMA *nad; |
1153 | |
|
1154 | 0 | if (!nas) |
1155 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
1156 | 0 | if (tsum <= 0.0) |
1157 | 0 | return (NUMA *)ERROR_PTR("tsum must be > 0.0", __func__, NULL); |
1158 | 0 | if ((ns = numaGetCount(nas)) == 0) |
1159 | 0 | return (NUMA *)ERROR_PTR("no bins in nas", __func__, NULL); |
1160 | | |
1161 | 0 | numaGetSum(nas, &sum); |
1162 | 0 | factor = tsum / sum; |
1163 | |
|
1164 | 0 | if ((nad = numaCreate(ns)) == NULL) |
1165 | 0 | return (NUMA *)ERROR_PTR("nad not made", __func__, NULL); |
1166 | 0 | numaCopyParameters(nad, nas); |
1167 | |
|
1168 | 0 | for (i = 0; i < ns; i++) { |
1169 | 0 | numaGetFValue(nas, i, &fval); |
1170 | 0 | fval *= factor; |
1171 | 0 | numaAddNumber(nad, fval); |
1172 | 0 | } |
1173 | |
|
1174 | 0 | return nad; |
1175 | 0 | } |
1176 | | |
1177 | | |
1178 | | /*! |
1179 | | * \brief numaGetStatsUsingHistogram() |
1180 | | * |
1181 | | * \param[in] na an arbitrary set of numbers; not ordered and not |
1182 | | * a histogram |
1183 | | * \param[in] maxbins the maximum number of bins to be allowed in |
1184 | | * the histogram; use an integer larger than the |
1185 | | * largest number in %na for consecutive integer bins |
1186 | | * \param[out] pmin [optional] min value of set |
1187 | | * \param[out] pmax [optional] max value of set |
1188 | | * \param[out] pmean [optional] mean value of set |
1189 | | * \param[out] pvariance [optional] variance |
1190 | | * \param[out] pmedian [optional] median value of set |
1191 | | * \param[in] rank in [0.0 ... 1.0]; median has a rank 0.5; |
1192 | | * ignored if &rval == NULL |
1193 | | * \param[out] prval [optional] value in na corresponding to %rank |
1194 | | * \param[out] phisto [optional] Numa histogram; use NULL to prevent |
1195 | | * \return 0 if OK, 1 on error |
1196 | | * |
1197 | | * <pre> |
1198 | | * Notes: |
1199 | | * (1) This is a simple interface for gathering statistics |
1200 | | * from a numa, where a histogram is used 'under the covers' |
1201 | | * to avoid sorting if a rank value is requested. In that case, |
1202 | | * by using a histogram we are trading speed for accuracy, because |
1203 | | * the values in %na are quantized to the center of a set of bins. |
1204 | | * (2) If the median, other rank value, or histogram are not requested, |
1205 | | * the calculation is all performed on the input Numa. |
1206 | | * (3) The variance is the average of the square of the |
1207 | | * difference from the mean. The median is the value in na |
1208 | | * with rank 0.5. |
1209 | | * (4) There are two situations where this gives rank results with |
1210 | | * accuracy comparable to computing stastics directly on the input |
1211 | | * data, without binning into a histogram: |
1212 | | * (a) the data is integers and the range of data is less than |
1213 | | * %maxbins, and |
1214 | | * (b) the data is floats and the range is small compared to |
1215 | | * %maxbins, so that the binsize is much less than 1. |
1216 | | * (5) If a histogram is used and the numbers in the Numa extend |
1217 | | * over a large range, you can limit the required storage by |
1218 | | * specifying the maximum number of bins in the histogram. |
1219 | | * Use %maxbins == 0 to force the bin size to be 1. |
1220 | | * (6) This optionally returns the median and one arbitrary rank value. |
1221 | | * If you need several rank values, return the histogram and use |
1222 | | * numaHistogramGetValFromRank(nah, rank, &rval) |
1223 | | * multiple times. |
1224 | | * </pre> |
1225 | | */ |
1226 | | l_ok |
1227 | | numaGetStatsUsingHistogram(NUMA *na, |
1228 | | l_int32 maxbins, |
1229 | | l_float32 *pmin, |
1230 | | l_float32 *pmax, |
1231 | | l_float32 *pmean, |
1232 | | l_float32 *pvariance, |
1233 | | l_float32 *pmedian, |
1234 | | l_float32 rank, |
1235 | | l_float32 *prval, |
1236 | | NUMA **phisto) |
1237 | 0 | { |
1238 | 0 | l_int32 i, n; |
1239 | 0 | l_float32 minval, maxval, fval, mean, sum; |
1240 | 0 | NUMA *nah; |
1241 | |
|
1242 | 0 | if (pmin) *pmin = 0.0; |
1243 | 0 | if (pmax) *pmax = 0.0; |
1244 | 0 | if (pmean) *pmean = 0.0; |
1245 | 0 | if (pvariance) *pvariance = 0.0; |
1246 | 0 | if (pmedian) *pmedian = 0.0; |
1247 | 0 | if (prval) *prval = 0.0; |
1248 | 0 | if (phisto) *phisto = NULL; |
1249 | 0 | if (!na) |
1250 | 0 | return ERROR_INT("na not defined", __func__, 1); |
1251 | 0 | if ((n = numaGetCount(na)) == 0) |
1252 | 0 | return ERROR_INT("numa is empty", __func__, 1); |
1253 | | |
1254 | 0 | numaGetMin(na, &minval, NULL); |
1255 | 0 | numaGetMax(na, &maxval, NULL); |
1256 | 0 | if (pmin) *pmin = minval; |
1257 | 0 | if (pmax) *pmax = maxval; |
1258 | 0 | if (pmean || pvariance) { |
1259 | 0 | sum = 0.0; |
1260 | 0 | for (i = 0; i < n; i++) { |
1261 | 0 | numaGetFValue(na, i, &fval); |
1262 | 0 | sum += fval; |
1263 | 0 | } |
1264 | 0 | mean = sum / (l_float32)n; |
1265 | 0 | if (pmean) *pmean = mean; |
1266 | 0 | } |
1267 | 0 | if (pvariance) { |
1268 | 0 | sum = 0.0; |
1269 | 0 | for (i = 0; i < n; i++) { |
1270 | 0 | numaGetFValue(na, i, &fval); |
1271 | 0 | sum += fval * fval; |
1272 | 0 | } |
1273 | 0 | *pvariance = sum / (l_float32)n - mean * mean; |
1274 | 0 | } |
1275 | |
|
1276 | 0 | if (!pmedian && !prval && !phisto) |
1277 | 0 | return 0; |
1278 | | |
1279 | 0 | nah = numaMakeHistogramAuto(na, maxbins); |
1280 | 0 | if (pmedian) |
1281 | 0 | numaHistogramGetValFromRank(nah, 0.5, pmedian); |
1282 | 0 | if (prval) |
1283 | 0 | numaHistogramGetValFromRank(nah, rank, prval); |
1284 | 0 | if (phisto) |
1285 | 0 | *phisto = nah; |
1286 | 0 | else |
1287 | 0 | numaDestroy(&nah); |
1288 | 0 | return 0; |
1289 | 0 | } |
1290 | | |
1291 | | |
1292 | | /*! |
1293 | | * \brief numaGetHistogramStats() |
1294 | | * |
1295 | | * \param[in] nahisto histogram: y(x(i)), i = 0 ... nbins - 1 |
1296 | | * \param[in] startx x value of first bin: x(0) |
1297 | | * \param[in] deltax x increment between bins; the bin size; x(1) - x(0) |
1298 | | * \param[out] pxmean [optional] mean value of histogram |
1299 | | * \param[out] pxmedian [optional] median value of histogram |
1300 | | * \param[out] pxmode [optional] mode value of histogram: |
1301 | | * xmode = x(imode), where y(xmode) >= y(x(i)) for |
1302 | | * all i != imode |
1303 | | * \param[out] pxvariance [optional] variance of x |
1304 | | * \return 0 if OK, 1 on error |
1305 | | * |
1306 | | * <pre> |
1307 | | * Notes: |
1308 | | * (1) If the histogram represents the relation y(x), the |
1309 | | * computed values that are returned are the x values. |
1310 | | * These are NOT the bucket indices i; they are related to the |
1311 | | * bucket indices by |
1312 | | * x(i) = startx + i * deltax |
1313 | | * </pre> |
1314 | | */ |
1315 | | l_ok |
1316 | | numaGetHistogramStats(NUMA *nahisto, |
1317 | | l_float32 startx, |
1318 | | l_float32 deltax, |
1319 | | l_float32 *pxmean, |
1320 | | l_float32 *pxmedian, |
1321 | | l_float32 *pxmode, |
1322 | | l_float32 *pxvariance) |
1323 | 0 | { |
1324 | 0 | if (pxmean) *pxmean = 0.0; |
1325 | 0 | if (pxmedian) *pxmedian = 0.0; |
1326 | 0 | if (pxmode) *pxmode = 0.0; |
1327 | 0 | if (pxvariance) *pxvariance = 0.0; |
1328 | 0 | if (!nahisto) |
1329 | 0 | return ERROR_INT("nahisto not defined", __func__, 1); |
1330 | | |
1331 | 0 | return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, -1, |
1332 | 0 | pxmean, pxmedian, pxmode, |
1333 | 0 | pxvariance); |
1334 | 0 | } |
1335 | | |
1336 | | |
1337 | | /*! |
1338 | | * \brief numaGetHistogramStatsOnInterval() |
1339 | | * |
1340 | | * \param[in] nahisto histogram: y(x(i)), i = 0 ... nbins - 1 |
1341 | | * \param[in] startx x value of first bin: x(0) |
1342 | | * \param[in] deltax x increment between bins; the bin size; x(1) - x(0) |
1343 | | * \param[in] ifirst first bin to use for collecting stats |
1344 | | * \param[in] ilast last bin for collecting stats; -1 to go to the end |
1345 | | * \param[out] pxmean [optional] mean value of histogram |
1346 | | * \param[out] pxmedian [optional] median value of histogram |
1347 | | * \param[out] pxmode [optional] mode value of histogram: |
1348 | | * xmode = x(imode), where y(xmode) >= y(x(i)) for |
1349 | | * all i != imode |
1350 | | * \param[out] pxvariance [optional] variance of x |
1351 | | * \return 0 if OK, 1 on error |
1352 | | * |
1353 | | * <pre> |
1354 | | * Notes: |
1355 | | * (1) If the histogram represents the relation y(x), the |
1356 | | * computed values that are returned are the x values. |
1357 | | * These are NOT the bucket indices i; they are related to the |
1358 | | * bucket indices by |
1359 | | * x(i) = startx + i * deltax |
1360 | | * </pre> |
1361 | | */ |
1362 | | l_ok |
1363 | | numaGetHistogramStatsOnInterval(NUMA *nahisto, |
1364 | | l_float32 startx, |
1365 | | l_float32 deltax, |
1366 | | l_int32 ifirst, |
1367 | | l_int32 ilast, |
1368 | | l_float32 *pxmean, |
1369 | | l_float32 *pxmedian, |
1370 | | l_float32 *pxmode, |
1371 | | l_float32 *pxvariance) |
1372 | 0 | { |
1373 | 0 | l_int32 i, n, imax; |
1374 | 0 | l_float32 sum, sumval, halfsum, moment, var, x, y, ymax; |
1375 | |
|
1376 | 0 | if (pxmean) *pxmean = 0.0; |
1377 | 0 | if (pxmedian) *pxmedian = 0.0; |
1378 | 0 | if (pxmode) *pxmode = 0.0; |
1379 | 0 | if (pxvariance) *pxvariance = 0.0; |
1380 | 0 | if (!nahisto) |
1381 | 0 | return ERROR_INT("nahisto not defined", __func__, 1); |
1382 | 0 | if (!pxmean && !pxmedian && !pxmode && !pxvariance) |
1383 | 0 | return ERROR_INT("nothing to compute", __func__, 1); |
1384 | | |
1385 | 0 | n = numaGetCount(nahisto); |
1386 | 0 | ifirst = L_MAX(0, ifirst); |
1387 | 0 | if (ilast < 0) ilast = n - 1; |
1388 | 0 | if (ifirst >= n) |
1389 | 0 | return ERROR_INT("invalid ifirst", __func__, 1); |
1390 | 0 | if (ilast >= n) { |
1391 | 0 | L_WARNING("ilast = %d is beyond max index = %d; adjusting\n", |
1392 | 0 | __func__, ilast, n - 1); |
1393 | 0 | ilast = n - 1; |
1394 | 0 | } |
1395 | 0 | if (ifirst > ilast) |
1396 | 0 | return ERROR_INT("ifirst > ilast", __func__, 1); |
1397 | 0 | for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) { |
1398 | 0 | x = startx + i * deltax; |
1399 | 0 | numaGetFValue(nahisto, i, &y); |
1400 | 0 | sum += y; |
1401 | 0 | moment += x * y; |
1402 | 0 | var += x * x * y; |
1403 | 0 | } |
1404 | 0 | if (sum == 0.0) { |
1405 | 0 | L_INFO("sum is 0\n", __func__); |
1406 | 0 | return 0; |
1407 | 0 | } |
1408 | | |
1409 | 0 | if (pxmean) |
1410 | 0 | *pxmean = moment / sum; |
1411 | 0 | if (pxvariance) |
1412 | 0 | *pxvariance = var / sum - moment * moment / (sum * sum); |
1413 | |
|
1414 | 0 | if (pxmedian) { |
1415 | 0 | halfsum = sum / 2.0f; |
1416 | 0 | for (sumval = 0.0, i = ifirst; i <= ilast; i++) { |
1417 | 0 | numaGetFValue(nahisto, i, &y); |
1418 | 0 | sumval += y; |
1419 | 0 | if (sumval >= halfsum) { |
1420 | 0 | *pxmedian = startx + i * deltax; |
1421 | 0 | break; |
1422 | 0 | } |
1423 | 0 | } |
1424 | 0 | } |
1425 | |
|
1426 | 0 | if (pxmode) { |
1427 | 0 | imax = -1; |
1428 | 0 | ymax = -1.0e10; |
1429 | 0 | for (i = ifirst; i <= ilast; i++) { |
1430 | 0 | numaGetFValue(nahisto, i, &y); |
1431 | 0 | if (y > ymax) { |
1432 | 0 | ymax = y; |
1433 | 0 | imax = i; |
1434 | 0 | } |
1435 | 0 | } |
1436 | 0 | *pxmode = startx + imax * deltax; |
1437 | 0 | } |
1438 | |
|
1439 | 0 | return 0; |
1440 | 0 | } |
1441 | | |
1442 | | |
1443 | | /*! |
1444 | | * \brief numaMakeRankFromHistogram() |
1445 | | * |
1446 | | * \param[in] startx xval corresponding to first element in nay |
1447 | | * \param[in] deltax x increment between array elements in nay |
1448 | | * \param[in] nasy input histogram, assumed equally spaced |
1449 | | * \param[in] npts number of points to evaluate rank function |
1450 | | * \param[out] pnax [optional] array of x values in range |
1451 | | * \param[out] pnay rank array of specified npts |
1452 | | * \return 0 if OK, 1 on error |
1453 | | */ |
1454 | | l_ok |
1455 | | numaMakeRankFromHistogram(l_float32 startx, |
1456 | | l_float32 deltax, |
1457 | | NUMA *nasy, |
1458 | | l_int32 npts, |
1459 | | NUMA **pnax, |
1460 | | NUMA **pnay) |
1461 | 0 | { |
1462 | 0 | l_int32 i, n; |
1463 | 0 | l_float32 sum, fval; |
1464 | 0 | NUMA *nan, *nar; |
1465 | |
|
1466 | 0 | if (pnax) *pnax = NULL; |
1467 | 0 | if (!pnay) |
1468 | 0 | return ERROR_INT("&nay not defined", __func__, 1); |
1469 | 0 | *pnay = NULL; |
1470 | 0 | if (!nasy) |
1471 | 0 | return ERROR_INT("nasy not defined", __func__, 1); |
1472 | 0 | if ((n = numaGetCount(nasy)) == 0) |
1473 | 0 | return ERROR_INT("no bins in nas", __func__, 1); |
1474 | | |
1475 | | /* Normalize and generate the rank array corresponding to |
1476 | | * the binned histogram. */ |
1477 | 0 | nan = numaNormalizeHistogram(nasy, 1.0); |
1478 | 0 | nar = numaCreate(n + 1); /* rank numa corresponding to nan */ |
1479 | 0 | sum = 0.0; |
1480 | 0 | numaAddNumber(nar, sum); /* first element is 0.0 */ |
1481 | 0 | for (i = 0; i < n; i++) { |
1482 | 0 | numaGetFValue(nan, i, &fval); |
1483 | 0 | sum += fval; |
1484 | 0 | numaAddNumber(nar, sum); |
1485 | 0 | } |
1486 | | |
1487 | | /* Compute rank array on full range with specified |
1488 | | * number of points and correspondence to x-values. */ |
1489 | 0 | numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP, |
1490 | 0 | startx, startx + n * deltax, npts, |
1491 | 0 | pnax, pnay); |
1492 | 0 | numaDestroy(&nan); |
1493 | 0 | numaDestroy(&nar); |
1494 | 0 | return 0; |
1495 | 0 | } |
1496 | | |
1497 | | |
1498 | | /*! |
1499 | | * \brief numaHistogramGetRankFromVal() |
1500 | | * |
1501 | | * \param[in] na histogram |
1502 | | * \param[in] rval value of input sample for which we want the rank |
1503 | | * \param[out] prank fraction of total samples below rval |
1504 | | * \return 0 if OK, 1 on error |
1505 | | * |
1506 | | * <pre> |
1507 | | * Notes: |
1508 | | * (1) If we think of the histogram as a function y(x), normalized |
1509 | | * to 1, for a given input value of x, this computes the |
1510 | | * rank of x, which is the integral of y(x) from the start |
1511 | | * value of x to the input value. |
1512 | | * (2) This function only makes sense when applied to a Numa that |
1513 | | * is a histogram. The values in the histogram can be ints and |
1514 | | * floats, and are computed as floats. The rank is returned |
1515 | | * as a float between 0.0 and 1.0. |
1516 | | * (3) The numa parameters startx and binsize are used to |
1517 | | * compute x from the Numa index i. |
1518 | | * </pre> |
1519 | | */ |
1520 | | l_ok |
1521 | | numaHistogramGetRankFromVal(NUMA *na, |
1522 | | l_float32 rval, |
1523 | | l_float32 *prank) |
1524 | 0 | { |
1525 | 0 | l_int32 i, ibinval, n; |
1526 | 0 | l_float32 startval, binsize, binval, maxval, fractval, total, sum, val; |
1527 | |
|
1528 | 0 | if (!prank) |
1529 | 0 | return ERROR_INT("prank not defined", __func__, 1); |
1530 | 0 | *prank = 0.0; |
1531 | 0 | if (!na) |
1532 | 0 | return ERROR_INT("na not defined", __func__, 1); |
1533 | 0 | numaGetParameters(na, &startval, &binsize); |
1534 | 0 | n = numaGetCount(na); |
1535 | 0 | if (rval < startval) |
1536 | 0 | return 0; |
1537 | 0 | maxval = startval + n * binsize; |
1538 | 0 | if (rval > maxval) { |
1539 | 0 | *prank = 1.0; |
1540 | 0 | return 0; |
1541 | 0 | } |
1542 | | |
1543 | 0 | binval = (rval - startval) / binsize; |
1544 | 0 | ibinval = (l_int32)binval; |
1545 | 0 | if (ibinval >= n) { |
1546 | 0 | *prank = 1.0; |
1547 | 0 | return 0; |
1548 | 0 | } |
1549 | 0 | fractval = binval - (l_float32)ibinval; |
1550 | |
|
1551 | 0 | sum = 0.0; |
1552 | 0 | for (i = 0; i < ibinval; i++) { |
1553 | 0 | numaGetFValue(na, i, &val); |
1554 | 0 | sum += val; |
1555 | 0 | } |
1556 | 0 | numaGetFValue(na, ibinval, &val); |
1557 | 0 | sum += fractval * val; |
1558 | 0 | numaGetSum(na, &total); |
1559 | 0 | *prank = sum / total; |
1560 | | |
1561 | | /* lept_stderr("binval = %7.3f, rank = %7.3f\n", binval, *prank); */ |
1562 | |
|
1563 | 0 | return 0; |
1564 | 0 | } |
1565 | | |
1566 | | |
1567 | | /*! |
1568 | | * \brief numaHistogramGetValFromRank() |
1569 | | * |
1570 | | * \param[in] na histogram |
1571 | | * \param[in] rank fraction of total samples |
1572 | | * \param[out] prval approx. to the bin value |
1573 | | * \return 0 if OK, 1 on error |
1574 | | * |
1575 | | * <pre> |
1576 | | * Notes: |
1577 | | * (1) If we think of the histogram as a function y(x), this returns |
1578 | | * the value x such that the integral of y(x) from the start |
1579 | | * value to x gives the fraction 'rank' of the integral |
1580 | | * of y(x) over all bins. |
1581 | | * (2) This function only makes sense when applied to a Numa that |
1582 | | * is a histogram. The values in the histogram can be ints and |
1583 | | * floats, and are computed as floats. The val is returned |
1584 | | * as a float, even though the buckets are of integer width. |
1585 | | * (3) The numa parameters startx and binsize are used to |
1586 | | * compute x from the Numa index i. |
1587 | | * </pre> |
1588 | | */ |
1589 | | l_ok |
1590 | | numaHistogramGetValFromRank(NUMA *na, |
1591 | | l_float32 rank, |
1592 | | l_float32 *prval) |
1593 | 0 | { |
1594 | 0 | l_int32 i, n; |
1595 | 0 | l_float32 startval, binsize, rankcount, total, sum, fract, val; |
1596 | |
|
1597 | 0 | if (!prval) |
1598 | 0 | return ERROR_INT("prval not defined", __func__, 1); |
1599 | 0 | *prval = 0.0; |
1600 | 0 | if (!na) |
1601 | 0 | return ERROR_INT("na not defined", __func__, 1); |
1602 | 0 | if (rank < 0.0) { |
1603 | 0 | L_WARNING("rank < 0; setting to 0.0\n", __func__); |
1604 | 0 | rank = 0.0; |
1605 | 0 | } |
1606 | 0 | if (rank > 1.0) { |
1607 | 0 | L_WARNING("rank > 1.0; setting to 1.0\n", __func__); |
1608 | 0 | rank = 1.0; |
1609 | 0 | } |
1610 | |
|
1611 | 0 | n = numaGetCount(na); |
1612 | 0 | numaGetParameters(na, &startval, &binsize); |
1613 | 0 | numaGetSum(na, &total); |
1614 | 0 | rankcount = rank * total; /* count that corresponds to rank */ |
1615 | 0 | sum = 0.0; |
1616 | 0 | val = 0.0; |
1617 | 0 | for (i = 0; i < n; i++) { |
1618 | 0 | numaGetFValue(na, i, &val); |
1619 | 0 | if (sum + val >= rankcount) |
1620 | 0 | break; |
1621 | 0 | sum += val; |
1622 | 0 | } |
1623 | 0 | if (val <= 0.0) /* can == 0 if rank == 0.0 */ |
1624 | 0 | fract = 0.0; |
1625 | 0 | else /* sum + fract * val = rankcount */ |
1626 | 0 | fract = (rankcount - sum) / val; |
1627 | | |
1628 | | /* The use of the fraction of a bin allows a simple calculation |
1629 | | * for the histogram value at the given rank. */ |
1630 | 0 | *prval = startval + binsize * ((l_float32)i + fract); |
1631 | | |
1632 | | /* lept_stderr("rank = %7.3f, val = %7.3f\n", rank, *prval); */ |
1633 | |
|
1634 | 0 | return 0; |
1635 | 0 | } |
1636 | | |
1637 | | |
1638 | | /*! |
1639 | | * \brief numaDiscretizeSortedInBins() |
1640 | | * |
1641 | | * \param[in] na sorted |
1642 | | * \param[in] nbins number of equal population bins (> 1) |
1643 | | * \param[out] pnabinval average "gray" values in each bin |
1644 | | * \return 0 if OK, 1 on error |
1645 | | * |
1646 | | * <pre> |
1647 | | * Notes: |
1648 | | * (1) The input %na is sorted in increasing value. |
1649 | | * (2) The output array has the following mapping: |
1650 | | * bin number --> average array value in bin (nabinval) |
1651 | | * (3) With %nbins == 100, nabinval is the average gray value in |
1652 | | * each of the 100 equally populated bins. It is the function |
1653 | | * gray[100 * rank]. |
1654 | | * Thus it is the inverse of |
1655 | | * rank[gray] |
1656 | | * (4) Contast with numaDiscretizeHistoInBins(), where the input %na |
1657 | | * is a histogram. |
1658 | | * </pre> |
1659 | | */ |
1660 | | l_ok |
1661 | | numaDiscretizeSortedInBins(NUMA *na, |
1662 | | l_int32 nbins, |
1663 | | NUMA **pnabinval) |
1664 | 0 | { |
1665 | 0 | NUMA *nabinval; /* average gray value in the bins */ |
1666 | 0 | NUMA *naeach; |
1667 | 0 | l_int32 i, ntot, bincount, binindex, binsize; |
1668 | 0 | l_float32 sum, val, ave; |
1669 | |
|
1670 | 0 | if (!pnabinval) |
1671 | 0 | return ERROR_INT("&nabinval not defined", __func__, 1); |
1672 | 0 | *pnabinval = NULL; |
1673 | 0 | if (!na) |
1674 | 0 | return ERROR_INT("na not defined", __func__, 1); |
1675 | 0 | if (nbins < 2) |
1676 | 0 | return ERROR_INT("nbins must be > 1", __func__, 1); |
1677 | | |
1678 | | /* Get the number of items in each bin */ |
1679 | 0 | ntot = numaGetCount(na); |
1680 | 0 | if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL) |
1681 | 0 | return ERROR_INT("naeach not made", __func__, 1); |
1682 | | |
1683 | | /* Get the average value in each bin */ |
1684 | 0 | sum = 0.0; |
1685 | 0 | bincount = 0; |
1686 | 0 | binindex = 0; |
1687 | 0 | numaGetIValue(naeach, 0, &binsize); |
1688 | 0 | nabinval = numaCreate(nbins); |
1689 | 0 | for (i = 0; i < ntot; i++) { |
1690 | 0 | numaGetFValue(na, i, &val); |
1691 | 0 | bincount++; |
1692 | 0 | sum += val; |
1693 | 0 | if (bincount == binsize) { /* add bin entry */ |
1694 | 0 | ave = sum / binsize; |
1695 | 0 | numaAddNumber(nabinval, ave); |
1696 | 0 | sum = 0.0; |
1697 | 0 | bincount = 0; |
1698 | 0 | binindex++; |
1699 | 0 | if (binindex == nbins) break; |
1700 | 0 | numaGetIValue(naeach, binindex, &binsize); |
1701 | 0 | } |
1702 | 0 | } |
1703 | 0 | *pnabinval = nabinval; |
1704 | |
|
1705 | 0 | numaDestroy(&naeach); |
1706 | 0 | return 0; |
1707 | 0 | } |
1708 | | |
1709 | | |
1710 | | /*! |
1711 | | * \brief numaDiscretizeHistoInBins() |
1712 | | * |
1713 | | * \param[in] na histogram |
1714 | | * \param[in] nbins number of equal population bins (> 1) |
1715 | | * \param[out] pnabinval average "gray" values in each bin |
1716 | | * \param[out] pnarank [optional] rank value of input histogram; |
1717 | | * this is a cumulative norm histogram. |
1718 | | * \return 0 if OK, 1 on error |
1719 | | * |
1720 | | * <pre> |
1721 | | * Notes: |
1722 | | * (1) With %nbins == 100, nabinval is the average gray value in |
1723 | | * each of the 100 equally populated bins. It is the function |
1724 | | * gray[100 * rank]. |
1725 | | * Thus it is the inverse of |
1726 | | * rank[gray] |
1727 | | * which is optionally returned in narank. |
1728 | | * (2) The "gray value" is the index into the input histogram. |
1729 | | * (3) The two output arrays give the following mappings, where the |
1730 | | * input is an un-normalized histogram of array values: |
1731 | | * bin number --> average array value in bin (nabinval) |
1732 | | * array values --> cumulative normalized histogram (narank) |
1733 | | * </pre> |
1734 | | */ |
1735 | | l_ok |
1736 | | numaDiscretizeHistoInBins(NUMA *na, |
1737 | | l_int32 nbins, |
1738 | | NUMA **pnabinval, |
1739 | | NUMA **pnarank) |
1740 | 0 | { |
1741 | 0 | NUMA *nabinval; /* average gray value in the bins */ |
1742 | 0 | NUMA *naeach, *nan; |
1743 | 0 | l_int32 i, j, nxvals, occup, count, bincount, binindex, binsize; |
1744 | 0 | l_float32 sum, ave, ntot; |
1745 | |
|
1746 | 0 | if (pnarank) *pnarank = NULL; |
1747 | 0 | if (!pnabinval) |
1748 | 0 | return ERROR_INT("&nabinval not defined", __func__, 1); |
1749 | 0 | *pnabinval = NULL; |
1750 | 0 | if (!na) |
1751 | 0 | return ERROR_INT("na not defined", __func__, 1); |
1752 | 0 | if (nbins < 2) |
1753 | 0 | return ERROR_INT("nbins must be > 1", __func__, 1); |
1754 | | |
1755 | 0 | nxvals = numaGetCount(na); |
1756 | 0 | numaGetSum(na, &ntot); |
1757 | 0 | occup = ntot / nxvals; |
1758 | 0 | if (occup < 1) L_INFO("average occupancy %d < 1\n", __func__, occup); |
1759 | | |
1760 | | /* Get the number of items in each bin */ |
1761 | 0 | if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL) |
1762 | 0 | return ERROR_INT("naeach not made", __func__, 1); |
1763 | | |
1764 | | /* Get the average value in each bin */ |
1765 | 0 | sum = 0.0; |
1766 | 0 | bincount = 0; |
1767 | 0 | binindex = 0; |
1768 | 0 | numaGetIValue(naeach, 0, &binsize); |
1769 | 0 | nabinval = numaCreate(nbins); |
1770 | 0 | for (i = 0; i < nxvals; i++) { |
1771 | 0 | numaGetIValue(na, i, &count); |
1772 | 0 | for (j = 0; j < count; j++) { |
1773 | 0 | bincount++; |
1774 | 0 | sum += i; |
1775 | 0 | if (bincount == binsize) { /* add bin entry */ |
1776 | 0 | ave = sum / binsize; |
1777 | 0 | numaAddNumber(nabinval, ave); |
1778 | 0 | sum = 0.0; |
1779 | 0 | bincount = 0; |
1780 | 0 | binindex++; |
1781 | 0 | if (binindex == nbins) break; |
1782 | 0 | numaGetIValue(naeach, binindex, &binsize); |
1783 | 0 | } |
1784 | 0 | } |
1785 | 0 | if (binindex == nbins) break; |
1786 | 0 | } |
1787 | 0 | *pnabinval = nabinval; |
1788 | 0 | if (binindex != nbins) |
1789 | 0 | L_ERROR("binindex = %d != nbins = %d\n", __func__, binindex, nbins); |
1790 | | |
1791 | | /* Get cumulative normalized histogram (rank[gray value]). |
1792 | | * This is the partial sum operating on the normalized histogram. */ |
1793 | 0 | if (pnarank) { |
1794 | 0 | nan = numaNormalizeHistogram(na, 1.0); |
1795 | 0 | *pnarank = numaGetPartialSums(nan); |
1796 | 0 | numaDestroy(&nan); |
1797 | 0 | } |
1798 | 0 | numaDestroy(&naeach); |
1799 | 0 | return 0; |
1800 | 0 | } |
1801 | | |
1802 | | |
1803 | | /*! |
1804 | | * \brief numaGetRankBinValues() |
1805 | | * |
1806 | | * \param[in] na an array of values |
1807 | | * \param[in] nbins number of bins at which the rank is divided |
1808 | | * \param[out] pnam mean intensity in a bin vs rank bin value, |
1809 | | * with %nbins of discretized rank values |
1810 | | * \return 0 if OK, 1 on error |
1811 | | * |
1812 | | * <pre> |
1813 | | * Notes: |
1814 | | * (1) Simple interface for getting a binned rank representation |
1815 | | * of an input array of values. This returns: |
1816 | | * rank bin number --> average array value in each rank bin (nam) |
1817 | | * (2) Uses bins either a sorted array or a histogram, depending on |
1818 | | * the values in the array and the size of the array. |
1819 | | * </pre> |
1820 | | */ |
1821 | | l_ok |
1822 | | numaGetRankBinValues(NUMA *na, |
1823 | | l_int32 nbins, |
1824 | | NUMA **pnam) |
1825 | 0 | { |
1826 | 0 | NUMA *na1; |
1827 | 0 | l_int32 maxbins, type; |
1828 | 0 | l_float32 maxval, delx; |
1829 | |
|
1830 | 0 | if (!pnam) |
1831 | 0 | return ERROR_INT("&pnam not defined", __func__, 1); |
1832 | 0 | *pnam = NULL; |
1833 | 0 | if (!na) |
1834 | 0 | return ERROR_INT("na not defined", __func__, 1); |
1835 | 0 | if (numaGetCount(na) == 0) |
1836 | 0 | return ERROR_INT("na is empty", __func__, 1); |
1837 | 0 | if (nbins < 2) |
1838 | 0 | return ERROR_INT("nbins must be > 1", __func__, 1); |
1839 | | |
1840 | | /* Choose between sorted array and a histogram. |
1841 | | * If the input array is has a small number of numbers with |
1842 | | * a large maximum, we will sort it. At the other extreme, if |
1843 | | * the array has many numbers with a small maximum, such as the |
1844 | | * values of pixels in an 8 bpp grayscale image, generate a histogram. |
1845 | | * If type comes back as L_BIN_SORT, use a histogram. */ |
1846 | 0 | type = numaChooseSortType(na); |
1847 | 0 | if (type == L_SHELL_SORT) { /* sort the array */ |
1848 | 0 | L_INFO("sort the array: input size = %d\n", __func__, numaGetCount(na)); |
1849 | 0 | na1 = numaSort(NULL, na, L_SORT_INCREASING); |
1850 | 0 | numaDiscretizeSortedInBins(na1, nbins, pnam); |
1851 | 0 | numaDestroy(&na1); |
1852 | 0 | return 0; |
1853 | 0 | } |
1854 | | |
1855 | | /* Make the histogram. Assuming there are no negative values |
1856 | | * in the array, if the max value in the array does not exceed |
1857 | | * about 100000, the bin size for generating the histogram will |
1858 | | * be 1; maxbins refers to the number of entries in the histogram. */ |
1859 | 0 | L_INFO("use a histogram: input size = %d\n", __func__, numaGetCount(na)); |
1860 | 0 | numaGetMax(na, &maxval, NULL); |
1861 | 0 | maxbins = L_MIN(100002, (l_int32)maxval + 2); |
1862 | 0 | na1 = numaMakeHistogram(na, maxbins, NULL, NULL); |
1863 | | |
1864 | | /* Warn if there is a scale change. This shouldn't happen |
1865 | | * unless the max value is above 100000. */ |
1866 | 0 | numaGetParameters(na1, NULL, &delx); |
1867 | 0 | if (delx > 1.0) |
1868 | 0 | L_WARNING("scale change: delx = %6.2f\n", __func__, delx); |
1869 | | |
1870 | | /* Rank bin the results */ |
1871 | 0 | numaDiscretizeHistoInBins(na1, nbins, pnam, NULL); |
1872 | 0 | numaDestroy(&na1); |
1873 | 0 | return 0; |
1874 | 0 | } |
1875 | | |
1876 | | |
1877 | | /*! |
1878 | | * \brief numaGetUniformBinSizes() |
1879 | | * |
1880 | | * \param[in] ntotal number of values to be split up |
1881 | | * \param[in] nbins number of bins |
1882 | | * \return naeach number of values to go in each bin, or NULL on error |
1883 | | * |
1884 | | * <pre> |
1885 | | * Notes: |
1886 | | * (1) The numbers in the bins can differ by 1. The sum of |
1887 | | * bin numbers in %naeach is %ntotal. |
1888 | | * </pre> |
1889 | | */ |
1890 | | NUMA * |
1891 | | numaGetUniformBinSizes(l_int32 ntotal, |
1892 | | l_int32 nbins) |
1893 | 0 | { |
1894 | 0 | l_int32 i, start, end; |
1895 | 0 | NUMA *naeach; |
1896 | |
|
1897 | 0 | if (ntotal <= 0) |
1898 | 0 | return (NUMA *)ERROR_PTR("ntotal <= 0", __func__, NULL); |
1899 | 0 | if (nbins <= 0) |
1900 | 0 | return (NUMA *)ERROR_PTR("nbins <= 0", __func__, NULL); |
1901 | | |
1902 | 0 | if ((naeach = numaCreate(nbins)) == NULL) |
1903 | 0 | return (NUMA *)ERROR_PTR("naeach not made", __func__, NULL); |
1904 | | |
1905 | 0 | if (ntotal < nbins) { /* put 1 in each of %ntotal bins */ |
1906 | 0 | for (i = 0; i < ntotal; i++) |
1907 | 0 | numaAddNumber(naeach, 1); |
1908 | 0 | return naeach; |
1909 | 0 | } |
1910 | | |
1911 | 0 | start = 0; |
1912 | 0 | for (i = 0; i < nbins; i++) { |
1913 | 0 | end = ntotal * (i + 1) / nbins; |
1914 | 0 | numaAddNumber(naeach, end - start); |
1915 | 0 | start = end; |
1916 | 0 | } |
1917 | 0 | return naeach; |
1918 | 0 | } |
1919 | | |
1920 | | |
1921 | | /*----------------------------------------------------------------------* |
1922 | | * Splitting a distribution * |
1923 | | *----------------------------------------------------------------------*/ |
1924 | | /*! |
1925 | | * \brief numaSplitDistribution() |
1926 | | * |
1927 | | * \param[in] na histogram |
1928 | | * \param[in] scorefract fraction of the max score, used to determine |
1929 | | * range over which the histogram min is searched |
1930 | | * \param[out] psplitindex [optional] index for splitting |
1931 | | * \param[out] pave1 [optional] average of lower distribution |
1932 | | * \param[out] pave2 [optional] average of upper distribution |
1933 | | * \param[out] pnum1 [optional] population of lower distribution |
1934 | | * \param[out] pnum2 [optional] population of upper distribution |
1935 | | * \param[out] pnascore [optional] for debugging; otherwise use NULL |
1936 | | * \return 0 if OK, 1 on error |
1937 | | * |
1938 | | * <pre> |
1939 | | * Notes: |
1940 | | * (1) This function is intended to be used on a distribution of |
1941 | | * values that represent two sets, such as a histogram of |
1942 | | * pixel values for an image with a fg and bg, and the goal |
1943 | | * is to determine the averages of the two sets and the |
1944 | | * best splitting point. |
1945 | | * (2) The Otsu method finds a split point that divides the distribution |
1946 | | * into two parts by maximizing a score function that is the |
1947 | | * product of two terms: |
1948 | | * (a) the square of the difference of centroids, (ave1 - ave2)^2 |
1949 | | * (b) fract1 * (1 - fract1) |
1950 | | * where fract1 is the fraction in the lower distribution. |
1951 | | * (3) This works well for images where the fg and bg are |
1952 | | * each relatively homogeneous and well-separated in color. |
1953 | | * However, if the actual fg and bg sets are very different |
1954 | | * in size, and the bg is highly varied, as can occur in some |
1955 | | * scanned document images, this will bias the split point |
1956 | | * into the larger "bump" (i.e., toward the point where the |
1957 | | * (b) term reaches its maximum of 0.25 at fract1 = 0.5. |
1958 | | * To avoid this, we define a range of values near the |
1959 | | * maximum of the score function, and choose the value within |
1960 | | * this range such that the histogram itself has a minimum value. |
1961 | | * The range is determined by scorefract: we include all abscissa |
1962 | | * values to the left and right of the value that maximizes the |
1963 | | * score, such that the score stays above (1 - scorefract) * maxscore. |
1964 | | * The intuition behind this modification is to try to find |
1965 | | * a split point that both has a high variance score and is |
1966 | | * at or near a minimum in the histogram, so that the histogram |
1967 | | * slope is small at the split point. |
1968 | | * (4) We normalize the score so that if the two distributions |
1969 | | * were of equal size and at opposite ends of the numa, the |
1970 | | * score would be 1.0. |
1971 | | * </pre> |
1972 | | */ |
1973 | | l_ok |
1974 | | numaSplitDistribution(NUMA *na, |
1975 | | l_float32 scorefract, |
1976 | | l_int32 *psplitindex, |
1977 | | l_float32 *pave1, |
1978 | | l_float32 *pave2, |
1979 | | l_float32 *pnum1, |
1980 | | l_float32 *pnum2, |
1981 | | NUMA **pnascore) |
1982 | 0 | { |
1983 | 0 | l_int32 i, n, bestsplit, minrange, maxrange, maxindex; |
1984 | 0 | l_float32 ave1, ave2, ave1prev, ave2prev; |
1985 | 0 | l_float32 num1, num2, num1prev, num2prev; |
1986 | 0 | l_float32 val, minval, sum, fract1; |
1987 | 0 | l_float32 norm, score, minscore, maxscore; |
1988 | 0 | NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2; |
1989 | |
|
1990 | 0 | if (psplitindex) *psplitindex = 0; |
1991 | 0 | if (pave1) *pave1 = 0.0; |
1992 | 0 | if (pave2) *pave2 = 0.0; |
1993 | 0 | if (pnum1) *pnum1 = 0.0; |
1994 | 0 | if (pnum2) *pnum2 = 0.0; |
1995 | 0 | if (pnascore) *pnascore = NULL; |
1996 | 0 | if (!na) |
1997 | 0 | return ERROR_INT("na not defined", __func__, 1); |
1998 | | |
1999 | 0 | n = numaGetCount(na); |
2000 | 0 | if (n <= 1) |
2001 | 0 | return ERROR_INT("n = 1 in histogram", __func__, 1); |
2002 | 0 | numaGetSum(na, &sum); |
2003 | 0 | if (sum <= 0.0) |
2004 | 0 | return ERROR_INT("sum <= 0.0", __func__, 1); |
2005 | 0 | norm = 4.0f / ((l_float32)(n - 1) * (n - 1)); |
2006 | 0 | ave1prev = 0.0; |
2007 | 0 | numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL); |
2008 | 0 | num1prev = 0.0; |
2009 | 0 | num2prev = sum; |
2010 | 0 | maxindex = n / 2; /* initialize with something */ |
2011 | | |
2012 | | /* Split the histogram with [0 ... i] in the lower part |
2013 | | * and [i+1 ... n-1] in upper part. First, compute an otsu |
2014 | | * score for each possible splitting. */ |
2015 | 0 | if ((nascore = numaCreate(n)) == NULL) |
2016 | 0 | return ERROR_INT("nascore not made", __func__, 1); |
2017 | 0 | naave1 = (pave1) ? numaCreate(n) : NULL; |
2018 | 0 | naave2 = (pave2) ? numaCreate(n) : NULL; |
2019 | 0 | nanum1 = (pnum1) ? numaCreate(n) : NULL; |
2020 | 0 | nanum2 = (pnum2) ? numaCreate(n) : NULL; |
2021 | 0 | maxscore = 0.0; |
2022 | 0 | for (i = 0; i < n; i++) { |
2023 | 0 | numaGetFValue(na, i, &val); |
2024 | 0 | num1 = num1prev + val; |
2025 | 0 | if (num1 == 0) |
2026 | 0 | ave1 = ave1prev; |
2027 | 0 | else |
2028 | 0 | ave1 = (num1prev * ave1prev + i * val) / num1; |
2029 | 0 | num2 = num2prev - val; |
2030 | 0 | if (num2 == 0) |
2031 | 0 | ave2 = ave2prev; |
2032 | 0 | else |
2033 | 0 | ave2 = (num2prev * ave2prev - i * val) / num2; |
2034 | 0 | fract1 = num1 / sum; |
2035 | 0 | score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1); |
2036 | 0 | numaAddNumber(nascore, score); |
2037 | 0 | if (pave1) numaAddNumber(naave1, ave1); |
2038 | 0 | if (pave2) numaAddNumber(naave2, ave2); |
2039 | 0 | if (pnum1) numaAddNumber(nanum1, num1); |
2040 | 0 | if (pnum2) numaAddNumber(nanum2, num2); |
2041 | 0 | if (score > maxscore) { |
2042 | 0 | maxscore = score; |
2043 | 0 | maxindex = i; |
2044 | 0 | } |
2045 | 0 | num1prev = num1; |
2046 | 0 | num2prev = num2; |
2047 | 0 | ave1prev = ave1; |
2048 | 0 | ave2prev = ave2; |
2049 | 0 | } |
2050 | | |
2051 | | /* Next, for all contiguous scores within a specified fraction |
2052 | | * of the max, choose the split point as the value with the |
2053 | | * minimum in the histogram. */ |
2054 | 0 | minscore = (1.f - scorefract) * maxscore; |
2055 | 0 | for (i = maxindex - 1; i >= 0; i--) { |
2056 | 0 | numaGetFValue(nascore, i, &val); |
2057 | 0 | if (val < minscore) |
2058 | 0 | break; |
2059 | 0 | } |
2060 | 0 | minrange = i + 1; |
2061 | 0 | for (i = maxindex + 1; i < n; i++) { |
2062 | 0 | numaGetFValue(nascore, i, &val); |
2063 | 0 | if (val < minscore) |
2064 | 0 | break; |
2065 | 0 | } |
2066 | 0 | maxrange = i - 1; |
2067 | 0 | numaGetFValue(na, minrange, &minval); |
2068 | 0 | bestsplit = minrange; |
2069 | 0 | for (i = minrange + 1; i <= maxrange; i++) { |
2070 | 0 | numaGetFValue(na, i, &val); |
2071 | 0 | if (val < minval) { |
2072 | 0 | minval = val; |
2073 | 0 | bestsplit = i; |
2074 | 0 | } |
2075 | 0 | } |
2076 | | |
2077 | | /* Add one to the bestsplit value to get the threshold value, |
2078 | | * because when we take a threshold, as in pixThresholdToBinary(), |
2079 | | * we always choose the set with values below the threshold. */ |
2080 | 0 | bestsplit = L_MIN(255, bestsplit + 1); |
2081 | |
|
2082 | 0 | if (psplitindex) *psplitindex = bestsplit; |
2083 | 0 | if (pave1) numaGetFValue(naave1, bestsplit, pave1); |
2084 | 0 | if (pave2) numaGetFValue(naave2, bestsplit, pave2); |
2085 | 0 | if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1); |
2086 | 0 | if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2); |
2087 | |
|
2088 | 0 | if (pnascore) { /* debug mode */ |
2089 | 0 | lept_stderr("minrange = %d, maxrange = %d\n", minrange, maxrange); |
2090 | 0 | lept_stderr("minval = %10.0f\n", minval); |
2091 | 0 | gplotSimple1(nascore, GPLOT_PNG, "/tmp/lept/nascore", |
2092 | 0 | "Score for split distribution"); |
2093 | 0 | *pnascore = nascore; |
2094 | 0 | } else { |
2095 | 0 | numaDestroy(&nascore); |
2096 | 0 | } |
2097 | |
|
2098 | 0 | if (pave1) numaDestroy(&naave1); |
2099 | 0 | if (pave2) numaDestroy(&naave2); |
2100 | 0 | if (pnum1) numaDestroy(&nanum1); |
2101 | 0 | if (pnum2) numaDestroy(&nanum2); |
2102 | 0 | return 0; |
2103 | 0 | } |
2104 | | |
2105 | | |
2106 | | /*----------------------------------------------------------------------* |
2107 | | * Comparing histograms * |
2108 | | *----------------------------------------------------------------------*/ |
2109 | | /*! |
2110 | | * \brief grayHistogramsToEMD() |
2111 | | * |
2112 | | * \param[in] naa1, naa2 two numaa, each with one or more 256-element |
2113 | | * histograms |
2114 | | * \param[out] pnad nad of EM distances for each histogram |
2115 | | * \return 0 if OK, 1 on error |
2116 | | * |
2117 | | * <pre> |
2118 | | * Notes: |
2119 | | * (1) The two numaas must be the same size and have corresponding |
2120 | | * 256-element histograms. Pairs do not need to be normalized |
2121 | | * to the same sum. |
2122 | | * (2) This is typically used on two sets of histograms from |
2123 | | * corresponding tiles of two images. The similarity of two |
2124 | | * images can be found with the scoring function used in |
2125 | | * pixCompareGrayByHisto(): |
2126 | | * score S = 1.0 - k * D, where |
2127 | | * k is a constant, say in the range 5-10 |
2128 | | * D = EMD |
2129 | | * for each tile; for multiple tiles, take the Min(S) over |
2130 | | * the set of tiles to be the final score. |
2131 | | * </pre> |
2132 | | */ |
2133 | | l_ok |
2134 | | grayHistogramsToEMD(NUMAA *naa1, |
2135 | | NUMAA *naa2, |
2136 | | NUMA **pnad) |
2137 | 0 | { |
2138 | 0 | l_int32 i, n, nt; |
2139 | 0 | l_float32 dist; |
2140 | 0 | NUMA *na1, *na2, *nad; |
2141 | |
|
2142 | 0 | if (!pnad) |
2143 | 0 | return ERROR_INT("&nad not defined", __func__, 1); |
2144 | 0 | *pnad = NULL; |
2145 | 0 | if (!naa1 || !naa2) |
2146 | 0 | return ERROR_INT("na1 and na2 not both defined", __func__, 1); |
2147 | 0 | n = numaaGetCount(naa1); |
2148 | 0 | if (n != numaaGetCount(naa2)) |
2149 | 0 | return ERROR_INT("naa1 and naa2 numa counts differ", __func__, 1); |
2150 | 0 | nt = numaaGetNumberCount(naa1); |
2151 | 0 | if (nt != numaaGetNumberCount(naa2)) |
2152 | 0 | return ERROR_INT("naa1 and naa2 number counts differ", __func__, 1); |
2153 | 0 | if (256 * n != nt) /* good enough check */ |
2154 | 0 | return ERROR_INT("na sizes must be 256", __func__, 1); |
2155 | | |
2156 | 0 | nad = numaCreate(n); |
2157 | 0 | *pnad = nad; |
2158 | 0 | for (i = 0; i < n; i++) { |
2159 | 0 | na1 = numaaGetNuma(naa1, i, L_CLONE); |
2160 | 0 | na2 = numaaGetNuma(naa2, i, L_CLONE); |
2161 | 0 | numaEarthMoverDistance(na1, na2, &dist); |
2162 | 0 | numaAddNumber(nad, dist / 255.f); /* normalize to [0.0 - 1.0] */ |
2163 | 0 | numaDestroy(&na1); |
2164 | 0 | numaDestroy(&na2); |
2165 | 0 | } |
2166 | 0 | return 0; |
2167 | 0 | } |
2168 | | |
2169 | | |
2170 | | /*! |
2171 | | * \brief numaEarthMoverDistance() |
2172 | | * |
2173 | | * \param[in] na1, na2 two numas of the same size, typically histograms |
2174 | | * \param[out] pdist earthmover distance |
2175 | | * \return 0 if OK, 1 on error |
2176 | | * |
2177 | | * <pre> |
2178 | | * Notes: |
2179 | | * (1) The two numas must have the same size. They do not need to be |
2180 | | * normalized to the same sum before applying the function. |
2181 | | * (2) For a 1D discrete function, the implementation of the EMD |
2182 | | * is trivial. Just keep filling or emptying buckets in one numa |
2183 | | * to match the amount in the other, moving sequentially along |
2184 | | * both arrays. |
2185 | | * (3) We divide the sum of the absolute value of everything moved |
2186 | | * (by 1 unit at a time) by the sum of the numa (amount of "earth") |
2187 | | * to get the average distance that the "earth" was moved. |
2188 | | * This is the value returned here. |
2189 | | * (4) The caller can do a further normalization, by the number of |
2190 | | * buckets (minus 1), to get the EM distance as a fraction of |
2191 | | * the maximum possible distance, which is n-1. This fraction |
2192 | | * is 1.0 for the situation where all the 'earth' in the first |
2193 | | * array is at one end, and all in the second array is at the |
2194 | | * other end. |
2195 | | * </pre> |
2196 | | */ |
2197 | | l_ok |
2198 | | numaEarthMoverDistance(NUMA *na1, |
2199 | | NUMA *na2, |
2200 | | l_float32 *pdist) |
2201 | 0 | { |
2202 | 0 | l_int32 n, norm, i; |
2203 | 0 | l_float32 sum1, sum2, diff, total; |
2204 | 0 | l_float32 *array1, *array3; |
2205 | 0 | NUMA *na3; |
2206 | |
|
2207 | 0 | if (!pdist) |
2208 | 0 | return ERROR_INT("&dist not defined", __func__, 1); |
2209 | 0 | *pdist = 0.0; |
2210 | 0 | if (!na1 || !na2) |
2211 | 0 | return ERROR_INT("na1 and na2 not both defined", __func__, 1); |
2212 | 0 | n = numaGetCount(na1); |
2213 | 0 | if (n != numaGetCount(na2)) |
2214 | 0 | return ERROR_INT("na1 and na2 have different size", __func__, 1); |
2215 | | |
2216 | | /* Generate na3; normalize to na1 if necessary */ |
2217 | 0 | numaGetSum(na1, &sum1); |
2218 | 0 | numaGetSum(na2, &sum2); |
2219 | 0 | norm = (L_ABS(sum1 - sum2) < 0.00001 * L_ABS(sum1)) ? 1 : 0; |
2220 | 0 | if (!norm) |
2221 | 0 | na3 = numaTransform(na2, 0, sum1 / sum2); |
2222 | 0 | else |
2223 | 0 | na3 = numaCopy(na2); |
2224 | 0 | array1 = numaGetFArray(na1, L_NOCOPY); |
2225 | 0 | array3 = numaGetFArray(na3, L_NOCOPY); |
2226 | | |
2227 | | /* Move earth in n3 from array elements, to match n1 */ |
2228 | 0 | total = 0; |
2229 | 0 | for (i = 1; i < n; i++) { |
2230 | 0 | diff = array1[i - 1] - array3[i - 1]; |
2231 | 0 | array3[i] -= diff; |
2232 | 0 | total += L_ABS(diff); |
2233 | 0 | } |
2234 | 0 | *pdist = total / sum1; |
2235 | |
|
2236 | 0 | numaDestroy(&na3); |
2237 | 0 | return 0; |
2238 | 0 | } |
2239 | | |
2240 | | |
2241 | | /*! |
2242 | | * \brief grayInterHistogramStats() |
2243 | | * |
2244 | | * \param[in] naa numaa with two or more 256-element histograms |
2245 | | * \param[in] wc half-width of the smoothing window |
2246 | | * \param[out] pnam [optional] mean values |
2247 | | * \param[out] pnams [optional] mean square values |
2248 | | * \param[out] pnav [optional] variances |
2249 | | * \param[out] pnarv [optional] rms deviations from the mean |
2250 | | * \return 0 if OK, 1 on error |
2251 | | * |
2252 | | * <pre> |
2253 | | * Notes: |
2254 | | * (1) The %naa has two or more 256-element numa histograms, which |
2255 | | * are to be compared value-wise at each of the 256 gray levels. |
2256 | | * The result are stats (mean, mean square, variance, root variance) |
2257 | | * aggregated across the set of histograms, and each is output |
2258 | | * as a 256 entry numa. Think of these histograms as a matrix, |
2259 | | * where each histogram is one row of the array. The stats are |
2260 | | * then aggregated column-wise, between the histograms. |
2261 | | * (2) These stats are: |
2262 | | * ~ average value: <v> (nam) |
2263 | | * ~ average squared value: <v*v> (nams) |
2264 | | * ~ variance: <(v - <v>)*(v - <v>)> = <v*v> - <v>*<v> (nav) |
2265 | | * ~ square-root of variance: (narv) |
2266 | | * where the brackets < .. > indicate that the average value is |
2267 | | * to be taken over each column of the array. |
2268 | | * (3) The input histograms are optionally smoothed before these |
2269 | | * statistical operations. |
2270 | | * (4) The input histograms are normalized to a sum of 10000. By |
2271 | | * doing this, the resulting numbers are independent of the |
2272 | | * number of samples used in building the individual histograms. |
2273 | | * (5) A typical application is on a set of histograms from tiles |
2274 | | * of an image, to distinguish between text/tables and photo |
2275 | | * regions. If the tiles are much larger than the text line |
2276 | | * spacing, text/table regions typically have smaller variance |
2277 | | * across tiles than photo regions. For this application, it |
2278 | | * may be useful to ignore values near white, which are large for |
2279 | | * text and would magnify the variance due to variations in |
2280 | | * illumination. However, because the variance of a drawing or |
2281 | | * a light photo can be similar to that of grayscale text, this |
2282 | | * function is only a discriminator between darker photos/drawings |
2283 | | * and light photos/text/line-graphics. |
2284 | | * </pre> |
2285 | | */ |
2286 | | l_ok |
2287 | | grayInterHistogramStats(NUMAA *naa, |
2288 | | l_int32 wc, |
2289 | | NUMA **pnam, |
2290 | | NUMA **pnams, |
2291 | | NUMA **pnav, |
2292 | | NUMA **pnarv) |
2293 | 0 | { |
2294 | 0 | l_int32 i, j, n, nn; |
2295 | 0 | l_float32 **arrays; |
2296 | 0 | l_float32 mean, var, rvar; |
2297 | 0 | NUMA *na1, *na2, *na3, *na4; |
2298 | |
|
2299 | 0 | if (pnam) *pnam = NULL; |
2300 | 0 | if (pnams) *pnams = NULL; |
2301 | 0 | if (pnav) *pnav = NULL; |
2302 | 0 | if (pnarv) *pnarv = NULL; |
2303 | 0 | if (!pnam && !pnams && !pnav && !pnarv) |
2304 | 0 | return ERROR_INT("nothing requested", __func__, 1); |
2305 | 0 | if (!naa) |
2306 | 0 | return ERROR_INT("naa not defined", __func__, 1); |
2307 | 0 | n = numaaGetCount(naa); |
2308 | 0 | for (i = 0; i < n; i++) { |
2309 | 0 | nn = numaaGetNumaCount(naa, i); |
2310 | 0 | if (nn != 256) { |
2311 | 0 | L_ERROR("%d numbers in numa[%d]\n", __func__, nn, i); |
2312 | 0 | return 1; |
2313 | 0 | } |
2314 | 0 | } |
2315 | | |
2316 | 0 | if (pnam) *pnam = numaCreate(256); |
2317 | 0 | if (pnams) *pnams = numaCreate(256); |
2318 | 0 | if (pnav) *pnav = numaCreate(256); |
2319 | 0 | if (pnarv) *pnarv = numaCreate(256); |
2320 | | |
2321 | | /* First, use mean smoothing, normalize each histogram, |
2322 | | * and save all results in a 2D matrix. */ |
2323 | 0 | arrays = (l_float32 **)LEPT_CALLOC(n, sizeof(l_float32 *)); |
2324 | 0 | for (i = 0; i < n; i++) { |
2325 | 0 | na1 = numaaGetNuma(naa, i, L_CLONE); |
2326 | 0 | na2 = numaWindowedMean(na1, wc); |
2327 | 0 | na3 = numaNormalizeHistogram(na2, 10000.); |
2328 | 0 | arrays[i] = numaGetFArray(na3, L_COPY); |
2329 | 0 | numaDestroy(&na1); |
2330 | 0 | numaDestroy(&na2); |
2331 | 0 | numaDestroy(&na3); |
2332 | 0 | } |
2333 | | |
2334 | | /* Get stats between histograms */ |
2335 | 0 | for (j = 0; j < 256; j++) { |
2336 | 0 | na4 = numaCreate(n); |
2337 | 0 | for (i = 0; i < n; i++) { |
2338 | 0 | numaAddNumber(na4, arrays[i][j]); |
2339 | 0 | } |
2340 | 0 | numaSimpleStats(na4, 0, -1, &mean, &var, &rvar); |
2341 | 0 | if (pnam) numaAddNumber(*pnam, mean); |
2342 | 0 | if (pnams) numaAddNumber(*pnams, mean * mean); |
2343 | 0 | if (pnav) numaAddNumber(*pnav, var); |
2344 | 0 | if (pnarv) numaAddNumber(*pnarv, rvar); |
2345 | 0 | numaDestroy(&na4); |
2346 | 0 | } |
2347 | |
|
2348 | 0 | for (i = 0; i < n; i++) |
2349 | 0 | LEPT_FREE(arrays[i]); |
2350 | 0 | LEPT_FREE(arrays); |
2351 | 0 | return 0; |
2352 | 0 | } |
2353 | | |
2354 | | |
2355 | | /*----------------------------------------------------------------------* |
2356 | | * Extrema finding * |
2357 | | *----------------------------------------------------------------------*/ |
2358 | | /*! |
2359 | | * \brief numaFindPeaks() |
2360 | | * |
2361 | | * \param[in] nas source numa |
2362 | | * \param[in] nmax max number of peaks to be found |
2363 | | * \param[in] fract1 min fraction of peak value |
2364 | | * \param[in] fract2 min slope |
2365 | | * \return peak na, or NULL on error. |
2366 | | * |
2367 | | * <pre> |
2368 | | * Notes: |
2369 | | * (1) The returned na consists of sets of four numbers representing |
2370 | | * the peak, in the following order: |
2371 | | * left edge; peak center; right edge; normalized peak area |
2372 | | * </pre> |
2373 | | */ |
2374 | | NUMA * |
2375 | | numaFindPeaks(NUMA *nas, |
2376 | | l_int32 nmax, |
2377 | | l_float32 fract1, |
2378 | | l_float32 fract2) |
2379 | 0 | { |
2380 | 0 | l_int32 i, k, n, maxloc, lloc, rloc; |
2381 | 0 | l_float32 fmaxval, sum, total, newtotal, val, lastval; |
2382 | 0 | l_float32 peakfract; |
2383 | 0 | NUMA *na, *napeak; |
2384 | |
|
2385 | 0 | if (!nas) |
2386 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
2387 | 0 | n = numaGetCount(nas); |
2388 | 0 | numaGetSum(nas, &total); |
2389 | | |
2390 | | /* We munge this copy */ |
2391 | 0 | if ((na = numaCopy(nas)) == NULL) |
2392 | 0 | return (NUMA *)ERROR_PTR("na not made", __func__, NULL); |
2393 | 0 | if ((napeak = numaCreate(4 * nmax)) == NULL) { |
2394 | 0 | numaDestroy(&na); |
2395 | 0 | return (NUMA *)ERROR_PTR("napeak not made", __func__, NULL); |
2396 | 0 | } |
2397 | | |
2398 | 0 | for (k = 0; k < nmax; k++) { |
2399 | 0 | numaGetSum(na, &newtotal); |
2400 | 0 | if (newtotal == 0.0) /* sanity check */ |
2401 | 0 | break; |
2402 | 0 | numaGetMax(na, &fmaxval, &maxloc); |
2403 | 0 | sum = fmaxval; |
2404 | 0 | lastval = fmaxval; |
2405 | 0 | lloc = 0; |
2406 | 0 | for (i = maxloc - 1; i >= 0; --i) { |
2407 | 0 | numaGetFValue(na, i, &val); |
2408 | 0 | if (val == 0.0) { |
2409 | 0 | lloc = i + 1; |
2410 | 0 | break; |
2411 | 0 | } |
2412 | 0 | if (val > fract1 * fmaxval) { |
2413 | 0 | sum += val; |
2414 | 0 | lastval = val; |
2415 | 0 | continue; |
2416 | 0 | } |
2417 | 0 | if (lastval - val > fract2 * lastval) { |
2418 | 0 | sum += val; |
2419 | 0 | lastval = val; |
2420 | 0 | continue; |
2421 | 0 | } |
2422 | 0 | lloc = i; |
2423 | 0 | break; |
2424 | 0 | } |
2425 | 0 | lastval = fmaxval; |
2426 | 0 | rloc = n - 1; |
2427 | 0 | for (i = maxloc + 1; i < n; ++i) { |
2428 | 0 | numaGetFValue(na, i, &val); |
2429 | 0 | if (val == 0.0) { |
2430 | 0 | rloc = i - 1; |
2431 | 0 | break; |
2432 | 0 | } |
2433 | 0 | if (val > fract1 * fmaxval) { |
2434 | 0 | sum += val; |
2435 | 0 | lastval = val; |
2436 | 0 | continue; |
2437 | 0 | } |
2438 | 0 | if (lastval - val > fract2 * lastval) { |
2439 | 0 | sum += val; |
2440 | 0 | lastval = val; |
2441 | 0 | continue; |
2442 | 0 | } |
2443 | 0 | rloc = i; |
2444 | 0 | break; |
2445 | 0 | } |
2446 | 0 | peakfract = sum / total; |
2447 | 0 | numaAddNumber(napeak, lloc); |
2448 | 0 | numaAddNumber(napeak, maxloc); |
2449 | 0 | numaAddNumber(napeak, rloc); |
2450 | 0 | numaAddNumber(napeak, peakfract); |
2451 | |
|
2452 | 0 | for (i = lloc; i <= rloc; i++) |
2453 | 0 | numaSetValue(na, i, 0.0); |
2454 | 0 | } |
2455 | |
|
2456 | 0 | numaDestroy(&na); |
2457 | 0 | return napeak; |
2458 | 0 | } |
2459 | | |
2460 | | |
2461 | | /*! |
2462 | | * \brief numaFindExtrema() |
2463 | | * |
2464 | | * \param[in] nas input values |
2465 | | * \param[in] delta relative amount to resolve peaks and valleys |
2466 | | * \param[out] pnav [optional] values of extrema |
2467 | | * \return nad (locations of extrema, or NULL on error |
2468 | | * |
2469 | | * <pre> |
2470 | | * Notes: |
2471 | | * (1) This returns a sequence of extrema (peaks and valleys). |
2472 | | * (2) The algorithm is analogous to that for determining |
2473 | | * mountain peaks. Suppose we have a local peak, with |
2474 | | * bumps on the side. Under what conditions can we consider |
2475 | | * those 'bumps' to be actual peaks? The answer: if the |
2476 | | * bump is separated from the peak by a saddle that is at |
2477 | | * least 500 feet below the bump. |
2478 | | * (3) Operationally, suppose we are trying to identify a peak. |
2479 | | * We have a previous valley, and also the largest value that |
2480 | | * we have seen since that valley. We can identify this as |
2481 | | * a peak if we find a value that is delta BELOW it. When |
2482 | | * we find such a value, label the peak, use the current |
2483 | | * value to label the starting point for the search for |
2484 | | * a valley, and do the same operation in reverse. Namely, |
2485 | | * keep track of the lowest point seen, and look for a value |
2486 | | * that is delta ABOVE it. Once found, the lowest point is |
2487 | | * labeled the valley, and continue, looking for the next peak. |
2488 | | * </pre> |
2489 | | */ |
2490 | | NUMA * |
2491 | | numaFindExtrema(NUMA *nas, |
2492 | | l_float32 delta, |
2493 | | NUMA **pnav) |
2494 | 0 | { |
2495 | 0 | l_int32 i, n, found, loc, direction; |
2496 | 0 | l_float32 startval, val, maxval, minval; |
2497 | 0 | NUMA *nav, *nad; |
2498 | |
|
2499 | 0 | if (pnav) *pnav = NULL; |
2500 | 0 | if (!nas) |
2501 | 0 | return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL); |
2502 | 0 | if (delta < 0.0) |
2503 | 0 | return (NUMA *)ERROR_PTR("delta < 0", __func__, NULL); |
2504 | | |
2505 | 0 | n = numaGetCount(nas); |
2506 | 0 | nad = numaCreate(0); |
2507 | 0 | nav = NULL; |
2508 | 0 | if (pnav) { |
2509 | 0 | nav = numaCreate(0); |
2510 | 0 | *pnav = nav; |
2511 | 0 | } |
2512 | | |
2513 | | /* We don't know if we'll find a peak or valley first, |
2514 | | * but use the first element of nas as the reference point. |
2515 | | * Break when we deviate by 'delta' from the first point. */ |
2516 | 0 | numaGetFValue(nas, 0, &startval); |
2517 | 0 | found = FALSE; |
2518 | 0 | for (i = 1; i < n; i++) { |
2519 | 0 | numaGetFValue(nas, i, &val); |
2520 | 0 | if (L_ABS(val - startval) >= delta) { |
2521 | 0 | found = TRUE; |
2522 | 0 | break; |
2523 | 0 | } |
2524 | 0 | } |
2525 | |
|
2526 | 0 | if (!found) |
2527 | 0 | return nad; /* it's empty */ |
2528 | | |
2529 | | /* Are we looking for a peak or a valley? */ |
2530 | 0 | if (val > startval) { /* peak */ |
2531 | 0 | direction = 1; |
2532 | 0 | maxval = val; |
2533 | 0 | } else { |
2534 | 0 | direction = -1; |
2535 | 0 | minval = val; |
2536 | 0 | } |
2537 | 0 | loc = i; |
2538 | | |
2539 | | /* Sweep through the rest of the array, recording alternating |
2540 | | * peak/valley extrema. */ |
2541 | 0 | for (i = i + 1; i < n; i++) { |
2542 | 0 | numaGetFValue(nas, i, &val); |
2543 | 0 | if (direction == 1 && val > maxval ) { /* new local max */ |
2544 | 0 | maxval = val; |
2545 | 0 | loc = i; |
2546 | 0 | } else if (direction == -1 && val < minval ) { /* new local min */ |
2547 | 0 | minval = val; |
2548 | 0 | loc = i; |
2549 | 0 | } else if (direction == 1 && (maxval - val >= delta)) { |
2550 | 0 | numaAddNumber(nad, loc); /* save the current max location */ |
2551 | 0 | if (nav) numaAddNumber(nav, maxval); |
2552 | 0 | direction = -1; /* reverse: start looking for a min */ |
2553 | 0 | minval = val; |
2554 | 0 | loc = i; /* current min location */ |
2555 | 0 | } else if (direction == -1 && (val - minval >= delta)) { |
2556 | 0 | numaAddNumber(nad, loc); /* save the current min location */ |
2557 | 0 | if (nav) numaAddNumber(nav, minval); |
2558 | 0 | direction = 1; /* reverse: start looking for a max */ |
2559 | 0 | maxval = val; |
2560 | 0 | loc = i; /* current max location */ |
2561 | 0 | } |
2562 | 0 | } |
2563 | | |
2564 | | /* Save the final extremum */ |
2565 | | /* numaAddNumber(nad, loc); */ |
2566 | 0 | return nad; |
2567 | 0 | } |
2568 | | |
2569 | | |
2570 | | /*! |
2571 | | * \brief numaFindLocForThreshold() |
2572 | | * |
2573 | | * \param[in] nas input histogram |
2574 | | * \param[in] skip look-ahead distance to avoid false mininma; |
2575 | | * use 0 for default |
2576 | | * \param[out] pthresh threshold value |
2577 | | * \param[out] pfract [optional] fraction below or at threshold |
2578 | | * \return 0 if OK, 1 on error or if no threshold can be found |
2579 | | * |
2580 | | * <pre> |
2581 | | * Notes: |
2582 | | * (1) This finds a good place to set a threshold for a histogram |
2583 | | * of values that has two peaks. The peaks can differ greatly |
2584 | | * in area underneath them. The number of buckets in the |
2585 | | * histogram is expected to be 256 (e.g, from an 8 bpp gray image). |
2586 | | * (2) The input histogram should have been smoothed with a window |
2587 | | * to avoid false peak and valley detection due to noise. For |
2588 | | * example, see pixThresholdByHisto(). |
2589 | | * (3) A skip value can be input to determine the look-ahead distance |
2590 | | * to ignore a false peak on the rise or descent from the first peak. |
2591 | | * Input 0 to use the default value (it assumes a histo size of 256). |
2592 | | * (4) Optionally, the fractional area under the first peak can |
2593 | | * be returned. |
2594 | | * </pre> |
2595 | | */ |
2596 | | l_ok |
2597 | | numaFindLocForThreshold(NUMA *na, |
2598 | | l_int32 skip, |
2599 | | l_int32 *pthresh, |
2600 | | l_float32 *pfract) |
2601 | 0 | { |
2602 | 0 | l_int32 i, n, start, index, minloc, found; |
2603 | 0 | l_float32 val, pval, jval, minval, maxval, sum, partsum; |
2604 | 0 | l_float32 *fa; |
2605 | |
|
2606 | 0 | if (pfract) *pfract = 0.0; |
2607 | 0 | if (!pthresh) |
2608 | 0 | return ERROR_INT("&thresh not defined", __func__, 1); |
2609 | 0 | *pthresh = 0; |
2610 | 0 | if (!na) |
2611 | 0 | return ERROR_INT("na not defined", __func__, 1); |
2612 | 0 | if (skip <= 0) skip = 20; |
2613 | | |
2614 | | /* Test for constant value */ |
2615 | 0 | numaGetMin(na, &minval, NULL); |
2616 | 0 | numaGetMax(na, &maxval, NULL); |
2617 | 0 | if (minval == maxval) |
2618 | 0 | return ERROR_INT("all array values are the same", __func__, 1); |
2619 | | |
2620 | | /* Look for the top of the first peak */ |
2621 | 0 | n = numaGetCount(na); |
2622 | 0 | if (n < 256) |
2623 | 0 | L_WARNING("array size %d < 256\n", __func__, n); |
2624 | 0 | fa = numaGetFArray(na, L_NOCOPY); |
2625 | 0 | pval = fa[0]; |
2626 | 0 | for (i = 1; i < n; i++) { |
2627 | 0 | val = fa[i]; |
2628 | 0 | index = L_MIN(i + skip, n - 1); |
2629 | 0 | jval = fa[index]; |
2630 | 0 | if (val < pval && jval < pval) /* near the top if not there */ |
2631 | 0 | break; |
2632 | 0 | pval = val; |
2633 | 0 | } |
2634 | |
|
2635 | 0 | if (i > n - 5) /* just an increasing function */ |
2636 | 0 | return ERROR_INT("top of first peak not found", __func__, 1); |
2637 | | |
2638 | | /* Look for the low point in the valley */ |
2639 | 0 | found = FALSE; |
2640 | 0 | start = i; |
2641 | 0 | pval = fa[start]; |
2642 | 0 | for (i = start + 1; i < n; i++) { |
2643 | 0 | val = fa[i]; |
2644 | 0 | if (val <= pval) { /* appears to be going down */ |
2645 | 0 | pval = val; |
2646 | 0 | } else { /* appears to be going up */ |
2647 | 0 | index = L_MIN(i + skip, n - 1); |
2648 | 0 | jval = fa[index]; /* junp ahead by 'skip' */ |
2649 | 0 | if (val > jval) { /* still going down; jump ahead */ |
2650 | 0 | pval = jval; |
2651 | 0 | i = index; |
2652 | 0 | } else { /* really going up; passed the min */ |
2653 | 0 | found = TRUE; |
2654 | 0 | break; |
2655 | 0 | } |
2656 | 0 | } |
2657 | 0 | } |
2658 | 0 | if (!found) |
2659 | 0 | return ERROR_INT("no minimum found", __func__, 1); |
2660 | | |
2661 | | /* Find the location of the minimum in the interval */ |
2662 | 0 | minloc = index; /* likely passed the min; look backward */ |
2663 | 0 | minval = fa[index]; |
2664 | 0 | for (i = index - 1; i > index - skip; i--) { |
2665 | 0 | if (fa[i] < minval) { |
2666 | 0 | minval = fa[i]; |
2667 | 0 | minloc = i; |
2668 | 0 | } |
2669 | 0 | } |
2670 | | |
2671 | | /* Is the minimum very near the end of the array? */ |
2672 | 0 | if (minloc > n - 10) |
2673 | 0 | return ERROR_INT("minimum at end of array; invalid", __func__, 1); |
2674 | 0 | *pthresh = minloc; |
2675 | | |
2676 | | /* Find the fraction under the first peak */ |
2677 | 0 | if (pfract) { |
2678 | 0 | numaGetSumOnInterval(na, 0, minloc, &partsum); |
2679 | 0 | numaGetSum(na, &sum); |
2680 | 0 | if (sum > 0.0) |
2681 | 0 | *pfract = partsum / sum; |
2682 | 0 | } |
2683 | 0 | return 0; |
2684 | 0 | } |
2685 | | |
2686 | | |
2687 | | /*! |
2688 | | * \brief numaCountReversals() |
2689 | | * |
2690 | | * \param[in] nas input values |
2691 | | * \param[in] minreversal relative amount to resolve peaks and valleys |
2692 | | * \param[out] pnr [optional] number of reversals |
2693 | | * \param[out] prd [optional] reversal density: reversals/length |
2694 | | * \return 0 if OK, 1 on error |
2695 | | * |
2696 | | * <pre> |
2697 | | * Notes: |
2698 | | * (1) The input numa is can be generated from pixExtractAlongLine(). |
2699 | | * If so, the x parameters can be used to find the reversal |
2700 | | * frequency along a line. |
2701 | | * (2) If the input numa was generated from a 1 bpp pix, the |
2702 | | * values will be 0 and 1. Use %minreversal == 1 to get |
2703 | | * the number of pixel flips. If the only values are 0 and 1, |
2704 | | * but %minreversal > 1, set the reversal count to 0 and |
2705 | | * issue a warning. |
2706 | | * </pre> |
2707 | | */ |
2708 | | l_ok |
2709 | | numaCountReversals(NUMA *nas, |
2710 | | l_float32 minreversal, |
2711 | | l_int32 *pnr, |
2712 | | l_float32 *prd) |
2713 | 0 | { |
2714 | 0 | l_int32 i, n, nr, ival, binvals; |
2715 | 0 | l_int32 *ia; |
2716 | 0 | l_float32 fval, delx, len; |
2717 | 0 | NUMA *nat; |
2718 | |
|
2719 | 0 | if (pnr) *pnr = 0; |
2720 | 0 | if (prd) *prd = 0.0; |
2721 | 0 | if (!pnr && !prd) |
2722 | 0 | return ERROR_INT("neither &nr nor &rd are defined", __func__, 1); |
2723 | 0 | if (!nas) |
2724 | 0 | return ERROR_INT("nas not defined", __func__, 1); |
2725 | 0 | if ((n = numaGetCount(nas)) == 0) { |
2726 | 0 | L_INFO("nas is empty\n", __func__); |
2727 | 0 | return 0; |
2728 | 0 | } |
2729 | 0 | if (minreversal < 0.0) |
2730 | 0 | return ERROR_INT("minreversal < 0", __func__, 1); |
2731 | | |
2732 | | /* Decide if the only values are 0 and 1 */ |
2733 | 0 | binvals = TRUE; |
2734 | 0 | for (i = 0; i < n; i++) { |
2735 | 0 | numaGetFValue(nas, i, &fval); |
2736 | 0 | if (fval != 0.0 && fval != 1.0) { |
2737 | 0 | binvals = FALSE; |
2738 | 0 | break; |
2739 | 0 | } |
2740 | 0 | } |
2741 | |
|
2742 | 0 | nr = 0; |
2743 | 0 | if (binvals) { |
2744 | 0 | if (minreversal > 1.0) { |
2745 | 0 | L_WARNING("binary values but minreversal > 1\n", __func__); |
2746 | 0 | } else { |
2747 | 0 | ia = numaGetIArray(nas); |
2748 | 0 | ival = ia[0]; |
2749 | 0 | for (i = 1; i < n; i++) { |
2750 | 0 | if (ia[i] != ival) { |
2751 | 0 | nr++; |
2752 | 0 | ival = ia[i]; |
2753 | 0 | } |
2754 | 0 | } |
2755 | 0 | LEPT_FREE(ia); |
2756 | 0 | } |
2757 | 0 | } else { |
2758 | 0 | nat = numaFindExtrema(nas, minreversal, NULL); |
2759 | 0 | nr = numaGetCount(nat); |
2760 | 0 | numaDestroy(&nat); |
2761 | 0 | } |
2762 | 0 | if (pnr) *pnr = nr; |
2763 | 0 | if (prd) { |
2764 | 0 | numaGetParameters(nas, NULL, &delx); |
2765 | 0 | len = delx * n; |
2766 | 0 | *prd = (l_float32)nr / len; |
2767 | 0 | } |
2768 | |
|
2769 | 0 | return 0; |
2770 | 0 | } |
2771 | | |
2772 | | |
2773 | | /*----------------------------------------------------------------------* |
2774 | | * Threshold crossings and frequency analysis * |
2775 | | *----------------------------------------------------------------------*/ |
2776 | | /*! |
2777 | | * \brief numaSelectCrossingThreshold() |
2778 | | * |
2779 | | * \param[in] nax [optional] numa of abscissa values; can be NULL |
2780 | | * \param[in] nay signal |
2781 | | * \param[in] estthresh estimated pixel threshold for crossing: |
2782 | | * e.g., for images, white <--> black; typ. ~120 |
2783 | | * \param[out] pbestthresh robust estimate of threshold to use |
2784 | | * \return 0 if OK, 1 on error or warning |
2785 | | * |
2786 | | * <pre> |
2787 | | * Notes: |
2788 | | * (1) When a valid threshold is used, the number of crossings is |
2789 | | * a maximum, because none are missed. If no threshold intersects |
2790 | | * all the crossings, the crossings must be determined with |
2791 | | * numaCrossingsByPeaks(). |
2792 | | * (2) %estthresh is an input estimate of the threshold that should |
2793 | | * be used. We compute the crossings with 41 thresholds |
2794 | | * (20 below and 20 above). There is a range in which the |
2795 | | * number of crossings is a maximum. Return a threshold |
2796 | | * in the center of this stable plateau of crossings. |
2797 | | * This can then be used with numaCrossingsByThreshold() |
2798 | | * to get a good estimate of crossing locations. |
2799 | | * (3) If the count of nay is less than 2, a warning is issued. |
2800 | | * </pre> |
2801 | | */ |
2802 | | l_ok |
2803 | | numaSelectCrossingThreshold(NUMA *nax, |
2804 | | NUMA *nay, |
2805 | | l_float32 estthresh, |
2806 | | l_float32 *pbestthresh) |
2807 | 0 | { |
2808 | 0 | l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen; |
2809 | 0 | l_int32 val, maxval, nmax, count; |
2810 | 0 | l_float32 thresh, fmaxval, fmodeval; |
2811 | 0 | NUMA *nat, *nac; |
2812 | |
|
2813 | 0 | if (!pbestthresh) |
2814 | 0 | return ERROR_INT("&bestthresh not defined", __func__, 1); |
2815 | 0 | *pbestthresh = 0.0; |
2816 | 0 | if (!nay) |
2817 | 0 | return ERROR_INT("nay not defined", __func__, 1); |
2818 | 0 | if (numaGetCount(nay) < 2) { |
2819 | 0 | L_WARNING("nay count < 2; no threshold crossing\n", __func__); |
2820 | 0 | return 1; |
2821 | 0 | } |
2822 | | |
2823 | | /* Compute the number of crossings for different thresholds */ |
2824 | 0 | nat = numaCreate(41); |
2825 | 0 | for (i = 0; i < 41; i++) { |
2826 | 0 | thresh = estthresh - 80.0f + 4.0f * i; |
2827 | 0 | nac = numaCrossingsByThreshold(nax, nay, thresh); |
2828 | 0 | numaAddNumber(nat, numaGetCount(nac)); |
2829 | 0 | numaDestroy(&nac); |
2830 | 0 | } |
2831 | | |
2832 | | /* Find the center of the plateau of max crossings, which |
2833 | | * extends from thresh[istart] to thresh[iend]. */ |
2834 | 0 | numaGetMax(nat, &fmaxval, NULL); |
2835 | 0 | maxval = (l_int32)fmaxval; |
2836 | 0 | nmax = 0; |
2837 | 0 | for (i = 0; i < 41; i++) { |
2838 | 0 | numaGetIValue(nat, i, &val); |
2839 | 0 | if (val == maxval) |
2840 | 0 | nmax++; |
2841 | 0 | } |
2842 | 0 | if (nmax < 3) { /* likely accidental max; try the mode */ |
2843 | 0 | numaGetMode(nat, &fmodeval, &count); |
2844 | 0 | if (count > nmax && fmodeval > 0.5 * fmaxval) |
2845 | 0 | maxval = (l_int32)fmodeval; /* use the mode */ |
2846 | 0 | } |
2847 | |
|
2848 | 0 | inrun = FALSE; |
2849 | 0 | iend = 40; |
2850 | 0 | maxrunlen = 0, maxstart = 0, maxend = 0; |
2851 | 0 | for (i = 0; i < 41; i++) { |
2852 | 0 | numaGetIValue(nat, i, &val); |
2853 | 0 | if (val == maxval) { |
2854 | 0 | if (!inrun) { |
2855 | 0 | istart = i; |
2856 | 0 | inrun = TRUE; |
2857 | 0 | } |
2858 | 0 | continue; |
2859 | 0 | } |
2860 | 0 | if (inrun && (val != maxval)) { |
2861 | 0 | iend = i - 1; |
2862 | 0 | runlen = iend - istart + 1; |
2863 | 0 | inrun = FALSE; |
2864 | 0 | if (runlen > maxrunlen) { |
2865 | 0 | maxstart = istart; |
2866 | 0 | maxend = iend; |
2867 | 0 | maxrunlen = runlen; |
2868 | 0 | } |
2869 | 0 | } |
2870 | 0 | } |
2871 | 0 | if (inrun) { |
2872 | 0 | runlen = i - istart; |
2873 | 0 | if (runlen > maxrunlen) { |
2874 | 0 | maxstart = istart; |
2875 | 0 | maxend = i - 1; |
2876 | 0 | maxrunlen = runlen; |
2877 | 0 | } |
2878 | 0 | } |
2879 | |
|
2880 | 0 | *pbestthresh = estthresh - 80.0f + 2.0f * (l_float32)(maxstart + maxend); |
2881 | |
|
2882 | | #if DEBUG_CROSSINGS |
2883 | | lept_stderr("\nCrossings attain a maximum at %d thresholds, between:\n" |
2884 | | " thresh[%d] = %5.1f and thresh[%d] = %5.1f\n", |
2885 | | nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart, |
2886 | | maxend, estthresh - 80.0 + 4.0 * maxend); |
2887 | | lept_stderr("The best choice: %5.1f\n", *pbestthresh); |
2888 | | lept_stderr("Number of crossings at the 41 thresholds:"); |
2889 | | numaWriteStderr(nat); |
2890 | | #endif /* DEBUG_CROSSINGS */ |
2891 | |
|
2892 | 0 | numaDestroy(&nat); |
2893 | 0 | return 0; |
2894 | 0 | } |
2895 | | |
2896 | | |
2897 | | /*! |
2898 | | * \brief numaCrossingsByThreshold() |
2899 | | * |
2900 | | * \param[in] nax [optional] numa of abscissa values; can be NULL |
2901 | | * \param[in] nay numa of ordinate values, corresponding to nax |
2902 | | * \param[in] thresh threshold value for nay |
2903 | | * \return nad abscissa pts at threshold, or NULL on error |
2904 | | * |
2905 | | * <pre> |
2906 | | * Notes: |
2907 | | * (1) If nax == NULL, we use startx and delx from nay to compute |
2908 | | * the crossing values in nad. |
2909 | | * </pre> |
2910 | | */ |
2911 | | NUMA * |
2912 | | numaCrossingsByThreshold(NUMA *nax, |
2913 | | NUMA *nay, |
2914 | | l_float32 thresh) |
2915 | 0 | { |
2916 | 0 | l_int32 i, n; |
2917 | 0 | l_float32 startx, delx; |
2918 | 0 | l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract; |
2919 | 0 | NUMA *nad; |
2920 | |
|
2921 | 0 | if (!nay) |
2922 | 0 | return (NUMA *)ERROR_PTR("nay not defined", __func__, NULL); |
2923 | 0 | n = numaGetCount(nay); |
2924 | |
|
2925 | 0 | if (nax && (numaGetCount(nax) != n)) |
2926 | 0 | return (NUMA *)ERROR_PTR("nax and nay sizes differ", __func__, NULL); |
2927 | | |
2928 | 0 | nad = numaCreate(0); |
2929 | 0 | if (n < 2) return nad; |
2930 | 0 | numaGetFValue(nay, 0, &yval1); |
2931 | 0 | numaGetParameters(nay, &startx, &delx); |
2932 | 0 | if (nax) |
2933 | 0 | numaGetFValue(nax, 0, &xval1); |
2934 | 0 | else |
2935 | 0 | xval1 = startx; |
2936 | 0 | for (i = 1; i < n; i++) { |
2937 | 0 | numaGetFValue(nay, i, &yval2); |
2938 | 0 | if (nax) |
2939 | 0 | numaGetFValue(nax, i, &xval2); |
2940 | 0 | else |
2941 | 0 | xval2 = startx + i * delx; |
2942 | 0 | delta1 = yval1 - thresh; |
2943 | 0 | delta2 = yval2 - thresh; |
2944 | 0 | if (delta1 == 0.0) { |
2945 | 0 | numaAddNumber(nad, xval1); |
2946 | 0 | } else if (delta2 == 0.0) { |
2947 | 0 | numaAddNumber(nad, xval2); |
2948 | 0 | } else if (delta1 * delta2 < 0.0) { /* crossing */ |
2949 | 0 | fract = L_ABS(delta1) / L_ABS(yval1 - yval2); |
2950 | 0 | crossval = xval1 + fract * (xval2 - xval1); |
2951 | 0 | numaAddNumber(nad, crossval); |
2952 | 0 | } |
2953 | 0 | xval1 = xval2; |
2954 | 0 | yval1 = yval2; |
2955 | 0 | } |
2956 | |
|
2957 | 0 | return nad; |
2958 | 0 | } |
2959 | | |
2960 | | |
2961 | | /*! |
2962 | | * \brief numaCrossingsByPeaks() |
2963 | | * |
2964 | | * \param[in] nax [optional] numa of abscissa values |
2965 | | * \param[in] nay numa of ordinate values, corresponding to nax |
2966 | | * \param[in] delta parameter used to identify when a new peak can be found |
2967 | | * \return nad abscissa pts at threshold, or NULL on error |
2968 | | * |
2969 | | * <pre> |
2970 | | * Notes: |
2971 | | * (1) If nax == NULL, we use startx and delx from nay to compute |
2972 | | * the crossing values in nad. |
2973 | | * </pre> |
2974 | | */ |
2975 | | NUMA * |
2976 | | numaCrossingsByPeaks(NUMA *nax, |
2977 | | NUMA *nay, |
2978 | | l_float32 delta) |
2979 | 0 | { |
2980 | 0 | l_int32 i, j, n, np, previndex, curindex; |
2981 | 0 | l_float32 startx, delx; |
2982 | 0 | l_float32 xval1, xval2, yval1, yval2, delta1, delta2; |
2983 | 0 | l_float32 prevval, curval, thresh, crossval, fract; |
2984 | 0 | NUMA *nap, *nad; |
2985 | |
|
2986 | 0 | if (!nay) |
2987 | 0 | return (NUMA *)ERROR_PTR("nay not defined", __func__, NULL); |
2988 | | |
2989 | 0 | n = numaGetCount(nay); |
2990 | 0 | if (nax && (numaGetCount(nax) != n)) |
2991 | 0 | return (NUMA *)ERROR_PTR("nax and nay sizes differ", __func__, NULL); |
2992 | | |
2993 | | /* Find the extrema. Also add last point in nay to get |
2994 | | * the last transition (from the last peak to the end). |
2995 | | * The number of crossings is 1 more than the number of extrema. */ |
2996 | 0 | nap = numaFindExtrema(nay, delta, NULL); |
2997 | 0 | numaAddNumber(nap, n - 1); |
2998 | 0 | np = numaGetCount(nap); |
2999 | 0 | L_INFO("Number of crossings: %d\n", __func__, np); |
3000 | | |
3001 | | /* Do all computation in index units of nax or the delx of nay */ |
3002 | 0 | nad = numaCreate(np); /* output crossing locations, in nax units */ |
3003 | 0 | previndex = 0; /* prime the search with 1st point */ |
3004 | 0 | numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */ |
3005 | 0 | numaGetParameters(nay, &startx, &delx); |
3006 | 0 | for (i = 0; i < np; i++) { |
3007 | 0 | numaGetIValue(nap, i, &curindex); |
3008 | 0 | numaGetFValue(nay, curindex, &curval); |
3009 | 0 | thresh = (prevval + curval) / 2.0f; |
3010 | 0 | if (nax) |
3011 | 0 | numaGetFValue(nax, previndex, &xval1); |
3012 | 0 | else |
3013 | 0 | xval1 = startx + previndex * delx; |
3014 | 0 | numaGetFValue(nay, previndex, &yval1); |
3015 | 0 | for (j = previndex + 1; j <= curindex; j++) { |
3016 | 0 | if (nax) |
3017 | 0 | numaGetFValue(nax, j, &xval2); |
3018 | 0 | else |
3019 | 0 | xval2 = startx + j * delx; |
3020 | 0 | numaGetFValue(nay, j, &yval2); |
3021 | 0 | delta1 = yval1 - thresh; |
3022 | 0 | delta2 = yval2 - thresh; |
3023 | 0 | if (delta1 == 0.0) { |
3024 | 0 | numaAddNumber(nad, xval1); |
3025 | 0 | break; |
3026 | 0 | } else if (delta2 == 0.0) { |
3027 | 0 | numaAddNumber(nad, xval2); |
3028 | 0 | break; |
3029 | 0 | } else if (delta1 * delta2 < 0.0) { /* crossing */ |
3030 | 0 | fract = L_ABS(delta1) / L_ABS(yval1 - yval2); |
3031 | 0 | crossval = xval1 + fract * (xval2 - xval1); |
3032 | 0 | numaAddNumber(nad, crossval); |
3033 | 0 | break; |
3034 | 0 | } |
3035 | 0 | xval1 = xval2; |
3036 | 0 | yval1 = yval2; |
3037 | 0 | } |
3038 | 0 | previndex = curindex; |
3039 | 0 | prevval = curval; |
3040 | 0 | } |
3041 | |
|
3042 | 0 | numaDestroy(&nap); |
3043 | 0 | return nad; |
3044 | 0 | } |
3045 | | |
3046 | | |
3047 | | /*! |
3048 | | * \brief numaEvalBestHaarParameters() |
3049 | | * |
3050 | | * \param[in] nas numa of non-negative signal values |
3051 | | * \param[in] relweight relative weight of (-1 comb) / (+1 comb) |
3052 | | * contributions to the 'convolution'. In effect, |
3053 | | * the convolution kernel is a comb consisting of |
3054 | | * alternating +1 and -weight. |
3055 | | * \param[in] nwidth number of widths to consider |
3056 | | * \param[in] nshift number of shifts to consider for each width |
3057 | | * \param[in] minwidth smallest width to consider |
3058 | | * \param[in] maxwidth largest width to consider |
3059 | | * \param[out] pbestwidth width giving largest score |
3060 | | * \param[out] pbestshift shift giving largest score |
3061 | | * \param[out] pbestscore [optional] convolution with "Haar"-like comb |
3062 | | * \return 0 if OK, 1 on error |
3063 | | * |
3064 | | * <pre> |
3065 | | * Notes: |
3066 | | * (1) This does a linear sweep of widths, evaluating at %nshift |
3067 | | * shifts for each width, computing the score from a convolution |
3068 | | * with a long comb, and finding the (width, shift) pair that |
3069 | | * gives the maximum score. The best width is the "half-wavelength" |
3070 | | * of the signal. |
3071 | | * (2) The convolving function is a comb of alternating values |
3072 | | * +1 and -1 * relweight, separated by the width and phased by |
3073 | | * the shift. This is similar to a Haar transform, except |
3074 | | * there the convolution is performed with a square wave. |
3075 | | * (3) The function is useful for finding the line spacing |
3076 | | * and strength of line signal from pixel sum projections. |
3077 | | * (4) The score is normalized to the size of nas divided by |
3078 | | * the number of half-widths. For image applications, the input is |
3079 | | * typically an array of pixel projections, so one should |
3080 | | * normalize by dividing the score by the image width in the |
3081 | | * pixel projection direction. |
3082 | | * </pre> |
3083 | | */ |
3084 | | l_ok |
3085 | | numaEvalBestHaarParameters(NUMA *nas, |
3086 | | l_float32 relweight, |
3087 | | l_int32 nwidth, |
3088 | | l_int32 nshift, |
3089 | | l_float32 minwidth, |
3090 | | l_float32 maxwidth, |
3091 | | l_float32 *pbestwidth, |
3092 | | l_float32 *pbestshift, |
3093 | | l_float32 *pbestscore) |
3094 | 0 | { |
3095 | 0 | l_int32 i, j; |
3096 | 0 | l_float32 delwidth, delshift, width, shift, score; |
3097 | 0 | l_float32 bestwidth, bestshift, bestscore; |
3098 | |
|
3099 | 0 | if (pbestscore) *pbestscore = 0.0; |
3100 | 0 | if (pbestwidth) *pbestwidth = 0.0; |
3101 | 0 | if (pbestshift) *pbestshift = 0.0; |
3102 | 0 | if (!pbestwidth || !pbestshift) |
3103 | 0 | return ERROR_INT("&bestwidth and &bestshift not defined", __func__, 1); |
3104 | 0 | if (!nas) |
3105 | 0 | return ERROR_INT("nas not defined", __func__, 1); |
3106 | | |
3107 | 0 | bestscore = bestwidth = bestshift = 0.0; |
3108 | 0 | delwidth = (maxwidth - minwidth) / (nwidth - 1.0f); |
3109 | 0 | for (i = 0; i < nwidth; i++) { |
3110 | 0 | width = minwidth + delwidth * i; |
3111 | 0 | delshift = width / (l_float32)(nshift); |
3112 | 0 | for (j = 0; j < nshift; j++) { |
3113 | 0 | shift = j * delshift; |
3114 | 0 | numaEvalHaarSum(nas, width, shift, relweight, &score); |
3115 | 0 | if (score > bestscore) { |
3116 | 0 | bestscore = score; |
3117 | 0 | bestwidth = width; |
3118 | 0 | bestshift = shift; |
3119 | | #if DEBUG_FREQUENCY |
3120 | | lept_stderr("width = %7.3f, shift = %7.3f, score = %7.3f\n", |
3121 | | width, shift, score); |
3122 | | #endif /* DEBUG_FREQUENCY */ |
3123 | 0 | } |
3124 | 0 | } |
3125 | 0 | } |
3126 | |
|
3127 | 0 | *pbestwidth = bestwidth; |
3128 | 0 | *pbestshift = bestshift; |
3129 | 0 | if (pbestscore) |
3130 | 0 | *pbestscore = bestscore; |
3131 | 0 | return 0; |
3132 | 0 | } |
3133 | | |
3134 | | |
3135 | | /*! |
3136 | | * \brief numaEvalHaarSum() |
3137 | | * |
3138 | | * \param[in] nas numa of non-negative signal values |
3139 | | * \param[in] width distance between +1 and -1 in convolution comb |
3140 | | * \param[in] shift phase of the comb: location of first +1 |
3141 | | * \param[in] relweight relative weight of (-1 comb) / (+1 comb) |
3142 | | * contributions to the 'convolution'. In effect, |
3143 | | * the convolution kernel is a comb consisting of |
3144 | | * alternating +1 and -weight. |
3145 | | * \param[out] pscore convolution with "Haar"-like comb |
3146 | | * \return 0 if OK, 1 on error |
3147 | | * |
3148 | | * <pre> |
3149 | | * Notes: |
3150 | | * (1) This does a convolution with a comb of alternating values |
3151 | | * +1 and -relweight, separated by the width and phased by the shift. |
3152 | | * This is similar to a Haar transform, except that for Haar, |
3153 | | * (1) the convolution kernel is symmetric about 0, so the |
3154 | | * relweight is 1.0, and |
3155 | | * (2) the convolution is performed with a square wave. |
3156 | | * (2) The score is normalized to the size of nas divided by |
3157 | | * twice the "width". For image applications, the input is |
3158 | | * typically an array of pixel projections, so one should |
3159 | | * normalize by dividing the score by the image width in the |
3160 | | * pixel projection direction. |
3161 | | * (3) To get a Haar-like result, use relweight = 1.0. For detecting |
3162 | | * signals where you expect every other sample to be close to |
3163 | | * zero, as with barcodes or filtered text lines, you can |
3164 | | * use relweight > 1.0. |
3165 | | * </pre> |
3166 | | */ |
3167 | | l_ok |
3168 | | numaEvalHaarSum(NUMA *nas, |
3169 | | l_float32 width, |
3170 | | l_float32 shift, |
3171 | | l_float32 relweight, |
3172 | | l_float32 *pscore) |
3173 | 0 | { |
3174 | 0 | l_int32 i, n, nsamp, index; |
3175 | 0 | l_float32 score, weight, val; |
3176 | |
|
3177 | 0 | if (!pscore) |
3178 | 0 | return ERROR_INT("&score not defined", __func__, 1); |
3179 | 0 | *pscore = 0.0; |
3180 | 0 | if (!nas) |
3181 | 0 | return ERROR_INT("nas not defined", __func__, 1); |
3182 | 0 | if ((n = numaGetCount(nas)) < 2 * width) |
3183 | 0 | return ERROR_INT("nas size too small", __func__, 1); |
3184 | | |
3185 | 0 | score = 0.0; |
3186 | 0 | nsamp = (l_int32)((n - shift) / width); |
3187 | 0 | for (i = 0; i < nsamp; i++) { |
3188 | 0 | index = (l_int32)(shift + i * width); |
3189 | 0 | weight = (i % 2) ? 1.0f : -1.0f * relweight; |
3190 | 0 | numaGetFValue(nas, index, &val); |
3191 | 0 | score += weight * val; |
3192 | 0 | } |
3193 | |
|
3194 | 0 | *pscore = 2.0f * width * score / (l_float32)n; |
3195 | 0 | return 0; |
3196 | 0 | } |
3197 | | |
3198 | | |
3199 | | /*----------------------------------------------------------------------* |
3200 | | * Generating numbers in a range under constraints * |
3201 | | *----------------------------------------------------------------------*/ |
3202 | | /*! |
3203 | | * \brief genConstrainedNumaInRange() |
3204 | | * |
3205 | | * \param[in] first first number to choose; >= 0 |
3206 | | * \param[in] last biggest possible number to reach; >= first |
3207 | | * \param[in] nmax maximum number of numbers to select; > 0 |
3208 | | * \param[in] use_pairs 1 = select pairs of adjacent numbers; |
3209 | | * 0 = select individual numbers |
3210 | | * \return 0 if OK, 1 on error |
3211 | | * |
3212 | | * <pre> |
3213 | | * Notes: |
3214 | | * (1) Selection is made uniformly in the range. This can be used |
3215 | | * to select pages distributed as uniformly as possible |
3216 | | * through a book, where you are constrained to: |
3217 | | * ~ choose between [first, ... biggest], |
3218 | | * ~ choose no more than nmax numbers, and |
3219 | | * and you have the option of requiring pairs of adjacent numbers. |
3220 | | * </pre> |
3221 | | */ |
3222 | | NUMA * |
3223 | | genConstrainedNumaInRange(l_int32 first, |
3224 | | l_int32 last, |
3225 | | l_int32 nmax, |
3226 | | l_int32 use_pairs) |
3227 | 0 | { |
3228 | 0 | l_int32 i, nsets, val; |
3229 | 0 | l_float32 delta; |
3230 | 0 | NUMA *na; |
3231 | |
|
3232 | 0 | first = L_MAX(0, first); |
3233 | 0 | if (last < first) |
3234 | 0 | return (NUMA *)ERROR_PTR("last < first!", __func__, NULL); |
3235 | 0 | if (nmax < 1) |
3236 | 0 | return (NUMA *)ERROR_PTR("nmax < 1!", __func__, NULL); |
3237 | | |
3238 | 0 | nsets = L_MIN(nmax, last - first + 1); |
3239 | 0 | if (use_pairs == 1) |
3240 | 0 | nsets = nsets / 2; |
3241 | 0 | if (nsets == 0) |
3242 | 0 | return (NUMA *)ERROR_PTR("nsets == 0", __func__, NULL); |
3243 | | |
3244 | | /* Select delta so that selection covers the full range if possible */ |
3245 | 0 | if (nsets == 1) { |
3246 | 0 | delta = 0.0; |
3247 | 0 | } else { |
3248 | 0 | if (use_pairs == 0) |
3249 | 0 | delta = (l_float32)(last - first) / (nsets - 1); |
3250 | 0 | else |
3251 | 0 | delta = (l_float32)(last - first - 1) / (nsets - 1); |
3252 | 0 | } |
3253 | |
|
3254 | 0 | na = numaCreate(nsets); |
3255 | 0 | for (i = 0; i < nsets; i++) { |
3256 | 0 | val = (l_int32)(first + i * delta + 0.5); |
3257 | 0 | numaAddNumber(na, val); |
3258 | 0 | if (use_pairs == 1) |
3259 | 0 | numaAddNumber(na, val + 1); |
3260 | 0 | } |
3261 | |
|
3262 | 0 | return na; |
3263 | 0 | } |