Coverage Report

Created: 2024-07-27 06:27

/src/leptonica/src/boxfunc5.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file  boxfunc5.c
29
 * <pre>
30
 *
31
 *      Boxa sequence fitting
32
 *           BOXA     *boxaSmoothSequenceMedian()
33
 *           BOXA     *boxaWindowedMedian()
34
 *           BOXA     *boxaModifyWithBoxa()
35
 *           BOXA     *boxaReconcilePairWidth()
36
 *           l_int32   boxaSizeConsistency()
37
 *           BOXA     *boxaReconcileAllByMedian()
38
 *           BOXA     *boxaReconcileSidesByMedian()
39
 *    static void      adjustSidePlotName()  -- debug
40
 *           BOXA     *boxaReconcileSizeByMedian()
41
 *           l_int32   boxaPlotSides()   [for debugging]
42
 *           l_int32   boxaPlotSizes()   [for debugging]
43
 *           BOXA     *boxaFillSequence()
44
 *    static l_int32   boxaFillAll()
45
 *           l_int32   boxaSizeVariation()
46
 *           l_int32   boxaMedianDimensions()
47
 * </pre>
48
 */
49
50
#ifdef HAVE_CONFIG_H
51
#include <config_auto.h>
52
#endif  /* HAVE_CONFIG_H */
53
54
#include <math.h>
55
#include "allheaders.h"
56
57
static l_int32 boxaFillAll(BOXA *boxa);
58
static void adjustSidePlotName(char *buf, size_t size, const char *preface,
59
                               l_int32 select);
60
61
/*---------------------------------------------------------------------*
62
 *                        Boxa sequence fitting                        *
63
 *---------------------------------------------------------------------*/
64
/*!
65
 * \brief   boxaSmoothSequenceMedian()
66
 *
67
 * \param[in]    boxas        source boxa
68
 * \param[in]    halfwin      half-width of sliding window; used to find median
69
 * \param[in]    subflag      L_USE_MINSIZE, L_USE_MAXSIZE,
70
 *                            L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF,
71
 *                            L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
72
 * \param[in]    maxdiff      parameter used with L_SUB_ON_LOC_DIFF,
73
 *                            L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN,
74
 *                            L_USE_CAPPED_MAX
75
 * \param[in]    extrapixels  pixels added on all sides (or subtracted
76
 *                            if %extrapixels < 0) when using
77
 *                            L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF
78
 * \param[in]    debug        1 for debug output
79
 * \return  boxad fitted boxa, or NULL on error
80
 *
81
 * <pre>
82
 * Notes:
83
 *      (1) The target width of the sliding window is 2 * %halfwin + 1.
84
 *          If necessary, this will be reduced by boxaWindowedMedian().
85
 *      (2) This returns a modified version of %boxas by constructing
86
 *          for each input box a box that has been smoothed with windowed
87
 *          median filtering.  The filtering is done to each of the
88
 *          box sides independently, and it is computed separately for
89
 *          sequences of even and odd boxes.  The output %boxad is
90
 *          constructed from the input boxa and the filtered boxa,
91
 *          depending on %subflag.  See boxaModifyWithBoxa() for
92
 *          details on the use of %subflag, %maxdiff and %extrapixels.
93
 *      (3) This is useful for removing noise separately in the even
94
 *          and odd sets, where the box edge locations can have
95
 *          discontinuities but otherwise vary roughly linearly within
96
 *          intervals of size %halfwin or larger.
97
 *      (4) If you don't need to handle even and odd sets separately,
98
 *          just do this:
99
 *              boxam = boxaWindowedMedian(boxas, halfwin, debug);
100
 *              boxad = boxaModifyWithBoxa(boxas, boxam, subflag, maxdiff,
101
 *                                         extrapixels);
102
 *              boxaDestroy(&boxam);
103
 * </pre>
104
 */
105
BOXA *
106
boxaSmoothSequenceMedian(BOXA    *boxas,
107
                         l_int32  halfwin,
108
                         l_int32  subflag,
109
                         l_int32  maxdiff,
110
                         l_int32  extrapixels,
111
                         l_int32  debug)
112
0
{
113
0
l_int32  n;
114
0
BOXA    *boxae, *boxao, *boxamede, *boxamedo, *boxame, *boxamo, *boxad;
115
0
PIX     *pix1;
116
117
0
    if (!boxas)
118
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
119
0
    if (halfwin <= 0) {
120
0
        L_WARNING("halfwin must be > 0; returning copy\n", __func__);
121
0
        return boxaCopy(boxas, L_COPY);
122
0
    }
123
0
    if (maxdiff < 0) {
124
0
        L_WARNING("maxdiff must be >= 0; returning copy\n", __func__);
125
0
        return boxaCopy(boxas, L_COPY);
126
0
    }
127
0
    if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE &&
128
0
        subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF &&
129
0
        subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) {
130
0
        L_WARNING("invalid subflag; returning copy\n", __func__);
131
0
        return boxaCopy(boxas, L_COPY);
132
0
    }
133
0
    if ((n = boxaGetCount(boxas)) < 6) {
134
0
        L_WARNING("need at least 6 boxes; returning copy\n", __func__);
135
0
        return boxaCopy(boxas, L_COPY);
136
0
    }
137
138
0
    boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
139
0
    if (debug) {
140
0
        lept_mkdir("lept/smooth");
141
0
        boxaWriteDebug("/tmp/lept/smooth/boxae.ba", boxae);
142
0
        boxaWriteDebug("/tmp/lept/smooth/boxao.ba", boxao);
143
0
    }
144
145
0
    boxamede = boxaWindowedMedian(boxae, halfwin, debug);
146
0
    boxamedo = boxaWindowedMedian(boxao, halfwin, debug);
147
0
    if (debug) {
148
0
        boxaWriteDebug("/tmp/lept/smooth/boxamede.ba", boxamede);
149
0
        boxaWriteDebug("/tmp/lept/smooth/boxamedo.ba", boxamedo);
150
0
    }
151
152
0
    boxame = boxaModifyWithBoxa(boxae, boxamede, subflag, maxdiff, extrapixels);
153
0
    boxamo = boxaModifyWithBoxa(boxao, boxamedo, subflag, maxdiff, extrapixels);
154
0
    if (debug) {
155
0
        boxaWriteDebug("/tmp/lept/smooth/boxame.ba", boxame);
156
0
        boxaWriteDebug("/tmp/lept/smooth/boxamo.ba", boxamo);
157
0
    }
158
159
0
    boxad = boxaMergeEvenOdd(boxame, boxamo, 0);
160
0
    if (debug) {
161
0
        boxaPlotSides(boxas, NULL, NULL, NULL, NULL, NULL, &pix1);
162
0
        pixWrite("/tmp/lept/smooth/plotsides1.png", pix1, IFF_PNG);
163
0
        pixDestroy(&pix1);
164
0
        boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1);
165
0
        pixWrite("/tmp/lept/smooth/plotsides2.png", pix1, IFF_PNG);
166
0
        pixDestroy(&pix1);
167
0
        boxaPlotSizes(boxas, NULL, NULL, NULL, &pix1);
168
0
        pixWrite("/tmp/lept/smooth/plotsizes1.png", pix1, IFF_PNG);
169
0
        pixDestroy(&pix1);
170
0
        boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1);
171
0
        pixWrite("/tmp/lept/smooth/plotsizes2.png", pix1, IFF_PNG);
172
0
        pixDestroy(&pix1);
173
0
    }
174
175
0
    boxaDestroy(&boxae);
176
0
    boxaDestroy(&boxao);
177
0
    boxaDestroy(&boxamede);
178
0
    boxaDestroy(&boxamedo);
179
0
    boxaDestroy(&boxame);
180
0
    boxaDestroy(&boxamo);
181
0
    return boxad;
182
0
}
183
184
185
/*!
186
 * \brief   boxaWindowedMedian()
187
 *
188
 * \param[in]    boxas     source boxa
189
 * \param[in]    halfwin   half width of window over which the median is found
190
 * \param[in]    debug     1 for debug output
191
 * \return  boxad smoothed boxa, or NULL on error
192
 *
193
 * <pre>
194
 * Notes:
195
 *      (1) This finds a set of boxes (boxad) where each edge of each box is
196
 *          a windowed median smoothed value to the edges of the
197
 *          input set of boxes (boxas).
198
 *      (2) Invalid input boxes are filled from nearby ones.
199
 *      (3) The returned boxad can then be used in boxaModifyWithBoxa()
200
 *          to selectively change the boxes in the source boxa.
201
 * </pre>
202
 */
203
BOXA *
204
boxaWindowedMedian(BOXA    *boxas,
205
                   l_int32  halfwin,
206
                   l_int32  debug)
207
0
{
208
0
l_int32  n, i, left, top, right, bot;
209
0
BOX     *box;
210
0
BOXA    *boxaf, *boxad;
211
0
NUMA    *nal, *nat, *nar, *nab, *naml, *namt, *namr, *namb;
212
0
PIX     *pix1;
213
214
0
    if (!boxas)
215
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
216
0
    if ((n = boxaGetCount(boxas)) < 3) {
217
0
        L_WARNING("less than 3 boxes; returning a copy\n", __func__);
218
0
        return boxaCopy(boxas, L_COPY);
219
0
    }
220
0
    if (halfwin <= 0) {
221
0
        L_WARNING("halfwin must be > 0; returning copy\n", __func__);
222
0
        return boxaCopy(boxas, L_COPY);
223
0
    }
224
225
        /* Fill invalid boxes in the input sequence */
226
0
    if ((boxaf = boxaFillSequence(boxas, L_USE_ALL_BOXES, debug)) == NULL)
227
0
        return (BOXA *)ERROR_PTR("filled boxa not made", __func__, NULL);
228
229
        /* Get the windowed median output from each of the sides */
230
0
    boxaExtractAsNuma(boxaf, &nal, &nat, &nar, &nab, NULL, NULL, 0);
231
0
    naml = numaWindowedMedian(nal, halfwin);
232
0
    namt = numaWindowedMedian(nat, halfwin);
233
0
    namr = numaWindowedMedian(nar, halfwin);
234
0
    namb = numaWindowedMedian(nab, halfwin);
235
236
0
    n = boxaGetCount(boxaf);
237
0
    boxad = boxaCreate(n);
238
0
    for (i = 0; i < n; i++) {
239
0
        numaGetIValue(naml, i, &left);
240
0
        numaGetIValue(namt, i, &top);
241
0
        numaGetIValue(namr, i, &right);
242
0
        numaGetIValue(namb, i, &bot);
243
0
        box = boxCreate(left, top, right - left + 1, bot - top + 1);
244
0
        boxaAddBox(boxad, box, L_INSERT);
245
0
    }
246
247
0
    if (debug) {
248
0
        lept_mkdir("lept/windowed");
249
0
        boxaPlotSides(boxaf, NULL, NULL, NULL, NULL, NULL, &pix1);
250
0
        pixWrite("/tmp/lept/windowed/plotsides1.png", pix1, IFF_PNG);
251
0
        pixDestroy(&pix1);
252
0
        boxaPlotSides(boxad, NULL, NULL, NULL, NULL, NULL, &pix1);
253
0
        pixWrite("/tmp/lept/windowed/plotsides2.png", pix1, IFF_PNG);
254
0
        pixDestroy(&pix1);
255
0
        boxaPlotSizes(boxaf, NULL, NULL, NULL, &pix1);
256
0
        pixWrite("/tmp/lept/windowed/plotsizes1.png", pix1, IFF_PNG);
257
0
        pixDestroy(&pix1);
258
0
        boxaPlotSizes(boxad, NULL, NULL, NULL, &pix1);
259
0
        pixWrite("/tmp/lept/windowed/plotsizes2.png", pix1, IFF_PNG);
260
0
        pixDestroy(&pix1);
261
0
    }
262
263
0
    boxaDestroy(&boxaf);
264
0
    numaDestroy(&nal);
265
0
    numaDestroy(&nat);
266
0
    numaDestroy(&nar);
267
0
    numaDestroy(&nab);
268
0
    numaDestroy(&naml);
269
0
    numaDestroy(&namt);
270
0
    numaDestroy(&namr);
271
0
    numaDestroy(&namb);
272
0
    return boxad;
273
0
}
274
275
276
/*!
277
 * \brief   boxaModifyWithBoxa()
278
 *
279
 * \param[in]    boxas
280
 * \param[in]    boxam        boxa with boxes used to modify those in boxas
281
 * \param[in]    subflag      L_USE_MINSIZE, L_USE_MAXSIZE,
282
 *                            L_SUB_ON_LOC_DIFF, L_SUB_ON_SIZE_DIFF,
283
 *                            L_USE_CAPPED_MIN, L_USE_CAPPED_MAX
284
 * \param[in]    maxdiff      parameter used with L_SUB_ON_LOC_DIFF,
285
 *                            L_SUB_ON_SIZE_DIFF, L_USE_CAPPED_MIN,
286
 *                            L_USE_CAPPED_MAX
287
 * \param[in]    extrapixels  pixels added on all sides (or subtracted
288
 *                            if %extrapixels < 0) when using
289
 *                            L_SUB_ON_LOC_DIFF and L_SUB_ON_SIZE_DIFF
290
 * \return  boxad  result after adjusting boxes in boxas, or NULL on error.
291
 *
292
 * <pre>
293
 * Notes:
294
 *      (1) This takes two input boxa (boxas, boxam) and constructs boxad,
295
 *          where each box in boxad is generated from the corresponding
296
 *          boxes in boxas and boxam.  The rule for constructing each
297
 *          output box depends on %subflag and %maxdiff.  Let boxs be
298
 *          a box from %boxas and boxm be a box from %boxam.
299
 *          * If %subflag == L_USE_MINSIZE: the output box is the intersection
300
 *            of the two input boxes.
301
 *          * If %subflag == L_USE_MAXSIZE: the output box is the union of the
302
 *            two input boxes; i.e., the minimum bounding rectangle for the
303
 *            two input boxes.
304
 *          * If %subflag == L_SUB_ON_LOC_DIFF: each side of the output box
305
 *            is found separately from the corresponding side of boxs and boxm.
306
 *            Use the boxm side, expanded by %extrapixels, if greater than
307
 *            %maxdiff pixels from the boxs side.
308
 *          * If %subflag == L_SUB_ON_SIZE_DIFF: the sides of the output box
309
 *            are determined in pairs from the width and height of boxs
310
 *            and boxm.  If the boxm width differs by more than %maxdiff
311
 *            pixels from boxs, use the boxm left and right sides,
312
 *            expanded by %extrapixels.  Ditto for the height difference.
313
 *          For the last two flags, each side of the output box is found
314
 *          separately from the corresponding side of boxs and boxm,
315
 *          according to these rules, where "smaller"("bigger") mean in a
316
 *          direction that decreases(increases) the size of the output box:
317
 *          * If %subflag == L_USE_CAPPED_MIN: use the Min of boxm
318
 *            with the Max of (boxs, boxm +- %maxdiff), where the sign
319
 *            is adjusted to make the box smaller (e.g., use "+" on left side).
320
 *          * If %subflag == L_USE_CAPPED_MAX: use the Max of boxm
321
 *            with the Min of (boxs, boxm +- %maxdiff), where the sign
322
 *            is adjusted to make the box bigger (e.g., use "-" on left side).
323
 *          Use of the last 2 flags is further explained in (3) and (4).
324
 *      (2) boxas and boxam must be the same size.  If boxam == NULL,
325
 *          this returns a copy of boxas with a warning.
326
 *      (3) If %subflag == L_SUB_ON_LOC_DIFF, use boxm for each side
327
 *          where the corresponding sides differ by more than %maxdiff.
328
 *          Two extreme cases:
329
 *          (a) set %maxdiff == 0 to use only values from boxam in boxad.
330
 *          (b) set %maxdiff == 10000 to ignore all values from boxam;
331
 *              then boxad will be the same as boxas.
332
 *      (4) If %subflag == L_USE_CAPPED_MAX: use boxm if boxs is smaller;
333
 *          use boxs if boxs is bigger than boxm by an amount up to %maxdiff;
334
 *          and use boxm +- %maxdiff (the 'capped' value) if boxs is
335
 *          bigger than boxm by an amount larger than %maxdiff.
336
 *          Similarly, with interchange of Min/Max and sign of %maxdiff,
337
 *          for %subflag == L_USE_CAPPED_MIN.
338
 *      (5) If either of corresponding boxes in boxas and boxam is invalid,
339
 *          an invalid box is copied to the result.
340
 *      (6) Typical input for boxam may be the output of boxaLinearFit().
341
 *          where outliers have been removed and each side is LS fit to a line.
342
 *      (7) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(),
343
 *          this uses two boxes and does not specify target dimensions.
344
 * </pre>
345
 */
346
BOXA *
347
boxaModifyWithBoxa(BOXA    *boxas,
348
                   BOXA    *boxam,
349
                   l_int32  subflag,
350
                   l_int32  maxdiff,
351
                   l_int32  extrapixels)
352
0
{
353
0
l_int32  n, i, ls, ts, rs, bs, ws, hs, lm, tm, rm, bm, wm, hm, ld, td, rd, bd;
354
0
BOX     *boxs, *boxm, *boxd, *boxempty;
355
0
BOXA    *boxad;
356
357
0
    if (!boxas)
358
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
359
0
    if (!boxam) {
360
0
        L_WARNING("boxam not defined; returning copy", __func__);
361
0
        return boxaCopy(boxas, L_COPY);
362
0
    }
363
0
    if (subflag != L_USE_MINSIZE && subflag != L_USE_MAXSIZE &&
364
0
        subflag != L_SUB_ON_LOC_DIFF && subflag != L_SUB_ON_SIZE_DIFF &&
365
0
        subflag != L_USE_CAPPED_MIN && subflag != L_USE_CAPPED_MAX) {
366
0
        L_WARNING("invalid subflag; returning copy", __func__);
367
0
        return boxaCopy(boxas, L_COPY);
368
0
    }
369
0
    n = boxaGetCount(boxas);
370
0
    if (n != boxaGetCount(boxam)) {
371
0
        L_WARNING("boxas and boxam sizes differ; returning copy", __func__);
372
0
        return boxaCopy(boxas, L_COPY);
373
0
    }
374
375
0
    boxad = boxaCreate(n);
376
0
    boxempty = boxCreate(0, 0, 0, 0);  /* placeholders */
377
0
    for (i = 0; i < n; i++) {
378
0
        boxs = boxaGetValidBox(boxas, i, L_CLONE);
379
0
        boxm = boxaGetValidBox(boxam, i, L_CLONE);
380
0
        if (!boxs || !boxm) {
381
0
            boxaAddBox(boxad, boxempty, L_COPY);
382
0
        } else {
383
0
            boxGetGeometry(boxs, &ls, &ts, &ws, &hs);
384
0
            boxGetGeometry(boxm, &lm, &tm, &wm, &hm);
385
0
            rs = ls + ws - 1;
386
0
            bs = ts + hs - 1;
387
0
            rm = lm + wm - 1;
388
0
            bm = tm + hm - 1;
389
0
            if (subflag == L_USE_MINSIZE) {
390
0
                ld = L_MAX(ls, lm);
391
0
                rd = L_MIN(rs, rm);
392
0
                td = L_MAX(ts, tm);
393
0
                bd = L_MIN(bs, bm);
394
0
            } else if (subflag == L_USE_MAXSIZE) {
395
0
                ld = L_MIN(ls, lm);
396
0
                rd = L_MAX(rs, rm);
397
0
                td = L_MIN(ts, tm);
398
0
                bd = L_MAX(bs, bm);
399
0
            } else if (subflag == L_SUB_ON_LOC_DIFF) {
400
0
                ld = (L_ABS(lm - ls) <= maxdiff) ? ls : lm - extrapixels;
401
0
                td = (L_ABS(tm - ts) <= maxdiff) ? ts : tm - extrapixels;
402
0
                rd = (L_ABS(rm - rs) <= maxdiff) ? rs : rm + extrapixels;
403
0
                bd = (L_ABS(bm - bs) <= maxdiff) ? bs : bm + extrapixels;
404
0
            } else if (subflag == L_SUB_ON_SIZE_DIFF) {
405
0
                ld = (L_ABS(wm - ws) <= maxdiff) ? ls : lm - extrapixels;
406
0
                td = (L_ABS(hm - hs) <= maxdiff) ? ts : tm - extrapixels;
407
0
                rd = (L_ABS(wm - ws) <= maxdiff) ? rs : rm + extrapixels;
408
0
                bd = (L_ABS(hm - hs) <= maxdiff) ? bs : bm + extrapixels;
409
0
            } else if (subflag == L_USE_CAPPED_MIN) {
410
0
                ld = L_MAX(lm, L_MIN(ls, lm + maxdiff));
411
0
                td = L_MAX(tm, L_MIN(ts, tm + maxdiff));
412
0
                rd = L_MIN(rm, L_MAX(rs, rm - maxdiff));
413
0
                bd = L_MIN(bm, L_MAX(bs, bm - maxdiff));
414
0
            } else {  /* subflag == L_USE_CAPPED_MAX */
415
0
                ld = L_MIN(lm, L_MAX(ls, lm - maxdiff));
416
0
                td = L_MIN(tm, L_MAX(ts, tm - maxdiff));
417
0
                rd = L_MAX(rm, L_MIN(rs, rm + maxdiff));
418
0
                bd = L_MAX(bm, L_MIN(bs, bm + maxdiff));
419
0
            }
420
0
            boxd = boxCreate(ld, td, rd - ld + 1, bd - td + 1);
421
0
            boxaAddBox(boxad, boxd, L_INSERT);
422
0
        }
423
0
        boxDestroy(&boxs);
424
0
        boxDestroy(&boxm);
425
0
    }
426
0
    boxDestroy(&boxempty);
427
428
0
    return boxad;
429
0
}
430
431
432
/*!
433
 * \brief   boxaReconcilePairWidth()
434
 *
435
 * \param[in]    boxas
436
 * \param[in]    delw      threshold on adjacent width difference
437
 * \param[in]    op        L_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX
438
 * \param[in]    factor    > 0.0, typically near 1.0
439
 * \param[in]    na        [optional] indicator array allowing change
440
 * \return  boxad adjusted, or a copy of boxas on error
441
 *
442
 * <pre>
443
 * Notes:
444
 *      (1) This reconciles differences in the width of adjacent boxes,
445
 *          by moving one side of one of the boxes in each pair.
446
 *          If the widths in the pair differ by more than some
447
 *          threshold, move either the left side for even boxes or
448
 *          the right side for odd boxes, depending on if we're choosing
449
 *          the min or max.  If choosing min, the width of the max is
450
 *          set to factor * (width of min).  If choosing max, the width
451
 *          of the min is set to factor * (width of max).
452
 *      (2) If %na exists, it is an indicator array corresponding to the
453
 *          boxes in %boxas.  If %na != NULL, only boxes with an
454
 *          indicator value of 1 are allowed to adjust; otherwise,
455
 *          all boxes can adjust.
456
 *      (3) Typical input might be the output of boxaSmoothSequenceMedian(),
457
 *          where even and odd boxa have been independently regulated.
458
 * </pre>
459
 */
460
BOXA *
461
boxaReconcilePairWidth(BOXA      *boxas,
462
                       l_int32    delw,
463
                       l_int32    op,
464
                       l_float32  factor,
465
                       NUMA      *na)
466
0
{
467
0
l_int32  i, ne, no, nmin, xe, we, xo, wo, inde, indo, x, w;
468
0
BOX     *boxe, *boxo;
469
0
BOXA    *boxae, *boxao, *boxad;
470
471
0
    if (!boxas)
472
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
473
0
    if (factor <= 0.0) {
474
0
        L_WARNING("invalid factor; setting to 1.0\n", __func__);
475
0
        factor = 1.0;
476
0
    }
477
478
        /* Taking the boxes in pairs, if the difference in width reaches
479
         * the threshold %delw, adjust the left or right side of one
480
         * of the pair. */
481
0
    boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
482
0
    ne = boxaGetCount(boxae);
483
0
    no = boxaGetCount(boxao);
484
0
    nmin = L_MIN(ne, no);
485
0
    for (i = 0; i < nmin; i++) {
486
            /* Set indicator values */
487
0
        if (na) {
488
0
            numaGetIValue(na, 2 * i, &inde);
489
0
            numaGetIValue(na, 2 * i + 1, &indo);
490
0
        } else {
491
0
            inde = indo = 1;
492
0
        }
493
0
        if (inde == 0 && indo == 0) continue;
494
495
0
        boxe = boxaGetBox(boxae, i, L_CLONE);
496
0
        boxo = boxaGetBox(boxao, i, L_CLONE);
497
0
        boxGetGeometry(boxe, &xe, NULL, &we, NULL);
498
0
        boxGetGeometry(boxo, &xo, NULL, &wo, NULL);
499
0
        if (we == 0 || wo == 0) {  /* if either is invalid; skip */
500
0
            boxDestroy(&boxe);
501
0
            boxDestroy(&boxo);
502
0
            continue;
503
0
        } else if (L_ABS(we - wo) > delw) {
504
0
            if (op == L_ADJUST_CHOOSE_MIN) {
505
0
                if (we > wo && inde == 1) {
506
                        /* move left side of even to the right */
507
0
                    w = factor * wo;
508
0
                    x = xe + (we - w);
509
0
                    boxSetGeometry(boxe, x, -1, w, -1);
510
0
                } else if (we < wo && indo == 1) {
511
                        /* move right side of odd to the left */
512
0
                    w = factor * we;
513
0
                    boxSetGeometry(boxo, -1, -1, w, -1);
514
0
                }
515
0
            } else {  /* maximize width */
516
0
                if (we < wo && inde == 1) {
517
                        /* move left side of even to the left */
518
0
                    w = factor * wo;
519
0
                    x = L_MAX(0, xe + (we - w));
520
0
                    w = we + (xe - x);  /* covers both cases for the max */
521
0
                    boxSetGeometry(boxe, x, -1, w, -1);
522
0
                } else if (we > wo && indo == 1) {
523
                        /* move right side of odd to the right */
524
0
                    w = factor * we;
525
0
                    boxSetGeometry(boxo, -1, -1, w, -1);
526
0
                }
527
0
            }
528
0
        }
529
0
        boxDestroy(&boxe);
530
0
        boxDestroy(&boxo);
531
0
    }
532
533
0
    boxad = boxaMergeEvenOdd(boxae, boxao, 0);
534
0
    boxaDestroy(&boxae);
535
0
    boxaDestroy(&boxao);
536
0
    return boxad;
537
0
}
538
539
540
/*!
541
 * \brief   boxaSizeConsistency()
542
 *
543
 * \param[in]    boxas     of size >= 10
544
 * \param[in]    type      L_CHECK_WIDTH, L_CHECK_HEIGHT
545
 * \param[in]    threshp   threshold for pairwise fractional variation
546
 * \param[in]    threshm   threshold for fractional variation from median
547
 * \param[out]   pfvarp    [optional] average fractional pairwise variation
548
 * \param[out]   pfvarm    [optional] average fractional median variation
549
 * \param[out]   psame     decision for uniformity of page size (1, 0, -1)
550
 *
551
 * <pre>
552
 * Notes:
553
 *      (1) This evaluates a boxa for particular types of dimensional
554
 *          variation.  Select either width or height variation.  Then
555
 *          it returns two numbers: one is based on pairwise (even/odd)
556
 *          variation; the other is based on the average variation
557
 *          from the boxa median.
558
 *      (2) For the pairwise variation, get the fraction of the absolute
559
 *          difference in dimension of each pair of boxes, and take
560
 *          the average value.  The median variation is simply the
561
 *          the average of the fractional deviation from the median
562
 *          of all the boxes.
563
 *      (3) Use 0 for default values of %threshp and %threshm.  They are
564
 *            threshp:  0.02
565
 *            threshm:  0.015
566
 *      (4) The intended application is that the boxes are a sequence of
567
 *          page regions in a book scan, and we calculate two numbers
568
 *          that can give an indication if the pages are approximately
569
 *          the same size.  The pairwise variation should be small if
570
 *          the boxes are correctly calculated.  If there are a
571
 *          significant number of random or systematic outliers, the
572
 *          pairwise variation will be large, and no decision will be made
573
 *          (i.e., return same == -1).  Here are the possible outcomes:
574
 *            Pairwise Var    Median Var    Decision
575
 *            ------------    ----------    --------
576
 *            small           small         same size  (1)
577
 *            small           large         different size  (0)
578
 *            large           small/large   unknown   (-1)
579
 * </pre>
580
 */
581
l_ok
582
boxaSizeConsistency(BOXA       *boxas,
583
                    l_int32     type,
584
                    l_float32   threshp,
585
                    l_float32   threshm,
586
                    l_float32  *pfvarp,
587
                    l_float32  *pfvarm,
588
                    l_int32    *psame)
589
0
{
590
0
l_int32    i, n, bw1, bh1, bw2, bh2, npairs;
591
0
l_float32  ave, fdiff, sumdiff, med, fvarp, fvarm;
592
0
NUMA      *na1;
593
594
0
    if (pfvarp) *pfvarp = 0.0;
595
0
    if (pfvarm) *pfvarm = 0.0;
596
0
    if (!psame)
597
0
        return ERROR_INT("&same not defined", __func__, 1);
598
0
    *psame = -1;
599
0
    if (!boxas)
600
0
        return ERROR_INT("boxas not defined", __func__, 1);
601
0
    if (boxaGetValidCount(boxas) < 6)
602
0
        return ERROR_INT("need a least 6 valid boxes", __func__, 1);
603
0
    if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT)
604
0
        return ERROR_INT("invalid type", __func__, 1);
605
0
    if (threshp < 0.0 || threshp >= 0.5)
606
0
        return ERROR_INT("invalid threshp", __func__, 1);
607
0
    if (threshm < 0.0 || threshm >= 0.5)
608
0
        return ERROR_INT("invalid threshm", __func__, 1);
609
0
    if (threshp == 0.0) threshp = 0.02f;
610
0
    if (threshm == 0.0) threshm = 0.015f;
611
612
        /* Evaluate pairwise variation */
613
0
    n = boxaGetCount(boxas);
614
0
    na1 = numaCreate(0);
615
0
    for (i = 0, npairs = 0, sumdiff = 0; i < n - 1; i += 2) {
616
0
        boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw1, &bh1);
617
0
        boxaGetBoxGeometry(boxas, i + 1, NULL, NULL, &bw2, &bh2);
618
0
        if (bw1 == 0 || bh1 == 0 || bw2 == 0 || bh2 == 0)
619
0
            continue;
620
0
        npairs++;
621
0
        if (type == L_CHECK_WIDTH) {
622
0
            ave = (bw1 + bw2) / 2.0;
623
0
            fdiff = L_ABS(bw1 - bw2) / ave;
624
0
            numaAddNumber(na1, bw1);
625
0
            numaAddNumber(na1, bw2);
626
0
        } else {  /* type == L_CHECK_HEIGHT) */
627
0
            ave = (bh1 + bh2) / 2.0;
628
0
            fdiff = L_ABS(bh1 - bh2) / ave;
629
0
            numaAddNumber(na1, bh1);
630
0
            numaAddNumber(na1, bh2);
631
0
        }
632
0
        sumdiff += fdiff;
633
0
    }
634
0
    fvarp = sumdiff / npairs;
635
0
    if (pfvarp) *pfvarp = fvarp;
636
637
        /* Evaluate the average abs fractional deviation from the median */
638
0
    numaGetMedian(na1, &med);
639
0
    if (med == 0.0) {
640
0
        L_WARNING("median value is 0\n", __func__);
641
0
    } else {
642
0
        numaGetMeanDevFromMedian(na1, med, &fvarm);
643
0
        fvarm /= med;
644
0
        if (pfvarm) *pfvarm = fvarm;
645
0
    }
646
0
    numaDestroy(&na1);
647
648
        /* Make decision */
649
0
    if (fvarp < threshp && fvarm < threshm)
650
0
        *psame = 1;
651
0
    else if (fvarp < threshp && fvarm > threshm)
652
0
        *psame = 0;
653
0
    else
654
0
        *psame = -1;  /* unknown */
655
0
    return 0;
656
0
}
657
658
659
/*!
660
 * \brief   boxaReconcileAllByMedian()
661
 *
662
 * \param[in]    boxas    containing at least 6 valid boxes
663
 * \param[in]    select1  L_ADJUST_LEFT_AND_RIGHT or L_ADJUST_SKIP
664
 * \param[in]    select2  L_ADJUST_TOP_AND_BOT or L_ADJUST_SKIP
665
 * \param[in]    thresh   threshold number of pixels to make adjustment
666
 * \param[in]    extra    extra pixels to add beyond median value
667
 * \param[in]    pixadb   use NULL to skip debug output
668
 * \return  boxad  possibly adjusted from boxas; a copy of boxas on error
669
 *
670
 * <pre>
671
 * Notes:
672
 *      (1) This uses boxaReconcileSidesByMedian() to reconcile
673
 *          the left-and-right and/or top-and-bottom sides of the
674
 *          even and odd boxes, separately.
675
 *      (2) See boxaReconcileSidesByMedian() for use of %thresh and %extra.
676
 *      (3) If all box sides are within %thresh of the median value,
677
 *          the returned box will be identical to %boxas.
678
 * </pre>
679
 */
680
BOXA *
681
boxaReconcileAllByMedian(BOXA    *boxas,
682
                         l_int32  select1,
683
                         l_int32  select2,
684
                         l_int32  thresh,
685
                         l_int32  extra,
686
                         PIXA    *pixadb)
687
0
 {
688
0
l_int32  ncols;
689
0
BOXA    *boxa1e, *boxa1o, *boxa2e, *boxa2o, *boxa3e, *boxa3o, *boxad;
690
0
PIX     *pix1;
691
692
0
    if (!boxas)
693
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
694
0
    if (select1 != L_ADJUST_LEFT_AND_RIGHT && select1 != L_ADJUST_SKIP) {
695
0
        L_WARNING("invalid select1; returning copy\n", __func__);
696
0
        return boxaCopy(boxas, L_COPY);
697
0
    }
698
0
    if (select2 != L_ADJUST_TOP_AND_BOT && select2 != L_ADJUST_SKIP) {
699
0
        L_WARNING("invalid select2; returning copy\n", __func__);
700
0
        return boxaCopy(boxas, L_COPY);
701
0
    }
702
0
    if (thresh < 0) {
703
0
        L_WARNING("thresh must be >= 0; returning copy\n", __func__);
704
0
        return boxaCopy(boxas, L_COPY);
705
0
    }
706
0
    if (boxaGetValidCount(boxas) < 3) {
707
0
        L_WARNING("need at least 3 valid boxes; returning copy\n", __func__);
708
0
        return boxaCopy(boxas, L_COPY);
709
0
    }
710
711
        /* Adjust even and odd box sides separately */
712
0
    boxaSplitEvenOdd(boxas, 0, &boxa1e, &boxa1o);
713
0
    ncols = 1;
714
0
    if (select1 == L_ADJUST_LEFT_AND_RIGHT) {
715
0
        ncols += 2;
716
0
        boxa2e = boxaReconcileSidesByMedian(boxa1e, select1, thresh,
717
0
                                            extra, pixadb);
718
0
    } else {
719
0
        boxa2e = boxaCopy(boxa1e, L_COPY);
720
0
    }
721
0
    if (select2 == L_ADJUST_TOP_AND_BOT) {
722
0
        ncols += 2;
723
0
        boxa3e = boxaReconcileSidesByMedian(boxa2e, select2, thresh,
724
0
                                            extra, pixadb);
725
0
    } else {
726
0
        boxa3e = boxaCopy(boxa2e, L_COPY);
727
0
    }
728
0
    if (select1 == L_ADJUST_LEFT_AND_RIGHT)
729
0
        boxa2o = boxaReconcileSidesByMedian(boxa1o, select1, thresh,
730
0
                                            extra, pixadb);
731
0
    else
732
0
        boxa2o = boxaCopy(boxa1o, L_COPY);
733
0
    if (select2 == L_ADJUST_TOP_AND_BOT)
734
0
        boxa3o = boxaReconcileSidesByMedian(boxa2o, select2, thresh,
735
0
                                            extra, pixadb);
736
0
    else
737
0
        boxa3o = boxaCopy(boxa2o, L_COPY);
738
0
    boxad = boxaMergeEvenOdd(boxa3e, boxa3o, 0);
739
740
        /* This generates 2 sets of 3 or 5 plots in a row, depending
741
         * on whether select1 and select2 are true (not skipping).
742
         * The top row is for even boxes; the bottom row is for odd boxes. */
743
0
    if (pixadb) {
744
0
        lept_mkdir("lept/boxa");
745
0
        pix1 = pixaDisplayTiledInColumns(pixadb, ncols, 1.0, 30, 2);
746
0
        pixWrite("/tmp/lept/boxa/recon_sides.png", pix1, IFF_PNG);
747
0
        pixDestroy(&pix1);
748
0
    }
749
750
0
    boxaDestroy(&boxa1e);
751
0
    boxaDestroy(&boxa1o);
752
0
    boxaDestroy(&boxa2e);
753
0
    boxaDestroy(&boxa2o);
754
0
    boxaDestroy(&boxa3e);
755
0
    boxaDestroy(&boxa3o);
756
0
    return boxad;
757
0
}
758
759
760
/*!
761
 * \brief   boxaReconcileSidesByMedian()
762
 *
763
 * \param[in]    boxas    containing at least 3 valid boxes
764
 * \param[in]    select   L_ADJUST_LEFT, L_ADJUST_RIGHT, etc.
765
 * \param[in]    thresh   threshold number of pixels to make adjustment
766
 * \param[in]    extra    extra pixels to add beyond median value
767
 * \param[in]    pixadb   use NULL to skip debug output
768
 * \return  boxad  possibly adjusted from boxas; a copy of boxas on error
769
 *
770
 * <pre>
771
 * Notes:
772
 *      (1) This modifies individual box sides if their location differs
773
 *          significantly (>= %thresh) from the median value.
774
 *      (2) %select specifies which sides are to be checked.
775
 *      (3) %thresh specifies the tolerance for different side locations.
776
 *          Any box side that differs from the median by this much will
777
 *          be set to the median value, plus the %extra amount.
778
 *      (4) If %extra is positive, the box dimensions are expanded.
779
 *          For example, for the left side, a positive %extra results in
780
 *          moving the left side farther to the left (i.e., in a negative
781
 *          direction).
782
 *      (5) If all box sides are within %thresh - 1 of the median value,
783
 *          the returned box will be identical to %boxas.
784
 *      (6) N.B. If you expect that even and odd box sides should be
785
 *          significantly different, this function must be called separately
786
 *          on the even and odd boxes in %boxas.  Note also that the
787
 *          higher level function boxaReconcileAllByMedian() handles the
788
 *          even and odd box sides separately.
789
 * </pre>
790
 */
791
BOXA *
792
boxaReconcileSidesByMedian(BOXA    *boxas,
793
                           l_int32  select,
794
                           l_int32  thresh,
795
                           l_int32  extra,
796
                           PIXA    *pixadb)
797
0
 {
798
0
char     buf[128];
799
0
l_int32  i, n, diff;
800
0
l_int32  left, right, top, bot, medleft, medright, medtop, medbot;
801
0
BOX     *box;
802
0
BOXA    *boxa1, *boxad;
803
0
PIX     *pix;
804
805
0
    if (!boxas)
806
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
807
0
    if (select != L_ADJUST_LEFT && select != L_ADJUST_RIGHT &&
808
0
        select != L_ADJUST_TOP && select != L_ADJUST_BOT &&
809
0
        select != L_ADJUST_LEFT_AND_RIGHT && select != L_ADJUST_TOP_AND_BOT) {
810
0
        L_WARNING("invalid select; returning copy\n", __func__);
811
0
        return boxaCopy(boxas, L_COPY);
812
0
    }
813
0
    if (thresh < 0) {
814
0
        L_WARNING("thresh must be >= 0; returning copy\n", __func__);
815
0
        return boxaCopy(boxas, L_COPY);
816
0
    }
817
0
    if (boxaGetValidCount(boxas) < 3) {
818
0
        L_WARNING("need at least 3 valid boxes; returning copy\n", __func__);
819
0
        return boxaCopy(boxas, L_COPY);
820
0
    }
821
822
0
    if (select == L_ADJUST_LEFT_AND_RIGHT) {
823
0
        boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_LEFT, thresh, extra,
824
0
                                           pixadb);
825
0
        boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_RIGHT, thresh, extra,
826
0
                                           pixadb);
827
0
        boxaDestroy(&boxa1);
828
0
        return boxad;
829
0
    }
830
0
    if (select == L_ADJUST_TOP_AND_BOT) {
831
0
        boxa1 = boxaReconcileSidesByMedian(boxas, L_ADJUST_TOP, thresh, extra,
832
0
                                           pixadb);
833
0
        boxad = boxaReconcileSidesByMedian(boxa1, L_ADJUST_BOT, thresh, extra,
834
0
                                           pixadb);
835
0
        boxaDestroy(&boxa1);
836
0
        return boxad;
837
0
    }
838
839
0
    if (pixadb) {
840
0
        l_int32 ndb = pixaGetCount(pixadb);
841
0
        if (ndb == 0 || ndb == 5) {  /* first of even and odd box sets */
842
0
            adjustSidePlotName(buf, sizeof(buf), "init", select);
843
0
            boxaPlotSides(boxas, buf, NULL, NULL, NULL, NULL, &pix);
844
0
            pixaAddPix(pixadb, pix, L_INSERT);
845
0
        }
846
0
    }
847
848
0
    n = boxaGetCount(boxas);
849
0
    boxad = boxaCreate(n);
850
0
    if (select == L_ADJUST_LEFT) {
851
0
        boxaGetMedianVals(boxas, &medleft, NULL, NULL, NULL, NULL, NULL);
852
0
        for (i = 0; i < n; i++) {
853
0
            box = boxaGetBox(boxas, i, L_COPY);
854
0
            boxGetSideLocations(box, &left, NULL, NULL, NULL);
855
0
            diff = medleft - left;
856
0
            if (L_ABS(diff) >= thresh)
857
0
                boxAdjustSides(box, box, diff - extra, 0, 0, 0);
858
0
            boxaAddBox(boxad, box, L_INSERT);
859
0
        }
860
0
    } else if (select == L_ADJUST_RIGHT) {
861
0
        boxaGetMedianVals(boxas, NULL, NULL, &medright, NULL, NULL, NULL);
862
0
        for (i = 0; i < n; i++) {
863
0
            box = boxaGetBox(boxas, i, L_COPY);
864
0
            boxGetSideLocations(box, NULL, &right, NULL, NULL);
865
0
            diff = medright - right;
866
0
            if (L_ABS(diff) >= thresh)
867
0
                boxAdjustSides(box, box, 0, diff + extra, 0, 0);
868
0
            boxaAddBox(boxad, box, L_INSERT);
869
0
        }
870
0
    } else if (select == L_ADJUST_TOP) {
871
0
        boxaGetMedianVals(boxas, NULL, &medtop, NULL, NULL, NULL, NULL);
872
0
        for (i = 0; i < n; i++) {
873
0
            box = boxaGetBox(boxas, i, L_COPY);
874
0
            boxGetSideLocations(box, NULL, NULL, &top, NULL);
875
0
            diff = medtop - top;
876
0
            if (L_ABS(diff) >= thresh)
877
0
                boxAdjustSides(box, box, 0, 0, diff - extra, 0);
878
0
            boxaAddBox(boxad, box, L_INSERT);
879
0
        }
880
0
    } else {  /* select == L_ADJUST_BOT */
881
0
        boxaGetMedianVals(boxas, NULL, NULL, NULL, &medbot, NULL, NULL);
882
0
        for (i = 0; i < n; i++) {
883
0
            box = boxaGetBox(boxas, i, L_COPY);
884
0
            boxGetSideLocations(box, NULL, NULL, NULL, &bot);
885
0
            diff = medbot - bot;
886
0
            if (L_ABS(diff) >= thresh)
887
0
                boxAdjustSides(box, box, 0, 0, 0, diff + extra);
888
0
            boxaAddBox(boxad, box, L_INSERT);
889
0
        }
890
0
    }
891
892
0
    if (pixadb) {
893
0
        adjustSidePlotName(buf, sizeof(buf), "final", select);
894
0
        boxaPlotSides(boxad, buf, NULL, NULL, NULL, NULL, &pix);
895
0
        pixaAddPix(pixadb, pix, L_INSERT);
896
0
    }
897
0
    return boxad;
898
0
}
899
900
901
static void
902
adjustSidePlotName(char        *buf,
903
                   size_t       size,
904
                   const char  *preface,
905
                   l_int32      select)
906
0
{
907
0
    stringCopy(buf, preface, size - 8);
908
0
    if (select == L_ADJUST_LEFT)
909
0
        stringCat(buf, size, "-left");
910
0
    else if (select == L_ADJUST_RIGHT)
911
0
        stringCat(buf, size, "-right");
912
0
    else if (select == L_ADJUST_TOP)
913
0
        stringCat(buf, size, "-top");
914
0
    else if (select == L_ADJUST_BOT)
915
0
        stringCat(buf, size, "-bot");
916
0
}
917
918
919
/*!
920
 * \brief   boxaReconcileSizeByMedian()
921
 *
922
 * \param[in]    boxas    containing at least 6 valid boxes
923
 * \param[in]    type     L_CHECK_WIDTH, L_CHECK_HEIGHT, L_CHECK_BOTH
924
 * \param[in]    dfract   threshold fraction of dimensional variation from
925
 *                        median; in range (0 ... 1); typ. about 0.05.
926
 * \param[in]    sfract   threshold fraction of side variation from median;
927
 *                        in range (0 ... 1); typ. about 0.04.
928
 * \param[in]    factor   expansion for fixed box beyond median width;
929
 *                        should be near 1.0.
930
 * \param[out]   pnadelw  [optional] diff from median width for boxes
931
 *                        above threshold
932
 * \param[out]   pnadelh  [optional] diff from median height for boxes
933
 *                        above threshold
934
 * \param[out]   pratiowh [optional] ratio of median width/height of boxas
935
 * \return  boxad  possibly adjusted from boxas; a copy of boxas on error
936
 *
937
 * <pre>
938
 * Notes:
939
 *      (1) The basic idea is to identify significant differences in box
940
 *          dimension (either width or height) and modify the outlier boxes.
941
 *      (2) %type specifies if we are reconciling the width, height or both.
942
 *      (3) %dfract specifies the tolerance for different dimensions. Any
943
 *          box with a fractional difference from the median size that
944
 *          exceeds %dfract will be altered.
945
 *      (4) %sfract specifies the tolerance for different side locations.
946
 *          If a box has been marked by (3) for alteration, any side
947
 *          location that differs from the median side location by
948
 *          more than %sfract of the median dimension (medw or medh)
949
 *          will be moved.
950
 *      (5) Median width and height are found for all valid boxes (i.e.,
951
 *          for all boxes with width and height > 0.
952
 *          Median side locations are found separately for even and odd boxes,
953
 *          using only boxes that are "inliers"; i.e., that have been
954
 *          found by (3) to be within tolerance for width or height.
955
 *      (6) If all box dimensions are within threshold of the median size,
956
 *          just return a copy.  Otherwise, box sides of the outliers
957
 *          will be adjusted.
958
 *      (7) Using %sfract, sides that are sufficiently far from the median
959
 *          are first moved to the median value.  Then they are moved
960
 *          together (in or out) so that the final box dimension
961
 *          is %factor times the median dimension.
962
 *      (8) The arrays that are the initial deviation from median size
963
 *          (width and height) are optionally returned.  Also optionally
964
 *          returned is the median w/h asperity ratio of the input %boxas.
965
 * </pre>
966
 */
967
BOXA *
968
boxaReconcileSizeByMedian(BOXA       *boxas,
969
                          l_int32     type,
970
                          l_float32   dfract,
971
                          l_float32   sfract,
972
                          l_float32   factor,
973
                          NUMA      **pnadelw,
974
                          NUMA      **pnadelh,
975
                          l_float32  *pratiowh)
976
0
{
977
0
l_int32    i, n, ne, no, outfound, isvalid, ind, del, maxdel;
978
0
l_int32    medw, medh, bw, bh, left, right, top, bot;
979
0
l_int32    medleft, medlefte, medlefto, medright, medrighte, medrighto;
980
0
l_int32    medtop, medtope, medtopo, medbot, medbote, medboto;
981
0
l_float32  brat;
982
0
BOX       *box;
983
0
BOXA      *boxa1, *boxae, *boxao, *boxad;
984
0
NUMA      *naind, *nadelw, *nadelh;
985
986
0
    if (pnadelw) *pnadelw = NULL;
987
0
    if (pnadelh) *pnadelh = NULL;
988
0
    if (pratiowh) *pratiowh = 0.0;
989
0
    if (!boxas)
990
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
991
0
    if (type != L_CHECK_WIDTH && type != L_CHECK_HEIGHT &&
992
0
        type != L_CHECK_BOTH) {
993
0
        L_WARNING("invalid type; returning copy\n", __func__);
994
0
        return boxaCopy(boxas, L_COPY);
995
0
    }
996
0
    if (dfract <= 0.0 || dfract >= 0.5) {
997
0
        L_WARNING("invalid dimensional fract; returning copy\n", __func__);
998
0
        return boxaCopy(boxas, L_COPY);
999
0
    }
1000
0
    if (sfract <= 0.0 || sfract >= 0.5) {
1001
0
        L_WARNING("invalid side fract; returning copy\n", __func__);
1002
0
        return boxaCopy(boxas, L_COPY);
1003
0
    }
1004
0
    if (factor < 0.8 || factor > 1.25)
1005
0
        L_WARNING("factor %5.3f is typ. closer to 1.0\n", __func__, factor);
1006
0
    if (boxaGetValidCount(boxas) < 6) {
1007
0
        L_WARNING("need at least 6 valid boxes; returning copy\n", __func__);
1008
0
        return boxaCopy(boxas, L_COPY);
1009
0
    }
1010
1011
        /* If reconciling both width and height, optionally return array of
1012
         * median deviations and even/odd ratio for width measurements */
1013
0
    if (type == L_CHECK_BOTH) {
1014
0
        boxa1 = boxaReconcileSizeByMedian(boxas, L_CHECK_WIDTH, dfract, sfract,
1015
0
                                          factor, pnadelw, NULL, pratiowh);
1016
0
        boxad = boxaReconcileSizeByMedian(boxa1, L_CHECK_HEIGHT, dfract, sfract,
1017
0
                                          factor, NULL, pnadelh, NULL);
1018
0
        boxaDestroy(&boxa1);
1019
0
        return boxad;
1020
0
    }
1021
1022
0
    n = boxaGetCount(boxas);
1023
0
    naind = numaCreate(n);  /* outlier indicator array */
1024
0
    boxae = boxaCreate(0);  /* even inliers */
1025
0
    boxao = boxaCreate(0);  /* odd inliers */
1026
0
    outfound = FALSE;
1027
0
    if (type == L_CHECK_WIDTH) {
1028
0
        boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL,
1029
0
                             &nadelw, NULL);
1030
0
        if (pratiowh) {
1031
0
            *pratiowh = (l_float32)medw / (l_float32)medh;
1032
0
            L_INFO("median ratio w/h = %5.3f\n", __func__, *pratiowh);
1033
0
        }
1034
0
        if (pnadelw)
1035
0
            *pnadelw = nadelw;
1036
0
        else
1037
0
            numaDestroy(&nadelw);
1038
1039
            /* Check for outliers; assemble inliers */
1040
0
        for (i = 0; i < n; i++) {
1041
0
            if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) {
1042
0
                numaAddNumber(naind, 0);
1043
0
                continue;
1044
0
            }
1045
0
            boxGetGeometry(box, NULL, NULL, &bw, NULL);
1046
0
            brat = (l_float32)bw / (l_float32)medw;
1047
0
            if (brat < 1.0 - dfract || brat > 1.0 + dfract) {
1048
0
                outfound = TRUE;
1049
0
                numaAddNumber(naind, 1);
1050
0
                boxDestroy(&box);
1051
0
            } else {  /* add to inliers */
1052
0
                numaAddNumber(naind, 0);
1053
0
                if (i % 2 == 0)
1054
0
                    boxaAddBox(boxae, box, L_INSERT);
1055
0
                else
1056
0
                    boxaAddBox(boxao, box, L_INSERT);
1057
0
            }
1058
0
        }
1059
0
        if (!outfound) {  /* nothing to do */
1060
0
            numaDestroy(&naind);
1061
0
            boxaDestroy(&boxae);
1062
0
            boxaDestroy(&boxao);
1063
0
            L_INFO("no width outlier boxes found\n", __func__);
1064
0
            return boxaCopy(boxas, L_COPY);
1065
0
        }
1066
1067
            /* Get left/right parameters from inliers.  Handle the case
1068
             * where there are no inliers for one of the sets.  For example,
1069
             * when all the even boxes have a different dimension from
1070
             * the odd boxes, and the median arbitrarily gets assigned
1071
             * to the even boxes, there are no odd inliers; in that case,
1072
             * use the even inliers sides to decide whether to adjust
1073
             * the left or the right sides of individual outliers. */
1074
0
        L_INFO("fixing width of outlier boxes\n", __func__);
1075
0
        medlefte = medrighte = medlefto = medrighto = 0;
1076
0
        if ((ne = boxaGetValidCount(boxae)) > 0)
1077
0
            boxaGetMedianVals(boxae, &medlefte, NULL, &medrighte, NULL,
1078
0
                              NULL, NULL);
1079
0
        if ((no = boxaGetValidCount(boxao)) > 0)
1080
0
            boxaGetMedianVals(boxao, &medlefto, NULL, &medrighto, NULL,
1081
0
                              NULL, NULL);
1082
0
        if (ne == 0) {  /* use odd inliers values for both */
1083
0
            medlefte = medlefto;
1084
0
            medrighte = medrighto;
1085
0
        } else if (no == 0) {  /* use even inliers values for both */
1086
0
            medlefto = medlefte;
1087
0
            medrighto = medrighte;
1088
0
        }
1089
1090
            /* Adjust the left and/or right sides of outliers.
1091
             * For each box that is a dimensional outlier, consider each side.
1092
             * Any side that differs fractionally from the median value
1093
             * by more than %sfract times the median width (medw) is set to
1094
             * the median value for that side.  Then both sides are moved
1095
             * an equal distance in or out to make w = %factor * medw. */
1096
0
        boxad = boxaCreate(n);
1097
0
        maxdel = (l_int32)(sfract * medw + 0.5);
1098
0
        for (i = 0; i < n; i++) {
1099
0
            box = boxaGetBox(boxas, i, L_COPY);
1100
0
            boxIsValid(box, &isvalid);
1101
0
            numaGetIValue(naind, i, &ind);
1102
0
            medleft = (i % 2 == 0) ? medlefte : medlefto;
1103
0
            medright = (i % 2 == 0) ? medrighte : medrighto;
1104
0
            if (ind == 1 && isvalid) {  /* adjust sides */
1105
0
                boxGetSideLocations(box, &left, &right, NULL, NULL);
1106
0
                if (L_ABS(left - medleft) > maxdel) left = medleft;
1107
0
                if (L_ABS(right - medright) > maxdel) right = medright;
1108
0
                del = (l_int32)(factor * medw - (right - left)) / 2;
1109
0
                boxSetSide(box, L_SET_LEFT, left - del, 0);
1110
0
                boxSetSide(box, L_SET_RIGHT, right + del, 0);
1111
0
            }
1112
0
            boxaAddBox(boxad, box, L_INSERT);
1113
0
        }
1114
0
    } else {  /* L_CHECK_HEIGHT */
1115
0
        boxaMedianDimensions(boxas, &medw, &medh, NULL, NULL, NULL, NULL,
1116
0
                             NULL, &nadelh);
1117
0
        if (pratiowh) {
1118
0
            *pratiowh = (l_float32)medw / (l_float32)medh;
1119
0
            L_INFO("median ratio w/h = %5.3f\n", __func__, *pratiowh);
1120
0
        }
1121
0
        if (pnadelh)
1122
0
            *pnadelh = nadelh;
1123
0
        else
1124
0
            numaDestroy(&nadelh);
1125
1126
            /* Check for outliers; assemble inliers */
1127
0
        for (i = 0; i < n; i++) {
1128
0
            if ((box = boxaGetValidBox(boxas, i, L_COPY)) == NULL) {
1129
0
                numaAddNumber(naind, 0);
1130
0
                continue;
1131
0
            }
1132
0
            boxGetGeometry(box, NULL, NULL, NULL, &bh);
1133
0
            brat = (l_float32)bh / (l_float32)medh;
1134
0
            if (brat < 1.0 - dfract || brat > 1.0 + dfract) {
1135
0
                outfound = TRUE;
1136
0
                numaAddNumber(naind, 1);
1137
0
                boxDestroy(&box);
1138
0
            } else {  /* add to inliers */
1139
0
                numaAddNumber(naind, 0);
1140
0
                if (i % 2 == 0)
1141
0
                    boxaAddBox(boxae, box, L_INSERT);
1142
0
                else
1143
0
                    boxaAddBox(boxao, box, L_INSERT);
1144
0
            }
1145
0
        }
1146
0
        if (!outfound) {  /* nothing to do */
1147
0
            numaDestroy(&naind);
1148
0
            boxaDestroy(&boxae);
1149
0
            boxaDestroy(&boxao);
1150
0
            L_INFO("no height outlier boxes found\n", __func__);
1151
0
            return boxaCopy(boxas, L_COPY);
1152
0
        }
1153
1154
            /* Get top/bot parameters from inliers.  Handle the case
1155
             * where there are no inliers for one of the sets.  For example,
1156
             * when all the even boxes have a different dimension from
1157
             * the odd boxes, and the median arbitrarily gets assigned
1158
             * to the even boxes, there are no odd inliers; in that case,
1159
             * use the even inlier sides to decide whether to adjust
1160
             * the top or the bottom sides of individual outliers. */
1161
0
        L_INFO("fixing height of outlier boxes\n", __func__);
1162
0
        medlefte = medtope = medbote = medtopo = medboto = 0;
1163
0
        if ((ne = boxaGetValidCount(boxae)) > 0)
1164
0
            boxaGetMedianVals(boxae, NULL, &medtope, NULL, &medbote,
1165
0
                              NULL, NULL);
1166
0
        if ((no = boxaGetValidCount(boxao)) > 0)
1167
0
            boxaGetMedianVals(boxao, NULL, &medtopo, NULL, &medboto,
1168
0
                              NULL, NULL);
1169
0
        if (ne == 0) {  /* use odd inliers values for both */
1170
0
            medtope = medtopo;
1171
0
            medbote = medboto;
1172
0
        } else if (no == 0) {  /* use even inliers values for both */
1173
0
            medtopo = medtope;
1174
0
            medboto = medbote;
1175
0
        }
1176
1177
            /* Adjust the top and/or bottom sides of outliers.
1178
             * For each box that is a dimensional outlier, consider each side.
1179
             * Any side that differs fractionally from the median value
1180
             * by more than %sfract times the median height (medh) is
1181
             * set to the median value for that that side.  Then both
1182
             * sides are moved an equal distance in or out to make
1183
             * h = %factor * medh). */
1184
0
        boxad = boxaCreate(n);
1185
0
        maxdel = (l_int32)(sfract * medh + 0.5);
1186
0
        for (i = 0; i < n; i++) {
1187
0
            box = boxaGetBox(boxas, i, L_COPY);
1188
0
            boxIsValid(box, &isvalid);
1189
0
            numaGetIValue(naind, i, &ind);
1190
0
            medtop = (i % 2 == 0) ? medtope : medtopo;
1191
0
            medbot = (i % 2 == 0) ? medbote : medboto;
1192
0
            if (ind == 1 && isvalid) {  /* adjust sides */
1193
0
                boxGetSideLocations(box, NULL, NULL, &top, &bot);
1194
0
                if (L_ABS(top - medtop) > maxdel) top = medtop;
1195
0
                if (L_ABS(bot - medbot) > maxdel) bot = medbot;
1196
0
                del = (l_int32)(factor * medh - (bot - top)) / 2;  /* typ > 0 */
1197
0
                boxSetSide(box, L_SET_TOP, L_MAX(0, top - del), 0);
1198
0
                boxSetSide(box, L_SET_BOT, bot + del, 0);
1199
0
            }
1200
0
            boxaAddBox(boxad, box, L_INSERT);
1201
0
        }
1202
0
    }
1203
0
    numaDestroy(&naind);
1204
0
    boxaDestroy(&boxae);
1205
0
    boxaDestroy(&boxao);
1206
0
    return boxad;
1207
0
}
1208
1209
1210
/*!
1211
 * \brief   boxaPlotSides()
1212
 *
1213
 * \param[in]    boxa       source boxa
1214
 * \param[in]    plotname   [optional], can be NULL
1215
 * \param[out]   pnal       [optional] na of left sides
1216
 * \param[out]   pnat       [optional] na of top sides
1217
 * \param[out]   pnar       [optional] na of right sides
1218
 * \param[out]   pnab       [optional] na of bottom sides
1219
 * \param[out]   ppixd      pix of the output plot
1220
 * \return  0 if OK, 1 on error
1221
 *
1222
 * <pre>
1223
 * Notes:
1224
 *      (1) This debugging function shows the progression of the four
1225
 *          sides in the boxa.  There must be at least 2 boxes.
1226
 *      (2) If there are invalid boxes (e.g., if only even or odd
1227
 *          indices have valid boxes), this will fill them with the
1228
 *          nearest valid box before plotting.
1229
 *      (3) The plotfiles are put in /tmp/lept/plots/, and are named
1230
 *          either with %plotname or, if NULL, a default name.  If
1231
 *          %plotname is used, make sure it has no whitespace characters.
1232
 * </pre>
1233
 */
1234
l_ok
1235
boxaPlotSides(BOXA        *boxa,
1236
              const char  *plotname,
1237
              NUMA       **pnal,
1238
              NUMA       **pnat,
1239
              NUMA       **pnar,
1240
              NUMA       **pnab,
1241
              PIX        **ppixd)
1242
0
{
1243
0
char            buf[128], titlebuf[128];
1244
0
char           *dataname;
1245
0
static l_int32  plotid = 0;
1246
0
l_int32         n, i, w, h, left, top, right, bot;
1247
0
l_int32         debugprint = FALSE;  /* change to TRUE to spam stderr */
1248
0
l_float32       med, dev;
1249
0
BOXA           *boxat;
1250
0
GPLOT          *gplot;
1251
0
NUMA           *nal, *nat, *nar, *nab;
1252
1253
0
    if (pnal) *pnal = NULL;
1254
0
    if (pnat) *pnat = NULL;
1255
0
    if (pnar) *pnar = NULL;
1256
0
    if (pnab) *pnab = NULL;
1257
0
    if (ppixd) *ppixd = NULL;
1258
0
    if (!boxa)
1259
0
        return ERROR_INT("boxa not defined", __func__, 1);
1260
0
    if ((n = boxaGetCount(boxa)) < 2)
1261
0
        return ERROR_INT("less than 2 boxes", __func__, 1);
1262
0
    if (!ppixd)
1263
0
        return ERROR_INT("&pixd not defined", __func__, 1);
1264
1265
0
    boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0);
1266
1267
        /* Build the numas for each side */
1268
0
    nal = numaCreate(n);
1269
0
    nat = numaCreate(n);
1270
0
    nar = numaCreate(n);
1271
0
    nab = numaCreate(n);
1272
1273
0
    for (i = 0; i < n; i++) {
1274
0
        boxaGetBoxGeometry(boxat, i, &left, &top, &w, &h);
1275
0
        right = left + w - 1;
1276
0
        bot = top + h - 1;
1277
0
        numaAddNumber(nal, left);
1278
0
        numaAddNumber(nat, top);
1279
0
        numaAddNumber(nar, right);
1280
0
        numaAddNumber(nab, bot);
1281
0
    }
1282
0
    boxaDestroy(&boxat);
1283
1284
0
    lept_mkdir("lept/plots");
1285
0
    if (plotname) {
1286
0
        snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%s", plotname);
1287
0
        snprintf(titlebuf, sizeof(titlebuf), "%s: Box sides vs. box index",
1288
0
                 plotname);
1289
0
    } else {
1290
0
        snprintf(buf, sizeof(buf), "/tmp/lept/plots/sides.%d", plotid++);
1291
0
        snprintf(titlebuf, sizeof(titlebuf), "Box sides vs. box index");
1292
0
    }
1293
0
    gplot = gplotCreate(buf, GPLOT_PNG, titlebuf,
1294
0
                        "box index", "side location");
1295
0
    gplotAddPlot(gplot, NULL, nal, GPLOT_LINES, "left side");
1296
0
    gplotAddPlot(gplot, NULL, nat, GPLOT_LINES, "top side");
1297
0
    gplotAddPlot(gplot, NULL, nar, GPLOT_LINES, "right side");
1298
0
    gplotAddPlot(gplot, NULL, nab, GPLOT_LINES, "bottom side");
1299
0
    *ppixd = gplotMakeOutputPix(gplot);
1300
0
    gplotDestroy(&gplot);
1301
1302
0
    if (debugprint) {
1303
0
        dataname = (plotname) ? stringNew(plotname) : stringNew("no_name");
1304
0
        numaGetMedian(nal, &med);
1305
0
        numaGetMeanDevFromMedian(nal, med, &dev);
1306
0
        lept_stderr("%s left: med = %7.3f, meandev = %7.3f\n",
1307
0
                    dataname, med, dev);
1308
0
        numaGetMedian(nat, &med);
1309
0
        numaGetMeanDevFromMedian(nat, med, &dev);
1310
0
        lept_stderr("%s top: med = %7.3f, meandev = %7.3f\n",
1311
0
                    dataname, med, dev);
1312
0
        numaGetMedian(nar, &med);
1313
0
        numaGetMeanDevFromMedian(nar, med, &dev);
1314
0
        lept_stderr("%s right: med = %7.3f, meandev = %7.3f\n",
1315
0
                    dataname, med, dev);
1316
0
        numaGetMedian(nab, &med);
1317
0
        numaGetMeanDevFromMedian(nab, med, &dev);
1318
0
        lept_stderr("%s bot: med = %7.3f, meandev = %7.3f\n",
1319
0
                    dataname, med, dev);
1320
0
        LEPT_FREE(dataname);
1321
0
    }
1322
1323
0
    if (pnal)
1324
0
        *pnal = nal;
1325
0
    else
1326
0
        numaDestroy(&nal);
1327
0
    if (pnat)
1328
0
        *pnat = nat;
1329
0
    else
1330
0
        numaDestroy(&nat);
1331
0
    if (pnar)
1332
0
        *pnar = nar;
1333
0
    else
1334
0
        numaDestroy(&nar);
1335
0
    if (pnab)
1336
0
        *pnab = nab;
1337
0
    else
1338
0
        numaDestroy(&nab);
1339
0
    return 0;
1340
0
}
1341
1342
1343
/*!
1344
 * \brief   boxaPlotSizes()
1345
 *
1346
 * \param[in]    boxa       source boxa
1347
 * \param[in]    plotname   [optional], can be NULL
1348
 * \param[out]   pnaw       [optional] na of widths
1349
 * \param[out]   pnah       [optional] na of heights
1350
 * \param[out]   ppixd      pix of the output plot
1351
 * \return  0 if OK, 1 on error
1352
 *
1353
 * <pre>
1354
 * Notes:
1355
 *      (1) This debugging function shows the progression of box width
1356
 *          and height in the boxa.  There must be at least 2 boxes.
1357
 *      (2) If there are invalid boxes (e.g., if only even or odd
1358
 *          indices have valid boxes), this will fill them with the
1359
 *          nearest valid box before plotting.
1360
 *      (3) The plotfiles are put in /tmp/lept/plots/, and are named
1361
 *          either with %plotname or, if NULL, a default name.  If
1362
 *          %plotname is used, make sure it has no whitespace characters.
1363
 * </pre>
1364
 */
1365
l_ok
1366
boxaPlotSizes(BOXA        *boxa,
1367
              const char  *plotname,
1368
              NUMA       **pnaw,
1369
              NUMA       **pnah,
1370
              PIX        **ppixd)
1371
0
{
1372
0
char            buf[128], titlebuf[128];
1373
0
static l_int32  plotid = 0;
1374
0
l_int32         n, i, w, h;
1375
0
BOXA           *boxat;
1376
0
GPLOT          *gplot;
1377
0
NUMA           *naw, *nah;
1378
1379
0
    if (pnaw) *pnaw = NULL;
1380
0
    if (pnah) *pnah = NULL;
1381
0
    if (ppixd) *ppixd = NULL;
1382
0
    if (!boxa)
1383
0
        return ERROR_INT("boxa not defined", __func__, 1);
1384
0
    if ((n = boxaGetCount(boxa)) < 2)
1385
0
        return ERROR_INT("less than 2 boxes", __func__, 1);
1386
0
    if (!ppixd)
1387
0
        return ERROR_INT("&pixd not defined", __func__, 1);
1388
1389
0
    boxat = boxaFillSequence(boxa, L_USE_ALL_BOXES, 0);
1390
1391
        /* Build the numas for the width and height */
1392
0
    naw = numaCreate(n);
1393
0
    nah = numaCreate(n);
1394
0
    for (i = 0; i < n; i++) {
1395
0
        boxaGetBoxGeometry(boxat, i, NULL, NULL, &w, &h);
1396
0
        numaAddNumber(naw, w);
1397
0
        numaAddNumber(nah, h);
1398
0
    }
1399
0
    boxaDestroy(&boxat);
1400
1401
0
    lept_mkdir("lept/plots");
1402
0
    if (plotname) {
1403
0
        snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%s", plotname);
1404
0
        snprintf(titlebuf, sizeof(titlebuf), "%s: Box size vs. box index",
1405
0
                 plotname);
1406
0
    } else {
1407
0
        snprintf(buf, sizeof(buf), "/tmp/lept/plots/size.%d", plotid++);
1408
0
        snprintf(titlebuf, sizeof(titlebuf), "Box size vs. box index");
1409
0
    }
1410
0
    gplot = gplotCreate(buf, GPLOT_PNG, titlebuf,
1411
0
                        "box index", "box dimension");
1412
0
    gplotAddPlot(gplot, NULL, naw, GPLOT_LINES, "width");
1413
0
    gplotAddPlot(gplot, NULL, nah, GPLOT_LINES, "height");
1414
0
    *ppixd = gplotMakeOutputPix(gplot);
1415
0
    gplotDestroy(&gplot);
1416
1417
0
    if (pnaw)
1418
0
        *pnaw = naw;
1419
0
    else
1420
0
        numaDestroy(&naw);
1421
0
    if (pnah)
1422
0
        *pnah = nah;
1423
0
    else
1424
0
        numaDestroy(&nah);
1425
0
    return 0;
1426
0
}
1427
1428
1429
/*!
1430
 * \brief   boxaFillSequence()
1431
 *
1432
 * \param[in]    boxas      with at least 3 boxes
1433
 * \param[in]    useflag    L_USE_ALL_BOXES, L_USE_SAME_PARITY_BOXES
1434
 * \param[in]    debug      1 for debug output
1435
 * \return  boxad filled boxa, or NULL on error
1436
 *
1437
 * <pre>
1438
 * Notes:
1439
 *      (1) This simple function replaces invalid boxes with a copy of
1440
 *          the nearest valid box, selected from either the entire
1441
 *          sequence (L_USE_ALL_BOXES) or from the boxes with the
1442
 *          same parity (L_USE_SAME_PARITY_BOXES).  It returns a new boxa.
1443
 *      (2) This is useful if you expect boxes in the sequence to
1444
 *          vary slowly with index.
1445
 * </pre>
1446
 */
1447
BOXA *
1448
boxaFillSequence(BOXA    *boxas,
1449
                 l_int32  useflag,
1450
                 l_int32  debug)
1451
0
{
1452
0
l_int32  n, nv;
1453
0
BOXA    *boxae, *boxao, *boxad;
1454
1455
0
    if (!boxas)
1456
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
1457
0
    if (useflag != L_USE_ALL_BOXES && useflag != L_USE_SAME_PARITY_BOXES)
1458
0
        return (BOXA *)ERROR_PTR("invalid useflag", __func__, NULL);
1459
1460
0
    n = boxaGetCount(boxas);
1461
0
    nv = boxaGetValidCount(boxas);
1462
0
    if (n == nv)
1463
0
        return boxaCopy(boxas, L_COPY);  /* all valid */
1464
0
    if (debug)
1465
0
        L_INFO("%d valid boxes, %d invalid boxes\n", __func__, nv, n - nv);
1466
0
    if (useflag == L_USE_SAME_PARITY_BOXES && n < 3) {
1467
0
        L_WARNING("n < 3; some invalid\n", __func__);
1468
0
        return boxaCopy(boxas, L_COPY);
1469
0
    }
1470
1471
0
    if (useflag == L_USE_ALL_BOXES) {
1472
0
        boxad = boxaCopy(boxas, L_COPY);
1473
0
        boxaFillAll(boxad);
1474
0
    } else {
1475
0
        boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
1476
0
        boxaFillAll(boxae);
1477
0
        boxaFillAll(boxao);
1478
0
        boxad = boxaMergeEvenOdd(boxae, boxao, 0);
1479
0
        boxaDestroy(&boxae);
1480
0
        boxaDestroy(&boxao);
1481
0
    }
1482
1483
0
    nv = boxaGetValidCount(boxad);
1484
0
    if (n != nv)
1485
0
        L_WARNING("there are still %d invalid boxes\n", __func__, n - nv);
1486
1487
0
    return boxad;
1488
0
}
1489
1490
1491
/*!
1492
 * \brief   boxaFillAll()
1493
 *
1494
 * \param[in]    boxa
1495
 * \return  0 if OK, 1 on error
1496
 *
1497
 * <pre>
1498
 * Notes:
1499
 *      (1) This static function replaces every invalid box with the
1500
 *          nearest valid box.  If there are no valid boxes, it
1501
 *          issues a warning.
1502
 * </pre>
1503
 */
1504
static l_int32
1505
boxaFillAll(BOXA  *boxa)
1506
0
{
1507
0
l_int32   n, nv, i, j, spandown, spanup;
1508
0
l_int32  *indic;
1509
0
BOX      *box, *boxt;
1510
1511
0
    if (!boxa)
1512
0
        return ERROR_INT("boxa not defined", __func__, 1);
1513
0
    n = boxaGetCount(boxa);
1514
0
    nv = boxaGetValidCount(boxa);
1515
0
    if (n == nv) return 0;
1516
0
    if (nv == 0) {
1517
0
        L_WARNING("no valid boxes out of %d boxes\n", __func__, n);
1518
0
        return 0;
1519
0
    }
1520
1521
        /* Make indicator array for valid boxes */
1522
0
    if ((indic = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32))) == NULL)
1523
0
        return ERROR_INT("indic not made", __func__, 1);
1524
0
    for (i = 0; i < n; i++) {
1525
0
        box = boxaGetValidBox(boxa, i, L_CLONE);
1526
0
        if (box)
1527
0
            indic[i] = 1;
1528
0
        boxDestroy(&box);
1529
0
    }
1530
1531
        /* Replace invalid boxes with the nearest valid one */
1532
0
    for (i = 0; i < n; i++) {
1533
0
        box = boxaGetValidBox(boxa, i, L_CLONE);
1534
0
        if (!box) {
1535
0
            spandown = spanup = 10000000;
1536
0
            for (j = i - 1; j >= 0; j--) {
1537
0
                if (indic[j] == 1) {
1538
0
                    spandown = i - j;
1539
0
                    break;
1540
0
                }
1541
0
            }
1542
0
            for (j = i + 1; j < n; j++) {
1543
0
                if (indic[j] == 1) {
1544
0
                    spanup = j - i;
1545
0
                    break;
1546
0
                }
1547
0
            }
1548
0
            if (spandown < spanup)
1549
0
                boxt = boxaGetBox(boxa, i - spandown, L_COPY);
1550
0
            else
1551
0
                boxt = boxaGetBox(boxa, i + spanup, L_COPY);
1552
0
            boxaReplaceBox(boxa, i, boxt);
1553
0
        }
1554
0
        boxDestroy(&box);
1555
0
    }
1556
1557
0
    LEPT_FREE(indic);
1558
0
    return 0;
1559
0
}
1560
1561
1562
/*!
1563
 * \brief   boxaSizeVariation()
1564
 *
1565
 * \param[in]    boxa           at least 4 boxes
1566
 * \param[in]    type           L_SELECT_WIDTH, L_SELECT_HEIGHT
1567
 * \param[out]   pdel_evenodd   [optional] average absolute value of
1568
 *                              (even - odd) size pairs
1569
 * \param[out]   prms_even      [optional] rms deviation of even boxes
1570
 * \param[out]   prms_odd       [optional] rms deviation of odd boxes
1571
 * \param[out]   prms_all       [optional] rms deviation of all boxes
1572
 * \return  0 if OK, 1 on error
1573
 *
1574
 * <pre>
1575
 * Notes:
1576
 *      (1) This gives several measures of the smoothness of either the
1577
 *          width or height of a sequence of boxes.
1578
 *          See boxaMedianDimensions() for some other measures.
1579
 *      (2) Statistics can be found separately for even and odd boxes.
1580
 *          Additionally, the average pair-wise difference between
1581
 *          adjacent even and odd boxes can be returned.
1582
 *      (3) The use case is bounding boxes for scanned page images,
1583
 *          where ideally the sizes should have little variance.
1584
 * </pre>
1585
 */
1586
l_ok
1587
boxaSizeVariation(BOXA       *boxa,
1588
                  l_int32     type,
1589
                  l_float32  *pdel_evenodd,
1590
                  l_float32  *prms_even,
1591
                  l_float32  *prms_odd,
1592
                  l_float32  *prms_all)
1593
0
{
1594
0
l_int32    n, ne, no, nmin, vale, valo, i;
1595
0
l_float32  sum;
1596
0
BOXA      *boxae, *boxao;
1597
0
NUMA      *nae, *nao, *na_all;
1598
1599
0
    if (pdel_evenodd) *pdel_evenodd = 0.0;
1600
0
    if (prms_even) *prms_even = 0.0;
1601
0
    if (prms_odd) *prms_odd = 0.0;
1602
0
    if (prms_all) *prms_all = 0.0;
1603
0
    if (!boxa)
1604
0
        return ERROR_INT("boxa not defined", __func__, 1);
1605
0
    if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT)
1606
0
        return ERROR_INT("invalid type", __func__, 1);
1607
0
    if (!pdel_evenodd && !prms_even && !prms_odd && !prms_all)
1608
0
        return ERROR_INT("nothing to do", __func__, 1);
1609
0
    n = boxaGetCount(boxa);
1610
0
    if (n < 4)
1611
0
        return ERROR_INT("too few boxes", __func__, 1);
1612
1613
0
    boxaSplitEvenOdd(boxa, 0, &boxae, &boxao);
1614
0
    ne = boxaGetCount(boxae);
1615
0
    no = boxaGetCount(boxao);
1616
0
    nmin = L_MIN(ne, no);
1617
0
    if (nmin == 0) {
1618
0
        boxaDestroy(&boxae);
1619
0
        boxaDestroy(&boxao);
1620
0
        return ERROR_INT("either no even or no odd boxes", __func__, 1);
1621
0
    }
1622
1623
0
    if (type == L_SELECT_WIDTH) {
1624
0
        boxaGetSizes(boxae, &nae, NULL);
1625
0
        boxaGetSizes(boxao, &nao, NULL);
1626
0
        boxaGetSizes(boxa, &na_all, NULL);
1627
0
    } else {   /* L_SELECT_HEIGHT) */
1628
0
        boxaGetSizes(boxae, NULL, &nae);
1629
0
        boxaGetSizes(boxao, NULL, &nao);
1630
0
        boxaGetSizes(boxa, NULL, &na_all);
1631
0
    }
1632
1633
0
    if (pdel_evenodd) {
1634
0
        sum = 0.0;
1635
0
        for (i = 0; i < nmin; i++) {
1636
0
            numaGetIValue(nae, i, &vale);
1637
0
            numaGetIValue(nao, i, &valo);
1638
0
            sum += L_ABS(vale - valo);
1639
0
        }
1640
0
        *pdel_evenodd = sum / nmin;
1641
0
    }
1642
0
    if (prms_even)
1643
0
        numaSimpleStats(nae, 0, -1, NULL, NULL, prms_even);
1644
0
    if (prms_odd)
1645
0
        numaSimpleStats(nao, 0, -1, NULL, NULL, prms_odd);
1646
0
    if (prms_all)
1647
0
        numaSimpleStats(na_all, 0, -1, NULL, NULL, prms_all);
1648
1649
0
    boxaDestroy(&boxae);
1650
0
    boxaDestroy(&boxao);
1651
0
    numaDestroy(&nae);
1652
0
    numaDestroy(&nao);
1653
0
    numaDestroy(&na_all);
1654
0
    return 0;
1655
0
}
1656
1657
1658
/*!
1659
 * \brief   boxaMedianDimensions()
1660
 *
1661
 * \param[in]    boxas    containing at least 3 valid boxes in even and odd
1662
 * \param[out]   pmedw    [optional] median width of all boxes
1663
 * \param[out]   pmedh    [optional] median height of all boxes
1664
 * \param[out]   pmedwe   [optional] median width of even boxes
1665
 * \param[out]   pmedwo   [optional] median width of odd boxes
1666
 * \param[out]   pmedhe   [optional] median height of even boxes
1667
 * \param[out]   pmedho   [optional] median height of odd boxes
1668
 * \param[out]   pnadelw  [optional] width diff of each box from median
1669
 * \param[out]   pnadelh  [optional] height diff of each box from median
1670
 * \return  0 if OK, 1 on error
1671
 *
1672
 * <pre>
1673
 * Notes:
1674
 *      (1) This provides information that (1) allows identification of
1675
 *          boxes that have unusual (outlier) width or height, and (2) can
1676
 *          be used to regularize the sizes of the outlier boxes, assuming
1677
 *          that the boxes satisfy a fairly regular sequence and should
1678
 *          mostly have the same width and height.
1679
 *      (2) This finds the median width and height, as well as separate
1680
 *          median widths and heights of even and odd boxes.  It also
1681
 *          generates arrays that give the difference in width and height
1682
 *          of each box from the median, which can be used to correct
1683
 *          individual boxes.
1684
 *      (3) All return values are optional.
1685
 * </pre>
1686
 */
1687
l_ok
1688
boxaMedianDimensions(BOXA     *boxas,
1689
                     l_int32  *pmedw,
1690
                     l_int32  *pmedh,
1691
                     l_int32  *pmedwe,
1692
                     l_int32  *pmedwo,
1693
                     l_int32  *pmedhe,
1694
                     l_int32  *pmedho,
1695
                     NUMA    **pnadelw,
1696
                     NUMA    **pnadelh)
1697
0
{
1698
0
l_int32  i, n, bw, bh, medw, medh, medwe, medwo, medhe, medho;
1699
0
BOXA    *boxae, *boxao;
1700
0
NUMA    *nadelw, *nadelh;
1701
1702
0
    if (pmedw) *pmedw = 0;
1703
0
    if (pmedh) *pmedh = 0;
1704
0
    if (pmedwe) *pmedwe= 0;
1705
0
    if (pmedwo) *pmedwo= 0;
1706
0
    if (pmedhe) *pmedhe= 0;
1707
0
    if (pmedho) *pmedho= 0;
1708
0
    if (pnadelw) *pnadelw = NULL;
1709
0
    if (pnadelh) *pnadelh = NULL;
1710
0
    if (!boxas)
1711
0
        return ERROR_INT("boxas not defined", __func__, 1);
1712
0
    if (boxaGetValidCount(boxas) < 6)
1713
0
        return ERROR_INT("need at least 6 valid boxes", __func__, 1);
1714
1715
        /* Require at least 3 valid boxes of both types */
1716
0
    boxaSplitEvenOdd(boxas, 0, &boxae, &boxao);
1717
0
    if (boxaGetValidCount(boxae) < 3 || boxaGetValidCount(boxao) < 3) {
1718
0
        boxaDestroy(&boxae);
1719
0
        boxaDestroy(&boxao);
1720
0
        return ERROR_INT("don't have 3+ valid boxes of each type", __func__, 1);
1721
0
    }
1722
1723
        /* Get the relevant median widths and heights */
1724
0
    boxaGetMedianVals(boxas, NULL, NULL, NULL, NULL, &medw, &medh);
1725
0
    boxaGetMedianVals(boxae, NULL, NULL, NULL, NULL, &medwe, &medhe);
1726
0
    boxaGetMedianVals(boxao, NULL, NULL, NULL, NULL, &medwo, &medho);
1727
0
    if (pmedw) *pmedw = medw;
1728
0
    if (pmedh) *pmedh = medh;
1729
0
    if (pmedwe) *pmedwe = medwe;
1730
0
    if (pmedwo) *pmedwo = medwo;
1731
0
    if (pmedhe) *pmedhe = medhe;
1732
0
    if (pmedho) *pmedho = medho;
1733
1734
        /* Find the variation from median dimension for each box */
1735
0
    n = boxaGetCount(boxas);
1736
0
    nadelw = numaCreate(n);
1737
0
    nadelh = numaCreate(n);
1738
0
    for (i = 0; i < n; i++) {
1739
0
        boxaGetBoxGeometry(boxas, i, NULL, NULL, &bw, &bh);
1740
0
        if (bw == 0 || bh == 0) {  /* invalid box */
1741
0
            numaAddNumber(nadelw, 0);
1742
0
            numaAddNumber(nadelh, 0);
1743
0
        } else {
1744
0
            numaAddNumber(nadelw, bw - medw);
1745
0
            numaAddNumber(nadelh, bh - medh);
1746
0
        }
1747
0
    }
1748
0
    if (pnadelw)
1749
0
        *pnadelw = nadelw;
1750
0
    else
1751
0
        numaDestroy(&nadelw);
1752
0
    if (pnadelh)
1753
0
        *pnadelh = nadelh;
1754
0
    else
1755
0
        numaDestroy(&nadelh);
1756
1757
0
    boxaDestroy(&boxae);
1758
0
    boxaDestroy(&boxao);
1759
0
    return 0;
1760
0
}
1761