Coverage Report

Created: 2025-06-13 07:15

/src/leptonica/src/boxfunc2.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file  boxfunc2.c
29
 * <pre>
30
 *
31
 *      Boxa/Box transform (shift, scale) and orthogonal rotation
32
 *           BOXA            *boxaTransform()
33
 *           BOX             *boxTransform()
34
 *           BOXA            *boxaTransformOrdered()
35
 *           BOX             *boxTransformOrdered()
36
 *           BOXA            *boxaRotateOrth()
37
 *           BOX             *boxRotateOrth()
38
 *           BOXA            *boxaShiftWithPta()
39
 *
40
 *      Boxa sort
41
 *           BOXA            *boxaSort()
42
 *           BOXA            *boxaBinSort()
43
 *           BOXA            *boxaSortByIndex()
44
 *           BOXAA           *boxaSort2d()
45
 *           BOXAA           *boxaSort2dByIndex()
46
 *
47
 *      Boxa statistics
48
 *           l_int32          boxaGetRankVals()
49
 *           l_int32          boxaGetMedianVals()
50
 *           l_int32          boxaGetAverageSize()
51
 *
52
 *      Boxa array extraction
53
 *           l_int32          boxaExtractAsNuma()
54
 *           l_int32          boxaExtractAsPta()
55
 *           PTA             *boxaExtractCorners()
56
 *
57
 *      Other Boxaa functions
58
 *           l_int32          boxaaGetExtent()
59
 *           BOXA            *boxaaFlattenToBoxa()
60
 *           BOXA            *boxaaFlattenAligned()
61
 *           BOXAA           *boxaEncapsulateAligned()
62
 *           BOXAA           *boxaaTranspose()
63
 *           l_int32          boxaaAlignBox()
64
 * </pre>
65
 */
66
67
#ifdef HAVE_CONFIG_H
68
#include <config_auto.h>
69
#endif  /* HAVE_CONFIG_H */
70
71
#include <math.h>
72
#include "allheaders.h"
73
#include "pix_internal.h"
74
75
    /* For more than this number of c.c. in a binarized image of
76
     * semi-perimeter (w + h) about 5000 or less, the O(n) binsort
77
     * is faster than the O(nlogn) shellsort.  */
78
static const l_int32   MinCompsForBinSort = 200;
79
80
/*---------------------------------------------------------------------*
81
 *      Boxa/Box transform (shift, scale) and orthogonal rotation      *
82
 *---------------------------------------------------------------------*/
83
/*!
84
 * \brief   boxaTransform()
85
 *
86
 * \param[in]    boxas
87
 * \param[in]    shiftx
88
 * \param[in]    shifty
89
 * \param[in]    scalex
90
 * \param[in]    scaley
91
 * \return  boxad, or NULL on error
92
 *
93
 * <pre>
94
 * Notes:
95
 *      (1) This is a very simple function that first shifts, then scales.
96
 *      (2) The UL corner coordinates of all boxes in the output %boxad
97
 *      (3) For the boxes in the output %boxad, the UL corner coordinates
98
 *          must be non-negative, and the width and height of valid
99
 *          boxes must be at least 1.
100
 * </pre>
101
 */
102
BOXA *
103
boxaTransform(BOXA      *boxas,
104
              l_int32    shiftx,
105
              l_int32    shifty,
106
              l_float32  scalex,
107
              l_float32  scaley)
108
0
{
109
0
l_int32  i, n;
110
0
BOX     *boxs, *boxd;
111
0
BOXA    *boxad;
112
113
0
    if (!boxas)
114
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
115
0
    n = boxaGetCount(boxas);
116
0
    if ((boxad = boxaCreate(n)) == NULL)
117
0
        return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
118
0
    for (i = 0; i < n; i++) {
119
0
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
120
0
            boxaDestroy(&boxad);
121
0
            return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL);
122
0
        }
123
0
        boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley);
124
0
        boxDestroy(&boxs);
125
0
        boxaAddBox(boxad, boxd, L_INSERT);
126
0
    }
127
128
0
    return boxad;
129
0
}
130
131
132
/*!
133
 * \brief   boxTransform()
134
 *
135
 * \param[in]    box
136
 * \param[in]    shiftx
137
 * \param[in]    shifty
138
 * \param[in]    scalex
139
 * \param[in]    scaley
140
 * \return  boxd, or NULL on error
141
 *
142
 * <pre>
143
 * Notes:
144
 *      (1) This is a very simple function that first shifts, then scales.
145
 *      (2) If the box is invalid, a new invalid box is returned.
146
 *      (3) The UL corner coordinates must be non-negative, and the
147
 *          width and height of valid boxes must be at least 1.
148
 * </pre>
149
 */
150
BOX *
151
boxTransform(BOX       *box,
152
             l_int32    shiftx,
153
             l_int32    shifty,
154
             l_float32  scalex,
155
             l_float32  scaley)
156
0
{
157
0
    if (!box)
158
0
        return (BOX *)ERROR_PTR("box not defined", __func__, NULL);
159
0
    if (box->w <= 0 || box->h <= 0)
160
0
        return boxCreate(0, 0, 0, 0);
161
0
    else
162
0
        return boxCreate((l_int32)(L_MAX(0, scalex * (box->x + shiftx) + 0.5)),
163
0
                         (l_int32)(L_MAX(0, scaley * (box->y + shifty) + 0.5)),
164
0
                         (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)),
165
0
                         (l_int32)(L_MAX(1.0, scaley * box->h + 0.5)));
166
0
}
167
168
169
/*!
170
 * \brief   boxaTransformOrdered()
171
 *
172
 * \param[in]    boxas
173
 * \param[in]    shiftx
174
 * \param[in]    shifty
175
 * \param[in]    scalex
176
 * \param[in]    scaley
177
 * \param[in]    xcen, ycen    center of rotation
178
 * \param[in]    angle         in radians; clockwise is positive
179
 * \param[in]    order         one of 6 combinations: L_TR_SC_RO, ...
180
 * \return  boxd, or NULL on error
181
 *
182
 * <pre>
183
 *          shift, scaling and rotation, and the order of the
184
 *          transforms is specified.
185
 *      (2) Although these operations appear to be on an infinite
186
 *          2D plane, in practice the region of interest is clipped
187
 *          to a finite image.  The center of rotation is usually taken
188
 *          with respect to the image (either the UL corner or the
189
 *          center).  A translation can have two very different effects:
190
 *            (a) Moves the boxes across the fixed image region.
191
 *            (b) Moves the image origin, causing a change in the image
192
 *                region and an opposite effective translation of the boxes.
193
 *          This function should only be used for (a), where the image
194
 *          region is fixed on translation.  If the image region is
195
 *          changed by the translation, use instead the functions
196
 *          in affinecompose.c, where the image region and rotation
197
 *          center can be computed from the actual clipping due to
198
 *          translation of the image origin.
199
 *      (3) See boxTransformOrdered() for usage and implementation details.
200
 * </pre>
201
 */
202
BOXA *
203
boxaTransformOrdered(BOXA      *boxas,
204
                     l_int32    shiftx,
205
                     l_int32    shifty,
206
                     l_float32  scalex,
207
                     l_float32  scaley,
208
                     l_int32    xcen,
209
                     l_int32    ycen,
210
                     l_float32  angle,
211
                     l_int32    order)
212
0
{
213
0
l_int32  i, n;
214
0
BOX     *boxs, *boxd;
215
0
BOXA    *boxad;
216
217
0
    if (!boxas)
218
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
219
0
    n = boxaGetCount(boxas);
220
0
    if ((boxad = boxaCreate(n)) == NULL)
221
0
        return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
222
0
    for (i = 0; i < n; i++) {
223
0
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
224
0
            boxaDestroy(&boxad);
225
0
            return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL);
226
0
        }
227
0
        boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley,
228
0
                                   xcen, ycen, angle, order);
229
0
        boxDestroy(&boxs);
230
0
        boxaAddBox(boxad, boxd, L_INSERT);
231
0
    }
232
233
0
    return boxad;
234
0
}
235
236
237
/*!
238
 * \brief   boxTransformOrdered()
239
 *
240
 * \param[in]    boxs
241
 * \param[in]    shiftx
242
 * \param[in]    shifty
243
 * \param[in]    scalex
244
 * \param[in]    scaley
245
 * \param[in]    xcen, ycen   center of rotation
246
 * \param[in]    angle        in radians; clockwise is positive
247
 * \param[in]    order        one of 6 combinations: L_TR_SC_RO, ...
248
 * \return  boxd, or NULL on error
249
 *
250
 * <pre>
251
 * Notes:
252
 *      (1) This allows a sequence of linear transforms, composed of
253
 *          shift, scaling and rotation, where the order of the
254
 *          transforms is specified.
255
 *      (2) The rotation is taken about a point specified by (xcen, ycen).
256
 *          Let the components of the vector from the center of rotation
257
 *          to the box center be (xdif, ydif):
258
 *            xdif = (bx + 0.5 * bw) - xcen
259
 *            ydif = (by + 0.5 * bh) - ycen
260
 *          Then the box center after rotation has new components:
261
 *            bxcen = xcen + xdif * cosa + ydif * sina
262
 *            bycen = ycen + ydif * cosa - xdif * sina
263
 *          where cosa and sina are the cos and sin of the angle,
264
 *          and the enclosing box for the rotated box has size:
265
 *            rw = |bw * cosa| + |bh * sina|
266
 *            rh = |bh * cosa| + |bw * sina|
267
 *          where bw and bh are the unrotated width and height.
268
 *          Then the box UL corner (rx, ry) is
269
 *            rx = bxcen - 0.5 * rw
270
 *            ry = bycen - 0.5 * rh
271
 *      (3) The center of rotation specified by args %xcen and %ycen
272
 *          is the point BEFORE any translation or scaling.  If the
273
 *          rotation is not the first operation, this function finds
274
 *          the actual center at the time of rotation.  It does this
275
 *          by making the following assumptions:
276
 *             (1) Any scaling is with respect to the UL corner, so
277
 *                 that the center location scales accordingly.
278
 *             (2) A translation does not affect the center of
279
 *                 the image; it just moves the boxes.
280
 *          We always use assumption (1).  However, assumption (2)
281
 *          will be incorrect if the apparent translation is due
282
 *          to a clipping operation that, in effect, moves the
283
 *          origin of the image.  In that case, you should NOT use
284
 *          these simple functions.  Instead, use the functions
285
 *          in affinecompose.c, where the rotation center can be
286
 *          computed from the actual clipping due to translation
287
 *          of the image origin.
288
 * </pre>
289
 */
290
BOX *
291
boxTransformOrdered(BOX       *boxs,
292
                    l_int32    shiftx,
293
                    l_int32    shifty,
294
                    l_float32  scalex,
295
                    l_float32  scaley,
296
                    l_int32    xcen,
297
                    l_int32    ycen,
298
                    l_float32  angle,
299
                    l_int32    order)
300
0
{
301
0
l_int32    bx, by, bw, bh, tx, ty, tw, th;
302
0
l_int32    xcent, ycent;  /* transformed center of rotation due to scaling */
303
0
l_float32  sina, cosa, xdif, ydif, rx, ry, rw, rh;
304
0
BOX       *boxd;
305
306
0
    if (!boxs)
307
0
        return (BOX *)ERROR_PTR("boxs not defined", __func__, NULL);
308
0
    if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC &&
309
0
        order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO)
310
0
        return (BOX *)ERROR_PTR("order invalid", __func__, NULL);
311
312
0
    boxGetGeometry(boxs, &bx, &by, &bw, &bh);
313
0
    if (bw <= 0 || bh <= 0)  /* invalid */
314
0
        return boxCreate(0, 0, 0, 0);
315
0
    if (angle != 0.0) {
316
0
        sina = sin(angle);
317
0
        cosa = cos(angle);
318
0
    }
319
320
0
    if (order == L_TR_SC_RO) {
321
0
        tx = (l_int32)(scalex * (bx + shiftx) + 0.5);
322
0
        ty = (l_int32)(scaley * (by + shifty) + 0.5);
323
0
        tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
324
0
        th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
325
0
        xcent = (l_int32)(scalex * xcen + 0.5);
326
0
        ycent = (l_int32)(scaley * ycen + 0.5);
327
0
        if (angle == 0.0) {
328
0
            boxd = boxCreate(tx, ty, tw, th);
329
0
        } else {
330
0
            xdif = tx + 0.5 * tw - xcent;
331
0
            ydif = ty + 0.5 * th - ycent;
332
0
            rw = L_ABS(tw * cosa) + L_ABS(th * sina);
333
0
            rh = L_ABS(th * cosa) + L_ABS(tw * sina);
334
0
            rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
335
0
            ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
336
0
            boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
337
0
                             (l_int32)rh);
338
0
        }
339
0
    } else if (order == L_SC_TR_RO) {
340
0
        tx = (l_int32)(scalex * bx + shiftx + 0.5);
341
0
        ty = (l_int32)(scaley * by + shifty + 0.5);
342
0
        tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
343
0
        th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
344
0
        xcent = (l_int32)(scalex * xcen + 0.5);
345
0
        ycent = (l_int32)(scaley * ycen + 0.5);
346
0
        if (angle == 0.0) {
347
0
            boxd = boxCreate(tx, ty, tw, th);
348
0
        } else {
349
0
            xdif = tx + 0.5 * tw - xcent;
350
0
            ydif = ty + 0.5 * th - ycent;
351
0
            rw = L_ABS(tw * cosa) + L_ABS(th * sina);
352
0
            rh = L_ABS(th * cosa) + L_ABS(tw * sina);
353
0
            rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
354
0
            ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
355
0
            boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
356
0
                             (l_int32)rh);
357
0
        }
358
0
    } else if (order == L_RO_TR_SC) {
359
0
        if (angle == 0.0) {
360
0
            rx = bx;
361
0
            ry = by;
362
0
            rw = bw;
363
0
            rh = bh;
364
0
        } else {
365
0
            xdif = bx + 0.5 * bw - xcen;
366
0
            ydif = by + 0.5 * bh - ycen;
367
0
            rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
368
0
            rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
369
0
            rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
370
0
            ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
371
0
        }
372
0
        tx = (l_int32)(scalex * (rx + shiftx) + 0.5);
373
0
        ty = (l_int32)(scaley * (ry + shifty) + 0.5);
374
0
        tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
375
0
        th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
376
0
        boxd = boxCreate(tx, ty, tw, th);
377
0
    } else if (order == L_RO_SC_TR) {
378
0
        if (angle == 0.0) {
379
0
            rx = bx;
380
0
            ry = by;
381
0
            rw = bw;
382
0
            rh = bh;
383
0
        } else {
384
0
            xdif = bx + 0.5 * bw - xcen;
385
0
            ydif = by + 0.5 * bh - ycen;
386
0
            rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
387
0
            rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
388
0
            rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
389
0
            ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
390
0
        }
391
0
        tx = (l_int32)(scalex * rx + shiftx + 0.5);
392
0
        ty = (l_int32)(scaley * ry + shifty + 0.5);
393
0
        tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
394
0
        th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
395
0
        boxd = boxCreate(tx, ty, tw, th);
396
0
    } else if (order == L_TR_RO_SC) {
397
0
        tx = bx + shiftx;
398
0
        ty = by + shifty;
399
0
        if (angle == 0.0) {
400
0
            rx = tx;
401
0
            ry = ty;
402
0
            rw = bw;
403
0
            rh = bh;
404
0
        } else {
405
0
            xdif = tx + 0.5 * bw - xcen;
406
0
            ydif = ty + 0.5 * bh - ycen;
407
0
            rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
408
0
            rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
409
0
            rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
410
0
            ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
411
0
        }
412
0
        tx = (l_int32)(scalex * rx + 0.5);
413
0
        ty = (l_int32)(scaley * ry + 0.5);
414
0
        tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
415
0
        th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
416
0
        boxd = boxCreate(tx, ty, tw, th);
417
0
    } else {  /* order == L_SC_RO_TR) */
418
0
        tx = (l_int32)(scalex * bx + 0.5);
419
0
        ty = (l_int32)(scaley * by + 0.5);
420
0
        tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
421
0
        th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
422
0
        xcent = (l_int32)(scalex * xcen + 0.5);
423
0
        ycent = (l_int32)(scaley * ycen + 0.5);
424
0
        if (angle == 0.0) {
425
0
            rx = tx;
426
0
            ry = ty;
427
0
            rw = tw;
428
0
            rh = th;
429
0
        } else {
430
0
            xdif = tx + 0.5 * tw - xcent;
431
0
            ydif = ty + 0.5 * th - ycent;
432
0
            rw = L_ABS(tw * cosa) + L_ABS(th * sina);
433
0
            rh = L_ABS(th * cosa) + L_ABS(tw * sina);
434
0
            rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
435
0
            ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
436
0
        }
437
0
        tx = (l_int32)(rx + shiftx + 0.5);
438
0
        ty = (l_int32)(ry + shifty + 0.5);
439
0
        tw = (l_int32)(rw + 0.5);
440
0
        th = (l_int32)(rh + 0.5);
441
0
        boxd = boxCreate(tx, ty, tw, th);
442
0
    }
443
444
0
    return boxd;
445
0
}
446
447
448
/*!
449
 * \brief   boxaRotateOrth()
450
 *
451
 * \param[in]    boxas
452
 * \param[in]    w, h       of image in which the boxa is embedded
453
 * \param[in]    rotation   0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
454
 *                          all rotations are clockwise
455
 * \return  boxad, or NULL on error
456
 *
457
 * <pre>
458
 * Notes:
459
 *      (1) See boxRotateOrth() for details.
460
 * </pre>
461
 */
462
BOXA *
463
boxaRotateOrth(BOXA    *boxas,
464
               l_int32  w,
465
               l_int32  h,
466
               l_int32  rotation)
467
0
{
468
0
l_int32  i, n;
469
0
BOX     *boxs, *boxd;
470
0
BOXA    *boxad;
471
472
0
    if (!boxas)
473
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
474
0
    if (rotation < 0 || rotation > 3)
475
0
        return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", __func__, NULL);
476
0
    if (rotation == 0)
477
0
        return boxaCopy(boxas, L_COPY);
478
479
0
    n = boxaGetCount(boxas);
480
0
    if ((boxad = boxaCreate(n)) == NULL)
481
0
        return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
482
0
    for (i = 0; i < n; i++) {
483
0
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) {
484
0
            boxaDestroy(&boxad);
485
0
            return (BOXA *)ERROR_PTR("boxs not found", __func__, NULL);
486
0
        }
487
0
        boxd = boxRotateOrth(boxs, w, h, rotation);
488
0
        boxDestroy(&boxs);
489
0
        boxaAddBox(boxad, boxd, L_INSERT);
490
0
    }
491
492
0
    return boxad;
493
0
}
494
495
496
/*!
497
 * \brief   boxRotateOrth()
498
 *
499
 * \param[in]    box
500
 * \param[in]    w, h       of image in which the box is embedded
501
 * \param[in]    rotation   0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
502
 *                          all rotations are clockwise
503
 * \return  boxd, or NULL on error
504
 *
505
 * <pre>
506
 * Notes:
507
 *      (1) Rotate the image with the embedded box by the specified amount.
508
 *      (2) After rotation, the rotated box is always measured with
509
 *          respect to the UL corner of the image.
510
 * </pre>
511
 */
512
BOX *
513
boxRotateOrth(BOX     *box,
514
              l_int32  w,
515
              l_int32  h,
516
              l_int32  rotation)
517
0
{
518
0
l_int32  bx, by, bw, bh, xdist, ydist;
519
520
0
    if (!box)
521
0
        return (BOX *)ERROR_PTR("box not defined", __func__, NULL);
522
0
    if (rotation < 0 || rotation > 3)
523
0
        return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", __func__, NULL);
524
0
    if (rotation == 0)
525
0
        return boxCopy(box);
526
527
0
    boxGetGeometry(box, &bx, &by, &bw, &bh);
528
0
    if (bw <= 0 || bh <= 0)  /* invalid */
529
0
        return boxCreate(0, 0, 0, 0);
530
0
    ydist = h - by - bh;  /* below box */
531
0
    xdist = w - bx - bw;  /* to right of box */
532
0
    if (rotation == 1)   /* 90 deg cw */
533
0
        return boxCreate(ydist, bx, bh, bw);
534
0
    else if (rotation == 2)  /* 180 deg cw */
535
0
        return boxCreate(xdist, ydist, bw, bh);
536
0
    else  /*  rotation == 3, 270 deg cw */
537
0
        return boxCreate(by, xdist, bh, bw);
538
0
}
539
540
541
/*!
542
 * \brief   boxaShiftWithPta()
543
 *
544
 * \param[in]    boxas
545
 * \param[in]    pta       aligned with the boxes; determines shift amount
546
 * \param[in]    dir       +1 to shift by the values in pta; -1 to shift
547
 *                         by the negative of the values in the pta.
548
 * \return  boxad, or NULL on error
549
 *
550
 * <pre>
551
 * Notes:
552
 *      (1) In use, %pta may come from the UL corners of of a boxa, each
553
 *          of whose boxes contains the corresponding box of %boxas
554
 *          within it.  The output %boxad is then a boxa in the (global)
555
 *          coordinates of the containing boxa.  So the input %pta
556
 *          could come from boxaExtractCorners().
557
 *      (2) The operations with %dir == 1 and %dir == -1 are inverses if
558
 *          called in order (1, -1).  Starting with an input boxa and
559
 *          calling twice with these values of %dir results in a boxa
560
 *          identical to the input.  However, because box parameters can
561
 *          never be negative, calling in the order (-1, 1) may result
562
 *          in clipping at the left side and the top.
563
 * </pre>
564
 */
565
BOXA *
566
boxaShiftWithPta(BOXA    *boxas,
567
                 PTA     *pta,
568
                 l_int32  dir)
569
0
{
570
0
l_int32  i, n, x, y, full;
571
0
BOX     *box1, *box2;
572
0
BOXA    *boxad;
573
574
0
    if (!boxas)
575
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
576
0
    boxaIsFull(boxas, &full);
577
0
    if (!full)
578
0
        return (BOXA *)ERROR_PTR("boxas not full", __func__, NULL);
579
0
    if (!pta)
580
0
        return (BOXA *)ERROR_PTR("pta not defined", __func__, NULL);
581
0
    if (dir != 1 && dir != -1)
582
0
        return (BOXA *)ERROR_PTR("invalid dir", __func__, NULL);
583
0
    n = boxaGetCount(boxas);
584
0
    if (n != ptaGetCount(pta))
585
0
        return (BOXA *)ERROR_PTR("boxas and pta not same size", __func__, NULL);
586
587
0
    if ((boxad = boxaCreate(n)) == NULL)
588
0
        return (BOXA *)ERROR_PTR("boxad not made", __func__, NULL);
589
0
    for (i = 0; i < n; i++) {
590
0
        box1 = boxaGetBox(boxas, i, L_COPY);
591
0
        ptaGetIPt(pta, i, &x, &y);
592
0
        box2 = boxTransform(box1, dir * x, dir * y, 1.0, 1.0);
593
0
        boxaAddBox(boxad, box2, L_INSERT);
594
0
        boxDestroy(&box1);
595
0
    }
596
0
    return boxad;
597
0
}
598
599
600
/*---------------------------------------------------------------------*
601
 *                              Boxa sort                              *
602
 *---------------------------------------------------------------------*/
603
/*!
604
 * \brief   boxaSort()
605
 *
606
 * \param[in]    boxas
607
 * \param[in]    sorttype   L_SORT_BY_X, L_SORT_BY_Y,
608
 *                          L_SORT_BY_RIGHT, L_SORT_BY_BOT,
609
 *                          L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
610
 *                          L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
611
 *                          L_SORT_BY_PERIMETER, L_SORT_BY_AREA,
612
 *                          L_SORT_BY_ASPECT_RATIO
613
 * \param[in]    sortorder  L_SORT_INCREASING, L_SORT_DECREASING
614
 * \param[out]   pnaindex   [optional] index of sorted order into
615
 *                          original array
616
 * \return  boxad sorted version of boxas, or NULL on error
617
 *
618
 * <pre>
619
 * Notes:
620
 *      (1) An empty boxa returns a copy, with a warning.
621
 * </pre>
622
 */
623
BOXA *
624
boxaSort(BOXA    *boxas,
625
         l_int32  sorttype,
626
         l_int32  sortorder,
627
         NUMA   **pnaindex)
628
6
{
629
6
l_int32    i, n, x, y, w, h, size;
630
6
BOXA      *boxad;
631
6
NUMA      *na, *naindex;
632
633
6
    if (pnaindex) *pnaindex = NULL;
634
6
    if (!boxas)
635
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
636
6
    if ((n = boxaGetCount(boxas)) == 0) {
637
0
        L_WARNING("boxas is empty\n", __func__);
638
0
        return boxaCopy(boxas, L_COPY);
639
0
    }
640
6
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
641
6
        sorttype != L_SORT_BY_RIGHT && sorttype != L_SORT_BY_BOT &&
642
6
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
643
6
        sorttype != L_SORT_BY_MIN_DIMENSION &&
644
6
        sorttype != L_SORT_BY_MAX_DIMENSION &&
645
6
        sorttype != L_SORT_BY_PERIMETER &&
646
6
        sorttype != L_SORT_BY_AREA &&
647
6
        sorttype != L_SORT_BY_ASPECT_RATIO)
648
0
        return (BOXA *)ERROR_PTR("invalid sort type", __func__, NULL);
649
6
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
650
0
        return (BOXA *)ERROR_PTR("invalid sort order", __func__, NULL);
651
652
        /* Use O(n) binsort if possible */
653
6
    if (n > MinCompsForBinSort &&
654
6
        ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
655
0
         (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
656
0
         (sorttype == L_SORT_BY_PERIMETER)))
657
0
        return boxaBinSort(boxas, sorttype, sortorder, pnaindex);
658
659
        /* Build up numa of specific data */
660
6
    if ((na = numaCreate(n)) == NULL)
661
0
        return (BOXA *)ERROR_PTR("na not made", __func__, NULL);
662
194
    for (i = 0; i < n; i++) {
663
188
        boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
664
188
        switch (sorttype)
665
188
        {
666
188
        case L_SORT_BY_X:
667
188
            numaAddNumber(na, x);
668
188
            break;
669
0
        case L_SORT_BY_Y:
670
0
            numaAddNumber(na, y);
671
0
            break;
672
0
        case L_SORT_BY_RIGHT:
673
0
            numaAddNumber(na, x + w - 1);
674
0
            break;
675
0
        case L_SORT_BY_BOT:
676
0
            numaAddNumber(na, y + h - 1);
677
0
            break;
678
0
        case L_SORT_BY_WIDTH:
679
0
            numaAddNumber(na, w);
680
0
            break;
681
0
        case L_SORT_BY_HEIGHT:
682
0
            numaAddNumber(na, h);
683
0
            break;
684
0
        case L_SORT_BY_MIN_DIMENSION:
685
0
            size = L_MIN(w, h);
686
0
            numaAddNumber(na, size);
687
0
            break;
688
0
        case L_SORT_BY_MAX_DIMENSION:
689
0
            size = L_MAX(w, h);
690
0
            numaAddNumber(na, size);
691
0
            break;
692
0
        case L_SORT_BY_PERIMETER:
693
0
            size = w + h;
694
0
            numaAddNumber(na, size);
695
0
            break;
696
0
        case L_SORT_BY_AREA:
697
0
            size = w * h;
698
0
            numaAddNumber(na, size);
699
0
            break;
700
0
        case L_SORT_BY_ASPECT_RATIO:
701
0
            numaAddNumber(na, (l_float32)w / (l_float32)h);
702
0
            break;
703
0
        default:
704
0
            L_WARNING("invalid sort type\n", __func__);
705
188
        }
706
188
    }
707
708
        /* Get the sort index for data array */
709
6
    naindex = numaGetSortIndex(na, sortorder);
710
6
    numaDestroy(&na);
711
6
    if (!naindex)
712
0
        return (BOXA *)ERROR_PTR("naindex not made", __func__, NULL);
713
714
        /* Build up sorted boxa using sort index */
715
6
    boxad = boxaSortByIndex(boxas, naindex);
716
717
6
    if (pnaindex)
718
0
        *pnaindex = naindex;
719
6
    else
720
6
        numaDestroy(&naindex);
721
6
    return boxad;
722
6
}
723
724
725
/*!
726
 * \brief   boxaBinSort()
727
 *
728
 * \param[in]    boxas
729
 * \param[in]    sorttype    L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
730
 *                           L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER
731
 * \param[in]    sortorder   L_SORT_INCREASING, L_SORT_DECREASING
732
 * \param[out]   pnaindex    [optional] index of sorted order into
733
 *                           original array
734
 * \return  boxad sorted version of boxas, or NULL on error
735
 *
736
 * <pre>
737
 * Notes:
738
 *      (1) For a large number of boxes (say, greater than 1000), this
739
 *          O(n) binsort is much faster than the O(nlogn) shellsort.
740
 *          For 5000 components, this is over 20x faster than boxaSort().
741
 *      (2) Consequently, boxaSort() calls this function if it will
742
 *          likely go much faster.
743
 * </pre>
744
 */
745
BOXA *
746
boxaBinSort(BOXA    *boxas,
747
            l_int32  sorttype,
748
            l_int32  sortorder,
749
            NUMA   **pnaindex)
750
0
{
751
0
l_int32  i, n, x, y, w, h;
752
0
BOXA    *boxad;
753
0
NUMA    *na, *naindex;
754
755
0
    if (pnaindex) *pnaindex = NULL;
756
0
    if (!boxas)
757
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
758
0
    if ((n = boxaGetCount(boxas)) == 0) {
759
0
        L_WARNING("boxas is empty\n", __func__);
760
0
        return boxaCopy(boxas, L_COPY);
761
0
    }
762
0
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
763
0
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
764
0
        sorttype != L_SORT_BY_PERIMETER)
765
0
        return (BOXA *)ERROR_PTR("invalid sort type", __func__, NULL);
766
0
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
767
0
        return (BOXA *)ERROR_PTR("invalid sort order", __func__, NULL);
768
769
        /* Generate Numa of appropriate box dimensions */
770
0
    if ((na = numaCreate(n)) == NULL)
771
0
        return (BOXA *)ERROR_PTR("na not made", __func__, NULL);
772
0
    for (i = 0; i < n; i++) {
773
0
        boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
774
0
        switch (sorttype)
775
0
        {
776
0
        case L_SORT_BY_X:
777
0
            numaAddNumber(na, x);
778
0
            break;
779
0
        case L_SORT_BY_Y:
780
0
            numaAddNumber(na, y);
781
0
            break;
782
0
        case L_SORT_BY_WIDTH:
783
0
            numaAddNumber(na, w);
784
0
            break;
785
0
        case L_SORT_BY_HEIGHT:
786
0
            numaAddNumber(na, h);
787
0
            break;
788
0
        case L_SORT_BY_PERIMETER:
789
0
            numaAddNumber(na, w + h);
790
0
            break;
791
0
        default:
792
0
            L_WARNING("invalid sort type\n", __func__);
793
0
        }
794
0
    }
795
796
        /* Get the sort index for data array */
797
0
    naindex = numaGetBinSortIndex(na, sortorder);
798
0
    numaDestroy(&na);
799
0
    if (!naindex)
800
0
        return (BOXA *)ERROR_PTR("naindex not made", __func__, NULL);
801
802
        /* Build up sorted boxa using the sort index */
803
0
    boxad = boxaSortByIndex(boxas, naindex);
804
805
0
    if (pnaindex)
806
0
        *pnaindex = naindex;
807
0
    else
808
0
        numaDestroy(&naindex);
809
0
    return boxad;
810
0
}
811
812
813
/*!
814
 * \brief   boxaSortByIndex()
815
 *
816
 * \param[in]    boxas
817
 * \param[in]    naindex    na that maps from the new boxa to the input boxa
818
 * \return  boxad sorted, or NULL on error
819
 */
820
BOXA *
821
boxaSortByIndex(BOXA  *boxas,
822
                NUMA  *naindex)
823
6
{
824
6
l_int32  i, n, index;
825
6
BOX     *box;
826
6
BOXA    *boxad;
827
828
6
    if (!boxas)
829
0
        return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
830
6
    if ((n = boxaGetCount(boxas)) == 0) {
831
0
        L_WARNING("boxas is empty\n", __func__);
832
0
        return boxaCopy(boxas, L_COPY);
833
0
    }
834
6
    if (!naindex)
835
0
        return (BOXA *)ERROR_PTR("naindex not defined", __func__, NULL);
836
837
6
    boxad = boxaCreate(n);
838
194
    for (i = 0; i < n; i++) {
839
188
        numaGetIValue(naindex, i, &index);
840
188
        box = boxaGetBox(boxas, index, L_COPY);
841
188
        boxaAddBox(boxad, box, L_INSERT);
842
188
    }
843
844
6
    return boxad;
845
6
}
846
847
848
/*!
849
 * \brief   boxaSort2d()
850
 *
851
 * \param[in]    boxas
852
 * \param[out]   pnaad    [optional] numaa with sorted indices
853
 *                        whose values are the indices of the input array
854
 * \param[in]    delta1   min separation that permits aggregation of a box
855
 *                        onto a boxa of horizontally-aligned boxes; pass 1
856
 * \param[in]    delta2   min separation that permits aggregation of a box
857
 *                        onto a boxa of horizontally-aligned boxes; pass 2
858
 * \param[in]    minh1    components less than this height either join an
859
 *                        existing boxa or are set aside for pass 2
860
 * \return  baa 2d sorted version of boxa, or NULL on error
861
 *
862
 * <pre>
863
 * Notes:
864
 *      (1) The final result is a sort where the 'fast scan' direction is
865
 *          left to right, and the 'slow scan' direction is from top
866
 *          to bottom.  Each boxa in the baa represents a sorted set
867
 *          of boxes from left to right.
868
 *      (2) Three passes are used to aggregate the boxas, which can correspond
869
 *          to characters or words in a line of text.  In pass 1, only
870
 *          taller components, which correspond to xheight or larger,
871
 *          are permitted to start a new boxa.  In pass 2, the remaining
872
 *          vertically-challenged components are allowed to join an
873
 *          existing boxa or start a new one.  In pass 3, boxa whose extent
874
 *          is overlapping are joined.  After that, the boxes in each
875
 *          boxa are sorted horizontally, and finally the boxa are
876
 *          sorted vertically.
877
 *      (3) If %delta1 > 0, the first pass allows aggregation when
878
 *          boxes in the same boxa do not overlap vertically.  In fact,
879
 *          %delta1 is the max distance by which they can miss and still
880
 *          be aggregated.  If %delta1 < 0, the box must have vertical
881
 *          overlap of at least abs(%delta1) with the boxa before it
882
 *          can be merged.  Similar for delta2 on the second pass.
883
 *      (4) On the first pass, any component of height less than minh1
884
 *          cannot start a new boxa; it's put aside for later insertion.
885
 *      (5) On the second pass, any small component that doesn't align
886
 *          with an existing boxa can start a new one.
887
 *      (6) This can be used to identify lines of text from
888
 *          character or word bounding boxes.
889
 *      (7) Typical values for the input parameters on 300 ppi text are:
890
 *                 delta1 ~ 0
891
 *                 delta2 ~ 0
892
 *                 minh1 ~ 5
893
 * </pre>
894
 */
895
BOXAA *
896
boxaSort2d(BOXA    *boxas,
897
           NUMAA  **pnaad,
898
           l_int32  delta1,
899
           l_int32  delta2,
900
           l_int32  minh1)
901
0
{
902
0
l_int32  i, index, h, nt, ne, n, m, ival;
903
0
BOX     *box;
904
0
BOXA    *boxa, *boxae, *boxan, *boxa1, *boxa2, *boxa3, *boxav, *boxavs;
905
0
BOXAA   *baa, *baa1, *baad;
906
0
NUMA    *naindex, *nae, *nan, *nah, *nav, *na1, *na2, *nad, *namap;
907
0
NUMAA   *naa, *naa1, *naad;
908
909
0
    if (pnaad) *pnaad = NULL;
910
0
    if (!boxas)
911
0
        return (BOXAA *)ERROR_PTR("boxas not defined", __func__, NULL);
912
0
    if (boxaGetCount(boxas) == 0)
913
0
        return (BOXAA *)ERROR_PTR("boxas is empty", __func__, NULL);
914
915
        /* Sort from left to right */
916
0
    if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
917
0
                    == NULL)
918
0
        return (BOXAA *)ERROR_PTR("boxa not made", __func__, NULL);
919
920
        /* First pass: assign taller boxes to boxa by row */
921
0
    nt = boxaGetCount(boxa);
922
0
    baa = boxaaCreate(0);
923
0
    naa = numaaCreate(0);
924
0
    boxae = boxaCreate(0);  /* save small height boxes here */
925
0
    nae = numaCreate(0);  /* keep track of small height boxes */
926
0
    for (i = 0; i < nt; i++) {
927
0
        box = boxaGetBox(boxa, i, L_CLONE);
928
0
        boxGetGeometry(box, NULL, NULL, NULL, &h);
929
0
        if (h < minh1) {  /* save for 2nd pass */
930
0
            boxaAddBox(boxae, box, L_INSERT);
931
0
            numaAddNumber(nae, i);
932
0
        } else {
933
0
            n = boxaaGetCount(baa);
934
0
            boxaaAlignBox(baa, box, delta1, &index);
935
0
            if (index < n) {  /* append to an existing boxa */
936
0
                boxaaAddBox(baa, index, box, L_INSERT);
937
0
            } else {  /* doesn't align, need new boxa */
938
0
                boxan = boxaCreate(0);
939
0
                boxaAddBox(boxan, box, L_INSERT);
940
0
                boxaaAddBoxa(baa, boxan, L_INSERT);
941
0
                nan = numaCreate(0);
942
0
                numaaAddNuma(naa, nan, L_INSERT);
943
0
            }
944
0
            numaGetIValue(naindex, i, &ival);
945
0
            numaaAddNumber(naa, index, ival);
946
0
        }
947
0
    }
948
0
    boxaDestroy(&boxa);
949
0
    numaDestroy(&naindex);
950
951
        /* Second pass: feed in small height boxes */
952
0
    ne = boxaGetCount(boxae);
953
0
    for (i = 0; i < ne; i++) {
954
0
        box = boxaGetBox(boxae, i, L_CLONE);
955
0
        n = boxaaGetCount(baa);
956
0
        boxaaAlignBox(baa, box, delta2, &index);
957
0
        if (index < n) {  /* append to an existing boxa */
958
0
            boxaaAddBox(baa, index, box, L_INSERT);
959
0
        } else {  /* doesn't align, need new boxa */
960
0
            boxan = boxaCreate(0);
961
0
            boxaAddBox(boxan, box, L_INSERT);
962
0
            boxaaAddBoxa(baa, boxan, L_INSERT);
963
0
            nan = numaCreate(0);
964
0
            numaaAddNuma(naa, nan, L_INSERT);
965
0
        }
966
0
        numaGetIValue(nae, i, &ival);  /* location in original boxas */
967
0
        numaaAddNumber(naa, index, ival);
968
0
    }
969
970
        /* Third pass: merge some boxa whose extent is overlapping.
971
         * Think of these boxa as text lines, where the bounding boxes
972
         * of the text lines can overlap, but likely won't have
973
         * a huge overlap.
974
         * First do a greedy find of pairs of overlapping boxa, where
975
         * the two boxa overlap by at least 50% of the smaller, and
976
         * the smaller is not more than half the area of the larger.
977
         * For such pairs, call the larger one the primary boxa.  The
978
         * boxes in the smaller one are appended to those in the primary
979
         * in pass 3a, and the primaries are extracted in pass 3b.
980
         * In this way, all boxes in the original baa are saved. */
981
0
    n = boxaaGetCount(baa);
982
0
    boxaaGetExtent(baa, NULL, NULL, NULL, &boxa3);
983
0
    boxa1 = boxaHandleOverlaps(boxa3, L_REMOVE_SMALL, 1000, 0.5, 0.5, &namap);
984
0
    boxaDestroy(&boxa1);
985
0
    boxaDestroy(&boxa3);
986
0
    for (i = 0; i < n; i++) {  /* Pass 3a: join selected copies of boxa */
987
0
        numaGetIValue(namap, i, &ival);
988
0
        if (ival >= 0) {  /* join current to primary boxa[ival] */
989
0
            boxa1 = boxaaGetBoxa(baa, i, L_COPY);
990
0
            boxa2 = boxaaGetBoxa(baa, ival, L_CLONE);
991
0
            boxaJoin(boxa2, boxa1, 0, -1);
992
0
            boxaDestroy(&boxa2);
993
0
            boxaDestroy(&boxa1);
994
0
            na1 = numaaGetNuma(naa, i, L_COPY);
995
0
            na2 = numaaGetNuma(naa, ival, L_CLONE);
996
0
            numaJoin(na2, na1, 0, -1);
997
0
            numaDestroy(&na1);
998
0
            numaDestroy(&na2);
999
0
        }
1000
0
    }
1001
0
    baa1 = boxaaCreate(n);
1002
0
    naa1 = numaaCreate(n);
1003
0
    for (i = 0; i < n; i++) {  /* Pass 3b: save primary boxa */
1004
0
        numaGetIValue(namap, i, &ival);
1005
0
        if (ival == -1) {
1006
0
            boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1007
0
            boxaaAddBoxa(baa1, boxa1, L_INSERT);
1008
0
            na1 = numaaGetNuma(naa, i, L_CLONE);
1009
0
            numaaAddNuma(naa1, na1, L_INSERT);
1010
0
        }
1011
0
    }
1012
0
    numaDestroy(&namap);
1013
0
    boxaaDestroy(&baa);
1014
0
    baa = baa1;
1015
0
    numaaDestroy(&naa);
1016
0
    naa = naa1;
1017
1018
        /* Sort the boxes in each boxa horizontally */
1019
0
    m = boxaaGetCount(baa);
1020
0
    for (i = 0; i < m; i++) {
1021
0
        boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1022
0
        boxa2 = boxaSort(boxa1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
1023
0
        boxaaReplaceBoxa(baa, i, boxa2);
1024
0
        na1 = numaaGetNuma(naa, i, L_CLONE);
1025
0
        na2 = numaSortByIndex(na1, nah);
1026
0
        numaaReplaceNuma(naa, i, na2);
1027
0
        boxaDestroy(&boxa1);
1028
0
        numaDestroy(&na1);
1029
0
        numaDestroy(&nah);
1030
0
    }
1031
1032
        /* Sort the boxa vertically within boxaa, using the first box
1033
         * in each boxa. */
1034
0
    m = boxaaGetCount(baa);
1035
0
    boxav = boxaCreate(m);  /* holds first box in each boxa in baa */
1036
0
    naad = numaaCreate(m);
1037
0
    if (pnaad)
1038
0
        *pnaad = naad;
1039
0
    baad = boxaaCreate(m);
1040
0
    for (i = 0; i < m; i++) {
1041
0
        boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1042
0
        box = boxaGetBox(boxa1, 0, L_CLONE);
1043
0
        boxaAddBox(boxav, box, L_INSERT);
1044
0
        boxaDestroy(&boxa1);
1045
0
    }
1046
0
    boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
1047
0
    for (i = 0; i < m; i++) {
1048
0
        numaGetIValue(nav, i, &index);
1049
0
        boxa = boxaaGetBoxa(baa, index, L_CLONE);
1050
0
        boxaaAddBoxa(baad, boxa, L_INSERT);
1051
0
        nad = numaaGetNuma(naa, index, L_CLONE);
1052
0
        numaaAddNuma(naad, nad, L_INSERT);
1053
0
    }
1054
1055
1056
/*    lept_stderr("box count = %d, numaa count = %d\n", nt,
1057
            numaaGetNumberCount(naad)); */
1058
1059
0
    boxaaDestroy(&baa);
1060
0
    boxaDestroy(&boxav);
1061
0
    boxaDestroy(&boxavs);
1062
0
    boxaDestroy(&boxae);
1063
0
    numaDestroy(&nav);
1064
0
    numaDestroy(&nae);
1065
0
    numaaDestroy(&naa);
1066
0
    if (!pnaad)
1067
0
        numaaDestroy(&naad);
1068
1069
0
    return baad;
1070
0
}
1071
1072
1073
/*!
1074
 * \brief   boxaSort2dByIndex()
1075
 *
1076
 * \param[in]    boxas
1077
 * \param[in]    naa     numaa that maps from the new baa to the input boxa
1078
 * \return  baa sorted boxaa, or NULL on error
1079
 */
1080
BOXAA *
1081
boxaSort2dByIndex(BOXA   *boxas,
1082
                  NUMAA  *naa)
1083
0
{
1084
0
l_int32  ntot, boxtot, i, j, n, nn, index;
1085
0
BOX     *box;
1086
0
BOXA    *boxa;
1087
0
BOXAA   *baa;
1088
0
NUMA    *na;
1089
1090
0
    if (!boxas)
1091
0
        return (BOXAA *)ERROR_PTR("boxas not defined", __func__, NULL);
1092
0
    if ((boxtot = boxaGetCount(boxas)) == 0)
1093
0
        return (BOXAA *)ERROR_PTR("boxas is empty", __func__, NULL);
1094
0
    if (!naa)
1095
0
        return (BOXAA *)ERROR_PTR("naindex not defined", __func__, NULL);
1096
1097
        /* Check counts */
1098
0
    ntot = numaaGetNumberCount(naa);
1099
0
    if (ntot != boxtot)
1100
0
        return (BOXAA *)ERROR_PTR("element count mismatch", __func__, NULL);
1101
1102
0
    n = numaaGetCount(naa);
1103
0
    baa = boxaaCreate(n);
1104
0
    for (i = 0; i < n; i++) {
1105
0
        na = numaaGetNuma(naa, i, L_CLONE);
1106
0
        nn = numaGetCount(na);
1107
0
        boxa = boxaCreate(nn);
1108
0
        for (j = 0; j < nn; j++) {
1109
0
            numaGetIValue(na, i, &index);
1110
0
            box = boxaGetBox(boxas, index, L_COPY);
1111
0
            boxaAddBox(boxa, box, L_INSERT);
1112
0
        }
1113
0
        boxaaAddBoxa(baa, boxa, L_INSERT);
1114
0
        numaDestroy(&na);
1115
0
    }
1116
1117
0
    return baa;
1118
0
}
1119
1120
1121
/*---------------------------------------------------------------------*
1122
 *                        Boxa array extraction                        *
1123
 *---------------------------------------------------------------------*/
1124
/*!
1125
 * \brief   boxaExtractAsNuma()
1126
 *
1127
 * \param[in]    boxa
1128
 * \param[out]   pnal          [optional] array of left locations
1129
 * \param[out]   pnat          [optional] array of top locations
1130
 * \param[out]   pnar          [optional] array of right locations
1131
 * \param[out]   pnab          [optional] array of bottom locations
1132
 * \param[out]   pnaw          [optional] array of widths
1133
 * \param[out]   pnah          [optional] array of heights
1134
 * \param[in]    keepinvalid   1 to keep invalid boxes; 0 to remove them
1135
 * \return  0 if OK, 1 on error
1136
 *
1137
 * <pre>
1138
 * Notes:
1139
 *      (1) If you are counting or sorting values, such as determining
1140
 *          rank order, you must remove invalid boxes.
1141
 *      (2) If you are parametrizing the values, or doing an evaluation
1142
 *          where the position in the boxa sequence is important, you
1143
 *          must replace the invalid boxes with valid ones before
1144
 *          doing the extraction. This is easily done with boxaFillSequence().
1145
 * </pre>
1146
 */
1147
l_ok
1148
boxaExtractAsNuma(BOXA    *boxa,
1149
                  NUMA   **pnal,
1150
                  NUMA   **pnat,
1151
                  NUMA   **pnar,
1152
                  NUMA   **pnab,
1153
                  NUMA   **pnaw,
1154
                  NUMA   **pnah,
1155
                  l_int32  keepinvalid)
1156
0
{
1157
0
l_int32  i, n, left, top, right, bot, w, h;
1158
1159
0
    if (!pnal && !pnat && !pnar && !pnab && !pnaw && !pnah)
1160
0
        return ERROR_INT("no output requested", __func__, 1);
1161
0
    if (pnal) *pnal = NULL;
1162
0
    if (pnat) *pnat = NULL;
1163
0
    if (pnar) *pnar = NULL;
1164
0
    if (pnab) *pnab = NULL;
1165
0
    if (pnaw) *pnaw = NULL;
1166
0
    if (pnah) *pnah = NULL;
1167
0
    if (!boxa)
1168
0
        return ERROR_INT("boxa not defined", __func__, 1);
1169
0
    if (!keepinvalid && boxaGetValidCount(boxa) == 0)
1170
0
        return ERROR_INT("no valid boxes", __func__, 1);
1171
1172
0
    n = boxaGetCount(boxa);
1173
0
    if (pnal) *pnal = numaCreate(n);
1174
0
    if (pnat) *pnat = numaCreate(n);
1175
0
    if (pnar) *pnar = numaCreate(n);
1176
0
    if (pnab) *pnab = numaCreate(n);
1177
0
    if (pnaw) *pnaw = numaCreate(n);
1178
0
    if (pnah) *pnah = numaCreate(n);
1179
0
    for (i = 0; i < n; i++) {
1180
0
        boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1181
0
        if (!keepinvalid && (w <= 0 || h <= 0))
1182
0
            continue;
1183
0
        right = left + w - 1;
1184
0
        bot = top + h - 1;
1185
0
        if (pnal) numaAddNumber(*pnal, left);
1186
0
        if (pnat) numaAddNumber(*pnat, top);
1187
0
        if (pnar) numaAddNumber(*pnar, right);
1188
0
        if (pnab) numaAddNumber(*pnab, bot);
1189
0
        if (pnaw) numaAddNumber(*pnaw, w);
1190
0
        if (pnah) numaAddNumber(*pnah, h);
1191
0
    }
1192
1193
0
    return 0;
1194
0
}
1195
1196
1197
/*!
1198
 * \brief   boxaExtractAsPta()
1199
 *
1200
 * \param[in]    boxa
1201
 * \param[out]   pptal         [optional] array of left locations vs. index
1202
 * \param[out]   pptat         [optional] array of top locations vs. index
1203
 * \param[out]   pptar         [optional] array of right locations vs. index
1204
 * \param[out]   pptab         [optional] array of bottom locations vs. index
1205
 * \param[out]   pptaw         [optional] array of widths vs. index
1206
 * \param[out]   pptah         [optional] array of heights vs. index
1207
 * \param[in]    keepinvalid   1 to keep invalid boxes; 0 to remove them
1208
 * \return  0 if OK, 1 on error
1209
 *
1210
 * <pre>
1211
 * Notes:
1212
 *      (1) For most applications, such as counting, sorting, fitting
1213
 *          to some parametrized form, plotting or filtering in general,
1214
 *          you should remove the invalid boxes.  Each pta saves the
1215
 *          box index in the x array, so replacing invalid boxes by
1216
 *          filling with boxaFillSequence(), which is required for
1217
 *          boxaExtractAsNuma(), is not necessary.
1218
 *      (2) If invalid boxes are retained, each one will result in
1219
 *          entries (typically 0) in all selected output pta.
1220
 *      (3) Other boxa --> pta functions are:
1221
 *          * boxaExtractCorners(): extracts any of the four corners as a pta.
1222
 *          * boxaConvertToPta(): extracts sufficient number of corners
1223
 *            to allow reconstruction of the original boxa from the pta.
1224
 * </pre>
1225
 */
1226
l_ok
1227
boxaExtractAsPta(BOXA    *boxa,
1228
                 PTA    **pptal,
1229
                 PTA    **pptat,
1230
                 PTA    **pptar,
1231
                 PTA    **pptab,
1232
                 PTA    **pptaw,
1233
                 PTA    **pptah,
1234
                 l_int32  keepinvalid)
1235
0
{
1236
0
l_int32  i, n, left, top, right, bot, w, h;
1237
1238
0
    if (!pptal && !pptar && !pptat && !pptab && !pptaw && !pptah)
1239
0
        return ERROR_INT("no output requested", __func__, 1);
1240
0
    if (pptal) *pptal = NULL;
1241
0
    if (pptat) *pptat = NULL;
1242
0
    if (pptar) *pptar = NULL;
1243
0
    if (pptab) *pptab = NULL;
1244
0
    if (pptaw) *pptaw = NULL;
1245
0
    if (pptah) *pptah = NULL;
1246
0
    if (!boxa)
1247
0
        return ERROR_INT("boxa not defined", __func__, 1);
1248
0
    if (!keepinvalid && boxaGetValidCount(boxa) == 0)
1249
0
        return ERROR_INT("no valid boxes", __func__, 1);
1250
1251
0
    n = boxaGetCount(boxa);
1252
0
    if (pptal) *pptal = ptaCreate(n);
1253
0
    if (pptat) *pptat = ptaCreate(n);
1254
0
    if (pptar) *pptar = ptaCreate(n);
1255
0
    if (pptab) *pptab = ptaCreate(n);
1256
0
    if (pptaw) *pptaw = ptaCreate(n);
1257
0
    if (pptah) *pptah = ptaCreate(n);
1258
0
    for (i = 0; i < n; i++) {
1259
0
        boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1260
0
        if (!keepinvalid && (w <= 0 || h <= 0))
1261
0
            continue;
1262
0
        right = left + w - 1;
1263
0
        bot = top + h - 1;
1264
0
        if (pptal) ptaAddPt(*pptal, i, left);
1265
0
        if (pptat) ptaAddPt(*pptat, i, top);
1266
0
        if (pptar) ptaAddPt(*pptar, i, right);
1267
0
        if (pptab) ptaAddPt(*pptab, i, bot);
1268
0
        if (pptaw) ptaAddPt(*pptaw, i, w);
1269
0
        if (pptah) ptaAddPt(*pptah, i, h);
1270
0
    }
1271
1272
0
    return 0;
1273
0
}
1274
1275
1276
/*!
1277
 * \brief   boxaExtractCorners()
1278
 *
1279
 * \param[in]    boxa
1280
 * \param[in]    loc       L_UPPER_LEFT, L_UPPER_RIGHT, L_LOWER_LEFT,
1281
 *                         L_LOWER_RIGHT, L_BOX_CENTER
1282
 * \return  pta of requested coordinates, or NULL on error
1283
 *
1284
 * <pre>
1285
 * Notes:
1286
 *      (1) Extracts (0,0) for invalid boxes.
1287
 *      (2) Other boxa --> pta functions are:
1288
 *          * boxaExtractAsPta(): allows extraction of any dimension
1289
 *            and/or side location, with each in a separate pta.
1290
 *          * boxaConvertToPta(): extracts sufficient number of corners
1291
 *            to allow reconstruction of the original boxa from the pta.
1292
 * </pre>
1293
 */
1294
PTA *
1295
boxaExtractCorners(BOXA    *boxa,
1296
                   l_int32  loc)
1297
0
{
1298
0
l_int32  i, n, left, top, right, bot, w, h;
1299
0
PTA     *pta;
1300
1301
0
    if (!boxa)
1302
0
        return (PTA *)ERROR_PTR("boxa not defined", __func__, NULL);
1303
0
    if (loc != L_UPPER_LEFT && loc != L_UPPER_RIGHT && loc != L_LOWER_LEFT &&
1304
0
        loc != L_LOWER_RIGHT && loc != L_BOX_CENTER)
1305
0
        return (PTA *)ERROR_PTR("invalid location", __func__, NULL);
1306
1307
0
    n = boxaGetCount(boxa);
1308
0
    if ((pta = ptaCreate(n)) == NULL)
1309
0
        return (PTA *)ERROR_PTR("pta not made", __func__, NULL);
1310
1311
0
    for (i = 0; i < n; i++) {
1312
0
        boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
1313
0
        right = left + w - 1;
1314
0
        bot = top + h - 1;
1315
0
        if (w == 0 || h == 0) {  /* invalid */
1316
0
            left = 0;
1317
0
            top = 0;
1318
0
            right = 0;
1319
0
            bot = 0;
1320
0
        }
1321
0
        if (loc == L_UPPER_LEFT)
1322
0
            ptaAddPt(pta, left, top);
1323
0
        else if (loc == L_UPPER_RIGHT)
1324
0
            ptaAddPt(pta, right, top);
1325
0
        else if (loc == L_LOWER_LEFT)
1326
0
            ptaAddPt(pta, left, bot);
1327
0
        else if (loc == L_LOWER_RIGHT)
1328
0
            ptaAddPt(pta, right, bot);
1329
0
        else if (loc == L_BOX_CENTER)
1330
0
            ptaAddPt(pta, (left + right) / 2, (top + bot) / 2);
1331
0
    }
1332
1333
0
    return pta;
1334
0
}
1335
1336
1337
/*---------------------------------------------------------------------*
1338
 *                            Boxa statistics                          *
1339
 *---------------------------------------------------------------------*/
1340
/*!
1341
 * \brief   boxaGetRankVals()
1342
 *
1343
 * \param[in]    boxa
1344
 * \param[in]    fract   use 0.0 for smallest, 1.0 for largest width and height
1345
 * \param[out]   px      [optional] rank value of x (left side)
1346
 * \param[out]   py      [optional] rank value of y (top side)
1347
 * \param[out]   pr      [optional] rank value of right side
1348
 * \param[out]   pb      [optional] rank value of bottom side
1349
 * \param[out]   pw      [optional] rank value of width
1350
 * \param[out]   ph      [optional] rank value of height
1351
 * \return  0 if OK, 1 on error or if the boxa is empty or has no valid boxes
1352
 *
1353
 * <pre>
1354
 * Notes:
1355
 *      (1) This function does not assume that all boxes in the boxa are valid
1356
 *      (2) The six box parameters are sorted independently.
1357
 *          For rank order, the width and height are sorted in increasing
1358
 *          order.  But what does it mean to sort x and y in "rank order"?
1359
 *          If the boxes are of comparable size and somewhat
1360
 *          aligned (e.g., from multiple images), it makes some sense
1361
 *          to give a "rank order" for x and y by sorting them in
1362
 *          decreasing order.  (By the same argument, we choose to sort
1363
 *          the r and b sides in increasing order.)  In general, the
1364
 *          interpretation of a rank order on x and y (or on r and b)
1365
 *          is highly application dependent.  In summary:
1366
 *             ~ x and y are sorted in decreasing order
1367
 *             ~ r and b are sorted in increasing order
1368
 *             ~ w and h are sorted in increasing order
1369
 * </pre>
1370
 */
1371
l_ok
1372
boxaGetRankVals(BOXA      *boxa,
1373
                l_float32  fract,
1374
                l_int32   *px,
1375
                l_int32   *py,
1376
                l_int32   *pr,
1377
                l_int32   *pb,
1378
                l_int32   *pw,
1379
                l_int32   *ph)
1380
0
{
1381
0
l_float32  xval, yval, rval, bval, wval, hval;
1382
0
NUMA      *nax, *nay, *nar, *nab, *naw, *nah;
1383
1384
0
    if (px) *px = 0;
1385
0
    if (py) *py = 0;
1386
0
    if (pr) *pr = 0;
1387
0
    if (pb) *pb = 0;
1388
0
    if (pw) *pw = 0;
1389
0
    if (ph) *ph = 0;
1390
0
    if (!boxa)
1391
0
        return ERROR_INT("boxa not defined", __func__, 1);
1392
0
    if (fract < 0.0 || fract > 1.0)
1393
0
        return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1);
1394
0
    if (boxaGetValidCount(boxa) == 0)
1395
0
        return ERROR_INT("no valid boxes in boxa", __func__, 1);
1396
1397
        /* Use only the valid boxes */
1398
0
    boxaExtractAsNuma(boxa, &nax, &nay, &nar, &nab, &naw, &nah, 0);
1399
1400
0
    if (px) {
1401
0
        numaGetRankValue(nax, 1.0 - fract, NULL, 1, &xval);
1402
0
        *px = (l_int32)xval;
1403
0
    }
1404
0
    if (py) {
1405
0
        numaGetRankValue(nay, 1.0 - fract, NULL, 1, &yval);
1406
0
        *py = (l_int32)yval;
1407
0
    }
1408
0
    if (pr) {
1409
0
        numaGetRankValue(nar, fract, NULL, 1, &rval);
1410
0
        *pr = (l_int32)rval;
1411
0
    }
1412
0
    if (pb) {
1413
0
        numaGetRankValue(nab, fract, NULL, 1, &bval);
1414
0
        *pb = (l_int32)bval;
1415
0
    }
1416
0
    if (pw) {
1417
0
        numaGetRankValue(naw, fract, NULL, 1, &wval);
1418
0
        *pw = (l_int32)wval;
1419
0
    }
1420
0
    if (ph) {
1421
0
        numaGetRankValue(nah, fract, NULL, 1, &hval);
1422
0
        *ph = (l_int32)hval;
1423
0
    }
1424
0
    numaDestroy(&nax);
1425
0
    numaDestroy(&nay);
1426
0
    numaDestroy(&nar);
1427
0
    numaDestroy(&nab);
1428
0
    numaDestroy(&naw);
1429
0
    numaDestroy(&nah);
1430
0
    return 0;
1431
0
}
1432
1433
1434
/*!
1435
 * \brief   boxaGetMedianVals()
1436
 *
1437
 * \param[in]    boxa
1438
 * \param[out]   px     [optional] median value of x (left side)
1439
 * \param[out]   py     [optional] median value of y (top side)
1440
 * \param[out]   pr     [optional] median value of right side
1441
 * \param[out]   pb     [optional] median value of bottom side
1442
 * \param[out]   pw     [optional] median value of width
1443
 * \param[out]   ph     [optional] median value of height
1444
 * \return  0 if OK, 1 on error or if the boxa is empty or has no valid boxes
1445
 *
1446
 * <pre>
1447
 * Notes:
1448
 *      (1) See boxaGetRankVals()
1449
 * </pre>
1450
 */
1451
l_ok
1452
boxaGetMedianVals(BOXA     *boxa,
1453
                  l_int32  *px,
1454
                  l_int32  *py,
1455
                  l_int32  *pr,
1456
                  l_int32  *pb,
1457
                  l_int32  *pw,
1458
                  l_int32  *ph)
1459
0
{
1460
0
    if (!boxa)
1461
0
        return ERROR_INT("boxa not defined", __func__, 1);
1462
0
    if (boxaGetValidCount(boxa) == 0)
1463
0
        return ERROR_INT("no valid boxes in boxa", __func__, 1);
1464
1465
0
    return boxaGetRankVals(boxa, 0.5, px, py, pr, pb, pw, ph);
1466
0
}
1467
1468
1469
/*!
1470
 * \brief   boxaGetAverageSize()
1471
 *
1472
 * \param[in]    boxa
1473
 * \param[out]   pw     [optional] average width
1474
 * \param[out]   ph     [optional] average height
1475
 * \return  0 if OK, 1 on error or if the boxa is empty
1476
 */
1477
l_ok
1478
boxaGetAverageSize(BOXA       *boxa,
1479
                   l_float32  *pw,
1480
                   l_float32  *ph)
1481
0
{
1482
0
l_int32    i, n, bw, bh;
1483
0
l_float32  sumw, sumh;
1484
1485
0
    if (pw) *pw = 0.0;
1486
0
    if (ph) *ph = 0.0;
1487
0
    if (!boxa)
1488
0
        return ERROR_INT("boxa not defined", __func__, 1);
1489
0
    if ((n = boxaGetCount(boxa)) == 0)
1490
0
        return ERROR_INT("boxa is empty", __func__, 1);
1491
1492
0
    sumw = sumh = 0.0;
1493
0
    for (i = 0; i < n; i++) {
1494
0
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
1495
0
        sumw += bw;
1496
0
        sumh += bh;
1497
0
    }
1498
1499
0
    if (pw) *pw = sumw / n;
1500
0
    if (ph) *ph = sumh / n;
1501
0
    return 0;
1502
0
}
1503
1504
1505
/*---------------------------------------------------------------------*
1506
 *                        Other Boxaa functions                        *
1507
 *---------------------------------------------------------------------*/
1508
/*!
1509
 * \brief   boxaaGetExtent()
1510
 *
1511
 * \param[in]    baa
1512
 * \param[out]   pw      [optional] width
1513
 * \param[out]   ph      [optional] height
1514
 * \param[out]   pbox    [optional]  minimum box containing all boxa
1515
 *                       in boxaa
1516
 * \param[out]   pboxa   [optional]  boxa containing all boxes in each
1517
 *                       boxa in the boxaa
1518
 * \return  0 if OK, 1 on error
1519
 *
1520
 * <pre>
1521
 * Notes:
1522
 *      (1) The returned w and h are the minimum size image
1523
 *          that would contain all boxes untranslated.
1524
 *      (2) Each box in the returned boxa is the minimum box required to
1525
 *          hold all the boxes in the respective boxa of baa.
1526
 *      (3) If there are no valid boxes in a boxa, the box corresponding
1527
 *          to its extent has all fields set to 0 (an invalid box).
1528
 * </pre>
1529
 */
1530
l_ok
1531
boxaaGetExtent(BOXAA    *baa,
1532
               l_int32  *pw,
1533
               l_int32  *ph,
1534
               BOX     **pbox,
1535
               BOXA    **pboxa)
1536
0
{
1537
0
l_int32  i, n, x, y, w, h, xmax, ymax, xmin, ymin, found;
1538
0
BOX     *box1;
1539
0
BOXA    *boxa, *boxa1;
1540
1541
0
    if (!pw && !ph && !pbox && !pboxa)
1542
0
        return ERROR_INT("no ptrs defined", __func__, 1);
1543
0
    if (pw) *pw = 0;
1544
0
    if (ph) *ph = 0;
1545
0
    if (pbox) *pbox = NULL;
1546
0
    if (pboxa) *pboxa = NULL;
1547
0
    if (!baa)
1548
0
        return ERROR_INT("baa not defined", __func__, 1);
1549
1550
0
    n = boxaaGetCount(baa);
1551
0
    if (n == 0)
1552
0
        return ERROR_INT("no boxa in baa", __func__, 1);
1553
1554
0
    boxa = boxaCreate(n);
1555
0
    xmax = ymax = 0;
1556
0
    xmin = ymin = 100000000;
1557
0
    found = FALSE;
1558
0
    for (i = 0; i < n; i++) {
1559
0
        boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
1560
0
        boxaGetExtent(boxa1, NULL, NULL, &box1);
1561
0
        boxaDestroy(&boxa1);
1562
0
        boxGetGeometry(box1, &x, &y, &w, &h);
1563
0
        if (w > 0 && h > 0) {  /* a valid extent box */
1564
0
            found = TRUE;  /* found at least one valid extent box */
1565
0
            xmin = L_MIN(xmin, x);
1566
0
            ymin = L_MIN(ymin, y);
1567
0
            xmax = L_MAX(xmax, x + w);
1568
0
            ymax = L_MAX(ymax, y + h);
1569
0
        }
1570
0
        boxaAddBox(boxa, box1, L_INSERT);
1571
0
    }
1572
0
    if (found == FALSE)  /* no valid extent boxes */
1573
0
        xmin = ymin = 0;
1574
1575
0
    if (pw) *pw = xmax;
1576
0
    if (ph) *ph = ymax;
1577
0
    if (pbox)
1578
0
        *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);
1579
0
    if (pboxa)
1580
0
        *pboxa = boxa;
1581
0
    else
1582
0
        boxaDestroy(&boxa);
1583
0
    return 0;
1584
0
}
1585
1586
1587
/*!
1588
 * \brief   boxaaFlattenToBoxa()
1589
 *
1590
 * \param[in]    baa
1591
 * \param[out]   pnaindex   [optional] the boxa index in the baa
1592
 * \param[in]    copyflag   L_COPY or L_CLONE
1593
 * \return  boxa, or NULL on error
1594
 *
1595
 * <pre>
1596
 * Notes:
1597
 *      (1) This 'flattens' the baa to a boxa, taking the boxes in
1598
 *          order in the first boxa, then the second, etc.
1599
 *      (2) If a boxa is empty, we generate an invalid, placeholder box
1600
 *          of zero size.  This is useful when converting from a baa
1601
 *          where each boxa has either 0 or 1 boxes, and it is necessary
1602
 *          to maintain a 1:1 correspondence between the initial
1603
 *          boxa array and the resulting box array.
1604
 *      (3) If &naindex is defined, we generate a Numa that gives, for
1605
 *          each box in the baa, the index of the boxa to which it belongs.
1606
 * </pre>
1607
 */
1608
BOXA *
1609
boxaaFlattenToBoxa(BOXAA   *baa,
1610
                   NUMA   **pnaindex,
1611
                   l_int32  copyflag)
1612
0
{
1613
0
l_int32  i, j, m, n;
1614
0
BOXA    *boxa, *boxat;
1615
0
BOX     *box;
1616
0
NUMA    *naindex = NULL;
1617
1618
0
    if (pnaindex) *pnaindex = NULL;
1619
0
    if (!baa)
1620
0
        return (BOXA *)ERROR_PTR("baa not defined", __func__, NULL);
1621
0
    if (copyflag != L_COPY && copyflag != L_CLONE)
1622
0
        return (BOXA *)ERROR_PTR("invalid copyflag", __func__, NULL);
1623
0
    if (pnaindex) {
1624
0
        naindex = numaCreate(0);
1625
0
        *pnaindex = naindex;
1626
0
    }
1627
1628
0
    n = boxaaGetCount(baa);
1629
0
    boxa = boxaCreate(n);
1630
0
    for (i = 0; i < n; i++) {
1631
0
        boxat = boxaaGetBoxa(baa, i, L_CLONE);
1632
0
        m = boxaGetCount(boxat);
1633
0
        if (m == 0) {  /* placeholder box */
1634
0
            box = boxCreate(0, 0, 0, 0);
1635
0
            boxaAddBox(boxa, box, L_INSERT);
1636
0
            if (pnaindex)
1637
0
                numaAddNumber(naindex, i);  /* save 'row' number */
1638
0
        } else {
1639
0
            for (j = 0; j < m; j++) {
1640
0
                box = boxaGetBox(boxat, j, copyflag);
1641
0
                boxaAddBox(boxa, box, L_INSERT);
1642
0
                if (pnaindex)
1643
0
                    numaAddNumber(naindex, i);  /* save 'row' number */
1644
0
            }
1645
0
        }
1646
0
        boxaDestroy(&boxat);
1647
0
    }
1648
1649
0
    return boxa;
1650
0
}
1651
1652
1653
/*!
1654
 * \brief   boxaaFlattenAligned()
1655
 *
1656
 * \param[in]    baa
1657
 * \param[in]    num         number extracted from each
1658
 * \param[in]    fillerbox   [optional] that fills if necessary
1659
 * \param[in]    copyflag    L_COPY or L_CLONE
1660
 * \return  boxa, or NULL on error
1661
 *
1662
 * <pre>
1663
 * Notes:
1664
 *      (1) This 'flattens' the baa to a boxa, taking the first %num
1665
 *          boxes from each boxa.
1666
 *      (2) In each boxa, if there are less than %num boxes, we preserve
1667
 *          the alignment between the input baa and the output boxa
1668
 *          by inserting one or more fillerbox(es) or, if %fillerbox == NULL,
1669
 *          one or more invalid placeholder boxes.
1670
 * </pre>
1671
 */
1672
BOXA *
1673
boxaaFlattenAligned(BOXAA   *baa,
1674
                    l_int32  num,
1675
                    BOX     *fillerbox,
1676
                    l_int32  copyflag)
1677
0
{
1678
0
l_int32  i, j, m, n, mval, nshort;
1679
0
BOXA    *boxat, *boxad;
1680
0
BOX     *box;
1681
1682
0
    if (!baa)
1683
0
        return (BOXA *)ERROR_PTR("baa not defined", __func__, NULL);
1684
0
    if (copyflag != L_COPY && copyflag != L_CLONE)
1685
0
        return (BOXA *)ERROR_PTR("invalid copyflag", __func__, NULL);
1686
1687
0
    n = boxaaGetCount(baa);
1688
0
    boxad = boxaCreate(n);
1689
0
    for (i = 0; i < n; i++) {
1690
0
        boxat = boxaaGetBoxa(baa, i, L_CLONE);
1691
0
        m = boxaGetCount(boxat);
1692
0
        mval = L_MIN(m, num);
1693
0
        nshort = num - mval;
1694
0
        for (j = 0; j < mval; j++) {  /* take the first %num if possible */
1695
0
            box = boxaGetBox(boxat, j, copyflag);
1696
0
            boxaAddBox(boxad, box, L_INSERT);
1697
0
        }
1698
0
        for (j = 0; j < nshort; j++) {  /* add fillers if necessary */
1699
0
            if (fillerbox) {
1700
0
                boxaAddBox(boxad, fillerbox, L_COPY);
1701
0
            } else {
1702
0
                box = boxCreate(0, 0, 0, 0);  /* invalid placeholder box */
1703
0
                boxaAddBox(boxad, box, L_INSERT);
1704
0
            }
1705
0
        }
1706
0
        boxaDestroy(&boxat);
1707
0
    }
1708
1709
0
    return boxad;
1710
0
}
1711
1712
1713
/*!
1714
 * \brief   boxaEncapsulateAligned()
1715
 *
1716
 * \param[in]    boxa
1717
 * \param[in]    num        number put into each boxa in the baa
1718
 * \param[in]    copyflag   L_COPY or L_CLONE
1719
 * \return  baa, or NULL on error
1720
 *
1721
 * <pre>
1722
 * Notes:
1723
 *      (1) This puts %num boxes from the input %boxa into each of a
1724
 *          set of boxa within an output baa.
1725
 *      (2) This assumes that the boxes in %boxa are in sets of %num each.
1726
 * </pre>
1727
 */
1728
BOXAA *
1729
boxaEncapsulateAligned(BOXA    *boxa,
1730
                       l_int32  num,
1731
                       l_int32  copyflag)
1732
0
{
1733
0
l_int32  i, j, n, nbaa, index;
1734
0
BOX     *box;
1735
0
BOXA    *boxat;
1736
0
BOXAA   *baa;
1737
1738
0
    if (!boxa)
1739
0
        return (BOXAA *)ERROR_PTR("boxa not defined", __func__, NULL);
1740
0
    if (copyflag != L_COPY && copyflag != L_CLONE)
1741
0
        return (BOXAA *)ERROR_PTR("invalid copyflag", __func__, NULL);
1742
1743
0
    n = boxaGetCount(boxa);
1744
0
    nbaa = n / num;
1745
0
    if (num * nbaa != n)
1746
0
        L_ERROR("inconsistent alignment: num doesn't divide n\n", __func__);
1747
0
    baa = boxaaCreate(nbaa);
1748
0
    for (i = 0, index = 0; i < nbaa; i++) {
1749
0
        boxat = boxaCreate(num);
1750
0
        for (j = 0; j < num; j++, index++) {
1751
0
            box = boxaGetBox(boxa, index, copyflag);
1752
0
            boxaAddBox(boxat, box, L_INSERT);
1753
0
        }
1754
0
        boxaaAddBoxa(baa, boxat, L_INSERT);
1755
0
    }
1756
1757
0
    return baa;
1758
0
}
1759
1760
1761
/*!
1762
 * \brief   boxaaTranspose()
1763
 *
1764
 * \param[in]    baas
1765
 * \return  baad, or NULL on error
1766
 *
1767
 * <pre>
1768
 * Notes:
1769
 *      (1) If you think of a boxaa as a 2D array of boxes that is accessed
1770
 *          row major, then each row is represented by one of the boxa.
1771
 *          This function creates a new boxaa related to the input boxaa
1772
 *          as a column major traversal of the input boxaa.
1773
 *      (2) For example, if %baas has 2 boxa, each with 10 boxes, then
1774
 *          %baad will have 10 boxa, each with 2 boxes.
1775
 *      (3) Require for this transpose operation that each boxa in
1776
 *          %baas has the same number of boxes.  This operation is useful
1777
 *          when the i-th boxes in each boxa are meaningfully related.
1778
 * </pre>
1779
 */
1780
BOXAA *
1781
boxaaTranspose(BOXAA  *baas)
1782
0
{
1783
0
l_int32   i, j, ny, nb, nbox;
1784
0
BOX      *box;
1785
0
BOXA     *boxa;
1786
0
BOXAA    *baad;
1787
1788
0
    if (!baas)
1789
0
        return (BOXAA *)ERROR_PTR("baas not defined", __func__, NULL);
1790
0
    if ((ny = boxaaGetCount(baas)) == 0)
1791
0
        return (BOXAA *)ERROR_PTR("baas empty", __func__, NULL);
1792
1793
        /* Make sure that each boxa in baas has the same number of boxes */
1794
0
    for (i = 0; i < ny; i++) {
1795
0
        if ((boxa = boxaaGetBoxa(baas, i, L_CLONE)) == NULL)
1796
0
            return (BOXAA *)ERROR_PTR("baas is missing a boxa", __func__, NULL);
1797
0
        nb = boxaGetCount(boxa);
1798
0
        boxaDestroy(&boxa);
1799
0
        if (i == 0)
1800
0
            nbox = nb;
1801
0
        else if (nb != nbox)
1802
0
            return (BOXAA *)ERROR_PTR("boxa are not all the same size",
1803
0
                                      __func__, NULL);
1804
0
    }
1805
1806
        /* baad[i][j] = baas[j][i] */
1807
0
    baad = boxaaCreate(nbox);
1808
0
    for (i = 0; i < nbox; i++) {
1809
0
        boxa = boxaCreate(ny);
1810
0
        for (j = 0; j < ny; j++) {
1811
0
            box = boxaaGetBox(baas, j, i, L_COPY);
1812
0
            boxaAddBox(boxa, box, L_INSERT);
1813
0
        }
1814
0
        boxaaAddBoxa(baad, boxa, L_INSERT);
1815
0
    }
1816
0
    return baad;
1817
0
}
1818
1819
1820
/*!
1821
 * \brief   boxaaAlignBox()
1822
 *
1823
 * \param[in]    baa
1824
 * \param[in]    box      to be aligned with bext boxa in the baa, if possible
1825
 * \param[in]    delta    amount by which consecutive components can miss
1826
 *                        in overlap and still be included in the array
1827
 * \param[out]   pindex   index of boxa with best overlap, or if none match,
1828
 *                        this is the index of the next boxa to be generated
1829
 * \return  0 if OK, 1 on error
1830
 *
1831
 * <pre>
1832
 * Notes:
1833
 *      (1) This is not greedy.  It finds the boxa whose vertical
1834
 *          extent has the closest overlap with the input box.
1835
 * </pre>
1836
 */
1837
l_ok
1838
boxaaAlignBox(BOXAA    *baa,
1839
              BOX      *box,
1840
              l_int32   delta,
1841
              l_int32  *pindex)
1842
0
{
1843
0
l_int32  i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex;
1844
0
BOX     *boxt;
1845
0
BOXA    *boxa;
1846
1847
0
    if (pindex) *pindex = 0;
1848
0
    if (!baa)
1849
0
        return ERROR_INT("baa not defined", __func__, 1);
1850
0
    if (!box)
1851
0
        return ERROR_INT("box not defined", __func__, 1);
1852
0
    if (!pindex)
1853
0
        return ERROR_INT("&index not defined", __func__, 1);
1854
1855
0
    n = boxaaGetCount(baa);
1856
0
    boxGetGeometry(box, NULL, &y, NULL, &h);
1857
0
    maxovlp = -10000000;
1858
0
    for (i = 0; i < n; i++) {
1859
0
        boxa = boxaaGetBoxa(baa, i, L_CLONE);
1860
0
        if ((m = boxaGetCount(boxa)) == 0) {
1861
0
            boxaDestroy(&boxa);
1862
0
            L_WARNING("no boxes in boxa\n", __func__);
1863
0
            continue;
1864
0
        }
1865
0
        boxaGetExtent(boxa, NULL, NULL, &boxt);
1866
0
        boxGetGeometry(boxt, NULL, &yt, NULL, &ht);
1867
0
        boxDestroy(&boxt);
1868
0
        boxaDestroy(&boxa);
1869
1870
            /* Overlap < 0 means the components do not overlap vertically */
1871
0
        if (yt >= y)
1872
0
            ovlp = y + h - 1 - yt;
1873
0
        else
1874
0
            ovlp = yt + ht - 1 - y;
1875
0
        if (ovlp > maxovlp) {
1876
0
            maxovlp = ovlp;
1877
0
            maxindex = i;
1878
0
        }
1879
0
    }
1880
1881
0
    if (maxovlp + delta >= 0)
1882
0
        *pindex = maxindex;
1883
0
    else
1884
0
        *pindex = n;
1885
0
    return 0;
1886
0
}