Coverage Report

Created: 2025-06-13 06:49

/src/leptonica/src/ptafunc2.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file  ptafunc2.c
29
 * <pre>
30
 *
31
 *      --------------------------------------
32
 *      This file has these Pta utilities:
33
 *         - sorting
34
 *         - ordered set operations
35
 *         - hash map operations
36
 *      --------------------------------------
37
 *
38
 *      Sorting
39
 *           PTA        *ptaSort()
40
 *           l_int32     ptaGetSortIndex()
41
 *           PTA        *ptaSortByIndex()
42
 *           PTAA       *ptaaSortByIndex()
43
 *           l_int32     ptaGetRankValue()
44
 *           PTA        *ptaSort2d()
45
 *           l_int32     ptaEqual()
46
 *
47
 *      Set operations using aset (rbtree)
48
 *           L_ASET     *l_asetCreateFromPta()
49
 *           PTA        *ptaRemoveDupsByAset()
50
 *           PTA        *ptaUnionByAset()
51
 *           PTA        *ptaIntersectionByAset()
52
 *
53
 *      Hashmap operations
54
 *          L_HASHMAP   *l_hmapCreateFromPta()
55
 *          l_int32      ptaRemoveDupsByHmap()
56
 *          l_int32      ptaUnionByHmap()
57
 *          l_int32      ptaIntersectionByHmap()
58
 *
59
 * We have two implementations of set operations on an array of points:
60
 *
61
 *   (1) Using an underlying tree (rbtree)
62
 *       This uses a good 64 bit hashing function for the key,
63
 *       that is not expected to have hash collisions (and we do
64
 *       not test for them).  The tree is built up of the keys,
65
 *       values, and is traversed looking for the key in O(log n).
66
 *
67
 *   (2) Building a hashmap from the keys (hashmap)
68
 *       This uses a fast 64 bit hashing function for the key, which
69
 *       is then hashed into a hashtable.  Collisions of hashkeys are
70
 *       very rare, but the hashtable is designed to allow more than one
71
 *       hashitem in a table entry.  The hashitems are put in a list at
72
 *       each hashtable entry, which is traversed looking for the key.
73
 *
74
 * </pre>
75
 */
76
77
#ifdef HAVE_CONFIG_H
78
#include <config_auto.h>
79
#endif  /* HAVE_CONFIG_H */
80
81
#include "allheaders.h"
82
83
/*---------------------------------------------------------------------*
84
 *                               Sorting                               *
85
 *---------------------------------------------------------------------*/
86
/*!
87
 * \brief   ptaSort()
88
 *
89
 * \param[in]    ptas
90
 * \param[in]    sorttype    L_SORT_BY_X, L_SORT_BY_Y
91
 * \param[in]    sortorder   L_SORT_INCREASING, L_SORT_DECREASING
92
 * \param[out]   pnaindex    [optional] index of sorted order into
93
 *                           original array
94
 * \return  ptad sorted version of ptas, or NULL on error
95
 */
96
PTA *
97
ptaSort(PTA     *ptas,
98
        l_int32  sorttype,
99
        l_int32  sortorder,
100
        NUMA   **pnaindex)
101
0
{
102
0
PTA   *ptad;
103
0
NUMA  *naindex;
104
105
0
    if (pnaindex) *pnaindex = NULL;
106
0
    if (!ptas)
107
0
        return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL);
108
0
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)
109
0
        return (PTA *)ERROR_PTR("invalid sort type", __func__, NULL);
110
0
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
111
0
        return (PTA *)ERROR_PTR("invalid sort order", __func__, NULL);
112
113
0
    if (ptaGetSortIndex(ptas, sorttype, sortorder, &naindex) != 0)
114
0
        return (PTA *)ERROR_PTR("naindex not made", __func__, NULL);
115
116
0
    ptad = ptaSortByIndex(ptas, naindex);
117
0
    if (pnaindex)
118
0
        *pnaindex = naindex;
119
0
    else
120
0
        numaDestroy(&naindex);
121
0
    if (!ptad)
122
0
        return (PTA *)ERROR_PTR("ptad not made", __func__, NULL);
123
0
    return ptad;
124
0
}
125
126
127
/*!
128
 * \brief   ptaGetSortIndex()
129
 *
130
 * \param[in]    ptas
131
 * \param[in]    sorttype    L_SORT_BY_X, L_SORT_BY_Y
132
 * \param[in]    sortorder   L_SORT_INCREASING, L_SORT_DECREASING
133
 * \param[out]   pnaindex    index of sorted order into original array
134
 * \return  0 if OK, 1 on error
135
 */
136
l_ok
137
ptaGetSortIndex(PTA     *ptas,
138
                l_int32  sorttype,
139
                l_int32  sortorder,
140
                NUMA   **pnaindex)
141
0
{
142
0
l_int32    i, n;
143
0
l_float32  x, y;
144
0
NUMA      *na, *nai;
145
146
0
    if (!pnaindex)
147
0
        return ERROR_INT("&naindex not defined", __func__, 1);
148
0
    *pnaindex = NULL;
149
0
    if (!ptas)
150
0
        return ERROR_INT("ptas not defined", __func__, 1);
151
0
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)
152
0
        return ERROR_INT("invalid sort type", __func__, 1);
153
0
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
154
0
        return ERROR_INT("invalid sort order", __func__, 1);
155
156
        /* Build up numa of specific data */
157
0
    n = ptaGetCount(ptas);
158
0
    if ((na = numaCreate(n)) == NULL)
159
0
        return ERROR_INT("na not made", __func__, 1);
160
0
    for (i = 0; i < n; i++) {
161
0
        ptaGetPt(ptas, i, &x, &y);
162
0
        if (sorttype == L_SORT_BY_X)
163
0
            numaAddNumber(na, x);
164
0
        else
165
0
            numaAddNumber(na, y);
166
0
    }
167
168
        /* Get the sort index for data array */
169
0
    nai = numaGetSortIndex(na, sortorder);
170
0
    numaDestroy(&na);
171
0
    if (!nai)
172
0
        return ERROR_INT("naindex not made", __func__, 1);
173
0
    *pnaindex = nai;
174
0
    return 0;
175
0
}
176
177
178
/*!
179
 * \brief   ptaSortByIndex()
180
 *
181
 * \param[in]    ptas
182
 * \param[in]    naindex    na that maps from the new pta to the input pta
183
 * \return  ptad sorted, or NULL on  error
184
 */
185
PTA *
186
ptaSortByIndex(PTA   *ptas,
187
               NUMA  *naindex)
188
0
{
189
0
l_int32    i, index, n;
190
0
l_float32  x, y;
191
0
PTA       *ptad;
192
193
0
    if (!ptas)
194
0
        return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL);
195
0
    if (!naindex)
196
0
        return (PTA *)ERROR_PTR("naindex not defined", __func__, NULL);
197
198
        /* Build up sorted pta using sort index */
199
0
    n = numaGetCount(naindex);
200
0
    if ((ptad = ptaCreate(n)) == NULL)
201
0
        return (PTA *)ERROR_PTR("ptad not made", __func__, NULL);
202
0
    for (i = 0; i < n; i++) {
203
0
        numaGetIValue(naindex, i, &index);
204
0
        ptaGetPt(ptas, index, &x, &y);
205
0
        ptaAddPt(ptad, x, y);
206
0
    }
207
208
0
    return ptad;
209
0
}
210
211
212
/*!
213
 * \brief   ptaaSortByIndex()
214
 *
215
 * \param[in]    ptaas
216
 * \param[in]    naindex    na that maps from the new ptaa to the input ptaa
217
 * \return  ptaad sorted, or NULL on error
218
 */
219
PTAA *
220
ptaaSortByIndex(PTAA  *ptaas,
221
                NUMA  *naindex)
222
0
{
223
0
l_int32  i, n, index;
224
0
PTA     *pta;
225
0
PTAA    *ptaad;
226
227
0
    if (!ptaas)
228
0
        return (PTAA *)ERROR_PTR("ptaas not defined", __func__, NULL);
229
0
    if (!naindex)
230
0
        return (PTAA *)ERROR_PTR("naindex not defined", __func__, NULL);
231
232
0
    n = ptaaGetCount(ptaas);
233
0
    if (numaGetCount(naindex) != n)
234
0
        return (PTAA *)ERROR_PTR("numa and ptaa sizes differ", __func__, NULL);
235
0
    ptaad = ptaaCreate(n);
236
0
    for (i = 0; i < n; i++) {
237
0
        numaGetIValue(naindex, i, &index);
238
0
        pta = ptaaGetPta(ptaas, index, L_COPY);
239
0
        ptaaAddPta(ptaad, pta, L_INSERT);
240
0
    }
241
242
0
    return ptaad;
243
0
}
244
245
246
/*!
247
 * \brief   ptaGetRankValue()
248
 *
249
 * \param[in]    pta
250
 * \param[in]    fract      use 0.0 for smallest, 1.0 for largest
251
 * \param[in]    ptasort    [optional] version of %pta sorted by %sorttype
252
 * \param[in]    sorttype   L_SORT_BY_X, L_SORT_BY_Y
253
 * \param[out]   pval       rankval: the x or y value at %fract
254
 * \return  0 if OK, 1 on error
255
 */
256
l_ok
257
ptaGetRankValue(PTA        *pta,
258
                l_float32   fract,
259
                PTA        *ptasort,
260
                l_int32     sorttype,
261
                l_float32  *pval)
262
0
{
263
0
l_int32  index, n;
264
0
PTA     *ptas;
265
266
0
    if (!pval)
267
0
        return ERROR_INT("&val not defined", __func__, 1);
268
0
    *pval = 0.0;
269
0
    if (!pta)
270
0
        return ERROR_INT("pta not defined", __func__, 1);
271
0
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)
272
0
        return ERROR_INT("invalid sort type", __func__, 1);
273
0
    if (fract < 0.0 || fract > 1.0)
274
0
        return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1);
275
0
    if ((n = ptaGetCount(pta)) == 0)
276
0
        return ERROR_INT("pta empty", __func__, 1);
277
278
0
    if (ptasort)
279
0
        ptas = ptasort;
280
0
    else
281
0
        ptas = ptaSort(pta, sorttype, L_SORT_INCREASING, NULL);
282
283
0
    index = (l_int32)(fract * (l_float32)(n - 1) + 0.5);
284
0
    if (sorttype == L_SORT_BY_X)
285
0
        ptaGetPt(ptas, index, pval, NULL);
286
0
    else  /* sort by y */
287
0
        ptaGetPt(ptas, index, NULL, pval);
288
289
0
    if (!ptasort) ptaDestroy(&ptas);
290
0
    return 0;
291
0
}
292
293
294
/*!
295
 * \brief   ptaSort2d()
296
 *
297
 * \param[in]    ptas
298
 * \return  ptad, or NULL on error
299
 *
300
 * <pre>
301
 * Notes:
302
 *      (1) Sort increasing by row-major, scanning down from the UL corner,
303
 *          where for each value of y, order the pts from left to right.
304
 * </pre>
305
 */
306
PTA *
307
ptaSort2d(PTA  *pta)
308
0
{
309
0
l_int32    index, i, j, n, nx, ny, start, end;
310
0
l_float32  x, y, yp, val;
311
0
NUMA      *na1, *na2, *nas, *nax;
312
0
PTA       *pta1, *ptad;
313
314
0
    if (!pta)
315
0
        return (PTA *)ERROR_PTR("pta not defined", __func__, NULL);
316
317
        /* Sort by row-major (y first, then x).  After sort by y,
318
         * the x values at the same y are not sorted.  */
319
0
    pta1 = ptaSort(pta, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
320
321
        /* Find start and ending indices with the same y value */
322
0
    n = ptaGetCount(pta1);
323
0
    na1 = numaCreate(0);  /* holds start index of sequence with same y */
324
0
    na2 = numaCreate(0);  /* holds end index of sequence with same y */
325
0
    numaAddNumber(na1, 0);
326
0
    ptaGetPt(pta1, 0, &x, &yp);
327
0
    for (i = 1; i < n; i++) {
328
0
        ptaGetPt(pta1, i, &x, &y);
329
0
        if (y != yp) {
330
0
            numaAddNumber(na1, i);
331
0
            numaAddNumber(na2, i - 1);
332
0
        }
333
0
        yp = y;
334
0
    }
335
0
    numaAddNumber(na2, n - 1);
336
337
        /* Sort by increasing x each set with the same y value */
338
0
    ptad = ptaCreate(n);
339
0
    ny = numaGetCount(na1);   /* number of distinct y values */
340
0
    for (i = 0, index = 0; i < ny; i++) {
341
0
        numaGetIValue(na1, i, &start);
342
0
        numaGetIValue(na2, i, &end);
343
0
        nx = end - start + 1;  /* number of points with current y value */
344
0
        if (nx == 1) {
345
0
            ptaGetPt(pta1, index++, &x, &y);
346
0
            ptaAddPt(ptad, x, y);
347
0
        } else {
348
                /* More than 1 point; extract and sort the x values */
349
0
            nax = numaCreate(nx);
350
0
            for (j = 0; j < nx; j++) {
351
0
                 ptaGetPt(pta1, index + j, &x, &y);
352
0
                 numaAddNumber(nax, x);
353
0
            }
354
0
            nas = numaSort(NULL, nax, L_SORT_INCREASING);
355
                /* Add the points with x sorted */
356
0
            for (j = 0; j < nx; j++) {
357
0
                numaGetFValue(nas, j, &val);
358
0
                ptaAddPt(ptad, val, y);
359
0
            }
360
0
            index += nx;
361
0
            numaDestroy(&nax);
362
0
            numaDestroy(&nas);
363
0
        }
364
0
    }
365
0
    numaDestroy(&na1);
366
0
    numaDestroy(&na2);
367
0
    ptaDestroy(&pta1);
368
0
    return ptad;
369
0
}
370
371
372
/*!
373
 * \brief   ptaEqual()
374
 *
375
 * \param[in]    pta1
376
 * \param[in]    pta2
377
 * \param[out]   psame  1 if same; 0 if different
378
 * \return  0 if OK; 1 on error
379
 *
380
 * <pre>
381
 * Notes:
382
 *      (1) Equality is defined as having the same set of points,
383
 *          independent of the order in which they are presented.
384
 * </pre>
385
 */
386
l_ok
387
ptaEqual(PTA      *pta1,
388
         PTA      *pta2,
389
         l_int32  *psame)
390
0
{
391
0
l_int32    i, n1, n2;
392
0
l_float32  x1, y1, x2, y2;
393
0
PTA       *ptas1, *ptas2;
394
395
0
    if (!psame)
396
0
        return ERROR_INT("&same not defined", __func__, 1);
397
0
    *psame = 0.0f;
398
0
    if (!pta1 || !pta2)
399
0
        return ERROR_INT("pta1 and pta2 not both defined", __func__, 1);
400
401
0
    n1 = ptaGetCount(pta1);
402
0
    n2 = ptaGetCount(pta2);
403
0
    if (n1 != n2) return 0;
404
405
        /* 2d sort each and compare */
406
0
    ptas1 = ptaSort2d(pta1);
407
0
    ptas2 = ptaSort2d(pta2);
408
0
    for (i = 0; i < n1; i++) {
409
0
        ptaGetPt(ptas1, i, &x1, &y1);
410
0
        ptaGetPt(ptas2, i, &x2, &y2);
411
0
        if (x1 != x2 || y1 != y2) {
412
0
            ptaDestroy(&ptas1);
413
0
            ptaDestroy(&ptas2);
414
0
            return 0;
415
0
        }
416
0
    }
417
418
0
    *psame = 1;
419
0
    ptaDestroy(&ptas1);
420
0
    ptaDestroy(&ptas2);
421
0
    return 0;
422
0
}
423
424
425
/*---------------------------------------------------------------------*
426
 *                   Set operations using aset (rbtree)                *
427
 *---------------------------------------------------------------------*/
428
/*!
429
 * \brief   l_asetCreateFromPta()
430
 *
431
 * \param[in]    pta
432
 * \return  set using a 64-bit hash of (x,y) as the key
433
 */
434
L_ASET *
435
l_asetCreateFromPta(PTA  *pta)
436
0
{
437
0
l_int32   i, n, x, y;
438
0
l_uint64  hash;
439
0
L_ASET   *set;
440
0
RB_TYPE   key;
441
442
0
    if (!pta)
443
0
        return (L_ASET *)ERROR_PTR("pta not defined", __func__, NULL);
444
445
0
    set = l_asetCreate(L_UINT_TYPE);
446
0
    n = ptaGetCount(pta);
447
0
    for (i = 0; i < n; i++) {
448
0
        ptaGetIPt(pta, i, &x, &y);
449
0
        l_hashPtToUint64(x, y, &hash);
450
0
        key.utype = hash;
451
0
        l_asetInsert(set, key);
452
0
    }
453
454
0
    return set;
455
0
}
456
457
458
/*!
459
 * \brief   ptaRemoveDupsByAset()
460
 *
461
 * \param[in]    ptas     assumed to be integer values
462
 * \param[out]   pptad    assumed to be integer values
463
 * \return  0 if OK; 1 on error
464
 *
465
 * <pre>
466
 * Notes:
467
 *      (1) This is slower than ptaRemoveDupsByHmap(), mostly because
468
 *          of the nlogn sort to build up the rbtree.  Do not use for
469
 *          large numbers of points (say, > 100K).
470
 * </pre>
471
 */
472
l_ok
473
ptaRemoveDupsByAset(PTA   *ptas,
474
                    PTA  **pptad)
475
0
{
476
0
l_int32   i, n, x, y;
477
0
PTA      *ptad;
478
0
l_uint64  hash;
479
0
L_ASET   *set;
480
0
RB_TYPE   key;
481
482
0
    if (!pptad)
483
0
        return ERROR_INT("&ptad not defined", __func__, 1);
484
0
    *pptad = NULL;
485
0
    if (!ptas)
486
0
        return ERROR_INT("ptas not defined", __func__, 1);
487
488
0
    set = l_asetCreate(L_UINT_TYPE);
489
0
    n = ptaGetCount(ptas);
490
0
    ptad = ptaCreate(n);
491
0
    *pptad = ptad;
492
0
    for (i = 0; i < n; i++) {
493
0
        ptaGetIPt(ptas, i, &x, &y);
494
0
        l_hashPtToUint64(x, y, &hash);
495
0
        key.utype = hash;
496
0
        if (!l_asetFind(set, key)) {
497
0
            ptaAddPt(ptad, x, y);
498
0
            l_asetInsert(set, key);
499
0
        }
500
0
    }
501
502
0
    l_asetDestroy(&set);
503
0
    return 0;
504
0
}
505
506
507
/*!
508
 * \brief   ptaUnionByAset()
509
 *
510
 * \param[in]    pta1
511
 * \param[in]    pta2
512
 * \param[out]   pptad     union of the two point arrays
513
 * \return  0 if OK; 1 on error
514
 *
515
 * <pre>
516
 * Notes:
517
 *      (1) See sarrayRemoveDupsByAset() for the approach.
518
 *      (2) The key is a 64-bit hash from the (x,y) pair.
519
 *      (3) This is slower than ptaUnionByHmap(), mostly because of the
520
 *          nlogn sort to build up the rbtree.  Do not use for large
521
 *          numbers of points (say, > 100K).
522
 *      (4) The *Aset() functions use the sorted l_Aset, which is just
523
 *          an rbtree in disguise.
524
 * </pre>
525
 */
526
l_ok
527
ptaUnionByAset(PTA   *pta1,
528
               PTA   *pta2,
529
               PTA  **pptad)
530
0
{
531
0
PTA  *pta3;
532
533
0
    if (!pptad)
534
0
        return ERROR_INT("&ptad not defined", __func__, 1);
535
0
    *pptad = NULL;
536
0
    if (!pta1)
537
0
        return ERROR_INT("pta1 not defined", __func__, 1);
538
0
    if (!pta2)
539
0
        return ERROR_INT("pta2 not defined", __func__, 1);
540
541
        /* Join */
542
0
    pta3 = ptaCopy(pta1);
543
0
    ptaJoin(pta3, pta2, 0, -1);
544
545
        /* Eliminate duplicates */
546
0
    ptaRemoveDupsByAset(pta3, pptad);
547
0
    ptaDestroy(&pta3);
548
0
    return 0;
549
0
}
550
551
552
/*!
553
 * \brief   ptaIntersectionByAset()
554
 *
555
 * \param[in]    pta1
556
 * \param[in]    pta2
557
 * \param[out]   pptad       intersection of the two point arrays
558
 * \return  0 if OK; 1 on error
559
 *
560
 * <pre>
561
 * Notes:
562
 *      (1) See sarrayIntersectionByAset() for the approach.
563
 *      (2) The key is a 64-bit hash from the (x,y) pair.
564
 *      (3) This is slower than ptaIntersectionByHmap(), mostly because
565
 *          of the nlogn sort to build up the rbtree.  Do not use for
566
 *          large numbers of points (say, > 100K).
567
 * </pre>
568
 */
569
l_ok
570
ptaIntersectionByAset(PTA   *pta1,
571
                      PTA   *pta2,
572
                      PTA  **pptad)
573
0
{
574
0
l_int32   n1, n2, i, n, x, y;
575
0
l_uint64  hash;
576
0
L_ASET   *set1, *set2;
577
0
RB_TYPE   key;
578
0
PTA      *pta_small, *pta_big, *ptad;
579
580
0
    if (!pptad)
581
0
        return ERROR_INT("&ptad not defined", __func__, 1);
582
0
    *pptad = NULL;
583
0
    if (!pta1)
584
0
        return ERROR_INT("pta1 not defined", __func__, 1);
585
0
    if (!pta2)
586
0
        return ERROR_INT("pta2 not defined", __func__, 1);
587
588
        /* Put the elements of the biggest array into a set */
589
0
    n1 = ptaGetCount(pta1);
590
0
    n2 = ptaGetCount(pta2);
591
0
    pta_small = (n1 < n2) ? pta1 : pta2;   /* do not destroy pta_small */
592
0
    pta_big = (n1 < n2) ? pta2 : pta1;   /* do not destroy pta_big */
593
0
    set1 = l_asetCreateFromPta(pta_big);
594
595
        /* Build up the intersection of points */
596
0
    ptad = ptaCreate(0);
597
0
    *pptad = ptad;
598
0
    n = ptaGetCount(pta_small);
599
0
    set2 = l_asetCreate(L_UINT_TYPE);
600
0
    for (i = 0; i < n; i++) {
601
0
        ptaGetIPt(pta_small, i, &x, &y);
602
0
        l_hashPtToUint64(x, y, &hash);
603
0
        key.utype = hash;
604
0
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
605
0
            ptaAddPt(ptad, x, y);
606
0
            l_asetInsert(set2, key);
607
0
        }
608
0
    }
609
610
0
    l_asetDestroy(&set1);
611
0
    l_asetDestroy(&set2);
612
0
    return 0;
613
0
}
614
615
616
/*--------------------------------------------------------------------------*
617
 *                            Hashmap operations                            *
618
 *--------------------------------------------------------------------------*/
619
/*!
620
 * \brief  l_hmapCreateFromPta()
621
 *
622
 * \param[in]   pta     input pta
623
 * \return      hmap    hashmap, or NULL on error
624
 *
625
 * <pre>
626
 *  Notes:
627
 *       (1) The indices into %pta are stored in the val field of the hashitems.
628
 *           This is necessary so that %hmap and %pta can be used together.
629
 * </pre>
630
 */
631
L_HASHMAP *
632
l_hmapCreateFromPta(PTA  *pta)
633
0
{
634
0
l_int32      i, n, x, y;
635
0
l_uint64     key;
636
0
L_HASHMAP   *hmap;
637
638
0
    if (!pta)
639
0
        return (L_HASHMAP *)ERROR_PTR("pta not defined", __func__, NULL);
640
641
0
    n = ptaGetCount(pta);
642
0
    if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
643
0
        return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL);
644
0
    for (i = 0; i < n; i++) {
645
0
        ptaGetIPt(pta, i, &x, &y);
646
0
        l_hashPtToUint64(x, y, &key);
647
0
        l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
648
0
    }
649
0
    return hmap;
650
0
}
651
652
653
/*!
654
 * \brief  ptaRemoveDupsByHmap()
655
 *
656
 * \param[in]   ptas
657
 * \param[out]  pptad    set of unique values
658
 * \param[out]  phmap    [optional] hashmap used for lookup
659
 * \return  0 if OK; 1 on error
660
 *
661
 * <pre>
662
 *  Notes:
663
 *       (1) Generates a set of (unique) points from %ptas.
664
 * </pre>
665
 */
666
l_ok
667
ptaRemoveDupsByHmap(PTA         *ptas,
668
                    PTA        **pptad,
669
                    L_HASHMAP  **phmap)
670
0
{
671
0
l_int32      i, x, y, tabsize;
672
0
PTA         *ptad;
673
0
L_HASHITEM  *hitem;
674
0
L_HASHMAP   *hmap;
675
676
0
    if (phmap) *phmap = NULL;
677
0
    if (!pptad)
678
0
        return ERROR_INT("&ptad not defined", __func__, 1);
679
0
    *pptad = NULL;
680
0
    if (!ptas)
681
0
        return ERROR_INT("ptas not defined", __func__, 1);
682
683
        /* Traverse the hashtable lists */
684
0
    if ((hmap = l_hmapCreateFromPta(ptas)) == NULL)
685
0
        return ERROR_INT("hmap not made", __func__, 1);
686
0
    ptad = ptaCreate(0);
687
0
    *pptad = ptad;
688
0
    tabsize = hmap->tabsize;
689
0
    for (i = 0; i < tabsize; i++) {
690
0
        hitem = hmap->hashtab[i];
691
0
        while (hitem) {
692
0
            ptaGetIPt(ptas, hitem->val, &x, &y);
693
0
            ptaAddPt(ptad, x, y);
694
0
            hitem = hitem->next;
695
0
        }
696
0
    }
697
698
0
    if (phmap)
699
0
        *phmap = hmap;
700
0
    else
701
0
        l_hmapDestroy(&hmap);
702
0
    return 0;
703
0
}
704
705
706
/*!
707
 * \brief  ptaUnionByHmap()
708
 *
709
 * \param[in]   pta1
710
 * \param[in]   pta2
711
 * \param[out]  pptad     union of the two point arrays
712
 * \return  0 if OK; 1 on error
713
 *
714
 * <pre>
715
 *  Notes:
716
 *       (1) Make pta with points found in either of the input arrays.
717
 * </pre>
718
 */
719
l_ok
720
ptaUnionByHmap(PTA   *pta1,
721
               PTA   *pta2,
722
               PTA  **pptad)
723
0
{
724
0
PTA  *pta3;
725
726
0
    if (!pptad)
727
0
        return ERROR_INT("&ptad not defined", __func__, 1);
728
0
    *pptad = NULL;
729
0
    if (!pta1)
730
0
        return ERROR_INT("pta1 not defined", __func__, 1);
731
0
    if (!pta2)
732
0
        return ERROR_INT("pta2 not defined", __func__, 1);
733
734
0
    pta3 = ptaCopy(pta1);
735
0
    if (ptaJoin(pta3, pta2, 0, -1) == 1) {
736
0
        ptaDestroy(&pta3);
737
0
        return ERROR_INT("pta join failed", __func__, 1);
738
0
    }
739
0
    ptaRemoveDupsByHmap(pta3, pptad, NULL);
740
0
    ptaDestroy(&pta3);
741
0
    return 0;
742
0
}
743
744
745
/*!
746
 * \brief  ptaIntersectionByHmap()
747
 *
748
 * \param[in]    pta1
749
 * \param[in]    pta2
750
 * \param[out]   pptad     intersection of the two point arrays
751
 * \return  0 if OK; 1 on error
752
 *
753
 * <pre>
754
 *  Notes:
755
 *       (1) Make pta with pts common to both input arrays.
756
 * </pre>
757
 */
758
l_ok
759
ptaIntersectionByHmap(PTA   *pta1,
760
                      PTA   *pta2,
761
                      PTA  **pptad)
762
0
{
763
0
l_int32      i, n1, n2, n, x, y;
764
0
l_uint64     key;
765
0
PTA         *pta_small, *pta_big, *ptad;
766
0
L_HASHITEM  *hitem;
767
0
L_HASHMAP   *hmap;
768
769
0
    if (!pptad)
770
0
        return ERROR_INT("&ptad not defined", __func__, 1);
771
0
    *pptad = NULL;
772
0
    if (!pta1)
773
0
        return ERROR_INT("pta1 not defined", __func__, 1);
774
0
    if (!pta2)
775
0
        return ERROR_INT("pta2 not defined", __func__, 1);
776
777
        /* Make a hashmap for the elements of the biggest array */
778
0
    n1 = ptaGetCount(pta1);
779
0
    n2 = ptaGetCount(pta2);
780
0
    pta_small = (n1 < n2) ? pta1 : pta2;   /* do not destroy pta_small */
781
0
    pta_big = (n1 < n2) ? pta2 : pta1;   /* do not destroy pta_big */
782
0
    if ((hmap = l_hmapCreateFromPta(pta_big)) == NULL)
783
0
        return ERROR_INT("hmap not made", __func__, 1);
784
785
        /* Go through the smallest array, doing a lookup of its (x,y) into
786
         * the big array hashmap.  If an hitem is returned, check the count.
787
         * If the count is 0, ignore; otherwise, add the point to the
788
         * output ptad and set the count in the hitem to 0, indicating
789
         * that the point has already been added. */
790
0
    ptad = ptaCreate(0);
791
0
    *pptad = ptad;
792
0
    n = ptaGetCount(pta_small);
793
0
    for (i = 0; i < n; i++) {
794
0
        ptaGetIPt(pta_small, i, &x, &y);
795
0
        l_hashPtToUint64(x, y, &key);
796
0
        hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
797
0
        if (!hitem || hitem->count == 0)
798
0
            continue;
799
0
        ptaAddPt(ptad, x, y);
800
0
        hitem->count = 0;
801
0
    }
802
0
    l_hmapDestroy(&hmap);
803
0
    return 0;
804
0
}
805
806