/src/leptonica/src/ptafunc2.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*====================================================================*  | 
2  |  |  -  Copyright (C) 2001 Leptonica.  All rights reserved.  | 
3  |  |  -  | 
4  |  |  -  Redistribution and use in source and binary forms, with or without  | 
5  |  |  -  modification, are permitted provided that the following conditions  | 
6  |  |  -  are met:  | 
7  |  |  -  1. Redistributions of source code must retain the above copyright  | 
8  |  |  -     notice, this list of conditions and the following disclaimer.  | 
9  |  |  -  2. Redistributions in binary form must reproduce the above  | 
10  |  |  -     copyright notice, this list of conditions and the following  | 
11  |  |  -     disclaimer in the documentation and/or other materials  | 
12  |  |  -     provided with the distribution.  | 
13  |  |  -  | 
14  |  |  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS  | 
15  |  |  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT  | 
16  |  |  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  | 
17  |  |  -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY  | 
18  |  |  -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,  | 
19  |  |  -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,  | 
20  |  |  -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR  | 
21  |  |  -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY  | 
22  |  |  -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING  | 
23  |  |  -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS  | 
24  |  |  -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  | 
25  |  |  *====================================================================*/  | 
26  |  |  | 
27  |  | /*!  | 
28  |  |  * \file  ptafunc2.c  | 
29  |  |  * <pre>  | 
30  |  |  *  | 
31  |  |  *      --------------------------------------  | 
32  |  |  *      This file has these Pta utilities:  | 
33  |  |  *         - sorting  | 
34  |  |  *         - ordered set operations  | 
35  |  |  *         - hash map operations  | 
36  |  |  *      --------------------------------------  | 
37  |  |  *  | 
38  |  |  *      Sorting  | 
39  |  |  *           PTA        *ptaSort()  | 
40  |  |  *           l_int32     ptaGetSortIndex()  | 
41  |  |  *           PTA        *ptaSortByIndex()  | 
42  |  |  *           PTAA       *ptaaSortByIndex()  | 
43  |  |  *           l_int32     ptaGetRankValue()  | 
44  |  |  *           PTA        *ptaSort2d()  | 
45  |  |  *           l_int32     ptaEqual()  | 
46  |  |  *  | 
47  |  |  *      Set operations using aset (rbtree)  | 
48  |  |  *           L_ASET     *l_asetCreateFromPta()  | 
49  |  |  *           PTA        *ptaRemoveDupsByAset()  | 
50  |  |  *           PTA        *ptaUnionByAset()  | 
51  |  |  *           PTA        *ptaIntersectionByAset()  | 
52  |  |  *  | 
53  |  |  *      Hashmap operations  | 
54  |  |  *          L_HASHMAP   *l_hmapCreateFromPta()  | 
55  |  |  *          l_int32      ptaRemoveDupsByHmap()  | 
56  |  |  *          l_int32      ptaUnionByHmap()  | 
57  |  |  *          l_int32      ptaIntersectionByHmap()  | 
58  |  |  *  | 
59  |  |  * We have two implementations of set operations on an array of points:  | 
60  |  |  *  | 
61  |  |  *   (1) Using an underlying tree (rbtree)  | 
62  |  |  *       This uses a good 64 bit hashing function for the key,  | 
63  |  |  *       that is not expected to have hash collisions (and we do  | 
64  |  |  *       not test for them).  The tree is built up of the keys,  | 
65  |  |  *       values, and is traversed looking for the key in O(log n).  | 
66  |  |  *  | 
67  |  |  *   (2) Building a hashmap from the keys (hashmap)  | 
68  |  |  *       This uses a fast 64 bit hashing function for the key, which  | 
69  |  |  *       is then hashed into a hashtable.  Collisions of hashkeys are  | 
70  |  |  *       very rare, but the hashtable is designed to allow more than one  | 
71  |  |  *       hashitem in a table entry.  The hashitems are put in a list at  | 
72  |  |  *       each hashtable entry, which is traversed looking for the key.  | 
73  |  |  *  | 
74  |  |  * </pre>  | 
75  |  |  */  | 
76  |  |  | 
77  |  | #ifdef HAVE_CONFIG_H  | 
78  |  | #include <config_auto.h>  | 
79  |  | #endif  /* HAVE_CONFIG_H */  | 
80  |  |  | 
81  |  | #include "allheaders.h"  | 
82  |  |  | 
83  |  | /*---------------------------------------------------------------------*  | 
84  |  |  *                               Sorting                               *  | 
85  |  |  *---------------------------------------------------------------------*/  | 
86  |  | /*!  | 
87  |  |  * \brief   ptaSort()  | 
88  |  |  *  | 
89  |  |  * \param[in]    ptas  | 
90  |  |  * \param[in]    sorttype    L_SORT_BY_X, L_SORT_BY_Y  | 
91  |  |  * \param[in]    sortorder   L_SORT_INCREASING, L_SORT_DECREASING  | 
92  |  |  * \param[out]   pnaindex    [optional] index of sorted order into  | 
93  |  |  *                           original array  | 
94  |  |  * \return  ptad sorted version of ptas, or NULL on error  | 
95  |  |  */  | 
96  |  | PTA *  | 
97  |  | ptaSort(PTA     *ptas,  | 
98  |  |         l_int32  sorttype,  | 
99  |  |         l_int32  sortorder,  | 
100  |  |         NUMA   **pnaindex)  | 
101  | 0  | { | 
102  | 0  | PTA   *ptad;  | 
103  | 0  | NUMA  *naindex;  | 
104  |  | 
  | 
105  | 0  |     if (pnaindex) *pnaindex = NULL;  | 
106  | 0  |     if (!ptas)  | 
107  | 0  |         return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL); | 
108  | 0  |     if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)  | 
109  | 0  |         return (PTA *)ERROR_PTR("invalid sort type", __func__, NULL); | 
110  | 0  |     if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)  | 
111  | 0  |         return (PTA *)ERROR_PTR("invalid sort order", __func__, NULL); | 
112  |  |  | 
113  | 0  |     if (ptaGetSortIndex(ptas, sorttype, sortorder, &naindex) != 0)  | 
114  | 0  |         return (PTA *)ERROR_PTR("naindex not made", __func__, NULL); | 
115  |  |  | 
116  | 0  |     ptad = ptaSortByIndex(ptas, naindex);  | 
117  | 0  |     if (pnaindex)  | 
118  | 0  |         *pnaindex = naindex;  | 
119  | 0  |     else  | 
120  | 0  |         numaDestroy(&naindex);  | 
121  | 0  |     if (!ptad)  | 
122  | 0  |         return (PTA *)ERROR_PTR("ptad not made", __func__, NULL); | 
123  | 0  |     return ptad;  | 
124  | 0  | }  | 
125  |  |  | 
126  |  |  | 
127  |  | /*!  | 
128  |  |  * \brief   ptaGetSortIndex()  | 
129  |  |  *  | 
130  |  |  * \param[in]    ptas  | 
131  |  |  * \param[in]    sorttype    L_SORT_BY_X, L_SORT_BY_Y  | 
132  |  |  * \param[in]    sortorder   L_SORT_INCREASING, L_SORT_DECREASING  | 
133  |  |  * \param[out]   pnaindex    index of sorted order into original array  | 
134  |  |  * \return  0 if OK, 1 on error  | 
135  |  |  */  | 
136  |  | l_ok  | 
137  |  | ptaGetSortIndex(PTA     *ptas,  | 
138  |  |                 l_int32  sorttype,  | 
139  |  |                 l_int32  sortorder,  | 
140  |  |                 NUMA   **pnaindex)  | 
141  | 0  | { | 
142  | 0  | l_int32    i, n;  | 
143  | 0  | l_float32  x, y;  | 
144  | 0  | NUMA      *na, *nai;  | 
145  |  | 
  | 
146  | 0  |     if (!pnaindex)  | 
147  | 0  |         return ERROR_INT("&naindex not defined", __func__, 1); | 
148  | 0  |     *pnaindex = NULL;  | 
149  | 0  |     if (!ptas)  | 
150  | 0  |         return ERROR_INT("ptas not defined", __func__, 1); | 
151  | 0  |     if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)  | 
152  | 0  |         return ERROR_INT("invalid sort type", __func__, 1); | 
153  | 0  |     if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)  | 
154  | 0  |         return ERROR_INT("invalid sort order", __func__, 1); | 
155  |  |  | 
156  |  |         /* Build up numa of specific data */  | 
157  | 0  |     n = ptaGetCount(ptas);  | 
158  | 0  |     if ((na = numaCreate(n)) == NULL)  | 
159  | 0  |         return ERROR_INT("na not made", __func__, 1); | 
160  | 0  |     for (i = 0; i < n; i++) { | 
161  | 0  |         ptaGetPt(ptas, i, &x, &y);  | 
162  | 0  |         if (sorttype == L_SORT_BY_X)  | 
163  | 0  |             numaAddNumber(na, x);  | 
164  | 0  |         else  | 
165  | 0  |             numaAddNumber(na, y);  | 
166  | 0  |     }  | 
167  |  |  | 
168  |  |         /* Get the sort index for data array */  | 
169  | 0  |     nai = numaGetSortIndex(na, sortorder);  | 
170  | 0  |     numaDestroy(&na);  | 
171  | 0  |     if (!nai)  | 
172  | 0  |         return ERROR_INT("naindex not made", __func__, 1); | 
173  | 0  |     *pnaindex = nai;  | 
174  | 0  |     return 0;  | 
175  | 0  | }  | 
176  |  |  | 
177  |  |  | 
178  |  | /*!  | 
179  |  |  * \brief   ptaSortByIndex()  | 
180  |  |  *  | 
181  |  |  * \param[in]    ptas  | 
182  |  |  * \param[in]    naindex    na that maps from the new pta to the input pta  | 
183  |  |  * \return  ptad sorted, or NULL on  error  | 
184  |  |  */  | 
185  |  | PTA *  | 
186  |  | ptaSortByIndex(PTA   *ptas,  | 
187  |  |                NUMA  *naindex)  | 
188  | 0  | { | 
189  | 0  | l_int32    i, index, n;  | 
190  | 0  | l_float32  x, y;  | 
191  | 0  | PTA       *ptad;  | 
192  |  | 
  | 
193  | 0  |     if (!ptas)  | 
194  | 0  |         return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL); | 
195  | 0  |     if (!naindex)  | 
196  | 0  |         return (PTA *)ERROR_PTR("naindex not defined", __func__, NULL); | 
197  |  |  | 
198  |  |         /* Build up sorted pta using sort index */  | 
199  | 0  |     n = numaGetCount(naindex);  | 
200  | 0  |     if ((ptad = ptaCreate(n)) == NULL)  | 
201  | 0  |         return (PTA *)ERROR_PTR("ptad not made", __func__, NULL); | 
202  | 0  |     for (i = 0; i < n; i++) { | 
203  | 0  |         numaGetIValue(naindex, i, &index);  | 
204  | 0  |         ptaGetPt(ptas, index, &x, &y);  | 
205  | 0  |         ptaAddPt(ptad, x, y);  | 
206  | 0  |     }  | 
207  |  | 
  | 
208  | 0  |     return ptad;  | 
209  | 0  | }  | 
210  |  |  | 
211  |  |  | 
212  |  | /*!  | 
213  |  |  * \brief   ptaaSortByIndex()  | 
214  |  |  *  | 
215  |  |  * \param[in]    ptaas  | 
216  |  |  * \param[in]    naindex    na that maps from the new ptaa to the input ptaa  | 
217  |  |  * \return  ptaad sorted, or NULL on error  | 
218  |  |  */  | 
219  |  | PTAA *  | 
220  |  | ptaaSortByIndex(PTAA  *ptaas,  | 
221  |  |                 NUMA  *naindex)  | 
222  | 0  | { | 
223  | 0  | l_int32  i, n, index;  | 
224  | 0  | PTA     *pta;  | 
225  | 0  | PTAA    *ptaad;  | 
226  |  | 
  | 
227  | 0  |     if (!ptaas)  | 
228  | 0  |         return (PTAA *)ERROR_PTR("ptaas not defined", __func__, NULL); | 
229  | 0  |     if (!naindex)  | 
230  | 0  |         return (PTAA *)ERROR_PTR("naindex not defined", __func__, NULL); | 
231  |  |  | 
232  | 0  |     n = ptaaGetCount(ptaas);  | 
233  | 0  |     if (numaGetCount(naindex) != n)  | 
234  | 0  |         return (PTAA *)ERROR_PTR("numa and ptaa sizes differ", __func__, NULL); | 
235  | 0  |     ptaad = ptaaCreate(n);  | 
236  | 0  |     for (i = 0; i < n; i++) { | 
237  | 0  |         numaGetIValue(naindex, i, &index);  | 
238  | 0  |         pta = ptaaGetPta(ptaas, index, L_COPY);  | 
239  | 0  |         ptaaAddPta(ptaad, pta, L_INSERT);  | 
240  | 0  |     }  | 
241  |  | 
  | 
242  | 0  |     return ptaad;  | 
243  | 0  | }  | 
244  |  |  | 
245  |  |  | 
246  |  | /*!  | 
247  |  |  * \brief   ptaGetRankValue()  | 
248  |  |  *  | 
249  |  |  * \param[in]    pta  | 
250  |  |  * \param[in]    fract      use 0.0 for smallest, 1.0 for largest  | 
251  |  |  * \param[in]    ptasort    [optional] version of %pta sorted by %sorttype  | 
252  |  |  * \param[in]    sorttype   L_SORT_BY_X, L_SORT_BY_Y  | 
253  |  |  * \param[out]   pval       rankval: the x or y value at %fract  | 
254  |  |  * \return  0 if OK, 1 on error  | 
255  |  |  */  | 
256  |  | l_ok  | 
257  |  | ptaGetRankValue(PTA        *pta,  | 
258  |  |                 l_float32   fract,  | 
259  |  |                 PTA        *ptasort,  | 
260  |  |                 l_int32     sorttype,  | 
261  |  |                 l_float32  *pval)  | 
262  | 0  | { | 
263  | 0  | l_int32  index, n;  | 
264  | 0  | PTA     *ptas;  | 
265  |  | 
  | 
266  | 0  |     if (!pval)  | 
267  | 0  |         return ERROR_INT("&val not defined", __func__, 1); | 
268  | 0  |     *pval = 0.0;  | 
269  | 0  |     if (!pta)  | 
270  | 0  |         return ERROR_INT("pta not defined", __func__, 1); | 
271  | 0  |     if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)  | 
272  | 0  |         return ERROR_INT("invalid sort type", __func__, 1); | 
273  | 0  |     if (fract < 0.0 || fract > 1.0)  | 
274  | 0  |         return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1); | 
275  | 0  |     if ((n = ptaGetCount(pta)) == 0)  | 
276  | 0  |         return ERROR_INT("pta empty", __func__, 1); | 
277  |  |  | 
278  | 0  |     if (ptasort)  | 
279  | 0  |         ptas = ptasort;  | 
280  | 0  |     else  | 
281  | 0  |         ptas = ptaSort(pta, sorttype, L_SORT_INCREASING, NULL);  | 
282  |  | 
  | 
283  | 0  |     index = (l_int32)(fract * (l_float32)(n - 1) + 0.5);  | 
284  | 0  |     if (sorttype == L_SORT_BY_X)  | 
285  | 0  |         ptaGetPt(ptas, index, pval, NULL);  | 
286  | 0  |     else  /* sort by y */  | 
287  | 0  |         ptaGetPt(ptas, index, NULL, pval);  | 
288  |  | 
  | 
289  | 0  |     if (!ptasort) ptaDestroy(&ptas);  | 
290  | 0  |     return 0;  | 
291  | 0  | }  | 
292  |  |  | 
293  |  |  | 
294  |  | /*!  | 
295  |  |  * \brief   ptaSort2d()  | 
296  |  |  *  | 
297  |  |  * \param[in]    ptas  | 
298  |  |  * \return  ptad, or NULL on error  | 
299  |  |  *  | 
300  |  |  * <pre>  | 
301  |  |  * Notes:  | 
302  |  |  *      (1) Sort increasing by row-major, scanning down from the UL corner,  | 
303  |  |  *          where for each value of y, order the pts from left to right.  | 
304  |  |  * </pre>  | 
305  |  |  */  | 
306  |  | PTA *  | 
307  |  | ptaSort2d(PTA  *pta)  | 
308  | 0  | { | 
309  | 0  | l_int32    index, i, j, n, nx, ny, start, end;  | 
310  | 0  | l_float32  x, y, yp, val;  | 
311  | 0  | NUMA      *na1, *na2, *nas, *nax;  | 
312  | 0  | PTA       *pta1, *ptad;  | 
313  |  | 
  | 
314  | 0  |     if (!pta)  | 
315  | 0  |         return (PTA *)ERROR_PTR("pta not defined", __func__, NULL); | 
316  |  |  | 
317  |  |         /* Sort by row-major (y first, then x).  After sort by y,  | 
318  |  |          * the x values at the same y are not sorted.  */  | 
319  | 0  |     pta1 = ptaSort(pta, L_SORT_BY_Y, L_SORT_INCREASING, NULL);  | 
320  |  |  | 
321  |  |         /* Find start and ending indices with the same y value */  | 
322  | 0  |     n = ptaGetCount(pta1);  | 
323  | 0  |     na1 = numaCreate(0);  /* holds start index of sequence with same y */  | 
324  | 0  |     na2 = numaCreate(0);  /* holds end index of sequence with same y */  | 
325  | 0  |     numaAddNumber(na1, 0);  | 
326  | 0  |     ptaGetPt(pta1, 0, &x, &yp);  | 
327  | 0  |     for (i = 1; i < n; i++) { | 
328  | 0  |         ptaGetPt(pta1, i, &x, &y);  | 
329  | 0  |         if (y != yp) { | 
330  | 0  |             numaAddNumber(na1, i);  | 
331  | 0  |             numaAddNumber(na2, i - 1);  | 
332  | 0  |         }  | 
333  | 0  |         yp = y;  | 
334  | 0  |     }  | 
335  | 0  |     numaAddNumber(na2, n - 1);  | 
336  |  |  | 
337  |  |         /* Sort by increasing x each set with the same y value */  | 
338  | 0  |     ptad = ptaCreate(n);  | 
339  | 0  |     ny = numaGetCount(na1);   /* number of distinct y values */  | 
340  | 0  |     for (i = 0, index = 0; i < ny; i++) { | 
341  | 0  |         numaGetIValue(na1, i, &start);  | 
342  | 0  |         numaGetIValue(na2, i, &end);  | 
343  | 0  |         nx = end - start + 1;  /* number of points with current y value */  | 
344  | 0  |         if (nx == 1) { | 
345  | 0  |             ptaGetPt(pta1, index++, &x, &y);  | 
346  | 0  |             ptaAddPt(ptad, x, y);  | 
347  | 0  |         } else { | 
348  |  |                 /* More than 1 point; extract and sort the x values */  | 
349  | 0  |             nax = numaCreate(nx);  | 
350  | 0  |             for (j = 0; j < nx; j++) { | 
351  | 0  |                  ptaGetPt(pta1, index + j, &x, &y);  | 
352  | 0  |                  numaAddNumber(nax, x);  | 
353  | 0  |             }  | 
354  | 0  |             nas = numaSort(NULL, nax, L_SORT_INCREASING);  | 
355  |  |                 /* Add the points with x sorted */  | 
356  | 0  |             for (j = 0; j < nx; j++) { | 
357  | 0  |                 numaGetFValue(nas, j, &val);  | 
358  | 0  |                 ptaAddPt(ptad, val, y);  | 
359  | 0  |             }  | 
360  | 0  |             index += nx;  | 
361  | 0  |             numaDestroy(&nax);  | 
362  | 0  |             numaDestroy(&nas);  | 
363  | 0  |         }  | 
364  | 0  |     }  | 
365  | 0  |     numaDestroy(&na1);  | 
366  | 0  |     numaDestroy(&na2);  | 
367  | 0  |     ptaDestroy(&pta1);  | 
368  | 0  |     return ptad;  | 
369  | 0  | }  | 
370  |  |  | 
371  |  |  | 
372  |  | /*!  | 
373  |  |  * \brief   ptaEqual()  | 
374  |  |  *  | 
375  |  |  * \param[in]    pta1  | 
376  |  |  * \param[in]    pta2  | 
377  |  |  * \param[out]   psame  1 if same; 0 if different  | 
378  |  |  * \return  0 if OK; 1 on error  | 
379  |  |  *  | 
380  |  |  * <pre>  | 
381  |  |  * Notes:  | 
382  |  |  *      (1) Equality is defined as having the same set of points,  | 
383  |  |  *          independent of the order in which they are presented.  | 
384  |  |  * </pre>  | 
385  |  |  */  | 
386  |  | l_ok  | 
387  |  | ptaEqual(PTA      *pta1,  | 
388  |  |          PTA      *pta2,  | 
389  |  |          l_int32  *psame)  | 
390  | 0  | { | 
391  | 0  | l_int32    i, n1, n2;  | 
392  | 0  | l_float32  x1, y1, x2, y2;  | 
393  | 0  | PTA       *ptas1, *ptas2;  | 
394  |  | 
  | 
395  | 0  |     if (!psame)  | 
396  | 0  |         return ERROR_INT("&same not defined", __func__, 1); | 
397  | 0  |     *psame = 0.0f;  | 
398  | 0  |     if (!pta1 || !pta2)  | 
399  | 0  |         return ERROR_INT("pta1 and pta2 not both defined", __func__, 1); | 
400  |  |  | 
401  | 0  |     n1 = ptaGetCount(pta1);  | 
402  | 0  |     n2 = ptaGetCount(pta2);  | 
403  | 0  |     if (n1 != n2) return 0;  | 
404  |  |  | 
405  |  |         /* 2d sort each and compare */  | 
406  | 0  |     ptas1 = ptaSort2d(pta1);  | 
407  | 0  |     ptas2 = ptaSort2d(pta2);  | 
408  | 0  |     for (i = 0; i < n1; i++) { | 
409  | 0  |         ptaGetPt(ptas1, i, &x1, &y1);  | 
410  | 0  |         ptaGetPt(ptas2, i, &x2, &y2);  | 
411  | 0  |         if (x1 != x2 || y1 != y2) { | 
412  | 0  |             ptaDestroy(&ptas1);  | 
413  | 0  |             ptaDestroy(&ptas2);  | 
414  | 0  |             return 0;  | 
415  | 0  |         }  | 
416  | 0  |     }  | 
417  |  |  | 
418  | 0  |     *psame = 1;  | 
419  | 0  |     ptaDestroy(&ptas1);  | 
420  | 0  |     ptaDestroy(&ptas2);  | 
421  | 0  |     return 0;  | 
422  | 0  | }  | 
423  |  |  | 
424  |  |  | 
425  |  | /*---------------------------------------------------------------------*  | 
426  |  |  *                   Set operations using aset (rbtree)                *  | 
427  |  |  *---------------------------------------------------------------------*/  | 
428  |  | /*!  | 
429  |  |  * \brief   l_asetCreateFromPta()  | 
430  |  |  *  | 
431  |  |  * \param[in]    pta  | 
432  |  |  * \return  set using a 64-bit hash of (x,y) as the key  | 
433  |  |  */  | 
434  |  | L_ASET *  | 
435  |  | l_asetCreateFromPta(PTA  *pta)  | 
436  | 0  | { | 
437  | 0  | l_int32   i, n, x, y;  | 
438  | 0  | l_uint64  hash;  | 
439  | 0  | L_ASET   *set;  | 
440  | 0  | RB_TYPE   key;  | 
441  |  | 
  | 
442  | 0  |     if (!pta)  | 
443  | 0  |         return (L_ASET *)ERROR_PTR("pta not defined", __func__, NULL); | 
444  |  |  | 
445  | 0  |     set = l_asetCreate(L_UINT_TYPE);  | 
446  | 0  |     n = ptaGetCount(pta);  | 
447  | 0  |     for (i = 0; i < n; i++) { | 
448  | 0  |         ptaGetIPt(pta, i, &x, &y);  | 
449  | 0  |         l_hashPtToUint64(x, y, &hash);  | 
450  | 0  |         key.utype = hash;  | 
451  | 0  |         l_asetInsert(set, key);  | 
452  | 0  |     }  | 
453  |  | 
  | 
454  | 0  |     return set;  | 
455  | 0  | }  | 
456  |  |  | 
457  |  |  | 
458  |  | /*!  | 
459  |  |  * \brief   ptaRemoveDupsByAset()  | 
460  |  |  *  | 
461  |  |  * \param[in]    ptas     assumed to be integer values  | 
462  |  |  * \param[out]   pptad    assumed to be integer values  | 
463  |  |  * \return  0 if OK; 1 on error  | 
464  |  |  *  | 
465  |  |  * <pre>  | 
466  |  |  * Notes:  | 
467  |  |  *      (1) This is slower than ptaRemoveDupsByHmap(), mostly because  | 
468  |  |  *          of the nlogn sort to build up the rbtree.  Do not use for  | 
469  |  |  *          large numbers of points (say, > 100K).  | 
470  |  |  * </pre>  | 
471  |  |  */  | 
472  |  | l_ok  | 
473  |  | ptaRemoveDupsByAset(PTA   *ptas,  | 
474  |  |                     PTA  **pptad)  | 
475  | 0  | { | 
476  | 0  | l_int32   i, n, x, y;  | 
477  | 0  | PTA      *ptad;  | 
478  | 0  | l_uint64  hash;  | 
479  | 0  | L_ASET   *set;  | 
480  | 0  | RB_TYPE   key;  | 
481  |  | 
  | 
482  | 0  |     if (!pptad)  | 
483  | 0  |         return ERROR_INT("&ptad not defined", __func__, 1); | 
484  | 0  |     *pptad = NULL;  | 
485  | 0  |     if (!ptas)  | 
486  | 0  |         return ERROR_INT("ptas not defined", __func__, 1); | 
487  |  |  | 
488  | 0  |     set = l_asetCreate(L_UINT_TYPE);  | 
489  | 0  |     n = ptaGetCount(ptas);  | 
490  | 0  |     ptad = ptaCreate(n);  | 
491  | 0  |     *pptad = ptad;  | 
492  | 0  |     for (i = 0; i < n; i++) { | 
493  | 0  |         ptaGetIPt(ptas, i, &x, &y);  | 
494  | 0  |         l_hashPtToUint64(x, y, &hash);  | 
495  | 0  |         key.utype = hash;  | 
496  | 0  |         if (!l_asetFind(set, key)) { | 
497  | 0  |             ptaAddPt(ptad, x, y);  | 
498  | 0  |             l_asetInsert(set, key);  | 
499  | 0  |         }  | 
500  | 0  |     }  | 
501  |  | 
  | 
502  | 0  |     l_asetDestroy(&set);  | 
503  | 0  |     return 0;  | 
504  | 0  | }  | 
505  |  |  | 
506  |  |  | 
507  |  | /*!  | 
508  |  |  * \brief   ptaUnionByAset()  | 
509  |  |  *  | 
510  |  |  * \param[in]    pta1  | 
511  |  |  * \param[in]    pta2  | 
512  |  |  * \param[out]   pptad     union of the two point arrays  | 
513  |  |  * \return  0 if OK; 1 on error  | 
514  |  |  *  | 
515  |  |  * <pre>  | 
516  |  |  * Notes:  | 
517  |  |  *      (1) See sarrayRemoveDupsByAset() for the approach.  | 
518  |  |  *      (2) The key is a 64-bit hash from the (x,y) pair.  | 
519  |  |  *      (3) This is slower than ptaUnionByHmap(), mostly because of the  | 
520  |  |  *          nlogn sort to build up the rbtree.  Do not use for large  | 
521  |  |  *          numbers of points (say, > 100K).  | 
522  |  |  *      (4) The *Aset() functions use the sorted l_Aset, which is just  | 
523  |  |  *          an rbtree in disguise.  | 
524  |  |  * </pre>  | 
525  |  |  */  | 
526  |  | l_ok  | 
527  |  | ptaUnionByAset(PTA   *pta1,  | 
528  |  |                PTA   *pta2,  | 
529  |  |                PTA  **pptad)  | 
530  | 0  | { | 
531  | 0  | PTA  *pta3;  | 
532  |  | 
  | 
533  | 0  |     if (!pptad)  | 
534  | 0  |         return ERROR_INT("&ptad not defined", __func__, 1); | 
535  | 0  |     *pptad = NULL;  | 
536  | 0  |     if (!pta1)  | 
537  | 0  |         return ERROR_INT("pta1 not defined", __func__, 1); | 
538  | 0  |     if (!pta2)  | 
539  | 0  |         return ERROR_INT("pta2 not defined", __func__, 1); | 
540  |  |  | 
541  |  |         /* Join */  | 
542  | 0  |     pta3 = ptaCopy(pta1);  | 
543  | 0  |     ptaJoin(pta3, pta2, 0, -1);  | 
544  |  |  | 
545  |  |         /* Eliminate duplicates */  | 
546  | 0  |     ptaRemoveDupsByAset(pta3, pptad);  | 
547  | 0  |     ptaDestroy(&pta3);  | 
548  | 0  |     return 0;  | 
549  | 0  | }  | 
550  |  |  | 
551  |  |  | 
552  |  | /*!  | 
553  |  |  * \brief   ptaIntersectionByAset()  | 
554  |  |  *  | 
555  |  |  * \param[in]    pta1  | 
556  |  |  * \param[in]    pta2  | 
557  |  |  * \param[out]   pptad       intersection of the two point arrays  | 
558  |  |  * \return  0 if OK; 1 on error  | 
559  |  |  *  | 
560  |  |  * <pre>  | 
561  |  |  * Notes:  | 
562  |  |  *      (1) See sarrayIntersectionByAset() for the approach.  | 
563  |  |  *      (2) The key is a 64-bit hash from the (x,y) pair.  | 
564  |  |  *      (3) This is slower than ptaIntersectionByHmap(), mostly because  | 
565  |  |  *          of the nlogn sort to build up the rbtree.  Do not use for  | 
566  |  |  *          large numbers of points (say, > 100K).  | 
567  |  |  * </pre>  | 
568  |  |  */  | 
569  |  | l_ok  | 
570  |  | ptaIntersectionByAset(PTA   *pta1,  | 
571  |  |                       PTA   *pta2,  | 
572  |  |                       PTA  **pptad)  | 
573  | 0  | { | 
574  | 0  | l_int32   n1, n2, i, n, x, y;  | 
575  | 0  | l_uint64  hash;  | 
576  | 0  | L_ASET   *set1, *set2;  | 
577  | 0  | RB_TYPE   key;  | 
578  | 0  | PTA      *pta_small, *pta_big, *ptad;  | 
579  |  | 
  | 
580  | 0  |     if (!pptad)  | 
581  | 0  |         return ERROR_INT("&ptad not defined", __func__, 1); | 
582  | 0  |     *pptad = NULL;  | 
583  | 0  |     if (!pta1)  | 
584  | 0  |         return ERROR_INT("pta1 not defined", __func__, 1); | 
585  | 0  |     if (!pta2)  | 
586  | 0  |         return ERROR_INT("pta2 not defined", __func__, 1); | 
587  |  |  | 
588  |  |         /* Put the elements of the biggest array into a set */  | 
589  | 0  |     n1 = ptaGetCount(pta1);  | 
590  | 0  |     n2 = ptaGetCount(pta2);  | 
591  | 0  |     pta_small = (n1 < n2) ? pta1 : pta2;   /* do not destroy pta_small */  | 
592  | 0  |     pta_big = (n1 < n2) ? pta2 : pta1;   /* do not destroy pta_big */  | 
593  | 0  |     set1 = l_asetCreateFromPta(pta_big);  | 
594  |  |  | 
595  |  |         /* Build up the intersection of points */  | 
596  | 0  |     ptad = ptaCreate(0);  | 
597  | 0  |     *pptad = ptad;  | 
598  | 0  |     n = ptaGetCount(pta_small);  | 
599  | 0  |     set2 = l_asetCreate(L_UINT_TYPE);  | 
600  | 0  |     for (i = 0; i < n; i++) { | 
601  | 0  |         ptaGetIPt(pta_small, i, &x, &y);  | 
602  | 0  |         l_hashPtToUint64(x, y, &hash);  | 
603  | 0  |         key.utype = hash;  | 
604  | 0  |         if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { | 
605  | 0  |             ptaAddPt(ptad, x, y);  | 
606  | 0  |             l_asetInsert(set2, key);  | 
607  | 0  |         }  | 
608  | 0  |     }  | 
609  |  | 
  | 
610  | 0  |     l_asetDestroy(&set1);  | 
611  | 0  |     l_asetDestroy(&set2);  | 
612  | 0  |     return 0;  | 
613  | 0  | }  | 
614  |  |  | 
615  |  |  | 
616  |  | /*--------------------------------------------------------------------------*  | 
617  |  |  *                            Hashmap operations                            *  | 
618  |  |  *--------------------------------------------------------------------------*/  | 
619  |  | /*!  | 
620  |  |  * \brief  l_hmapCreateFromPta()  | 
621  |  |  *  | 
622  |  |  * \param[in]   pta     input pta  | 
623  |  |  * \return      hmap    hashmap, or NULL on error  | 
624  |  |  *  | 
625  |  |  * <pre>  | 
626  |  |  *  Notes:  | 
627  |  |  *       (1) The indices into %pta are stored in the val field of the hashitems.  | 
628  |  |  *           This is necessary so that %hmap and %pta can be used together.  | 
629  |  |  * </pre>  | 
630  |  |  */  | 
631  |  | L_HASHMAP *  | 
632  |  | l_hmapCreateFromPta(PTA  *pta)  | 
633  | 0  | { | 
634  | 0  | l_int32      i, n, x, y;  | 
635  | 0  | l_uint64     key;  | 
636  | 0  | L_HASHMAP   *hmap;  | 
637  |  | 
  | 
638  | 0  |     if (!pta)  | 
639  | 0  |         return (L_HASHMAP *)ERROR_PTR("pta not defined", __func__, NULL); | 
640  |  |  | 
641  | 0  |     n = ptaGetCount(pta);  | 
642  | 0  |     if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)  | 
643  | 0  |         return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL); | 
644  | 0  |     for (i = 0; i < n; i++) { | 
645  | 0  |         ptaGetIPt(pta, i, &x, &y);  | 
646  | 0  |         l_hashPtToUint64(x, y, &key);  | 
647  | 0  |         l_hmapLookup(hmap, key, i, L_HMAP_CREATE);  | 
648  | 0  |     }  | 
649  | 0  |     return hmap;  | 
650  | 0  | }  | 
651  |  |  | 
652  |  |  | 
653  |  | /*!  | 
654  |  |  * \brief  ptaRemoveDupsByHmap()  | 
655  |  |  *  | 
656  |  |  * \param[in]   ptas  | 
657  |  |  * \param[out]  pptad    set of unique values  | 
658  |  |  * \param[out]  phmap    [optional] hashmap used for lookup  | 
659  |  |  * \return  0 if OK; 1 on error  | 
660  |  |  *  | 
661  |  |  * <pre>  | 
662  |  |  *  Notes:  | 
663  |  |  *       (1) Generates a set of (unique) points from %ptas.  | 
664  |  |  * </pre>  | 
665  |  |  */  | 
666  |  | l_ok  | 
667  |  | ptaRemoveDupsByHmap(PTA         *ptas,  | 
668  |  |                     PTA        **pptad,  | 
669  |  |                     L_HASHMAP  **phmap)  | 
670  | 0  | { | 
671  | 0  | l_int32      i, x, y, tabsize;  | 
672  | 0  | PTA         *ptad;  | 
673  | 0  | L_HASHITEM  *hitem;  | 
674  | 0  | L_HASHMAP   *hmap;  | 
675  |  | 
  | 
676  | 0  |     if (phmap) *phmap = NULL;  | 
677  | 0  |     if (!pptad)  | 
678  | 0  |         return ERROR_INT("&ptad not defined", __func__, 1); | 
679  | 0  |     *pptad = NULL;  | 
680  | 0  |     if (!ptas)  | 
681  | 0  |         return ERROR_INT("ptas not defined", __func__, 1); | 
682  |  |  | 
683  |  |         /* Traverse the hashtable lists */  | 
684  | 0  |     if ((hmap = l_hmapCreateFromPta(ptas)) == NULL)  | 
685  | 0  |         return ERROR_INT("hmap not made", __func__, 1); | 
686  | 0  |     ptad = ptaCreate(0);  | 
687  | 0  |     *pptad = ptad;  | 
688  | 0  |     tabsize = hmap->tabsize;  | 
689  | 0  |     for (i = 0; i < tabsize; i++) { | 
690  | 0  |         hitem = hmap->hashtab[i];  | 
691  | 0  |         while (hitem) { | 
692  | 0  |             ptaGetIPt(ptas, hitem->val, &x, &y);  | 
693  | 0  |             ptaAddPt(ptad, x, y);  | 
694  | 0  |             hitem = hitem->next;  | 
695  | 0  |         }  | 
696  | 0  |     }  | 
697  |  | 
  | 
698  | 0  |     if (phmap)  | 
699  | 0  |         *phmap = hmap;  | 
700  | 0  |     else  | 
701  | 0  |         l_hmapDestroy(&hmap);  | 
702  | 0  |     return 0;  | 
703  | 0  | }  | 
704  |  |  | 
705  |  |  | 
706  |  | /*!  | 
707  |  |  * \brief  ptaUnionByHmap()  | 
708  |  |  *  | 
709  |  |  * \param[in]   pta1  | 
710  |  |  * \param[in]   pta2  | 
711  |  |  * \param[out]  pptad     union of the two point arrays  | 
712  |  |  * \return  0 if OK; 1 on error  | 
713  |  |  *  | 
714  |  |  * <pre>  | 
715  |  |  *  Notes:  | 
716  |  |  *       (1) Make pta with points found in either of the input arrays.  | 
717  |  |  * </pre>  | 
718  |  |  */  | 
719  |  | l_ok  | 
720  |  | ptaUnionByHmap(PTA   *pta1,  | 
721  |  |                PTA   *pta2,  | 
722  |  |                PTA  **pptad)  | 
723  | 0  | { | 
724  | 0  | PTA  *pta3;  | 
725  |  | 
  | 
726  | 0  |     if (!pptad)  | 
727  | 0  |         return ERROR_INT("&ptad not defined", __func__, 1); | 
728  | 0  |     *pptad = NULL;  | 
729  | 0  |     if (!pta1)  | 
730  | 0  |         return ERROR_INT("pta1 not defined", __func__, 1); | 
731  | 0  |     if (!pta2)  | 
732  | 0  |         return ERROR_INT("pta2 not defined", __func__, 1); | 
733  |  |  | 
734  | 0  |     pta3 = ptaCopy(pta1);  | 
735  | 0  |     if (ptaJoin(pta3, pta2, 0, -1) == 1) { | 
736  | 0  |         ptaDestroy(&pta3);  | 
737  | 0  |         return ERROR_INT("pta join failed", __func__, 1); | 
738  | 0  |     }  | 
739  | 0  |     ptaRemoveDupsByHmap(pta3, pptad, NULL);  | 
740  | 0  |     ptaDestroy(&pta3);  | 
741  | 0  |     return 0;  | 
742  | 0  | }  | 
743  |  |  | 
744  |  |  | 
745  |  | /*!  | 
746  |  |  * \brief  ptaIntersectionByHmap()  | 
747  |  |  *  | 
748  |  |  * \param[in]    pta1  | 
749  |  |  * \param[in]    pta2  | 
750  |  |  * \param[out]   pptad     intersection of the two point arrays  | 
751  |  |  * \return  0 if OK; 1 on error  | 
752  |  |  *  | 
753  |  |  * <pre>  | 
754  |  |  *  Notes:  | 
755  |  |  *       (1) Make pta with pts common to both input arrays.  | 
756  |  |  * </pre>  | 
757  |  |  */  | 
758  |  | l_ok  | 
759  |  | ptaIntersectionByHmap(PTA   *pta1,  | 
760  |  |                       PTA   *pta2,  | 
761  |  |                       PTA  **pptad)  | 
762  | 0  | { | 
763  | 0  | l_int32      i, n1, n2, n, x, y;  | 
764  | 0  | l_uint64     key;  | 
765  | 0  | PTA         *pta_small, *pta_big, *ptad;  | 
766  | 0  | L_HASHITEM  *hitem;  | 
767  | 0  | L_HASHMAP   *hmap;  | 
768  |  | 
  | 
769  | 0  |     if (!pptad)  | 
770  | 0  |         return ERROR_INT("&ptad not defined", __func__, 1); | 
771  | 0  |     *pptad = NULL;  | 
772  | 0  |     if (!pta1)  | 
773  | 0  |         return ERROR_INT("pta1 not defined", __func__, 1); | 
774  | 0  |     if (!pta2)  | 
775  | 0  |         return ERROR_INT("pta2 not defined", __func__, 1); | 
776  |  |  | 
777  |  |         /* Make a hashmap for the elements of the biggest array */  | 
778  | 0  |     n1 = ptaGetCount(pta1);  | 
779  | 0  |     n2 = ptaGetCount(pta2);  | 
780  | 0  |     pta_small = (n1 < n2) ? pta1 : pta2;   /* do not destroy pta_small */  | 
781  | 0  |     pta_big = (n1 < n2) ? pta2 : pta1;   /* do not destroy pta_big */  | 
782  | 0  |     if ((hmap = l_hmapCreateFromPta(pta_big)) == NULL)  | 
783  | 0  |         return ERROR_INT("hmap not made", __func__, 1); | 
784  |  |  | 
785  |  |         /* Go through the smallest array, doing a lookup of its (x,y) into  | 
786  |  |          * the big array hashmap.  If an hitem is returned, check the count.  | 
787  |  |          * If the count is 0, ignore; otherwise, add the point to the  | 
788  |  |          * output ptad and set the count in the hitem to 0, indicating  | 
789  |  |          * that the point has already been added. */  | 
790  | 0  |     ptad = ptaCreate(0);  | 
791  | 0  |     *pptad = ptad;  | 
792  | 0  |     n = ptaGetCount(pta_small);  | 
793  | 0  |     for (i = 0; i < n; i++) { | 
794  | 0  |         ptaGetIPt(pta_small, i, &x, &y);  | 
795  | 0  |         l_hashPtToUint64(x, y, &key);  | 
796  | 0  |         hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);  | 
797  | 0  |         if (!hitem || hitem->count == 0)  | 
798  | 0  |             continue;  | 
799  | 0  |         ptaAddPt(ptad, x, y);  | 
800  | 0  |         hitem->count = 0;  | 
801  | 0  |     }  | 
802  | 0  |     l_hmapDestroy(&hmap);  | 
803  | 0  |     return 0;  | 
804  | 0  | }  | 
805  |  |  | 
806  |  |  |