/src/leptonica/src/ptafunc2.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file ptafunc2.c |
29 | | * <pre> |
30 | | * |
31 | | * -------------------------------------- |
32 | | * This file has these Pta utilities: |
33 | | * - sorting |
34 | | * - ordered set operations |
35 | | * - hash map operations |
36 | | * -------------------------------------- |
37 | | * |
38 | | * Sorting |
39 | | * PTA *ptaSort() |
40 | | * l_int32 ptaGetSortIndex() |
41 | | * PTA *ptaSortByIndex() |
42 | | * PTAA *ptaaSortByIndex() |
43 | | * l_int32 ptaGetRankValue() |
44 | | * PTA *ptaSort2d() |
45 | | * l_int32 ptaEqual() |
46 | | * |
47 | | * Set operations using aset (rbtree) |
48 | | * L_ASET *l_asetCreateFromPta() |
49 | | * PTA *ptaRemoveDupsByAset() |
50 | | * PTA *ptaUnionByAset() |
51 | | * PTA *ptaIntersectionByAset() |
52 | | * |
53 | | * Hashmap operations |
54 | | * L_HASHMAP *l_hmapCreateFromPta() |
55 | | * l_int32 ptaRemoveDupsByHmap() |
56 | | * l_int32 ptaUnionByHmap() |
57 | | * l_int32 ptaIntersectionByHmap() |
58 | | * |
59 | | * We have two implementations of set operations on an array of points: |
60 | | * |
61 | | * (1) Using an underlying tree (rbtree) |
62 | | * This uses a good 64 bit hashing function for the key, |
63 | | * that is not expected to have hash collisions (and we do |
64 | | * not test for them). The tree is built up of the keys, |
65 | | * values, and is traversed looking for the key in O(log n). |
66 | | * |
67 | | * (2) Building a hashmap from the keys (hashmap) |
68 | | * This uses a fast 64 bit hashing function for the key, which |
69 | | * is then hashed into a hashtable. Collisions of hashkeys are |
70 | | * very rare, but the hashtable is designed to allow more than one |
71 | | * hashitem in a table entry. The hashitems are put in a list at |
72 | | * each hashtable entry, which is traversed looking for the key. |
73 | | * |
74 | | * </pre> |
75 | | */ |
76 | | |
77 | | #ifdef HAVE_CONFIG_H |
78 | | #include <config_auto.h> |
79 | | #endif /* HAVE_CONFIG_H */ |
80 | | |
81 | | #include "allheaders.h" |
82 | | |
83 | | /*---------------------------------------------------------------------* |
84 | | * Sorting * |
85 | | *---------------------------------------------------------------------*/ |
86 | | /*! |
87 | | * \brief ptaSort() |
88 | | * |
89 | | * \param[in] ptas |
90 | | * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y |
91 | | * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING |
92 | | * \param[out] pnaindex [optional] index of sorted order into |
93 | | * original array |
94 | | * \return ptad sorted version of ptas, or NULL on error |
95 | | */ |
96 | | PTA * |
97 | | ptaSort(PTA *ptas, |
98 | | l_int32 sorttype, |
99 | | l_int32 sortorder, |
100 | | NUMA **pnaindex) |
101 | 0 | { |
102 | 0 | PTA *ptad; |
103 | 0 | NUMA *naindex; |
104 | |
|
105 | 0 | if (pnaindex) *pnaindex = NULL; |
106 | 0 | if (!ptas) |
107 | 0 | return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL); |
108 | 0 | if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) |
109 | 0 | return (PTA *)ERROR_PTR("invalid sort type", __func__, NULL); |
110 | 0 | if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) |
111 | 0 | return (PTA *)ERROR_PTR("invalid sort order", __func__, NULL); |
112 | | |
113 | 0 | if (ptaGetSortIndex(ptas, sorttype, sortorder, &naindex) != 0) |
114 | 0 | return (PTA *)ERROR_PTR("naindex not made", __func__, NULL); |
115 | | |
116 | 0 | ptad = ptaSortByIndex(ptas, naindex); |
117 | 0 | if (pnaindex) |
118 | 0 | *pnaindex = naindex; |
119 | 0 | else |
120 | 0 | numaDestroy(&naindex); |
121 | 0 | if (!ptad) |
122 | 0 | return (PTA *)ERROR_PTR("ptad not made", __func__, NULL); |
123 | 0 | return ptad; |
124 | 0 | } |
125 | | |
126 | | |
127 | | /*! |
128 | | * \brief ptaGetSortIndex() |
129 | | * |
130 | | * \param[in] ptas |
131 | | * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y |
132 | | * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING |
133 | | * \param[out] pnaindex index of sorted order into original array |
134 | | * \return 0 if OK, 1 on error |
135 | | */ |
136 | | l_ok |
137 | | ptaGetSortIndex(PTA *ptas, |
138 | | l_int32 sorttype, |
139 | | l_int32 sortorder, |
140 | | NUMA **pnaindex) |
141 | 0 | { |
142 | 0 | l_int32 i, n; |
143 | 0 | l_float32 x, y; |
144 | 0 | NUMA *na, *nai; |
145 | |
|
146 | 0 | if (!pnaindex) |
147 | 0 | return ERROR_INT("&naindex not defined", __func__, 1); |
148 | 0 | *pnaindex = NULL; |
149 | 0 | if (!ptas) |
150 | 0 | return ERROR_INT("ptas not defined", __func__, 1); |
151 | 0 | if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) |
152 | 0 | return ERROR_INT("invalid sort type", __func__, 1); |
153 | 0 | if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) |
154 | 0 | return ERROR_INT("invalid sort order", __func__, 1); |
155 | | |
156 | | /* Build up numa of specific data */ |
157 | 0 | n = ptaGetCount(ptas); |
158 | 0 | if ((na = numaCreate(n)) == NULL) |
159 | 0 | return ERROR_INT("na not made", __func__, 1); |
160 | 0 | for (i = 0; i < n; i++) { |
161 | 0 | ptaGetPt(ptas, i, &x, &y); |
162 | 0 | if (sorttype == L_SORT_BY_X) |
163 | 0 | numaAddNumber(na, x); |
164 | 0 | else |
165 | 0 | numaAddNumber(na, y); |
166 | 0 | } |
167 | | |
168 | | /* Get the sort index for data array */ |
169 | 0 | nai = numaGetSortIndex(na, sortorder); |
170 | 0 | numaDestroy(&na); |
171 | 0 | if (!nai) |
172 | 0 | return ERROR_INT("naindex not made", __func__, 1); |
173 | 0 | *pnaindex = nai; |
174 | 0 | return 0; |
175 | 0 | } |
176 | | |
177 | | |
178 | | /*! |
179 | | * \brief ptaSortByIndex() |
180 | | * |
181 | | * \param[in] ptas |
182 | | * \param[in] naindex na that maps from the new pta to the input pta |
183 | | * \return ptad sorted, or NULL on error |
184 | | */ |
185 | | PTA * |
186 | | ptaSortByIndex(PTA *ptas, |
187 | | NUMA *naindex) |
188 | 0 | { |
189 | 0 | l_int32 i, index, n; |
190 | 0 | l_float32 x, y; |
191 | 0 | PTA *ptad; |
192 | |
|
193 | 0 | if (!ptas) |
194 | 0 | return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL); |
195 | 0 | if (!naindex) |
196 | 0 | return (PTA *)ERROR_PTR("naindex not defined", __func__, NULL); |
197 | | |
198 | | /* Build up sorted pta using sort index */ |
199 | 0 | n = numaGetCount(naindex); |
200 | 0 | if ((ptad = ptaCreate(n)) == NULL) |
201 | 0 | return (PTA *)ERROR_PTR("ptad not made", __func__, NULL); |
202 | 0 | for (i = 0; i < n; i++) { |
203 | 0 | numaGetIValue(naindex, i, &index); |
204 | 0 | ptaGetPt(ptas, index, &x, &y); |
205 | 0 | ptaAddPt(ptad, x, y); |
206 | 0 | } |
207 | |
|
208 | 0 | return ptad; |
209 | 0 | } |
210 | | |
211 | | |
212 | | /*! |
213 | | * \brief ptaaSortByIndex() |
214 | | * |
215 | | * \param[in] ptaas |
216 | | * \param[in] naindex na that maps from the new ptaa to the input ptaa |
217 | | * \return ptaad sorted, or NULL on error |
218 | | */ |
219 | | PTAA * |
220 | | ptaaSortByIndex(PTAA *ptaas, |
221 | | NUMA *naindex) |
222 | 0 | { |
223 | 0 | l_int32 i, n, index; |
224 | 0 | PTA *pta; |
225 | 0 | PTAA *ptaad; |
226 | |
|
227 | 0 | if (!ptaas) |
228 | 0 | return (PTAA *)ERROR_PTR("ptaas not defined", __func__, NULL); |
229 | 0 | if (!naindex) |
230 | 0 | return (PTAA *)ERROR_PTR("naindex not defined", __func__, NULL); |
231 | | |
232 | 0 | n = ptaaGetCount(ptaas); |
233 | 0 | if (numaGetCount(naindex) != n) |
234 | 0 | return (PTAA *)ERROR_PTR("numa and ptaa sizes differ", __func__, NULL); |
235 | 0 | ptaad = ptaaCreate(n); |
236 | 0 | for (i = 0; i < n; i++) { |
237 | 0 | numaGetIValue(naindex, i, &index); |
238 | 0 | pta = ptaaGetPta(ptaas, index, L_COPY); |
239 | 0 | ptaaAddPta(ptaad, pta, L_INSERT); |
240 | 0 | } |
241 | |
|
242 | 0 | return ptaad; |
243 | 0 | } |
244 | | |
245 | | |
246 | | /*! |
247 | | * \brief ptaGetRankValue() |
248 | | * |
249 | | * \param[in] pta |
250 | | * \param[in] fract use 0.0 for smallest, 1.0 for largest |
251 | | * \param[in] ptasort [optional] version of %pta sorted by %sorttype |
252 | | * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y |
253 | | * \param[out] pval rankval: the x or y value at %fract |
254 | | * \return 0 if OK, 1 on error |
255 | | */ |
256 | | l_ok |
257 | | ptaGetRankValue(PTA *pta, |
258 | | l_float32 fract, |
259 | | PTA *ptasort, |
260 | | l_int32 sorttype, |
261 | | l_float32 *pval) |
262 | 0 | { |
263 | 0 | l_int32 index, n; |
264 | 0 | PTA *ptas; |
265 | |
|
266 | 0 | if (!pval) |
267 | 0 | return ERROR_INT("&val not defined", __func__, 1); |
268 | 0 | *pval = 0.0; |
269 | 0 | if (!pta) |
270 | 0 | return ERROR_INT("pta not defined", __func__, 1); |
271 | 0 | if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) |
272 | 0 | return ERROR_INT("invalid sort type", __func__, 1); |
273 | 0 | if (fract < 0.0 || fract > 1.0) |
274 | 0 | return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1); |
275 | 0 | if ((n = ptaGetCount(pta)) == 0) |
276 | 0 | return ERROR_INT("pta empty", __func__, 1); |
277 | | |
278 | 0 | if (ptasort) |
279 | 0 | ptas = ptasort; |
280 | 0 | else |
281 | 0 | ptas = ptaSort(pta, sorttype, L_SORT_INCREASING, NULL); |
282 | |
|
283 | 0 | index = (l_int32)(fract * (l_float32)(n - 1) + 0.5); |
284 | 0 | if (sorttype == L_SORT_BY_X) |
285 | 0 | ptaGetPt(ptas, index, pval, NULL); |
286 | 0 | else /* sort by y */ |
287 | 0 | ptaGetPt(ptas, index, NULL, pval); |
288 | |
|
289 | 0 | if (!ptasort) ptaDestroy(&ptas); |
290 | 0 | return 0; |
291 | 0 | } |
292 | | |
293 | | |
294 | | /*! |
295 | | * \brief ptaSort2d() |
296 | | * |
297 | | * \param[in] ptas |
298 | | * \return ptad, or NULL on error |
299 | | * |
300 | | * <pre> |
301 | | * Notes: |
302 | | * (1) Sort increasing by row-major, scanning down from the UL corner, |
303 | | * where for each value of y, order the pts from left to right. |
304 | | * </pre> |
305 | | */ |
306 | | PTA * |
307 | | ptaSort2d(PTA *pta) |
308 | 0 | { |
309 | 0 | l_int32 index, i, j, n, nx, ny, start, end; |
310 | 0 | l_float32 x, y, yp, val; |
311 | 0 | NUMA *na1, *na2, *nas, *nax; |
312 | 0 | PTA *pta1, *ptad; |
313 | |
|
314 | 0 | if (!pta) |
315 | 0 | return (PTA *)ERROR_PTR("pta not defined", __func__, NULL); |
316 | | |
317 | | /* Sort by row-major (y first, then x). After sort by y, |
318 | | * the x values at the same y are not sorted. */ |
319 | 0 | pta1 = ptaSort(pta, L_SORT_BY_Y, L_SORT_INCREASING, NULL); |
320 | | |
321 | | /* Find start and ending indices with the same y value */ |
322 | 0 | n = ptaGetCount(pta1); |
323 | 0 | na1 = numaCreate(0); /* holds start index of sequence with same y */ |
324 | 0 | na2 = numaCreate(0); /* holds end index of sequence with same y */ |
325 | 0 | numaAddNumber(na1, 0); |
326 | 0 | ptaGetPt(pta1, 0, &x, &yp); |
327 | 0 | for (i = 1; i < n; i++) { |
328 | 0 | ptaGetPt(pta1, i, &x, &y); |
329 | 0 | if (y != yp) { |
330 | 0 | numaAddNumber(na1, i); |
331 | 0 | numaAddNumber(na2, i - 1); |
332 | 0 | } |
333 | 0 | yp = y; |
334 | 0 | } |
335 | 0 | numaAddNumber(na2, n - 1); |
336 | | |
337 | | /* Sort by increasing x each set with the same y value */ |
338 | 0 | ptad = ptaCreate(n); |
339 | 0 | ny = numaGetCount(na1); /* number of distinct y values */ |
340 | 0 | for (i = 0, index = 0; i < ny; i++) { |
341 | 0 | numaGetIValue(na1, i, &start); |
342 | 0 | numaGetIValue(na2, i, &end); |
343 | 0 | nx = end - start + 1; /* number of points with current y value */ |
344 | 0 | if (nx == 1) { |
345 | 0 | ptaGetPt(pta1, index++, &x, &y); |
346 | 0 | ptaAddPt(ptad, x, y); |
347 | 0 | } else { |
348 | | /* More than 1 point; extract and sort the x values */ |
349 | 0 | nax = numaCreate(nx); |
350 | 0 | for (j = 0; j < nx; j++) { |
351 | 0 | ptaGetPt(pta1, index + j, &x, &y); |
352 | 0 | numaAddNumber(nax, x); |
353 | 0 | } |
354 | 0 | nas = numaSort(NULL, nax, L_SORT_INCREASING); |
355 | | /* Add the points with x sorted */ |
356 | 0 | for (j = 0; j < nx; j++) { |
357 | 0 | numaGetFValue(nas, j, &val); |
358 | 0 | ptaAddPt(ptad, val, y); |
359 | 0 | } |
360 | 0 | index += nx; |
361 | 0 | numaDestroy(&nax); |
362 | 0 | numaDestroy(&nas); |
363 | 0 | } |
364 | 0 | } |
365 | 0 | numaDestroy(&na1); |
366 | 0 | numaDestroy(&na2); |
367 | 0 | ptaDestroy(&pta1); |
368 | 0 | return ptad; |
369 | 0 | } |
370 | | |
371 | | |
372 | | /*! |
373 | | * \brief ptaEqual() |
374 | | * |
375 | | * \param[in] pta1 |
376 | | * \param[in] pta2 |
377 | | * \param[out] psame 1 if same; 0 if different |
378 | | * \return 0 if OK; 1 on error |
379 | | * |
380 | | * <pre> |
381 | | * Notes: |
382 | | * (1) Equality is defined as having the same set of points, |
383 | | * independent of the order in which they are presented. |
384 | | * </pre> |
385 | | */ |
386 | | l_ok |
387 | | ptaEqual(PTA *pta1, |
388 | | PTA *pta2, |
389 | | l_int32 *psame) |
390 | 0 | { |
391 | 0 | l_int32 i, n1, n2; |
392 | 0 | l_float32 x1, y1, x2, y2; |
393 | 0 | PTA *ptas1, *ptas2; |
394 | |
|
395 | 0 | if (!psame) |
396 | 0 | return ERROR_INT("&same not defined", __func__, 1); |
397 | 0 | *psame = 0.0f; |
398 | 0 | if (!pta1 || !pta2) |
399 | 0 | return ERROR_INT("pta1 and pta2 not both defined", __func__, 1); |
400 | | |
401 | 0 | n1 = ptaGetCount(pta1); |
402 | 0 | n2 = ptaGetCount(pta2); |
403 | 0 | if (n1 != n2) return 0; |
404 | | |
405 | | /* 2d sort each and compare */ |
406 | 0 | ptas1 = ptaSort2d(pta1); |
407 | 0 | ptas2 = ptaSort2d(pta2); |
408 | 0 | for (i = 0; i < n1; i++) { |
409 | 0 | ptaGetPt(ptas1, i, &x1, &y1); |
410 | 0 | ptaGetPt(ptas2, i, &x2, &y2); |
411 | 0 | if (x1 != x2 || y1 != y2) { |
412 | 0 | ptaDestroy(&ptas1); |
413 | 0 | ptaDestroy(&ptas2); |
414 | 0 | return 0; |
415 | 0 | } |
416 | 0 | } |
417 | | |
418 | 0 | *psame = 1; |
419 | 0 | ptaDestroy(&ptas1); |
420 | 0 | ptaDestroy(&ptas2); |
421 | 0 | return 0; |
422 | 0 | } |
423 | | |
424 | | |
425 | | /*---------------------------------------------------------------------* |
426 | | * Set operations using aset (rbtree) * |
427 | | *---------------------------------------------------------------------*/ |
428 | | /*! |
429 | | * \brief l_asetCreateFromPta() |
430 | | * |
431 | | * \param[in] pta |
432 | | * \return set using a 64-bit hash of (x,y) as the key |
433 | | */ |
434 | | L_ASET * |
435 | | l_asetCreateFromPta(PTA *pta) |
436 | 0 | { |
437 | 0 | l_int32 i, n, x, y; |
438 | 0 | l_uint64 hash; |
439 | 0 | L_ASET *set; |
440 | 0 | RB_TYPE key; |
441 | |
|
442 | 0 | if (!pta) |
443 | 0 | return (L_ASET *)ERROR_PTR("pta not defined", __func__, NULL); |
444 | | |
445 | 0 | set = l_asetCreate(L_UINT_TYPE); |
446 | 0 | n = ptaGetCount(pta); |
447 | 0 | for (i = 0; i < n; i++) { |
448 | 0 | ptaGetIPt(pta, i, &x, &y); |
449 | 0 | l_hashPtToUint64(x, y, &hash); |
450 | 0 | key.utype = hash; |
451 | 0 | l_asetInsert(set, key); |
452 | 0 | } |
453 | |
|
454 | 0 | return set; |
455 | 0 | } |
456 | | |
457 | | |
458 | | /*! |
459 | | * \brief ptaRemoveDupsByAset() |
460 | | * |
461 | | * \param[in] ptas assumed to be integer values |
462 | | * \param[out] pptad assumed to be integer values |
463 | | * \return 0 if OK; 1 on error |
464 | | * |
465 | | * <pre> |
466 | | * Notes: |
467 | | * (1) This is slower than ptaRemoveDupsByHmap(), mostly because |
468 | | * of the nlogn sort to build up the rbtree. Do not use for |
469 | | * large numbers of points (say, > 100K). |
470 | | * </pre> |
471 | | */ |
472 | | l_ok |
473 | | ptaRemoveDupsByAset(PTA *ptas, |
474 | | PTA **pptad) |
475 | 0 | { |
476 | 0 | l_int32 i, n, x, y; |
477 | 0 | PTA *ptad; |
478 | 0 | l_uint64 hash; |
479 | 0 | L_ASET *set; |
480 | 0 | RB_TYPE key; |
481 | |
|
482 | 0 | if (!pptad) |
483 | 0 | return ERROR_INT("&ptad not defined", __func__, 1); |
484 | 0 | *pptad = NULL; |
485 | 0 | if (!ptas) |
486 | 0 | return ERROR_INT("ptas not defined", __func__, 1); |
487 | | |
488 | 0 | set = l_asetCreate(L_UINT_TYPE); |
489 | 0 | n = ptaGetCount(ptas); |
490 | 0 | ptad = ptaCreate(n); |
491 | 0 | *pptad = ptad; |
492 | 0 | for (i = 0; i < n; i++) { |
493 | 0 | ptaGetIPt(ptas, i, &x, &y); |
494 | 0 | l_hashPtToUint64(x, y, &hash); |
495 | 0 | key.utype = hash; |
496 | 0 | if (!l_asetFind(set, key)) { |
497 | 0 | ptaAddPt(ptad, x, y); |
498 | 0 | l_asetInsert(set, key); |
499 | 0 | } |
500 | 0 | } |
501 | |
|
502 | 0 | l_asetDestroy(&set); |
503 | 0 | return 0; |
504 | 0 | } |
505 | | |
506 | | |
507 | | /*! |
508 | | * \brief ptaUnionByAset() |
509 | | * |
510 | | * \param[in] pta1 |
511 | | * \param[in] pta2 |
512 | | * \param[out] pptad union of the two point arrays |
513 | | * \return 0 if OK; 1 on error |
514 | | * |
515 | | * <pre> |
516 | | * Notes: |
517 | | * (1) See sarrayRemoveDupsByAset() for the approach. |
518 | | * (2) The key is a 64-bit hash from the (x,y) pair. |
519 | | * (3) This is slower than ptaUnionByHmap(), mostly because of the |
520 | | * nlogn sort to build up the rbtree. Do not use for large |
521 | | * numbers of points (say, > 100K). |
522 | | * (4) The *Aset() functions use the sorted l_Aset, which is just |
523 | | * an rbtree in disguise. |
524 | | * </pre> |
525 | | */ |
526 | | l_ok |
527 | | ptaUnionByAset(PTA *pta1, |
528 | | PTA *pta2, |
529 | | PTA **pptad) |
530 | 0 | { |
531 | 0 | PTA *pta3; |
532 | |
|
533 | 0 | if (!pptad) |
534 | 0 | return ERROR_INT("&ptad not defined", __func__, 1); |
535 | 0 | *pptad = NULL; |
536 | 0 | if (!pta1) |
537 | 0 | return ERROR_INT("pta1 not defined", __func__, 1); |
538 | 0 | if (!pta2) |
539 | 0 | return ERROR_INT("pta2 not defined", __func__, 1); |
540 | | |
541 | | /* Join */ |
542 | 0 | pta3 = ptaCopy(pta1); |
543 | 0 | ptaJoin(pta3, pta2, 0, -1); |
544 | | |
545 | | /* Eliminate duplicates */ |
546 | 0 | ptaRemoveDupsByAset(pta3, pptad); |
547 | 0 | ptaDestroy(&pta3); |
548 | 0 | return 0; |
549 | 0 | } |
550 | | |
551 | | |
552 | | /*! |
553 | | * \brief ptaIntersectionByAset() |
554 | | * |
555 | | * \param[in] pta1 |
556 | | * \param[in] pta2 |
557 | | * \param[out] pptad intersection of the two point arrays |
558 | | * \return 0 if OK; 1 on error |
559 | | * |
560 | | * <pre> |
561 | | * Notes: |
562 | | * (1) See sarrayIntersectionByAset() for the approach. |
563 | | * (2) The key is a 64-bit hash from the (x,y) pair. |
564 | | * (3) This is slower than ptaIntersectionByHmap(), mostly because |
565 | | * of the nlogn sort to build up the rbtree. Do not use for |
566 | | * large numbers of points (say, > 100K). |
567 | | * </pre> |
568 | | */ |
569 | | l_ok |
570 | | ptaIntersectionByAset(PTA *pta1, |
571 | | PTA *pta2, |
572 | | PTA **pptad) |
573 | 0 | { |
574 | 0 | l_int32 n1, n2, i, n, x, y; |
575 | 0 | l_uint64 hash; |
576 | 0 | L_ASET *set1, *set2; |
577 | 0 | RB_TYPE key; |
578 | 0 | PTA *pta_small, *pta_big, *ptad; |
579 | |
|
580 | 0 | if (!pptad) |
581 | 0 | return ERROR_INT("&ptad not defined", __func__, 1); |
582 | 0 | *pptad = NULL; |
583 | 0 | if (!pta1) |
584 | 0 | return ERROR_INT("pta1 not defined", __func__, 1); |
585 | 0 | if (!pta2) |
586 | 0 | return ERROR_INT("pta2 not defined", __func__, 1); |
587 | | |
588 | | /* Put the elements of the biggest array into a set */ |
589 | 0 | n1 = ptaGetCount(pta1); |
590 | 0 | n2 = ptaGetCount(pta2); |
591 | 0 | pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ |
592 | 0 | pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ |
593 | 0 | set1 = l_asetCreateFromPta(pta_big); |
594 | | |
595 | | /* Build up the intersection of points */ |
596 | 0 | ptad = ptaCreate(0); |
597 | 0 | *pptad = ptad; |
598 | 0 | n = ptaGetCount(pta_small); |
599 | 0 | set2 = l_asetCreate(L_UINT_TYPE); |
600 | 0 | for (i = 0; i < n; i++) { |
601 | 0 | ptaGetIPt(pta_small, i, &x, &y); |
602 | 0 | l_hashPtToUint64(x, y, &hash); |
603 | 0 | key.utype = hash; |
604 | 0 | if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { |
605 | 0 | ptaAddPt(ptad, x, y); |
606 | 0 | l_asetInsert(set2, key); |
607 | 0 | } |
608 | 0 | } |
609 | |
|
610 | 0 | l_asetDestroy(&set1); |
611 | 0 | l_asetDestroy(&set2); |
612 | 0 | return 0; |
613 | 0 | } |
614 | | |
615 | | |
616 | | /*--------------------------------------------------------------------------* |
617 | | * Hashmap operations * |
618 | | *--------------------------------------------------------------------------*/ |
619 | | /*! |
620 | | * \brief l_hmapCreateFromPta() |
621 | | * |
622 | | * \param[in] pta input pta |
623 | | * \return hmap hashmap, or NULL on error |
624 | | * |
625 | | * <pre> |
626 | | * Notes: |
627 | | * (1) The indices into %pta are stored in the val field of the hashitems. |
628 | | * This is necessary so that %hmap and %pta can be used together. |
629 | | * </pre> |
630 | | */ |
631 | | L_HASHMAP * |
632 | | l_hmapCreateFromPta(PTA *pta) |
633 | 0 | { |
634 | 0 | l_int32 i, n, x, y; |
635 | 0 | l_uint64 key; |
636 | 0 | L_HASHMAP *hmap; |
637 | |
|
638 | 0 | if (!pta) |
639 | 0 | return (L_HASHMAP *)ERROR_PTR("pta not defined", __func__, NULL); |
640 | | |
641 | 0 | n = ptaGetCount(pta); |
642 | 0 | if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL) |
643 | 0 | return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL); |
644 | 0 | for (i = 0; i < n; i++) { |
645 | 0 | ptaGetIPt(pta, i, &x, &y); |
646 | 0 | l_hashPtToUint64(x, y, &key); |
647 | 0 | l_hmapLookup(hmap, key, i, L_HMAP_CREATE); |
648 | 0 | } |
649 | 0 | return hmap; |
650 | 0 | } |
651 | | |
652 | | |
653 | | /*! |
654 | | * \brief ptaRemoveDupsByHmap() |
655 | | * |
656 | | * \param[in] ptas |
657 | | * \param[out] pptad set of unique values |
658 | | * \param[out] phmap [optional] hashmap used for lookup |
659 | | * \return 0 if OK; 1 on error |
660 | | * |
661 | | * <pre> |
662 | | * Notes: |
663 | | * (1) Generates a set of (unique) points from %ptas. |
664 | | * </pre> |
665 | | */ |
666 | | l_ok |
667 | | ptaRemoveDupsByHmap(PTA *ptas, |
668 | | PTA **pptad, |
669 | | L_HASHMAP **phmap) |
670 | 0 | { |
671 | 0 | l_int32 i, x, y, tabsize; |
672 | 0 | PTA *ptad; |
673 | 0 | L_HASHITEM *hitem; |
674 | 0 | L_HASHMAP *hmap; |
675 | |
|
676 | 0 | if (phmap) *phmap = NULL; |
677 | 0 | if (!pptad) |
678 | 0 | return ERROR_INT("&ptad not defined", __func__, 1); |
679 | 0 | *pptad = NULL; |
680 | 0 | if (!ptas) |
681 | 0 | return ERROR_INT("ptas not defined", __func__, 1); |
682 | | |
683 | | /* Traverse the hashtable lists */ |
684 | 0 | if ((hmap = l_hmapCreateFromPta(ptas)) == NULL) |
685 | 0 | return ERROR_INT("hmap not made", __func__, 1); |
686 | 0 | ptad = ptaCreate(0); |
687 | 0 | *pptad = ptad; |
688 | 0 | tabsize = hmap->tabsize; |
689 | 0 | for (i = 0; i < tabsize; i++) { |
690 | 0 | hitem = hmap->hashtab[i]; |
691 | 0 | while (hitem) { |
692 | 0 | ptaGetIPt(ptas, hitem->val, &x, &y); |
693 | 0 | ptaAddPt(ptad, x, y); |
694 | 0 | hitem = hitem->next; |
695 | 0 | } |
696 | 0 | } |
697 | |
|
698 | 0 | if (phmap) |
699 | 0 | *phmap = hmap; |
700 | 0 | else |
701 | 0 | l_hmapDestroy(&hmap); |
702 | 0 | return 0; |
703 | 0 | } |
704 | | |
705 | | |
706 | | /*! |
707 | | * \brief ptaUnionByHmap() |
708 | | * |
709 | | * \param[in] pta1 |
710 | | * \param[in] pta2 |
711 | | * \param[out] pptad union of the two point arrays |
712 | | * \return 0 if OK; 1 on error |
713 | | * |
714 | | * <pre> |
715 | | * Notes: |
716 | | * (1) Make pta with points found in either of the input arrays. |
717 | | * </pre> |
718 | | */ |
719 | | l_ok |
720 | | ptaUnionByHmap(PTA *pta1, |
721 | | PTA *pta2, |
722 | | PTA **pptad) |
723 | 0 | { |
724 | 0 | PTA *pta3; |
725 | |
|
726 | 0 | if (!pptad) |
727 | 0 | return ERROR_INT("&ptad not defined", __func__, 1); |
728 | 0 | *pptad = NULL; |
729 | 0 | if (!pta1) |
730 | 0 | return ERROR_INT("pta1 not defined", __func__, 1); |
731 | 0 | if (!pta2) |
732 | 0 | return ERROR_INT("pta2 not defined", __func__, 1); |
733 | | |
734 | 0 | pta3 = ptaCopy(pta1); |
735 | 0 | if (ptaJoin(pta3, pta2, 0, -1) == 1) { |
736 | 0 | ptaDestroy(&pta3); |
737 | 0 | return ERROR_INT("pta join failed", __func__, 1); |
738 | 0 | } |
739 | 0 | ptaRemoveDupsByHmap(pta3, pptad, NULL); |
740 | 0 | ptaDestroy(&pta3); |
741 | 0 | return 0; |
742 | 0 | } |
743 | | |
744 | | |
745 | | /*! |
746 | | * \brief ptaIntersectionByHmap() |
747 | | * |
748 | | * \param[in] pta1 |
749 | | * \param[in] pta2 |
750 | | * \param[out] pptad intersection of the two point arrays |
751 | | * \return 0 if OK; 1 on error |
752 | | * |
753 | | * <pre> |
754 | | * Notes: |
755 | | * (1) Make pta with pts common to both input arrays. |
756 | | * </pre> |
757 | | */ |
758 | | l_ok |
759 | | ptaIntersectionByHmap(PTA *pta1, |
760 | | PTA *pta2, |
761 | | PTA **pptad) |
762 | 0 | { |
763 | 0 | l_int32 i, n1, n2, n, x, y; |
764 | 0 | l_uint64 key; |
765 | 0 | PTA *pta_small, *pta_big, *ptad; |
766 | 0 | L_HASHITEM *hitem; |
767 | 0 | L_HASHMAP *hmap; |
768 | |
|
769 | 0 | if (!pptad) |
770 | 0 | return ERROR_INT("&ptad not defined", __func__, 1); |
771 | 0 | *pptad = NULL; |
772 | 0 | if (!pta1) |
773 | 0 | return ERROR_INT("pta1 not defined", __func__, 1); |
774 | 0 | if (!pta2) |
775 | 0 | return ERROR_INT("pta2 not defined", __func__, 1); |
776 | | |
777 | | /* Make a hashmap for the elements of the biggest array */ |
778 | 0 | n1 = ptaGetCount(pta1); |
779 | 0 | n2 = ptaGetCount(pta2); |
780 | 0 | pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ |
781 | 0 | pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ |
782 | 0 | if ((hmap = l_hmapCreateFromPta(pta_big)) == NULL) |
783 | 0 | return ERROR_INT("hmap not made", __func__, 1); |
784 | | |
785 | | /* Go through the smallest array, doing a lookup of its (x,y) into |
786 | | * the big array hashmap. If an hitem is returned, check the count. |
787 | | * If the count is 0, ignore; otherwise, add the point to the |
788 | | * output ptad and set the count in the hitem to 0, indicating |
789 | | * that the point has already been added. */ |
790 | 0 | ptad = ptaCreate(0); |
791 | 0 | *pptad = ptad; |
792 | 0 | n = ptaGetCount(pta_small); |
793 | 0 | for (i = 0; i < n; i++) { |
794 | 0 | ptaGetIPt(pta_small, i, &x, &y); |
795 | 0 | l_hashPtToUint64(x, y, &key); |
796 | 0 | hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK); |
797 | 0 | if (!hitem || hitem->count == 0) |
798 | 0 | continue; |
799 | 0 | ptaAddPt(ptad, x, y); |
800 | 0 | hitem->count = 0; |
801 | 0 | } |
802 | 0 | l_hmapDestroy(&hmap); |
803 | 0 | return 0; |
804 | 0 | } |
805 | | |
806 | | |