/src/leptonica/src/dnafunc1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - |
4 | | - Redistribution and use in source and binary forms, with or without |
5 | | - modification, are permitted provided that the following conditions |
6 | | - are met: |
7 | | - 1. Redistributions of source code must retain the above copyright |
8 | | - notice, this list of conditions and the following disclaimer. |
9 | | - 2. Redistributions in binary form must reproduce the above |
10 | | - copyright notice, this list of conditions and the following |
11 | | - disclaimer in the documentation and/or other materials |
12 | | - provided with the distribution. |
13 | | - |
14 | | - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
15 | | - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
16 | | - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
17 | | - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY |
18 | | - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 | | - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 | | - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 | | - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 | | - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
23 | | - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
24 | | - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | *====================================================================*/ |
26 | | |
27 | | /*! |
28 | | * \file dnafunc1.c |
29 | | * <pre> |
30 | | * |
31 | | * Rearrangements |
32 | | * l_int32 *l_dnaJoin() |
33 | | * l_int32 *l_dnaaFlattenToDna() |
34 | | * L_DNA *l_dnaSelectRange() |
35 | | * |
36 | | * Conversion between numa and dna |
37 | | * NUMA *l_dnaConvertToNuma() |
38 | | * L_DNA *numaConvertToDna() |
39 | | * |
40 | | * Conversion from pix data to dna |
41 | | * L_DNA *pixConvertDataToDna() |
42 | | * |
43 | | * Set operations using aset (rbtree) |
44 | | * L_ASET *l_asetCreateFromDna() |
45 | | * L_DNA *l_dnaRemoveDupsByAset() |
46 | | * L_DNA *l_dnaUnionByAset() |
47 | | * L_DNA *l_dnaIntersectionByAset() |
48 | | * |
49 | | * Hashmap operations |
50 | | * L_HASHMAP *l_hmapCreateFromDna() |
51 | | * l_int32 l_dnaRemoveDupsByHmap() |
52 | | * l_int32 l_dnaUnionByHmap() |
53 | | * l_int32 l_dnaIntersectionByHmap() |
54 | | * l_int32 l_dnaMakeHistoByHmap() |
55 | | * |
56 | | * Miscellaneous operations |
57 | | * L_DNA *l_dnaDiffAdjValues() |
58 | | * |
59 | | * |
60 | | * We have two implementations of set operations on an array of doubles: |
61 | | * |
62 | | * (1) Using an underlying tree (rbtree) |
63 | | * The key for each float64 value is the value itself. |
64 | | * No collisions can occur. The tree is sorted by the keys. |
65 | | * Lookup is done in O(log n) by traversing from the root, |
66 | | * looking for the key. |
67 | | * |
68 | | * (2) Building a hashmap from the keys (hashmap) |
69 | | * The keys are made from each float64 by casting into a uint64. |
70 | | * The key is then hashed into a hashtable. Collisions of hashkeys are |
71 | | * very rare, and the hashtable is designed to allow more than one |
72 | | * hashitem in a table entry. The hashitems are put in a list at |
73 | | * each hashtable entry, which is traversed looking for the key. |
74 | | * </pre> |
75 | | */ |
76 | | |
77 | | #ifdef HAVE_CONFIG_H |
78 | | #include <config_auto.h> |
79 | | #endif /* HAVE_CONFIG_H */ |
80 | | |
81 | | #include "allheaders.h" |
82 | | #include "array_internal.h" |
83 | | |
84 | | |
85 | | /*----------------------------------------------------------------------* |
86 | | * Rearrangements * |
87 | | *----------------------------------------------------------------------*/ |
88 | | /*! |
89 | | * \brief l_dnaJoin() |
90 | | * |
91 | | * \param[in] dad dest dna; add to this one |
92 | | * \param[in] das [optional] source dna; add from this one |
93 | | * \param[in] istart starting index in das |
94 | | * \param[in] iend ending index in das; use -1 to cat all |
95 | | * \return 0 if OK, 1 on error |
96 | | * |
97 | | * <pre> |
98 | | * Notes: |
99 | | * (1) istart < 0 is taken to mean 'read from the start' (istart = 0) |
100 | | * (2) iend < 0 means 'read to the end' |
101 | | * (3) if das == NULL, this is a no-op |
102 | | * </pre> |
103 | | */ |
104 | | l_ok |
105 | | l_dnaJoin(L_DNA *dad, |
106 | | L_DNA *das, |
107 | | l_int32 istart, |
108 | | l_int32 iend) |
109 | 0 | { |
110 | 0 | l_int32 n, i; |
111 | 0 | l_float64 val; |
112 | |
|
113 | 0 | if (!dad) |
114 | 0 | return ERROR_INT("dad not defined", __func__, 1); |
115 | 0 | if (!das) |
116 | 0 | return 0; |
117 | | |
118 | 0 | if (istart < 0) |
119 | 0 | istart = 0; |
120 | 0 | n = l_dnaGetCount(das); |
121 | 0 | if (iend < 0 || iend >= n) |
122 | 0 | iend = n - 1; |
123 | 0 | if (istart > iend) |
124 | 0 | return ERROR_INT("istart > iend; nothing to add", __func__, 1); |
125 | | |
126 | 0 | for (i = istart; i <= iend; i++) { |
127 | 0 | l_dnaGetDValue(das, i, &val); |
128 | 0 | if (l_dnaAddNumber(dad, val) == 1) { |
129 | 0 | L_ERROR("failed to add double at i = %d\n", __func__, i); |
130 | 0 | return 1; |
131 | 0 | } |
132 | |
|
133 | 0 | } |
134 | 0 | return 0; |
135 | 0 | } |
136 | | |
137 | | |
138 | | /*! |
139 | | * \brief l_dnaaFlattenToDna() |
140 | | * |
141 | | * \param[in] daa |
142 | | * \return dad, or NULL on error |
143 | | * |
144 | | * <pre> |
145 | | * Notes: |
146 | | * (1) This 'flattens' the dnaa to a dna, by joining successively |
147 | | * each dna in the dnaa. |
148 | | * (2) It leaves the input dnaa unchanged. |
149 | | * </pre> |
150 | | */ |
151 | | L_DNA * |
152 | | l_dnaaFlattenToDna(L_DNAA *daa) |
153 | 0 | { |
154 | 0 | l_int32 i, nalloc; |
155 | 0 | L_DNA *da, *dad; |
156 | 0 | L_DNA **array; |
157 | |
|
158 | 0 | if (!daa) |
159 | 0 | return (L_DNA *)ERROR_PTR("daa not defined", __func__, NULL); |
160 | | |
161 | 0 | nalloc = daa->nalloc; |
162 | 0 | array = daa->dna; |
163 | 0 | dad = l_dnaCreate(0); |
164 | 0 | for (i = 0; i < nalloc; i++) { |
165 | 0 | da = array[i]; |
166 | 0 | if (!da) continue; |
167 | 0 | l_dnaJoin(dad, da, 0, -1); |
168 | 0 | } |
169 | |
|
170 | 0 | return dad; |
171 | 0 | } |
172 | | |
173 | | |
174 | | /*! |
175 | | * \brief l_dnaSelectRange() |
176 | | * |
177 | | * \param[in] das |
178 | | * \param[in] first use 0 to select from the beginning |
179 | | * \param[in] last use -1 to select to the end |
180 | | * \return dad, or NULL on error |
181 | | */ |
182 | | L_DNA * |
183 | | l_dnaSelectRange(L_DNA *das, |
184 | | l_int32 first, |
185 | | l_int32 last) |
186 | 0 | { |
187 | 0 | l_int32 n, i; |
188 | 0 | l_float64 dval; |
189 | 0 | L_DNA *dad; |
190 | |
|
191 | 0 | if (!das) |
192 | 0 | return (L_DNA *)ERROR_PTR("das not defined", __func__, NULL); |
193 | 0 | if ((n = l_dnaGetCount(das)) == 0) { |
194 | 0 | L_WARNING("das is empty\n", __func__); |
195 | 0 | return l_dnaCopy(das); |
196 | 0 | } |
197 | 0 | first = L_MAX(0, first); |
198 | 0 | if (last < 0) last = n - 1; |
199 | 0 | if (first >= n) |
200 | 0 | return (L_DNA *)ERROR_PTR("invalid first", __func__, NULL); |
201 | 0 | if (last >= n) { |
202 | 0 | L_WARNING("last = %d is beyond max index = %d; adjusting\n", |
203 | 0 | __func__, last, n - 1); |
204 | 0 | last = n - 1; |
205 | 0 | } |
206 | 0 | if (first > last) |
207 | 0 | return (L_DNA *)ERROR_PTR("first > last", __func__, NULL); |
208 | | |
209 | 0 | dad = l_dnaCreate(last - first + 1); |
210 | 0 | for (i = first; i <= last; i++) { |
211 | 0 | l_dnaGetDValue(das, i, &dval); |
212 | 0 | l_dnaAddNumber(dad, dval); |
213 | 0 | } |
214 | 0 | return dad; |
215 | 0 | } |
216 | | |
217 | | |
218 | | /*----------------------------------------------------------------------* |
219 | | * Conversion between numa and dna * |
220 | | *----------------------------------------------------------------------*/ |
221 | | /*! |
222 | | * \brief l_dnaConvertToNuma() |
223 | | * |
224 | | * \param[in] da |
225 | | * \return na, or NULL on error |
226 | | */ |
227 | | NUMA * |
228 | | l_dnaConvertToNuma(L_DNA *da) |
229 | 0 | { |
230 | 0 | l_int32 i, n; |
231 | 0 | l_float64 val; |
232 | 0 | NUMA *na; |
233 | |
|
234 | 0 | if (!da) |
235 | 0 | return (NUMA *)ERROR_PTR("da not defined", __func__, NULL); |
236 | | |
237 | 0 | n = l_dnaGetCount(da); |
238 | 0 | na = numaCreate(n); |
239 | 0 | for (i = 0; i < n; i++) { |
240 | 0 | l_dnaGetDValue(da, i, &val); |
241 | 0 | numaAddNumber(na, val); |
242 | 0 | } |
243 | 0 | return na; |
244 | 0 | } |
245 | | |
246 | | |
247 | | /*! |
248 | | * \brief numaConvertToDna |
249 | | * |
250 | | * \param[in] na |
251 | | * \return da, or NULL on error |
252 | | */ |
253 | | L_DNA * |
254 | | numaConvertToDna(NUMA *na) |
255 | 0 | { |
256 | 0 | l_int32 i, n; |
257 | 0 | l_float32 val; |
258 | 0 | L_DNA *da; |
259 | |
|
260 | 0 | if (!na) |
261 | 0 | return (L_DNA *)ERROR_PTR("na not defined", __func__, NULL); |
262 | | |
263 | 0 | n = numaGetCount(na); |
264 | 0 | da = l_dnaCreate(n); |
265 | 0 | for (i = 0; i < n; i++) { |
266 | 0 | numaGetFValue(na, i, &val); |
267 | 0 | l_dnaAddNumber(da, val); |
268 | 0 | } |
269 | 0 | return da; |
270 | 0 | } |
271 | | |
272 | | |
273 | | /*----------------------------------------------------------------------* |
274 | | * Conversion from pix data to dna * |
275 | | *----------------------------------------------------------------------*/ |
276 | | /*! |
277 | | * \brief pixConvertDataToDna() |
278 | | * |
279 | | * \param[in] pix 32 bpp RGB(A) |
280 | | * \return da, or NULL on error |
281 | | * |
282 | | * <pre> |
283 | | * Notes: |
284 | | * (1) This writes the RGBA pixel values into the dna, in row-major order. |
285 | | * </pre> |
286 | | */ |
287 | | L_DNA * |
288 | | pixConvertDataToDna(PIX *pix) |
289 | 0 | { |
290 | 0 | l_int32 i, j, w, h, wpl; |
291 | 0 | l_uint32 *data, *line; |
292 | 0 | L_DNA *da; |
293 | |
|
294 | 0 | if (!pix) |
295 | 0 | return (L_DNA *)ERROR_PTR("pix not defined", __func__, NULL); |
296 | 0 | if (pixGetDepth(pix) != 32) |
297 | 0 | return (L_DNA *)ERROR_PTR("pix not 32 bpp", __func__, NULL); |
298 | | |
299 | 0 | pixGetDimensions(pix, &w, &h, NULL); |
300 | 0 | data = pixGetData(pix); |
301 | 0 | wpl = pixGetWpl(pix); |
302 | 0 | da = l_dnaCreate(w * h); |
303 | 0 | for (i = 0; i < h; i++) { |
304 | 0 | line = data + i * wpl; |
305 | 0 | for (j = 0; j < w; j++) |
306 | 0 | l_dnaAddNumber(da, (l_float64)line[j]); |
307 | 0 | } |
308 | 0 | return da; |
309 | 0 | } |
310 | | |
311 | | |
312 | | /*----------------------------------------------------------------------* |
313 | | * Set operations using aset (rbtree) * |
314 | | *----------------------------------------------------------------------*/ |
315 | | /*! |
316 | | * \brief l_asetCreateFromDna() |
317 | | * |
318 | | * \param[in] da source dna |
319 | | * \return set using the doubles in %da as keys |
320 | | */ |
321 | | L_ASET * |
322 | | l_asetCreateFromDna(L_DNA *da) |
323 | 0 | { |
324 | 0 | l_int32 i, n; |
325 | 0 | l_float64 val; |
326 | 0 | L_ASET *set; |
327 | 0 | RB_TYPE key; |
328 | |
|
329 | 0 | if (!da) |
330 | 0 | return (L_ASET *)ERROR_PTR("da not defined", __func__, NULL); |
331 | | |
332 | 0 | set = l_asetCreate(L_FLOAT_TYPE); |
333 | 0 | n = l_dnaGetCount(da); |
334 | 0 | for (i = 0; i < n; i++) { |
335 | 0 | l_dnaGetDValue(da, i, &val); |
336 | 0 | key.ftype = val; |
337 | 0 | l_asetInsert(set, key); |
338 | 0 | } |
339 | |
|
340 | 0 | return set; |
341 | 0 | } |
342 | | |
343 | | |
344 | | /*! |
345 | | * \brief l_dnaRemoveDupsByAset() |
346 | | * |
347 | | * \param[in] das |
348 | | * \param[out] pdad with duplicated removed |
349 | | * \return 0 if OK; 1 on error |
350 | | */ |
351 | | l_ok |
352 | | l_dnaRemoveDupsByAset(L_DNA *das, |
353 | | L_DNA **pdad) |
354 | 0 | { |
355 | 0 | l_int32 i, n; |
356 | 0 | l_float64 val; |
357 | 0 | L_DNA *dad; |
358 | 0 | L_ASET *set; |
359 | 0 | RB_TYPE key; |
360 | |
|
361 | 0 | if (!pdad) |
362 | 0 | return ERROR_INT("&dad not defined", __func__, 1); |
363 | 0 | *pdad = NULL; |
364 | 0 | if (!das) |
365 | 0 | return ERROR_INT("das not defined", __func__, 1); |
366 | | |
367 | 0 | set = l_asetCreate(L_FLOAT_TYPE); |
368 | 0 | dad = l_dnaCreate(0); |
369 | 0 | *pdad = dad; |
370 | 0 | n = l_dnaGetCount(das); |
371 | 0 | for (i = 0; i < n; i++) { |
372 | 0 | l_dnaGetDValue(das, i, &val); |
373 | 0 | key.ftype = val; |
374 | 0 | if (!l_asetFind(set, key)) { |
375 | 0 | l_dnaAddNumber(dad, val); |
376 | 0 | l_asetInsert(set, key); |
377 | 0 | } |
378 | 0 | } |
379 | |
|
380 | 0 | l_asetDestroy(&set); |
381 | 0 | return 0; |
382 | 0 | } |
383 | | |
384 | | |
385 | | /*! |
386 | | * \brief l_dnaUnionByAset() |
387 | | * |
388 | | * \param[in] da1 |
389 | | * \param[in] da2 |
390 | | * \param[out] pdad union of the two arrays |
391 | | * \return 0 if OK; 1 on error |
392 | | * |
393 | | * <pre> |
394 | | * Notes: |
395 | | * (1) See sarrayUnionByAset() for the approach. |
396 | | * (2) Here, the key in building the sorted tree is the number itself. |
397 | | * (3) Operations using an underlying tree are O(nlogn), which is |
398 | | * typically less efficient than hashing, which is O(n). |
399 | | * </pre> |
400 | | */ |
401 | | l_ok |
402 | | l_dnaUnionByAset(L_DNA *da1, |
403 | | L_DNA *da2, |
404 | | L_DNA **pdad) |
405 | 0 | { |
406 | 0 | L_DNA *da3; |
407 | |
|
408 | 0 | if (!pdad) |
409 | 0 | return ERROR_INT("&dad not defined", __func__, 1); |
410 | 0 | if (!da1) |
411 | 0 | return ERROR_INT("da1 not defined", __func__, 1); |
412 | 0 | if (!da2) |
413 | 0 | return ERROR_INT("da2 not defined", __func__, 1); |
414 | | |
415 | | /* Join */ |
416 | 0 | da3 = l_dnaCopy(da1); |
417 | 0 | if (l_dnaJoin(da3, da2, 0, -1) == 1) { |
418 | 0 | l_dnaDestroy(&da3); |
419 | 0 | return ERROR_INT("join failed for da3", __func__, 1); |
420 | 0 | } |
421 | | |
422 | | /* Eliminate duplicates */ |
423 | 0 | l_dnaRemoveDupsByAset(da3, pdad); |
424 | 0 | l_dnaDestroy(&da3); |
425 | 0 | return 0; |
426 | 0 | } |
427 | | |
428 | | |
429 | | /*! |
430 | | * \brief l_dnaIntersectionByAset() |
431 | | * |
432 | | * \param[in] da1 |
433 | | * \param[in] da2 |
434 | | * \param[out] pdad intersection of the two arrays |
435 | | * \return 0 if OK; 1 on error |
436 | | * |
437 | | * <pre> |
438 | | * Notes: |
439 | | * (1) See sarrayIntersection() for the approach. |
440 | | * (2) Here, the key in building the sorted tree is the number itself. |
441 | | * (3) Operations using an underlying tree are O(nlogn), which is |
442 | | * typically less efficient than hashing, which is O(n). |
443 | | * </pre> |
444 | | */ |
445 | | l_ok |
446 | | l_dnaIntersectionByAset(L_DNA *da1, |
447 | | L_DNA *da2, |
448 | | L_DNA **pdad) |
449 | 0 | { |
450 | 0 | l_int32 n1, n2, i, n; |
451 | 0 | l_float64 val; |
452 | 0 | L_ASET *set1, *set2; |
453 | 0 | RB_TYPE key; |
454 | 0 | L_DNA *da_small, *da_big, *dad; |
455 | |
|
456 | 0 | if (!pdad) |
457 | 0 | return ERROR_INT("&dad not defined", __func__, 1); |
458 | 0 | *pdad = NULL; |
459 | 0 | if (!da1) |
460 | 0 | return ERROR_INT("&da1 not defined", __func__, 1); |
461 | 0 | if (!da2) |
462 | 0 | return ERROR_INT("&da2 not defined", __func__, 1); |
463 | | |
464 | | /* Put the elements of the largest array into a set */ |
465 | 0 | n1 = l_dnaGetCount(da1); |
466 | 0 | n2 = l_dnaGetCount(da2); |
467 | 0 | da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ |
468 | 0 | da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ |
469 | 0 | set1 = l_asetCreateFromDna(da_big); |
470 | | |
471 | | /* Build up the intersection of doubles */ |
472 | 0 | dad = l_dnaCreate(0); |
473 | 0 | *pdad = dad; |
474 | 0 | n = l_dnaGetCount(da_small); |
475 | 0 | set2 = l_asetCreate(L_FLOAT_TYPE); |
476 | 0 | for (i = 0; i < n; i++) { |
477 | 0 | l_dnaGetDValue(da_small, i, &val); |
478 | 0 | key.ftype = val; |
479 | 0 | if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { |
480 | 0 | l_dnaAddNumber(dad, val); |
481 | 0 | l_asetInsert(set2, key); |
482 | 0 | } |
483 | 0 | } |
484 | |
|
485 | 0 | l_asetDestroy(&set1); |
486 | 0 | l_asetDestroy(&set2); |
487 | 0 | return 0; |
488 | 0 | } |
489 | | |
490 | | |
491 | | /*--------------------------------------------------------------------------* |
492 | | * Hashmap operations * |
493 | | *--------------------------------------------------------------------------*/ |
494 | | /*! |
495 | | * \brief l_hmapCreateFromDna() |
496 | | * |
497 | | * \param[in] da input dna |
498 | | * \return hmap hashmap, or NULL on error |
499 | | * |
500 | | * <pre> |
501 | | * Notes: |
502 | | * (1) Derive the hash keys from the values in %da. |
503 | | * (2) The indices into %da are stored in the val field of the hashitems. |
504 | | * This is necessary so that %hmap and %da can be used together. |
505 | | * </pre> |
506 | | */ |
507 | | L_HASHMAP * |
508 | | l_hmapCreateFromDna(L_DNA *da) |
509 | 0 | { |
510 | 0 | l_int32 i, n; |
511 | 0 | l_uint64 key; |
512 | 0 | l_float64 dval; |
513 | 0 | L_HASHMAP *hmap; |
514 | |
|
515 | 0 | if (!da) |
516 | 0 | return (L_HASHMAP *)ERROR_PTR("da not defined", __func__, NULL); |
517 | | |
518 | 0 | n = l_dnaGetCount(da); |
519 | 0 | hmap = l_hmapCreate(0, 0); |
520 | 0 | for (i = 0; i < n; i++) { |
521 | 0 | l_dnaGetDValue(da, i, &dval); |
522 | 0 | l_hashFloat64ToUint64(dval, &key); |
523 | 0 | l_hmapLookup(hmap, key, i, L_HMAP_CREATE); |
524 | 0 | } |
525 | 0 | return hmap; |
526 | 0 | } |
527 | | |
528 | | |
529 | | /*! |
530 | | * \brief l_dnaRemoveDupsByHmap() |
531 | | * |
532 | | * \param[in] das |
533 | | * \param[out] pdad hash set of unique values |
534 | | * \param[out] phmap [optional] hashmap used for lookup |
535 | | * \return 0 if OK; 1 on error |
536 | | * |
537 | | * <pre> |
538 | | * Notes: |
539 | | * (1) Generates the set of (unique) values from %das. |
540 | | * (2) The values in the hashitems are indices into %das. |
541 | | * </pre> |
542 | | */ |
543 | | l_ok |
544 | | l_dnaRemoveDupsByHmap(L_DNA *das, |
545 | | L_DNA **pdad, |
546 | | L_HASHMAP **phmap) |
547 | 0 | { |
548 | 0 | l_int32 i, tabsize; |
549 | 0 | l_float64 dval; |
550 | 0 | L_DNA *dad; |
551 | 0 | L_HASHITEM *hitem; |
552 | 0 | L_HASHMAP *hmap; |
553 | |
|
554 | 0 | if (phmap) *phmap = NULL; |
555 | 0 | if (!pdad) |
556 | 0 | return ERROR_INT("&dad not defined", __func__, 1); |
557 | 0 | *pdad = NULL; |
558 | 0 | if (!das) |
559 | 0 | return ERROR_INT("das not defined", __func__, 1); |
560 | | |
561 | | /* Traverse the hashtable lists */ |
562 | 0 | if ((hmap = l_hmapCreateFromDna(das)) == NULL) |
563 | 0 | return ERROR_INT("hmap not made", __func__, 1); |
564 | 0 | dad = l_dnaCreate(0); |
565 | 0 | *pdad = dad; |
566 | 0 | tabsize = hmap->tabsize; |
567 | 0 | for (i = 0; i < tabsize; i++) { |
568 | 0 | hitem = hmap->hashtab[i]; |
569 | 0 | while (hitem) { |
570 | 0 | l_dnaGetDValue(das, hitem->val, &dval); |
571 | 0 | l_dnaAddNumber(dad, dval); |
572 | 0 | hitem = hitem->next; |
573 | 0 | } |
574 | 0 | } |
575 | |
|
576 | 0 | if (phmap) |
577 | 0 | *phmap = hmap; |
578 | 0 | else |
579 | 0 | l_hmapDestroy(&hmap); |
580 | 0 | return 0; |
581 | 0 | } |
582 | | |
583 | | |
584 | | /*! |
585 | | * \brief l_dnaUnionByHmap() |
586 | | * |
587 | | * \param[in] da1 |
588 | | * \param[in] da2 |
589 | | * \param[out] pdad union of the array values |
590 | | * \return 0 if OK; 1 on error |
591 | | * |
592 | | * <pre> |
593 | | * Notes: |
594 | | * (1) Make dna with numbers found in either of the input arrays. |
595 | | * </pre> |
596 | | */ |
597 | | l_ok |
598 | | l_dnaUnionByHmap(L_DNA *da1, |
599 | | L_DNA *da2, |
600 | | L_DNA **pdad) |
601 | 0 | { |
602 | 0 | L_DNA *da3; |
603 | |
|
604 | 0 | if (!pdad) |
605 | 0 | return ERROR_INT("&dad not defined", __func__, 1); |
606 | 0 | *pdad = NULL; |
607 | 0 | if (!da1) |
608 | 0 | return ERROR_INT("da1 not defined", __func__, 1); |
609 | 0 | if (!da2) |
610 | 0 | return ERROR_INT("da2 not defined", __func__, 1); |
611 | | |
612 | 0 | da3 = l_dnaCopy(da1); |
613 | 0 | if (l_dnaJoin(da3, da2, 0, -1) == 1) { |
614 | 0 | l_dnaDestroy(&da3); |
615 | 0 | return ERROR_INT("da3 join failed", __func__, 1); |
616 | 0 | } |
617 | 0 | l_dnaRemoveDupsByHmap(da3, pdad, NULL); |
618 | 0 | l_dnaDestroy(&da3); |
619 | 0 | return 0; |
620 | 0 | } |
621 | | |
622 | | |
623 | | /*! |
624 | | * \brief l_dnaIntersectionByHmap() |
625 | | * |
626 | | * \param[in] da1 |
627 | | * \param[in] da2 |
628 | | * \param[out] pdad intersection of the array values |
629 | | * \return 0 if OK; 1 on error |
630 | | * |
631 | | * <pre> |
632 | | * Notes: |
633 | | * (1) Make dna with numbers common to both input arrays. |
634 | | * (2) Use the values in the dna as the hash keys. |
635 | | * </pre> |
636 | | */ |
637 | | l_ok |
638 | | l_dnaIntersectionByHmap(L_DNA *da1, |
639 | | L_DNA *da2, |
640 | | L_DNA **pdad) |
641 | 0 | { |
642 | 0 | l_int32 i, n1, n2, n; |
643 | 0 | l_uint64 key; |
644 | 0 | l_float64 dval; |
645 | 0 | L_DNA *da_small, *da_big, *dad; |
646 | 0 | L_HASHITEM *hitem; |
647 | 0 | L_HASHMAP *hmap; |
648 | |
|
649 | 0 | if (!pdad) |
650 | 0 | return ERROR_INT("&dad not defined", __func__, 1); |
651 | 0 | *pdad = NULL; |
652 | 0 | if (!da1) |
653 | 0 | return ERROR_INT("da1 not defined", __func__, 1); |
654 | 0 | if (!da2) |
655 | 0 | return ERROR_INT("da2 not defined", __func__, 1); |
656 | | |
657 | | /* Make a hashmap for the elements of the biggest array */ |
658 | 0 | n1 = l_dnaGetCount(da1); |
659 | 0 | n2 = l_dnaGetCount(da2); |
660 | 0 | da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ |
661 | 0 | da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ |
662 | 0 | if ((hmap = l_hmapCreateFromDna(da_big)) == NULL) |
663 | 0 | return ERROR_INT("hmap not made", __func__, 1); |
664 | | |
665 | | /* Go through the smallest array, doing a lookup of its dval into |
666 | | * the big array hashmap. If an hitem is returned, check the count. |
667 | | * If the count is 0, ignore; otherwise, add the dval to the |
668 | | * output dad and set the count in the hitem to 0, indicating |
669 | | * that the dval has already been added. */ |
670 | 0 | dad = l_dnaCreate(0); |
671 | 0 | *pdad = dad; |
672 | 0 | n = l_dnaGetCount(da_small); |
673 | 0 | for (i = 0; i < n; i++) { |
674 | 0 | l_dnaGetDValue(da_small, i, &dval); |
675 | 0 | l_hashFloat64ToUint64(dval, &key); |
676 | 0 | hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK); |
677 | 0 | if (!hitem || hitem->count == 0) |
678 | 0 | continue; |
679 | 0 | l_dnaAddNumber(dad, dval); |
680 | 0 | hitem->count = 0; |
681 | 0 | } |
682 | 0 | l_hmapDestroy(&hmap); |
683 | 0 | return 0; |
684 | 0 | } |
685 | | |
686 | | |
687 | | /*! |
688 | | * \brief l_dnaMakeHistoByHmap() |
689 | | * |
690 | | * \param[in] das |
691 | | * \param[out] pdav array (set) of unique values |
692 | | * \param[out] pdac array of counts, aligned with the array of values |
693 | | * \return 0 if OK; 1 on error |
694 | | * |
695 | | * <pre> |
696 | | * Notes: |
697 | | * (1) Generates a histogram represented by two aligned arrays: |
698 | | * value and count. |
699 | | * </pre> |
700 | | */ |
701 | | l_ok |
702 | | l_dnaMakeHistoByHmap(L_DNA *das, |
703 | | L_DNA **pdav, |
704 | | L_DNA **pdac) |
705 | 0 | { |
706 | 0 | l_int32 i, tabsize; |
707 | 0 | l_float64 dval; |
708 | 0 | L_DNA *dac, *dav; |
709 | 0 | L_HASHITEM *hitem; |
710 | 0 | L_HASHMAP *hmap; |
711 | |
|
712 | 0 | if (pdav) *pdav = NULL; |
713 | 0 | if (pdac) *pdac = NULL; |
714 | 0 | if (!das) |
715 | 0 | return ERROR_INT("das not defined", __func__, 1); |
716 | 0 | if (!pdav) |
717 | 0 | return ERROR_INT("&dav not defined", __func__, 1); |
718 | 0 | if (!pdac) |
719 | 0 | return ERROR_INT("&dac not defined", __func__, 1); |
720 | | |
721 | | /* Traverse the hashtable lists */ |
722 | 0 | if ((hmap = l_hmapCreateFromDna(das)) == NULL) |
723 | 0 | return ERROR_INT("hmap not made", __func__, 1); |
724 | 0 | dav = l_dnaCreate(0); |
725 | 0 | *pdav = dav; |
726 | 0 | dac = l_dnaCreate(0); |
727 | 0 | *pdac = dac; |
728 | 0 | tabsize = hmap->tabsize; |
729 | 0 | for (i = 0; i < tabsize; i++) { |
730 | 0 | hitem = hmap->hashtab[i]; |
731 | 0 | while (hitem) { |
732 | 0 | l_dnaGetDValue(das, hitem->val, &dval); |
733 | 0 | l_dnaAddNumber(dav, dval); |
734 | 0 | l_dnaAddNumber(dac, hitem->count); |
735 | 0 | hitem = hitem->next; |
736 | 0 | } |
737 | 0 | } |
738 | |
|
739 | 0 | l_hmapDestroy(&hmap); |
740 | 0 | return 0; |
741 | 0 | } |
742 | | |
743 | | |
744 | | /*----------------------------------------------------------------------* |
745 | | * Miscellaneous operations * |
746 | | *----------------------------------------------------------------------*/ |
747 | | /*! |
748 | | * \brief l_dnaDiffAdjValues() |
749 | | * |
750 | | * \param[in] das input l_dna |
751 | | * \return dad of difference values val[i+1] - val[i], |
752 | | * or NULL on error |
753 | | */ |
754 | | L_DNA * |
755 | | l_dnaDiffAdjValues(L_DNA *das) |
756 | 0 | { |
757 | 0 | l_int32 i, n, prev, cur; |
758 | 0 | L_DNA *dad; |
759 | |
|
760 | 0 | if (!das) |
761 | 0 | return (L_DNA *)ERROR_PTR("das not defined", __func__, NULL); |
762 | 0 | n = l_dnaGetCount(das); |
763 | 0 | dad = l_dnaCreate(n - 1); |
764 | 0 | prev = 0; |
765 | 0 | for (i = 1; i < n; i++) { |
766 | 0 | l_dnaGetIValue(das, i, &cur); |
767 | 0 | l_dnaAddNumber(dad, cur - prev); |
768 | 0 | prev = cur; |
769 | 0 | } |
770 | 0 | return dad; |
771 | 0 | } |
772 | | |