Coverage Report

Created: 2025-06-13 07:02

/src/leptonica/src/sarray2.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file  sarray2.c
29
 * <pre>
30
 *
31
 *      Sort
32
 *          SARRAY     *sarraySort()
33
 *          SARRAY     *sarraySortByIndex()
34
 *          l_int32     stringCompareLexical()
35
 *
36
 *      Set operations using aset (rbtree)
37
 *          L_ASET     *l_asetCreateFromSarray()
38
 *          l_int32     sarrayRemoveDupsByAset()
39
 *          l_int32     sarrayUnionByAset()
40
 *          l_int32     sarrayIntersectionByAset()
41
 *
42
 *      Hashmap operations
43
 *          L_HASHMAP  *l_hmapCreateFromSarray()
44
 *          l_int32     sarrayRemoveDupsByHmap()
45
 *          l_int32     sarrayUnionByHmap()
46
 *          l_int32     sarrayIntersectionByHmap()
47
 *
48
 *      Miscellaneous operations
49
 *          SARRAY     *sarrayGenerateIntegers()
50
 *          l_int32     sarrayLookupCSKV()
51
 *
52
 *
53
 * We have two implementations of set operations on an array of strings:
54
 *
55
 *   (1) Using an underlying tree (rbtree).
56
 *       This uses a good 64 bit hashing function for the key,
57
 *       that is not expected to have hash collisions (and we do
58
 *       not test for them).  The tree is built up of the hash
59
 *       values, and if the hash is found in the tree, it is
60
 *       assumed that the string has already been found.
61
 *
62
 *   (2) Building a hashmap from the keys (hashmap).
63
 *       This uses a fast 64 bit hashing function for the key, which
64
 *       is then hashed into a hashtable.  Collisions of hashkeys are
65
 *       very rare, but the hashtable is designed to allow more than one
66
 *       hashitem in a table entry.  The hashitems are put in a list at
67
 *       each hashtable entry, which is traversed looking for the key.
68
 * </pre>
69
 */
70
71
#ifdef HAVE_CONFIG_H
72
#include <config_auto.h>
73
#endif  /* HAVE_CONFIG_H */
74
75
#include <string.h>
76
#include "allheaders.h"
77
#include "array_internal.h"
78
79
/*----------------------------------------------------------------------*
80
 *                                   Sort                               *
81
 *----------------------------------------------------------------------*/
82
/*!
83
 * \brief   sarraySort()
84
 *
85
 * \param[in]    saout       output sarray; can be NULL or equal to sain
86
 * \param[in]    sain        input sarray
87
 * \param[in]    sortorder   L_SORT_INCREASING or L_SORT_DECREASING
88
 * \return  saout output sarray, sorted by ascii value, or NULL on error
89
 *
90
 * <pre>
91
 * Notes:
92
 *      (1) Set saout = sain for in-place; otherwise, set naout = NULL.
93
 *      (2) Shell sort, modified from K&R, 2nd edition, p.62.
94
 *          Slow but simple O(n logn) sort.
95
 * </pre>
96
 */
97
SARRAY *
98
sarraySort(SARRAY  *saout,
99
           SARRAY  *sain,
100
           l_int32  sortorder)
101
0
{
102
0
char   **array;
103
0
char    *tmp;
104
0
l_int32  n, i, j, gap;
105
106
0
    if (!sain)
107
0
        return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
108
109
        /* Make saout if necessary; otherwise do in-place */
110
0
    if (!saout)
111
0
        saout = sarrayCopy(sain);
112
0
    else if (sain != saout)
113
0
        return (SARRAY *)ERROR_PTR("invalid: not in-place", __func__, NULL);
114
0
    array = saout->array;  /* operate directly on the array */
115
0
    n = sarrayGetCount(saout);
116
117
        /* Shell sort */
118
0
    for (gap = n/2; gap > 0; gap = gap / 2) {
119
0
        for (i = gap; i < n; i++) {
120
0
            for (j = i - gap; j >= 0; j -= gap) {
121
0
                if ((sortorder == L_SORT_INCREASING &&
122
0
                     stringCompareLexical(array[j], array[j + gap])) ||
123
0
                    (sortorder == L_SORT_DECREASING &&
124
0
                     stringCompareLexical(array[j + gap], array[j])))
125
0
                {
126
0
                    tmp = array[j];
127
0
                    array[j] = array[j + gap];
128
0
                    array[j + gap] = tmp;
129
0
                }
130
0
            }
131
0
        }
132
0
    }
133
134
0
    return saout;
135
0
}
136
137
138
/*!
139
 * \brief   sarraySortByIndex()
140
 *
141
 * \param[in]    sain
142
 * \param[in]    naindex   na that maps from the new sarray to the input sarray
143
 * \return  saout sorted, or NULL on error
144
 */
145
SARRAY *
146
sarraySortByIndex(SARRAY  *sain,
147
                  NUMA    *naindex)
148
0
{
149
0
char    *str;
150
0
l_int32  i, n, index;
151
0
SARRAY  *saout;
152
153
0
    if (!sain)
154
0
        return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
155
0
    if (!naindex)
156
0
        return (SARRAY *)ERROR_PTR("naindex not defined", __func__, NULL);
157
158
0
    n = sarrayGetCount(sain);
159
0
    saout = sarrayCreate(n);
160
0
    for (i = 0; i < n; i++) {
161
0
        numaGetIValue(naindex, i, &index);
162
0
        str = sarrayGetString(sain, index, L_COPY);
163
0
        sarrayAddString(saout, str, L_INSERT);
164
0
    }
165
166
0
    return saout;
167
0
}
168
169
170
/*!
171
 * \brief   stringCompareLexical()
172
 *
173
 * \param[in]    str1
174
 * \param[in]    str2
175
 * \return  1 if str1 > str2 lexically; 0 otherwise
176
 *
177
 * <pre>
178
 * Notes:
179
 *      (1) If the lexical values are identical, return a 0, to
180
 *          indicate that no swapping is required to sort the strings.
181
 * </pre>
182
 */
183
l_int32
184
stringCompareLexical(const char *str1,
185
                     const char *str2)
186
0
{
187
0
l_int32  i, len1, len2, len;
188
189
0
    if (!str1)
190
0
        return ERROR_INT("str1 not defined", __func__, 1);
191
0
    if (!str2)
192
0
        return ERROR_INT("str2 not defined", __func__, 1);
193
194
0
    len1 = strlen(str1);
195
0
    len2 = strlen(str2);
196
0
    len = L_MIN(len1, len2);
197
198
0
    for (i = 0; i < len; i++) {
199
0
        if (str1[i] == str2[i])
200
0
            continue;
201
0
        if (str1[i] > str2[i])
202
0
            return 1;
203
0
        else
204
0
            return 0;
205
0
    }
206
207
0
    if (len1 > len2)
208
0
        return 1;
209
0
    else
210
0
        return 0;
211
0
}
212
213
214
/*----------------------------------------------------------------------*
215
 *                   Set operations using aset (rbtree)                 *
216
 *----------------------------------------------------------------------*/
217
/*!
218
 * \brief   l_asetCreateFromSarray()
219
 *
220
 * \param[in]    sa
221
 * \return  set using a string hash into a uint64 as the key
222
 */
223
L_ASET *
224
l_asetCreateFromSarray(SARRAY  *sa)
225
0
{
226
0
char     *str;
227
0
l_int32   i, n;
228
0
l_uint64  hash;
229
0
L_ASET   *set;
230
0
RB_TYPE   key;
231
232
0
    if (!sa)
233
0
        return (L_ASET *)ERROR_PTR("sa not defined", __func__, NULL);
234
235
0
    set = l_asetCreate(L_UINT_TYPE);
236
0
    n = sarrayGetCount(sa);
237
0
    for (i = 0; i < n; i++) {
238
0
        str = sarrayGetString(sa, i, L_NOCOPY);
239
0
        l_hashStringToUint64Fast(str, &hash);
240
0
        key.utype = hash;
241
0
        l_asetInsert(set, key);
242
0
    }
243
244
0
    return set;
245
0
}
246
247
248
/*!
249
 * \brief   sarrayRemoveDupsByAset()
250
 *
251
 * \param[in]    sas
252
 * \param[out]   psad      with duplicates removed
253
 * \return  0 if OK; 1 on error
254
 *
255
 * <pre>
256
 * Notes:
257
 *      (1) This is O(nlogn), considerably slower than
258
 *          sarrayRemoveDupsByHmap() for large string arrays.
259
 *      (2) The key for each string is a 64-bit hash.
260
 *      (3) Build a set, using hashed strings as keys.  As the set is
261
 *          built, first do a find; if not found, add the key to the
262
 *          set and add the string to the output sarray.
263
 * </pre>
264
 */
265
l_ok
266
sarrayRemoveDupsByAset(SARRAY   *sas,
267
                       SARRAY  **psad)
268
0
{
269
0
char     *str;
270
0
l_int32   i, n;
271
0
l_uint64  hash;
272
0
L_ASET   *set;
273
0
RB_TYPE   key;
274
0
SARRAY   *sad;
275
276
0
    if (!psad)
277
0
        return ERROR_INT("&sad not defined", __func__, 1);
278
0
    *psad = NULL;
279
0
    if (!sas)
280
0
        return ERROR_INT("sas not defined", __func__, 1);
281
282
0
    set = l_asetCreate(L_UINT_TYPE);
283
0
    sad = sarrayCreate(0);
284
0
    *psad = sad;
285
0
    n = sarrayGetCount(sas);
286
0
    for (i = 0; i < n; i++) {
287
0
        str = sarrayGetString(sas, i, L_NOCOPY);
288
0
        l_hashStringToUint64Fast(str, &hash);
289
0
        key.utype = hash;
290
0
        if (!l_asetFind(set, key)) {
291
0
            sarrayAddString(sad, str, L_COPY);
292
0
            l_asetInsert(set, key);
293
0
        }
294
0
    }
295
296
0
    l_asetDestroy(&set);
297
0
    return 0;
298
0
}
299
300
301
/*!
302
 * \brief   sarrayUnionByAset()
303
 *
304
 * \param[in]    sa1
305
 * \param[in]    sa2
306
 * \param[out]   psad      union of the two string arrays
307
 * \return  0 if OK; 1 on error
308
 *
309
 * <pre>
310
 * Notes:
311
 *      (1) Duplicates are removed from the concatenation of the two arrays.
312
 *      (2) The key for each string is a 64-bit hash.
313
 *      (2) Algorithm: Concatenate the two sarrays.  Then build a set,
314
 *          using hashed strings as keys.  As the set is built, first do
315
 *          a find; if not found, add the key to the set and add the string
316
 *          to the output sarray.  This is O(nlogn).
317
 * </pre>
318
 */
319
l_ok
320
sarrayUnionByAset(SARRAY   *sa1,
321
                  SARRAY   *sa2,
322
                  SARRAY  **psad)
323
0
{
324
0
SARRAY  *sa3;
325
326
0
    if (!psad)
327
0
        return ERROR_INT("&sad not defined", __func__, 1);
328
0
    *psad = NULL;
329
0
    if (!sa1)
330
0
        return ERROR_INT("sa1 not defined", __func__, 1);
331
0
    if (!sa2)
332
0
        return ERROR_INT("sa2 not defined", __func__, 1);
333
334
        /* Join */
335
0
    sa3 = sarrayCopy(sa1);
336
0
    if (sarrayJoin(sa3, sa2) == 1) {
337
0
        sarrayDestroy(&sa3);
338
0
        return ERROR_INT("join failed for sa3", __func__, 1);
339
0
    }
340
341
        /* Eliminate duplicates */
342
0
    sarrayRemoveDupsByAset(sa3, psad);
343
0
    sarrayDestroy(&sa3);
344
0
    return 0;
345
0
}
346
347
348
/*!
349
 * \brief   sarrayIntersectionByAset()
350
 *
351
 * \param[in]    sa1
352
 * \param[in]    sa2
353
 * \param[out]   psad      intersection of the two string arrays
354
 * \return  0 if OK; 1 on error
355
 *
356
 * <pre>
357
 * Notes:
358
 *      (1) Algorithm: put the larger sarray into a set, using the string
359
 *          hashes as the key values.  Then run through the smaller sarray,
360
 *          building an output sarray and a second set from the strings
361
 *          in the larger array: if a string is in the first set but
362
 *          not in the second, add the string to the output sarray and hash
363
 *          it into the second set.  The second set is required to make
364
 *          sure only one instance of each string is put into the output sarray.
365
 *          This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays.
366
 * </pre>
367
 */
368
l_ok
369
sarrayIntersectionByAset(SARRAY   *sa1,
370
                         SARRAY   *sa2,
371
                         SARRAY  **psad)
372
0
{
373
0
char     *str;
374
0
l_int32   n1, n2, i, n;
375
0
l_uint64  hash;
376
0
L_ASET   *set1, *set2;
377
0
RB_TYPE   key;
378
0
SARRAY   *sa_small, *sa_big, *sad;
379
380
0
    if (!psad)
381
0
        return ERROR_INT("&sad not defined", __func__, 1);
382
0
    *psad = NULL;
383
0
    if (!sa1)
384
0
        return ERROR_INT("sa1 not defined", __func__, 1);
385
0
    if (!sa2)
386
0
        return ERROR_INT("sa2 not defined", __func__, 1);
387
388
        /* Put the elements of the biggest array into a set */
389
0
    n1 = sarrayGetCount(sa1);
390
0
    n2 = sarrayGetCount(sa2);
391
0
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
392
0
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
393
0
    set1 = l_asetCreateFromSarray(sa_big);
394
395
        /* Build up the intersection of strings */
396
0
    sad = sarrayCreate(0);
397
0
    *psad = sad;
398
0
    n = sarrayGetCount(sa_small);
399
0
    set2 = l_asetCreate(L_UINT_TYPE);
400
0
    for (i = 0; i < n; i++) {
401
0
        str = sarrayGetString(sa_small, i, L_NOCOPY);
402
0
        l_hashStringToUint64Fast(str, &hash);
403
0
        key.utype = hash;
404
0
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
405
0
            sarrayAddString(sad, str, L_COPY);
406
0
            l_asetInsert(set2, key);
407
0
        }
408
0
    }
409
410
0
    l_asetDestroy(&set1);
411
0
    l_asetDestroy(&set2);
412
0
    return 0;
413
0
}
414
415
416
/*----------------------------------------------------------------------*
417
 *                          Hashmap operations                          *
418
 *----------------------------------------------------------------------*/
419
/*!
420
 * \brief  l_hmapCreateFromSarray()
421
 *
422
 * \param[in]   sa     input sarray
423
 * \return      hmap   hashmap, or NULL on error
424
 */
425
L_HASHMAP *
426
l_hmapCreateFromSarray(SARRAY  *sa)
427
0
{
428
0
l_int32      i, n;
429
0
l_uint64     key;
430
0
char        *str;
431
0
L_HASHMAP   *hmap;
432
433
0
    if (!sa)
434
0
        return (L_HASHMAP *)ERROR_PTR("sa not defined", __func__, NULL);
435
436
0
    n = sarrayGetCount(sa);
437
0
    if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
438
0
        return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL);
439
0
    for (i = 0; i < n; i++) {
440
0
        str = sarrayGetString(sa, i, L_NOCOPY);
441
0
        l_hashStringToUint64Fast(str, &key);
442
0
        l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
443
0
    }
444
0
    return hmap;
445
0
}
446
447
448
/*!
449
 * \brief  sarrayRemoveDupsByHmap()
450
 *
451
 * \param[in]   sas
452
 * \param[out]  psad    hash set of unique values
453
 * \param[out]  phmap   [optional] hashmap used for lookup
454
 * \return  0 if OK; 1 on error
455
 */
456
l_ok
457
sarrayRemoveDupsByHmap(SARRAY      *sas,
458
                       SARRAY     **psad,
459
                       L_HASHMAP  **phmap)
460
0
{
461
0
l_int32      i, tabsize;
462
0
char        *str;
463
0
SARRAY      *sad;
464
0
L_HASHITEM  *hitem;
465
0
L_HASHMAP   *hmap;
466
467
0
    if (phmap) *phmap = NULL;
468
0
    if (!psad)
469
0
        return ERROR_INT("&sad not defined", __func__, 1);
470
0
    *psad = NULL;
471
0
    if (!sas)
472
0
        return ERROR_INT("sas not defined", __func__, 1);
473
474
        /* Traverse the hashtable lists */
475
0
    if ((hmap = l_hmapCreateFromSarray(sas)) == NULL)
476
0
        return ERROR_INT("hmap not made", __func__, 1);
477
0
    sad = sarrayCreate(0);
478
0
    *psad = sad;
479
0
    tabsize = hmap->tabsize;
480
0
    for (i = 0; i < tabsize; i++) {
481
0
        hitem = hmap->hashtab[i];
482
0
        while (hitem) {
483
0
            str = sarrayGetString(sas, hitem->val, L_COPY);
484
0
            sarrayAddString(sad, str, L_INSERT);
485
0
            hitem = hitem->next;
486
0
        }
487
0
    }
488
489
0
    if (phmap)
490
0
        *phmap = hmap;
491
0
    else
492
0
        l_hmapDestroy(&hmap);
493
0
    return 0;
494
0
}
495
496
497
/*!
498
 * \brief  sarrayUnionByHmap()
499
 *
500
 * \param[in]   sa1
501
 * \param[in]   sa2
502
 * \param[out]  *psad     union of the array values
503
 * \return  0 if OK; 1 on error
504
 */
505
l_ok
506
sarrayUnionByHmap(SARRAY   *sa1,
507
                  SARRAY   *sa2,
508
                  SARRAY  **psad)
509
0
{
510
0
SARRAY  *sa3;
511
512
0
    if (!psad)
513
0
        return ERROR_INT("&sad not defined", __func__, 1);
514
0
    *psad = NULL;
515
0
    if (!sa1)
516
0
        return ERROR_INT("sa1 not defined", __func__, 1);
517
0
    if (!sa2)
518
0
        return ERROR_INT("sa2 not defined", __func__, 1);
519
520
0
    sa3 = sarrayCopy(sa1);
521
0
    if (sarrayJoin(sa3, sa2) == 1) {
522
0
        sarrayDestroy(&sa3);
523
0
        return ERROR_INT("sa3 join failed", __func__, 1);
524
0
    }
525
0
    sarrayRemoveDupsByHmap(sa3, psad, NULL);
526
0
    sarrayDestroy(&sa3);
527
0
    return 0;
528
0
}
529
530
531
/*!
532
 * \brief  sarrayIntersectionByHmap()
533
 *
534
 * \param[in]   sa1
535
 * \param[in]   sa2
536
 * \param[out]  *psad     intersection of the array values
537
 * \return  0 if OK; 1 on error
538
 */
539
l_ok
540
sarrayIntersectionByHmap(SARRAY   *sa1,
541
                         SARRAY   *sa2,
542
                         SARRAY  **psad)
543
0
{
544
0
l_int32      i, n1, n2, n;
545
0
l_uint64     key;
546
0
char        *str;
547
0
SARRAY      *sa_small, *sa_big, *sa3, *sad;
548
0
L_HASHITEM  *hitem;
549
0
L_HASHMAP   *hmap;
550
551
0
    if (!psad)
552
0
        return ERROR_INT("&sad not defined", __func__, 1);
553
0
    *psad = NULL;
554
0
    if (!sa1)
555
0
        return ERROR_INT("sa1 not defined", __func__, 1);
556
0
    if (!sa2)
557
0
        return ERROR_INT("sa2 not defined", __func__, 1);
558
559
        /* Make a hashmap for the elements of the biggest array */
560
0
    n1 = sarrayGetCount(sa1);
561
0
    n2 = sarrayGetCount(sa2);
562
0
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
563
0
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
564
0
    if ((hmap = l_hmapCreateFromSarray(sa_big)) == NULL)
565
0
        return ERROR_INT("hmap not made", __func__, 1);
566
567
        /* Remove duplicates from the smallest array.  Alternatively,
568
         * we can skip this step and avoid counting duplicates in
569
         * sa_small by modifying the count fields in the sa_big hashitems;
570
         * e.g., see l_hmapIntersectionDna(). */
571
0
    sarrayRemoveDupsByHmap(sa_small, &sa3, NULL);
572
573
        /* Go through sa3, the set of strings derived from the smallest array,
574
         * hashing into the big array table.  Any string found belongs to both,
575
         * so add it to the output array. */
576
0
    sad = sarrayCreate(0);
577
0
    *psad = sad;
578
0
    n = sarrayGetCount(sa3);
579
0
    for (i = 0; i < n; i++) {
580
0
        str = sarrayGetString(sa3, i, L_NOCOPY);
581
0
        l_hashStringToUint64Fast(str, &key);
582
0
        hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
583
0
        if (hitem)
584
0
            sarrayAddString(sad, str, L_COPY);
585
0
    }
586
0
    l_hmapDestroy(&hmap);
587
0
    sarrayDestroy(&sa3);
588
0
    return 0;
589
0
}
590
591
592
/*----------------------------------------------------------------------*
593
 *                      Miscellaneous operations                        *
594
 *----------------------------------------------------------------------*/
595
/*!
596
 * \brief   sarrayGenerateIntegers()
597
 *
598
 * \param[in]   n
599
 * \return  sa  of printed numbers, 1 - n, or NULL on error
600
 */
601
SARRAY *
602
sarrayGenerateIntegers(l_int32  n)
603
0
{
604
0
char     buf[32];
605
0
l_int32  i;
606
0
SARRAY  *sa;
607
608
0
    if ((sa = sarrayCreate(n)) == NULL)
609
0
        return (SARRAY *)ERROR_PTR("sa not made", __func__, NULL);
610
0
    for (i = 0; i < n; i++) {
611
0
        snprintf(buf, sizeof(buf), "%d", i);
612
0
        sarrayAddString(sa, buf, L_COPY);
613
0
    }
614
0
    return sa;
615
0
}
616
617
618
/*!
619
 * \brief   sarrayLookupCSKV()
620
 *
621
 * \param[in]    sa          of strings, each being a comma-separated pair
622
 *                           of strings, the first being a key and the
623
 *                           second a value
624
 * \param[in]    keystring   an input string to match with each key in %sa
625
 * \param[out]   pvalstring  the returned value string corresponding to the
626
 *                           input key string, if found; otherwise NULL
627
 * \return  0 if OK, 1 on error
628
 *
629
 * <pre>
630
 * Notes:
631
 *      (1) The input %sa can have other strings that are not in
632
 *          comma-separated key-value format.  These will be ignored.
633
 *      (2) This returns a copy of the first value string in %sa whose
634
 *          key string matches the input %keystring.
635
 *      (3) White space is not ignored; all white space before the ','
636
 *          is used for the keystring in matching.  This allows the
637
 *          key and val strings to have white space (e.g., multiple words).
638
 * </pre>
639
 */
640
l_ok
641
sarrayLookupCSKV(SARRAY      *sa,
642
                 const char  *keystring,
643
                 char       **pvalstring)
644
0
{
645
0
char    *key, *val, *str;
646
0
l_int32  i, n;
647
0
SARRAY  *sa1;
648
649
0
    if (!pvalstring)
650
0
        return ERROR_INT("&valstring not defined", __func__, 1);
651
0
    *pvalstring = NULL;
652
0
    if (!sa)
653
0
        return ERROR_INT("sa not defined", __func__, 1);
654
0
    if (!keystring)
655
0
        return ERROR_INT("keystring not defined", __func__, 1);
656
657
0
    n = sarrayGetCount(sa);
658
0
    for (i = 0; i < n; i++) {
659
0
        str = sarrayGetString(sa, i, L_NOCOPY);
660
0
        sa1 = sarrayCreate(2);
661
0
        sarraySplitString(sa1, str, ",");
662
0
        if (sarrayGetCount(sa1) != 2) {
663
0
            sarrayDestroy(&sa1);
664
0
            continue;
665
0
        }
666
0
        key = sarrayGetString(sa1, 0, L_NOCOPY);
667
0
        val = sarrayGetString(sa1, 1, L_NOCOPY);
668
0
        if (!strcmp(key, keystring)) {
669
0
            *pvalstring = stringNew(val);
670
0
            sarrayDestroy(&sa1);
671
0
            return 0;
672
0
        }
673
0
        sarrayDestroy(&sa1);
674
0
    }
675
676
0
    return 0;
677
0
}