Coverage Report

Created: 2024-06-18 06:04

/src/leptonica/src/bytearray.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -
4
 -  Redistribution and use in source and binary forms, with or without
5
 -  modification, are permitted provided that the following conditions
6
 -  are met:
7
 -  1. Redistributions of source code must retain the above copyright
8
 -     notice, this list of conditions and the following disclaimer.
9
 -  2. Redistributions in binary form must reproduce the above
10
 -     copyright notice, this list of conditions and the following
11
 -     disclaimer in the documentation and/or other materials
12
 -     provided with the distribution.
13
 -
14
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
18
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
 *====================================================================*/
26
27
/*!
28
 * \file  bytearray.c
29
 * <pre>
30
 *
31
 *   Functions for handling byte arrays, in analogy with C++ 'strings'
32
 *
33
 *      Creation, copy, clone, destruction
34
 *           L_BYTEA      *l_byteaCreate()
35
 *           L_BYTEA      *l_byteaInitFromMem()
36
 *           L_BYTEA      *l_byteaInitFromFile()
37
 *           L_BYTEA      *l_byteaInitFromStream()
38
 *           L_BYTEA      *l_byteaCopy()
39
 *           void          l_byteaDestroy()
40
 *
41
 *      Accessors
42
 *           size_t        l_byteaGetSize()
43
 *           l_uint8      *l_byteaGetData()
44
 *           l_uint8      *l_byteaCopyData()
45
 *
46
 *      Appending
47
 *           l_int32       l_byteaAppendData()
48
 *           l_int32       l_byteaAppendString()
49
 *           static l_int32  l_byteaExtendArrayToSize()
50
 *
51
 *      Join/Split
52
 *           l_int32       l_byteaJoin()
53
 *           l_int32       l_byteaSplit()
54
 *
55
 *      Search
56
 *           l_int32       l_byteaFindEachSequence()
57
 *
58
 *      Output to file
59
 *           l_int32       l_byteaWrite()
60
 *           l_int32       l_byteaWriteStream()
61
 *
62
 *   The internal data array is always null-terminated, for ease of use
63
 *   in the event that it is an ascii string without null bytes.
64
 * </pre>
65
 */
66
67
#ifdef HAVE_CONFIG_H
68
#include <config_auto.h>
69
#endif  /* HAVE_CONFIG_H */
70
71
#include <string.h>
72
#include "allheaders.h"
73
#include "array_internal.h"
74
75
    /* Bounds on array size */
76
static const l_uint32  MaxArraySize = 1000000000;   /* 10^9 bytes */
77
static const l_int32   InitialArraySize = 200;      /*!< n'importe quoi */
78
79
    /* Static function */
80
static l_int32 l_byteaExtendArrayToSize(L_BYTEA *ba, size_t size);
81
82
/*---------------------------------------------------------------------*
83
 *                  Creation, copy, clone, destruction                 *
84
 *---------------------------------------------------------------------*/
85
/*!
86
 * \brief   l_byteaCreate()
87
 *
88
 * \param[in]    nbytes    determines initial size of data array
89
 * \return  l_bytea, or NULL on error
90
 *
91
 * <pre>
92
 * Notes:
93
 *      (1) The allocated array is n + 1 bytes.  This allows room
94
 *          for null termination.
95
 * </pre>
96
 */
97
L_BYTEA *
98
l_byteaCreate(size_t  nbytes)
99
0
{
100
0
L_BYTEA  *ba;
101
102
0
    if (nbytes <= 0 || nbytes > MaxArraySize)
103
0
        nbytes = InitialArraySize;
104
0
    ba = (L_BYTEA *)LEPT_CALLOC(1, sizeof(L_BYTEA));
105
0
    ba->data = (l_uint8 *)LEPT_CALLOC(nbytes + 1, sizeof(l_uint8));
106
0
    if (!ba->data) {
107
0
        l_byteaDestroy(&ba);
108
0
        return (L_BYTEA *)ERROR_PTR("ba array not made", __func__, NULL);
109
0
    }
110
0
    ba->nalloc = nbytes + 1;
111
0
    ba->refcount = 1;
112
0
    return ba;
113
0
}
114
115
116
/*!
117
 * \brief   l_byteaInitFromMem()
118
 *
119
 * \param[in]    data    to be copied to the array
120
 * \param[in]    size    amount of data
121
 * \return  l_bytea, or NULL on error
122
 */
123
L_BYTEA *
124
l_byteaInitFromMem(const l_uint8  *data,
125
                   size_t          size)
126
0
{
127
0
L_BYTEA  *ba;
128
129
0
    if (!data)
130
0
        return (L_BYTEA *)ERROR_PTR("data not defined", __func__, NULL);
131
0
    if (size <= 0)
132
0
        return (L_BYTEA *)ERROR_PTR("no bytes to initialize", __func__, NULL);
133
0
    if (size > MaxArraySize)
134
0
        return (L_BYTEA *)ERROR_PTR("size is too big", __func__, NULL);
135
136
0
    if ((ba = l_byteaCreate(size)) == NULL)
137
0
        return (L_BYTEA *)ERROR_PTR("ba not made", __func__, NULL);
138
0
    memcpy(ba->data, data, size);
139
0
    ba->size = size;
140
0
    return ba;
141
0
}
142
143
144
/*!
145
 * \brief   l_byteaInitFromFile()
146
 *
147
 * \param[in]    fname
148
 * \return  l_bytea, or NULL on error
149
 */
150
L_BYTEA *
151
l_byteaInitFromFile(const char  *fname)
152
0
{
153
0
FILE     *fp;
154
0
L_BYTEA  *ba;
155
156
0
    if (!fname)
157
0
        return (L_BYTEA *)ERROR_PTR("fname not defined", __func__, NULL);
158
159
0
    if ((fp = fopenReadStream(fname)) == NULL)
160
0
        return (L_BYTEA *)ERROR_PTR_1("file stream not opened",
161
0
                                      fname, __func__, NULL);
162
0
    ba = l_byteaInitFromStream(fp);
163
0
    fclose(fp);
164
0
    if (!ba)
165
0
        return (L_BYTEA *)ERROR_PTR_1("ba not made", fname, __func__, NULL);
166
0
    return ba;
167
0
}
168
169
170
/*!
171
 * \brief   l_byteaInitFromStream()
172
 *
173
 * \param[in]    fp    file stream
174
 * \return  l_bytea, or NULL on error
175
 */
176
L_BYTEA *
177
l_byteaInitFromStream(FILE  *fp)
178
0
{
179
0
l_uint8  *data;
180
0
size_t    nbytes;
181
0
L_BYTEA  *ba;
182
183
0
    if (!fp)
184
0
        return (L_BYTEA *)ERROR_PTR("stream not defined", __func__, NULL);
185
186
0
    if ((data = l_binaryReadStream(fp, &nbytes)) == NULL)
187
0
        return (L_BYTEA *)ERROR_PTR("data not read", __func__, NULL);
188
0
    if ((ba = l_byteaCreate(nbytes)) == NULL) {
189
0
        LEPT_FREE(data);
190
0
        return (L_BYTEA *)ERROR_PTR("ba not made", __func__, NULL);
191
0
    }
192
0
    memcpy(ba->data, data, nbytes);
193
0
    ba->size = nbytes;
194
0
    LEPT_FREE(data);
195
0
    return ba;
196
0
}
197
198
199
/*!
200
 * \brief   l_byteaCopy()
201
 *
202
 * \param[in]    bas        source lba
203
 * \param[in]    copyflag   L_COPY, L_CLONE
204
 * \return  clone or copy of bas, or NULL on error
205
 *
206
 * <pre>
207
 * Notes:
208
 *      (1) If cloning, up the refcount and return a ptr to %bas.
209
 * </pre>
210
 */
211
L_BYTEA *
212
l_byteaCopy(L_BYTEA  *bas,
213
            l_int32   copyflag)
214
0
{
215
0
    if (!bas)
216
0
        return (L_BYTEA *)ERROR_PTR("bas not defined", __func__, NULL);
217
218
0
    if (copyflag == L_CLONE) {
219
0
        bas->refcount++;
220
0
        return bas;
221
0
    }
222
223
0
    return l_byteaInitFromMem(bas->data, bas->size);
224
0
}
225
226
227
/*!
228
 * \brief   l_byteaDestroy()
229
 *
230
 * \param[in,out]   pba    will be set to null before returning
231
 * \return  void
232
 *
233
 * <pre>
234
 * Notes:
235
 *      (1) Decrements the ref count and, if 0, destroys the lba.
236
 *      (2) Always nulls the input ptr.
237
 *      (3) If the data has been previously removed, the lba will
238
 *          have been nulled, so this will do nothing.
239
 * </pre>
240
 */
241
void
242
l_byteaDestroy(L_BYTEA  **pba)
243
0
{
244
0
L_BYTEA  *ba;
245
246
0
    if (pba == NULL) {
247
0
        L_WARNING("ptr address is null!\n", __func__);
248
0
        return;
249
0
    }
250
251
0
    if ((ba = *pba) == NULL)
252
0
        return;
253
254
        /* Decrement the ref count.  If it is 0, destroy the lba. */
255
0
    if (--ba->refcount == 0) {
256
0
        if (ba->data) LEPT_FREE(ba->data);
257
0
        LEPT_FREE(ba);
258
0
    }
259
0
    *pba = NULL;
260
0
}
261
262
263
/*---------------------------------------------------------------------*
264
 *                               Accessors                             *
265
 *---------------------------------------------------------------------*/
266
/*!
267
 * \brief   l_byteaGetSize()
268
 *
269
 * \param[in]    ba
270
 * \return  size of stored byte array, or 0 on error
271
 */
272
size_t
273
l_byteaGetSize(L_BYTEA  *ba)
274
0
{
275
0
    if (!ba)
276
0
        return ERROR_INT("ba not defined", __func__, 0);
277
0
    return ba->size;
278
0
}
279
280
281
/*!
282
 * \brief   l_byteaGetData()
283
 *
284
 * \param[in]    ba
285
 * \param[out]   psize     size of data in lba
286
 * \return  ptr to existing data array, or NULL on error
287
 *
288
 * <pre>
289
 * Notes:
290
 *      (1) The returned ptr is owned by %ba.  Do not free it!
291
 * </pre>
292
 */
293
l_uint8 *
294
l_byteaGetData(L_BYTEA  *ba,
295
               size_t   *psize)
296
0
{
297
0
    if (!ba)
298
0
        return (l_uint8 *)ERROR_PTR("ba not defined", __func__, NULL);
299
0
    if (!psize)
300
0
        return (l_uint8 *)ERROR_PTR("&size not defined", __func__, NULL);
301
302
0
    *psize = ba->size;
303
0
    return ba->data;
304
0
}
305
306
307
/*!
308
 * \brief   l_byteaCopyData()
309
 *
310
 * \param[in]    ba
311
 * \param[out]   psize     size of data in lba
312
 * \return  copy of data in use in the data array, or NULL on error.
313
 *
314
 * <pre>
315
 * Notes:
316
 *      (1) The returned data is owned by the caller.  The input %ba
317
 *          still owns the original data array.
318
 * </pre>
319
 */
320
l_uint8 *
321
l_byteaCopyData(L_BYTEA  *ba,
322
                size_t   *psize)
323
0
{
324
0
l_uint8  *data;
325
326
0
    if (!psize)
327
0
        return (l_uint8 *)ERROR_PTR("&size not defined", __func__, NULL);
328
0
    *psize = 0;
329
0
    if (!ba)
330
0
        return (l_uint8 *)ERROR_PTR("ba not defined", __func__, NULL);
331
332
0
    data = l_byteaGetData(ba, psize);
333
0
    return l_binaryCopy(data, *psize);
334
0
}
335
336
337
/*---------------------------------------------------------------------*
338
 *                               Appending                             *
339
 *---------------------------------------------------------------------*/
340
/*!
341
 * \brief   l_byteaAppendData()
342
 *
343
 * \param[in]    ba
344
 * \param[in]    newdata    byte array to be appended
345
 * \param[in]    newbytes   size of data array
346
 * \return  0 if OK, 1 on error
347
 */
348
l_ok
349
l_byteaAppendData(L_BYTEA        *ba,
350
                  const l_uint8  *newdata,
351
                  size_t          newbytes)
352
0
{
353
0
size_t  size, nalloc, reqsize;
354
355
0
    if (!ba)
356
0
        return ERROR_INT("ba not defined", __func__, 1);
357
0
    if (!newdata)
358
0
        return ERROR_INT("newdata not defined", __func__, 1);
359
360
0
    size = l_byteaGetSize(ba);
361
0
    reqsize = size + newbytes + 1;
362
0
    nalloc = ba->nalloc;
363
0
    if (nalloc < reqsize) {
364
0
        if (l_byteaExtendArrayToSize(ba, 2 * reqsize))
365
0
            return ERROR_INT("extension failed", __func__, 1);
366
0
    }
367
368
0
    memcpy(ba->data + size, newdata, newbytes);
369
0
    ba->size += newbytes;
370
0
    return 0;
371
0
}
372
373
374
/*!
375
 * \brief   l_byteaAppendString()
376
 *
377
 * \param[in]    ba
378
 * \param[in]    str    null-terminated string to be appended
379
 * \return  0 if OK, 1 on error
380
 */
381
l_ok
382
l_byteaAppendString(L_BYTEA     *ba,
383
                    const char  *str)
384
0
{
385
0
size_t  size, len, nalloc, reqsize;
386
387
0
    if (!ba)
388
0
        return ERROR_INT("ba not defined", __func__, 1);
389
0
    if (!str)
390
0
        return ERROR_INT("str not defined", __func__, 1);
391
392
0
    size = l_byteaGetSize(ba);
393
0
    len = strlen(str);
394
0
    reqsize = size + len + 1;
395
0
    nalloc = ba->nalloc;
396
0
    if (nalloc < reqsize) {
397
0
        if (l_byteaExtendArrayToSize(ba, 2 * reqsize))
398
0
            return ERROR_INT("extension failed", __func__, 1);
399
0
    }
400
401
0
    memcpy(ba->data + size, str, len);
402
0
    ba->size += len;
403
0
    return 0;
404
0
}
405
406
407
/*!
408
 * \brief   l_byteaExtendArrayToSize()
409
 *
410
 * \param[in]    ba
411
 * \param[in]    size    new size of lba data array
412
 * \return  0 if OK; 1 on error
413
 *
414
 * <pre>
415
 * Notes:
416
 *      (1) If necessary, reallocs the byte array to %size.
417
 *      (2) The max buffer size is 1 GB.
418
 * </pre>
419
 */
420
static l_int32
421
l_byteaExtendArrayToSize(L_BYTEA  *ba,
422
                         size_t    size)
423
0
{
424
0
    if (!ba)
425
0
        return ERROR_INT("ba not defined", __func__, 1);
426
0
    if (ba->nalloc > MaxArraySize)  /* belt & suspenders */
427
0
        return ERROR_INT("ba has too many ptrs", __func__, 1);
428
0
    if (size > MaxArraySize)
429
0
        return ERROR_INT("size > 1 GB; too large", __func__, 1);
430
0
    if (size <= ba->nalloc) {
431
0
        L_INFO("size too small; no extension\n", __func__);
432
0
        return 0;
433
0
    }
434
435
0
    if ((ba->data =
436
0
        (l_uint8 *)reallocNew((void **)&ba->data, ba->nalloc, size)) == NULL)
437
0
        return ERROR_INT("new array not returned", __func__, 1);
438
0
    ba->nalloc = size;
439
0
    return 0;
440
0
}
441
442
443
/*---------------------------------------------------------------------*
444
 *                        String join/split                            *
445
 *---------------------------------------------------------------------*/
446
/*!
447
 * \brief   l_byteaJoin()
448
 *
449
 * \param[in]       ba1
450
 * \param[in,out]   pba2    data array is added to the one in ba1;
451
 *                          then ba2 is destroyed and its pointer is nulled.
452
 * \return  0 if OK, 1 on error
453
 *
454
 * <pre>
455
 * Notes:
456
 *      (1) It is a no-op, not an error, for %ba2 to be null.
457
 * </pre>
458
 */
459
l_ok
460
l_byteaJoin(L_BYTEA   *ba1,
461
            L_BYTEA  **pba2)
462
0
{
463
0
l_uint8  *data2;
464
0
size_t    nbytes2;
465
0
L_BYTEA  *ba2;
466
467
0
    if (!ba1)
468
0
        return ERROR_INT("ba1 not defined", __func__, 1);
469
0
    if (!pba2)
470
0
        return ERROR_INT("&ba2 not defined", __func__, 1);
471
0
    if ((ba2 = *pba2) == NULL) return 0;
472
473
0
    data2 = l_byteaGetData(ba2, &nbytes2);
474
0
    l_byteaAppendData(ba1, data2, nbytes2);
475
476
0
    l_byteaDestroy(pba2);
477
0
    return 0;
478
0
}
479
480
481
/*!
482
 * \brief   l_byteaSplit()
483
 *
484
 * \param[in]    ba1       lba to split; array bytes nulled beyond the split loc
485
 * \param[in]    splitloc  location in ba1 to split; ba2 begins there
486
 * \param[out]   pba2      with data starting at splitloc
487
 * \return  0 if OK, 1 on error
488
 */
489
l_ok
490
l_byteaSplit(L_BYTEA   *ba1,
491
             size_t     splitloc,
492
             L_BYTEA  **pba2)
493
0
{
494
0
l_uint8  *data1;
495
0
size_t    nbytes1, nbytes2;
496
497
0
    if (!pba2)
498
0
        return ERROR_INT("&ba2 not defined", __func__, 1);
499
0
    *pba2 = NULL;
500
0
    if (!ba1)
501
0
        return ERROR_INT("ba1 not defined", __func__, 1);
502
503
0
    data1 = l_byteaGetData(ba1, &nbytes1);
504
0
    if (splitloc >= nbytes1)
505
0
        return ERROR_INT("splitloc invalid", __func__, 1);
506
0
    nbytes2 = nbytes1 - splitloc;
507
508
        /* Make the new lba */
509
0
    *pba2 = l_byteaInitFromMem(data1 + splitloc, nbytes2);
510
511
        /* Null the removed bytes in the input lba */
512
0
    memset(data1 + splitloc, 0, nbytes2);
513
0
    ba1->size = splitloc;
514
0
    return 0;
515
0
}
516
517
518
/*---------------------------------------------------------------------*
519
 *                                Search                               *
520
 *---------------------------------------------------------------------*/
521
/*!
522
 * \brief   l_byteaFindEachSequence()
523
 *
524
 * \param[in]    ba
525
 * \param[in]    sequence   subarray of bytes to find in data
526
 * \param[in]    seqlen     length of sequence, in bytes
527
 * \param[out]   pda        byte positions of each occurrence of %sequence
528
 * \return  0 if OK, 1 on error
529
 */
530
l_ok
531
l_byteaFindEachSequence(L_BYTEA        *ba,
532
                        const l_uint8  *sequence,
533
                        size_t          seqlen,
534
                        L_DNA         **pda)
535
0
{
536
0
l_uint8  *data;
537
0
size_t    size;
538
539
0
    if (!pda)
540
0
        return ERROR_INT("&da not defined", __func__, 1);
541
0
    *pda = NULL;
542
0
    if (!ba)
543
0
        return ERROR_INT("ba not defined", __func__, 1);
544
0
    if (!sequence)
545
0
        return ERROR_INT("sequence not defined", __func__, 1);
546
547
0
    data = l_byteaGetData(ba, &size);
548
0
    *pda = arrayFindEachSequence(data, size, sequence, seqlen);
549
0
    return 0;
550
0
}
551
552
553
/*---------------------------------------------------------------------*
554
 *                              Output to file                         *
555
 *---------------------------------------------------------------------*/
556
/*!
557
 * \brief   l_byteaWrite()
558
 *
559
 * \param[in]    fname      output file
560
 * \param[in]    ba
561
 * \param[in]    startloc   first byte to output
562
 * \param[in]    nbytes     number of bytes to write; use 0 to write to
563
 *                          the end of the data array
564
 * \return  0 if OK, 1 on error
565
 */
566
l_ok
567
l_byteaWrite(const char  *fname,
568
             L_BYTEA     *ba,
569
             size_t       startloc,
570
             size_t       nbytes)
571
0
{
572
0
l_int32  ret;
573
0
FILE    *fp;
574
575
0
    if (!fname)
576
0
        return ERROR_INT("fname not defined", __func__, 1);
577
0
    if (!ba)
578
0
        return ERROR_INT("ba not defined", __func__, 1);
579
580
0
    if ((fp = fopenWriteStream(fname, "wb")) == NULL)
581
0
        return ERROR_INT_1("stream not opened", fname, __func__, 1);
582
0
    ret = l_byteaWriteStream(fp, ba, startloc, nbytes);
583
0
    fclose(fp);
584
0
    return ret;
585
0
}
586
587
588
/*!
589
 * \brief   l_byteaWriteStream()
590
 *
591
 * \param[in]    fp         file stream opened for binary write
592
 * \param[in]    ba
593
 * \param[in]    startloc   first byte to output
594
 * \param[in]    nbytes     number of bytes to write; use 0 to write to
595
 *                          the end of the data array
596
 * \return  0 if OK, 1 on error
597
 */
598
l_ok
599
l_byteaWriteStream(FILE     *fp,
600
                   L_BYTEA  *ba,
601
                   size_t    startloc,
602
                   size_t    nbytes)
603
0
{
604
0
l_uint8  *data;
605
0
size_t    size, maxbytes;
606
607
0
    if (!fp)
608
0
        return ERROR_INT("stream not defined", __func__, 1);
609
0
    if (!ba)
610
0
        return ERROR_INT("ba not defined", __func__, 1);
611
612
0
    data = l_byteaGetData(ba, &size);
613
0
    if (startloc >= size)
614
0
        return ERROR_INT("invalid startloc", __func__, 1);
615
0
    maxbytes = size - startloc;
616
0
    nbytes = (nbytes == 0) ? maxbytes : L_MIN(nbytes, maxbytes);
617
618
0
    fwrite(data + startloc, 1, nbytes, fp);
619
0
    return 0;
620
0
}