Coverage Report

Created: 2025-06-13 07:15

/src/leptonica/src/encoding.c
Line
Count
Source (jump to first uncovered line)
1
/*====================================================================*
2
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
3
 -  This software is distributed in the hope that it will be
4
 -  useful, but with NO WARRANTY OF ANY KIND.
5
 -  No author or distributor accepts responsibility to anyone for the
6
 -  consequences of using this software, or for whether it serves any
7
 -  particular purpose or works at all, unless he or she says so in
8
 -  writing.  Everyone is granted permission to copy, modify and
9
 -  redistribute this source code, for commercial or non-commercial
10
 -  purposes, with the following restrictions: (1) the origin of this
11
 -  source code must not be misrepresented; (2) modified versions must
12
 -  be plainly marked as such; and (3) this notice may not be removed
13
 -  or altered from any source or modified source distribution.
14
 *====================================================================*/
15
16
/*
17
 *  encodings.c
18
 *
19
 *    Base64
20
 *        char           *encodeBase64()
21
 *        l_uint8        *decodeBase64()
22
 *        static l_int32  isBase64()
23
 *        static l_int32 *genReverseTab64()
24
 *        static void     byteConvert3to4()
25
 *        static void     byteConvert4to3()
26
 *
27
 *    Ascii85
28
 *        char           *encodeAscii85()
29
 *        l_uint8        *decodeAscii85()
30
 *        static l_int32  convertChunkToAscii85()
31
 *
32
 *        char           *encodeAscii85WithComp()
33
 *        l_uint8        *decodeAscii85WithComp()
34
 *
35
 *    String reformatting for base 64 encoded data
36
 *        char           *reformatPacked64()
37
 *
38
 *  Base64 encoding is useful for encding binary data in a restricted set of
39
 *  64 printable ascii symbols, that includes the 62 alphanumerics and '+'
40
 *  and '/'.  Notably it does not include quotes, so that base64 encoded
41
 *  strings can be used in situations where quotes are used for formatting.
42
 *  64 symbols was chosen because it is the smallest number that can be used
43
 *  in 4-for-3 byte encoding of binary data:
44
 *         log2(64) / log2(256) = 0.75 = 3/4
45
 *
46
 *  Ascii85 encoding is used in PostScript and some pdf files for
47
 *  representing binary data (for example, a compressed image) in printable
48
 *  ascii symbols.  It has a dictionary of 85 symbols; 85 was chosen because
49
 *  it is the smallest number that can be used in 5-for-4 byte encoding
50
 *  of binary data (256 possible input values).  This can be seen from
51
 *  the max information content in such a sequence:
52
 *         log2(84) / log2(256) = 0.799 < 4/5
53
 *         log2(85) / log2(256) = 0.801 > 4/5
54
 */
55
56
#ifdef HAVE_CONFIG_H
57
#include <config_auto.h>
58
#endif  /* HAVE_CONFIG_H */
59
60
#include <ctype.h>
61
#include <string.h>
62
#include "allheaders.h"
63
64
    /* Base64 encoding table in string representation */
65
static const l_int32  MAX_BASE64_LINE   = 72;  /* max line length base64 */
66
static const char *tablechar64 =
67
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
68
             "abcdefghijklmnopqrstuvwxyz"
69
             "0123456789+/";
70
71
static l_int32 isBase64(char);
72
static l_int32 *genReverseTab64(void);
73
static void byteConvert3to4(l_uint8 *in3, l_uint8 *out4);
74
static void byteConvert4to3(l_uint8 *in4, l_uint8 *out3);
75
76
    /* Ascii85 encoding */
77
static const l_int32  MAX_ASCII85_LINE   = 64;  /* max line length ascii85 */
78
static const l_uint32  power85[5] = {1,
79
                                     85,
80
                                     85 * 85,
81
                                     85 * 85 * 85,
82
                                     85 * 85 * 85 * 85};
83
84
static l_int32 convertChunkToAscii85(const l_uint8 *inarray, size_t insize,
85
                                     l_int32 *pindex, char *outbuf,
86
                                     l_int32 *pnbout);
87
88
/*-------------------------------------------------------------*
89
 *      Utility for encoding and decoding data with base64     *
90
 *-------------------------------------------------------------*/
91
/*!
92
 * \brief   encodeBase64()
93
 *
94
 * \param[in]    inarray     input binary data
95
 * \param[in]    insize      number of bytes in input array
96
 * \param[out]   poutsize    number of bytes in output char array
97
 * \return  chara with MAX_BASE64_LINE characters + \n in each line
98
 *
99
 * <pre>
100
 * Notes:
101
 *      (1) The input character data is unrestricted binary.
102
 *          The output encoded data consists of the 64 characters
103
 *          in the base64 set, plus newlines and the pad character '='.
104
 * </pre>
105
 */
106
char *
107
encodeBase64(const l_uint8 *inarray,
108
             l_int32        insize,
109
             l_int32       *poutsize)
110
0
{
111
0
char          *chara;
112
0
const l_uint8 *bytea;
113
0
l_uint8        array3[3], array4[4];
114
0
l_int32        outsize, i, j, index, linecount;
115
116
0
    if (!poutsize)
117
0
        return (char *)ERROR_PTR("&outsize not defined", __func__, NULL);
118
0
    *poutsize = 0;
119
0
    if (!inarray)
120
0
        return (char *)ERROR_PTR("inarray not defined", __func__, NULL);
121
0
    if (insize <= 0)
122
0
        return (char *)ERROR_PTR("insize not > 0", __func__, NULL);
123
124
        /* The output array is padded to a multiple of 4 bytes, not
125
         * counting the newlines.  We just need to allocate a large
126
         * enough array, and add 4 bytes to make sure it is big enough. */
127
0
    outsize = 4 * ((insize + 2) / 3);  /* without newlines */
128
0
    outsize += outsize / MAX_BASE64_LINE + 4;  /* with the newlines */
129
0
    if ((chara = (char *)LEPT_CALLOC(outsize, sizeof(char))) == NULL)
130
0
        return (char *)ERROR_PTR("chara not made", __func__, NULL);
131
132
        /* Read all the input data, and convert in sets of 3 input
133
         * bytes --> 4 output bytes. */
134
0
    i = index = linecount = 0;
135
0
    bytea = inarray;
136
0
    while (insize--) {
137
0
        if (linecount == MAX_BASE64_LINE) {
138
0
            chara[index++] = '\n';
139
0
            linecount = 0;
140
0
        }
141
0
        array3[i++] = *bytea++;
142
0
        if (i == 3) {  /* convert 3 to 4 and save */
143
0
            byteConvert3to4(array3, array4);
144
0
            for (j = 0; j < 4; j++)
145
0
                chara[index++] = tablechar64[array4[j]];
146
0
            i = 0;
147
0
            linecount += 4;
148
0
        }
149
0
    }
150
151
        /* Suppose 1 or 2 bytes has been read but not yet processed.
152
         * If 1 byte has been read, this will generate 2 bytes of
153
         * output, with 6 bits to the first byte and 2 bits to the second.
154
         * We will add two bytes of '=' for padding.
155
         * If 2 bytes has been read, this will generate 3 bytes of output,
156
         * with 6 bits to the first 2 bytes and 4 bits to the third, and
157
         * we add a fourth padding byte ('='). */
158
0
    if (i > 0) {  /* left-over 1 or 2 input bytes */
159
0
        for (j = i; j < 3; j++)
160
0
            array3[j] = '\0';  /* zero the remaining input bytes */
161
0
        byteConvert3to4(array3, array4);
162
0
        for (j = 0; j <= i; j++)
163
0
            chara[index++] = tablechar64[array4[j]];
164
0
        for (j = i + 1; j < 4; j++)
165
0
            chara[index++] = '=';
166
0
    }
167
0
    *poutsize = index;
168
169
0
    return chara;
170
0
}
171
172
173
/*!
174
 * \brief   decodeBase64()
175
 *
176
 * \param[in]    inarray    input encoded char data, with 72 chars/line)
177
 * \param[in]    insize     number of bytes in input array
178
 * \param[out]   poutsize   number of bytes in output byte array
179
 * \return  bytea decoded byte data, or NULL on error
180
 *
181
 * <pre>
182
 * Notes:
183
 *      (1) The input character data should have only 66 different characters:
184
 *          The 64 character set for base64 encoding, plus the pad
185
 *          character '=' and newlines for formatting with fixed line
186
 *          lengths.  If there are any other characters, the decoder
187
 *          will declare the input data to be invalid and return NULL.
188
 *      (2) The decoder ignores newlines and, for a valid input string,
189
 *          stops reading input when a pad byte is found.
190
 * </pre>
191
 */
192
l_uint8 *
193
decodeBase64(const char  *inarray,
194
             l_int32      insize,
195
             l_int32     *poutsize)
196
2
{
197
2
char      inchar;
198
2
l_uint8  *bytea;
199
2
l_uint8   array3[3], array4[4];
200
2
l_int32  *rtable64;
201
2
l_int32   i, j, outsize, in_index, out_index;
202
203
2
    if (!poutsize)
204
0
        return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL);
205
2
    *poutsize = 0;
206
2
    if (!inarray)
207
0
        return (l_uint8 *)ERROR_PTR("inarray not defined", __func__, NULL);
208
2
    if (insize <= 0)
209
0
        return (l_uint8 *)ERROR_PTR("insize not > 0", __func__, NULL);
210
211
        /* Validate the input data */
212
9.81k
    for (i = 0; i < insize; i++) {
213
9.81k
        inchar = inarray[i];
214
9.81k
        if (inchar == '\n') continue;
215
9.81k
        if (isBase64(inchar) == 0 && inchar != '=')
216
0
            return (l_uint8 *)ERROR_PTR("invalid char in inarray",
217
9.81k
                                        __func__, NULL);
218
9.81k
    }
219
220
        /* The input array typically is made with a newline every
221
         * MAX_BASE64_LINE input bytes.  However, as a printed string, the
222
         * newlines would be stripped.  So when we allocate the output
223
         * array, assume the input array is all data, but strip
224
         * out the newlines during decoding.  This guarantees that
225
         * the allocated array is large enough. */
226
2
    outsize = 3 * ((insize + 3) / 4) + 4;
227
2
    if ((bytea = (l_uint8 *)LEPT_CALLOC(outsize, sizeof(l_uint8))) == NULL)
228
0
        return (l_uint8 *)ERROR_PTR("bytea not made", __func__, NULL);
229
230
        /* The number of encoded input data bytes is always a multiple of 4.
231
         * Read all the data, until you reach either the end or
232
         * the first pad character '='.  The data is processed in
233
         * units of 4 input bytes, generating 3 output decoded bytes
234
         * of binary data.  Newlines are ignored.  If there are no
235
         * pad bytes, i == 0 at the end of this section. */
236
2
    rtable64 = genReverseTab64();
237
2
    i = in_index = out_index = 0;
238
9.81k
    for (in_index = 0; in_index < insize; in_index++) {
239
9.81k
        inchar = inarray[in_index];
240
9.81k
        if (inchar == '\n') continue;
241
9.81k
        if (inchar == '=') break;
242
9.81k
        array4[i++] = rtable64[(unsigned char)inchar];
243
9.81k
        if (i < 4) {
244
7.36k
            continue;
245
7.36k
        } else {  /* i == 4; convert 4 to 3 and save */
246
2.45k
            byteConvert4to3(array4, array3);
247
9.80k
            for (j = 0; j < 3; j++)
248
7.35k
                bytea[out_index++] = array3[j];
249
2.45k
            i = 0;
250
2.45k
        }
251
9.81k
    }
252
253
        /* If i > 0, we ran into pad bytes ('=').  If i == 2, there are
254
         * two input pad bytes and one output data byte.  If i == 3,
255
         * there is one input pad byte and two output data bytes. */
256
2
    if (i > 0) {
257
4
        for (j = i; j < 4; j++)
258
2
            array4[j] = '\0';  /* zero the remaining input bytes */
259
2
        byteConvert4to3(array4, array3);
260
6
        for (j = 0; j < i - 1; j++)
261
4
            bytea[out_index++] = array3[j];
262
2
    }
263
2
    *poutsize = out_index;
264
265
2
    LEPT_FREE(rtable64);
266
2
    return bytea;
267
2
}
268
269
270
/*!
271
 * \brief   isBase64()
272
 */
273
static l_int32
274
isBase64(char  c)
275
9.81k
{
276
9.81k
    return (isalnum(((int)c)) || ((c) == '+') || ((c) == '/')) ? 1 : 0;
277
9.81k
}
278
279
/*!
280
 * \brief   genReverseTab64()
281
 */
282
static l_int32 *
283
genReverseTab64(void)
284
2
{
285
2
l_int32   i;
286
2
l_int32  *rtable64;
287
288
2
    rtable64 = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32));
289
130
    for (i = 0; i < 64; i++) {
290
128
        rtable64[(unsigned char)tablechar64[i]] = i;
291
128
    }
292
2
    return rtable64;
293
2
}
294
295
/*!
296
 * \brief   byteConvert3to4()
297
 */
298
static void
299
byteConvert3to4(l_uint8  *in3,
300
                l_uint8  *out4)
301
0
{
302
0
    out4[0] = in3[0] >> 2;
303
0
    out4[1] = ((in3[0] & 0x03) << 4) | (in3[1] >> 4);
304
0
    out4[2] = ((in3[1] & 0x0f) << 2) | (in3[2] >> 6);
305
0
    out4[3] = in3[2] & 0x3f;
306
0
    return;
307
0
}
308
309
/*!
310
 * \brief   byteConvert4to3()
311
 */
312
static void
313
byteConvert4to3(l_uint8  *in4,
314
                l_uint8  *out3)
315
2.45k
{
316
2.45k
    out3[0] = (in4[0] << 2) | (in4[1] >> 4);
317
2.45k
    out3[1] = ((in4[1] & 0x0f) << 4) | (in4[2] >> 2);
318
2.45k
    out3[2] = ((in4[2] & 0x03) << 6) | in4[3];
319
2.45k
    return;
320
2.45k
}
321
322
323
/*-------------------------------------------------------------*
324
 *      Utility for encoding and decoding data with ascii85    *
325
 *-------------------------------------------------------------*/
326
/*!
327
 * \brief   encodeAscii85()
328
 *
329
 * \param[in]    inarray    input data
330
 * \param[in]    insize     number of bytes in input array
331
 * \param[out]   poutsize   number of bytes in output char array
332
 * \return  chara with 64 characters + \n in each line
333
 *
334
 * <pre>
335
 * Notes:
336
 *      (1) Ghostscript has a stack break if the last line of
337
 *          data only has a '>', so we avoid the problem by
338
 *          always putting '~>' on the last line.
339
 * </pre>
340
 */
341
char *
342
encodeAscii85(const l_uint8  *inarray,
343
              size_t          insize,
344
              size_t         *poutsize)
345
0
{
346
0
char    *chara;
347
0
char     outbuf[8];
348
0
l_int32  maxsize, i, index, linecount, nbout, eof;
349
0
size_t   outindex;
350
351
0
    if (!poutsize)
352
0
        return (char *)ERROR_PTR("&outsize not defined", __func__, NULL);
353
0
    *poutsize = 0;
354
0
    if (!inarray)
355
0
        return (char *)ERROR_PTR("inarray not defined", __func__, NULL);
356
0
    if (insize <= 0)
357
0
        return (char *)ERROR_PTR("insize not > 0", __func__, NULL);
358
359
        /* Accumulate results in char array */
360
0
    maxsize = (l_int32)(80. + (insize * 5. / 4.) *
361
0
                        (1. + 2. / MAX_ASCII85_LINE));
362
0
    if ((chara = (char *)LEPT_CALLOC(maxsize, sizeof(char))) == NULL)
363
0
        return (char *)ERROR_PTR("chara not made", __func__, NULL);
364
365
0
    linecount = 0;
366
0
    index = 0;
367
0
    outindex = 0;
368
0
    while (1) {
369
0
        eof = convertChunkToAscii85(inarray, insize, &index, outbuf, &nbout);
370
0
        for (i = 0; i < nbout; i++) {
371
0
            chara[outindex++] = outbuf[i];
372
0
            linecount++;
373
0
            if (linecount >= MAX_ASCII85_LINE) {
374
0
                chara[outindex++] = '\n';
375
0
                linecount = 0;
376
0
            }
377
0
        }
378
0
        if (eof == TRUE) {
379
0
            if (linecount != 0)
380
0
                chara[outindex++] = '\n';
381
0
            chara[outindex++] = '~';
382
0
            chara[outindex++] = '>';
383
0
            chara[outindex++] = '\n';
384
0
            break;
385
0
        }
386
0
    }
387
388
0
    *poutsize = outindex;
389
0
    return chara;
390
0
}
391
392
393
/*!
394
 * \brief   convertChunkToAscii85()
395
 *
396
 * \param[in]    inarray    input data
397
 * \param[in]    insize     number of bytes in input array
398
 * \param[out]   pindex     use and -- ptr
399
 * \param[in]    outbuf     holds 8 ascii chars; we use no more than 7
400
 * \param[out]   pnbsout    number of bytes written to outbuf
401
 * \return  boolean for eof 0 if more data, 1 if end of file
402
 *
403
 * <pre>
404
 * Notes:
405
 *      (1) Attempts to read 4 bytes and write 5.
406
 *      (2) Writes 1 byte if the value is 0.
407
 * </pre>
408
 */
409
static l_int32
410
convertChunkToAscii85(const l_uint8 *inarray,
411
                      size_t         insize,
412
                      l_int32       *pindex,
413
                      char          *outbuf,
414
                      l_int32       *pnbout)
415
0
{
416
0
l_uint8   inbyte;
417
0
l_uint32  inword, val;
418
0
l_int32   eof, index, nread, nbout, i;
419
420
0
    eof = FALSE;
421
0
    index = *pindex;
422
0
    nread = L_MIN(4, (insize - index));
423
0
    if (insize == index + nread)
424
0
        eof = TRUE;
425
0
    *pindex += nread;  /* save new index */
426
427
        /* Read input data and save in l_uint32 */
428
0
    inword = 0;
429
0
    for (i = 0; i < nread; i++) {
430
0
        inbyte = inarray[index + i];
431
0
        inword += (l_uint32)inbyte << (8 * (3 - i));
432
0
    }
433
434
#if 0
435
    lept_stderr("index = %d, nread = %d\n", index, nread);
436
    lept_stderr("inword = %x\n", inword);
437
    lept_stderr("eof = %d\n", eof);
438
#endif
439
440
        /* Special case: output 1 byte only */
441
0
    if (inword == 0) {
442
0
        outbuf[0] = 'z';
443
0
        nbout = 1;
444
0
    } else { /* output nread + 1 bytes */
445
0
        for (i = 4; i >= 4 - nread; i--) {
446
0
            val = inword / power85[i];
447
0
            outbuf[4 - i] = (l_uint8)(val + '!');
448
0
            inword -= val * power85[i];
449
0
        }
450
0
        nbout = nread + 1;
451
0
    }
452
0
    *pnbout = nbout;
453
454
0
    return eof;
455
0
}
456
457
458
/*!
459
 * \brief   decodeAscii85()
460
 *
461
 * \param[in]    inarray     ascii85 input data
462
 * \param[in]    insize      number of bytes in input array
463
 * \param[out]   poutsize    number of bytes in output l_uint8 array
464
 * \return  outarray binary
465
 *
466
 * <pre>
467
 * Notes:
468
 *      (1) We assume the data is properly encoded, so we do not check
469
 *          for invalid characters or the final '>' character.
470
 *      (2) We permit whitespace to be added to the encoding in an
471
 *          arbitrary way.
472
 * </pre>
473
 */
474
l_uint8 *
475
decodeAscii85(const char *inarray,
476
              size_t      insize,
477
              size_t     *poutsize)
478
0
{
479
0
char        inc;
480
0
const char *pin;
481
0
l_uint8     val;
482
0
l_uint8    *outa;
483
0
l_int32     maxsize, ocount, bytecount, index;
484
0
l_uint32    oword;
485
486
0
    if (!poutsize)
487
0
        return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL);
488
0
    *poutsize = 0;
489
0
    if (!inarray)
490
0
        return (l_uint8 *)ERROR_PTR("inarray not defined", __func__, NULL);
491
0
    if (insize <= 0)
492
0
        return (l_uint8 *)ERROR_PTR("insize not > 0", __func__, NULL);
493
494
        /* Accumulate results in outa */
495
0
    maxsize = (l_int32)(80. + (insize * 4. / 5.));  /* plenty big */
496
0
    if ((outa = (l_uint8 *)LEPT_CALLOC(maxsize, sizeof(l_uint8))) == NULL)
497
0
        return (l_uint8 *)ERROR_PTR("outa not made", __func__, NULL);
498
499
0
    pin = inarray;
500
0
    ocount = 0;  /* byte index into outa */
501
0
    oword = 0;
502
0
    for (index = 0, bytecount = 0; index < insize; index++, pin++) {
503
0
        inc = *pin;
504
505
0
        if (inc == ' ' || inc == '\t' || inc == '\n' ||
506
0
            inc == '\f' || inc == '\r' || inc == '\v')  /* ignore white space */
507
0
            continue;
508
509
0
        val = inc - '!';
510
0
        if (val < 85) {
511
0
            oword = oword * 85 + val;
512
0
            if (bytecount < 4) {
513
0
                bytecount++;
514
0
            } else {  /* we have all 5 input chars for the oword */
515
0
                outa[ocount] = (oword >> 24) & 0xff;
516
0
                outa[ocount + 1] = (oword >> 16) & 0xff;
517
0
                outa[ocount + 2] = (oword >> 8) & 0xff;
518
0
                outa[ocount + 3] = oword & 0xff;
519
0
                ocount += 4;
520
0
                bytecount = 0;
521
0
                oword = 0;
522
0
            }
523
0
        } else if (inc == 'z' && bytecount == 0) {
524
0
            outa[ocount] = 0;
525
0
            outa[ocount + 1] = 0;
526
0
            outa[ocount + 2] = 0;
527
0
            outa[ocount + 3] = 0;
528
0
            ocount += 4;
529
0
        } else if (inc == '~') {  /* end of data */
530
0
            L_INFO(" %d extra bytes output\n", __func__, bytecount - 1);
531
0
            switch (bytecount) {
532
0
            case 0:   /* normal eof */
533
0
            case 1:   /* error */
534
0
                break;
535
0
            case 2:   /* 1 extra byte */
536
0
                oword = oword * power85[3] + 0xffffff;
537
0
                outa[ocount] = (oword >> 24) & 0xff;
538
0
                break;
539
0
            case 3:   /* 2 extra bytes */
540
0
                oword = oword * power85[2] + 0xffff;
541
0
                outa[ocount] = (oword >> 24) & 0xff;
542
0
                outa[ocount + 1] = (oword >> 16) & 0xff;
543
0
                break;
544
0
            case 4:   /* 3 extra bytes */
545
0
                oword = oword * 85 + 0xff;
546
0
                outa[ocount] = (oword >> 24) & 0xff;
547
0
                outa[ocount + 1] = (oword >> 16) & 0xff;
548
0
                outa[ocount + 2] = (oword >> 8) & 0xff;
549
0
                break;
550
0
            }
551
0
            if (bytecount > 1)
552
0
                ocount += (bytecount - 1);
553
0
            break;
554
0
        }
555
0
    }
556
0
    *poutsize = ocount;
557
558
0
    return outa;
559
0
}
560
561
562
/*!
563
 * \brief   encodeAscii85WithComp)
564
 *
565
 * \param[in]    indata     input binary data
566
 * \param[in]    insize     number of bytes in input data
567
 * \param[out]   poutsize   number of bytes in output string
568
 * \return  outstr with 64 characters + \n in each line
569
 *
570
 * <pre>
571
 * Notes:
572
 *      (1) Compress the input data; then encode ascii85.  For ascii
573
 *          input, a first compression step will significantly reduce
574
 *          the final encoded output size.
575
 * </pre>
576
 */
577
char *
578
encodeAscii85WithComp(const l_uint8  *indata,
579
                      size_t          insize,
580
                      size_t         *poutsize)
581
0
{
582
0
char     *outstr;
583
0
size_t    size1;
584
0
l_uint8  *data1;
585
586
0
    if (!poutsize)
587
0
        return (char *)ERROR_PTR("&outsize not defined", __func__, NULL);
588
0
    *poutsize = 0;
589
0
    if (!indata)
590
0
        return (char *)ERROR_PTR("indata not defined", __func__, NULL);
591
592
0
    if ((data1 = zlibCompress(indata, insize, &size1)) == NULL)
593
0
        return (char *)ERROR_PTR("data1 not made", __func__, NULL);
594
0
    outstr = encodeAscii85(data1, size1, poutsize);
595
0
    LEPT_FREE(data1);
596
0
    return outstr;
597
0
}
598
599
600
/*!
601
 * \brief   decodeAscii85WithComp()
602
 *
603
 * \param[in]    instr       ascii85 input data string
604
 * \param[in]    insize      number of bytes in input data
605
 * \param[out]   poutsize    number of bytes in output binary data
606
 * \return  outdata   binary data before compression and ascii85 encoding
607
 *
608
 * <pre>
609
 * Notes:
610
 *      (1) We assume the input data has been zlib compressed and then
611
 *          properly encoded, so we reverse the procedure.  This is the
612
 *          inverse of encodeAscii85WithComp().
613
 *      (2) Set %insize == 0 to use strlen(%instr).
614
 * </pre>
615
 */
616
l_uint8 *
617
decodeAscii85WithComp(const char  *instr,
618
                      size_t       insize,
619
                      size_t      *poutsize)
620
0
{
621
0
size_t    size1;
622
0
l_uint8  *data1, *outdata;
623
624
0
    if (!poutsize)
625
0
        return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL);
626
0
    *poutsize = 0;
627
0
    if (!instr)
628
0
        return (l_uint8 *)ERROR_PTR("instr not defined", __func__, NULL);
629
630
0
    if (insize == 0) insize = strlen(instr);
631
0
    if ((data1 = decodeAscii85(instr, insize, &size1)) == NULL)
632
0
        return (l_uint8 *)ERROR_PTR("data1 not made", __func__, NULL);
633
0
    outdata = zlibUncompress(data1, size1, poutsize);
634
0
    LEPT_FREE(data1);
635
0
    return outdata;
636
0
}
637
638
639
/*-------------------------------------------------------------*
640
 *       String reformatting for base 64 encoded data          *
641
 *-------------------------------------------------------------*/
642
/*!
643
 * \brief   reformatPacked64()
644
 *
645
 * \param[in]    inarray     base64 encoded string with newlines
646
 * \param[in]    insize      number of bytes in input array
647
 * \param[in]    leadspace   number of spaces in each line before the data
648
 * \param[in]    linechars   number of bytes of data in each line; multiple of 4
649
 * \param[in]    addquotes   1 to add quotes to each line of data; 0 to skip
650
 * \param[out]   poutsize    number of bytes in output char array
651
 * \return  outarray ascii
652
 *
653
 * <pre>
654
 * Notes:
655
 *      (1) Each line in the output array has %leadspace space characters,
656
 *          followed optionally by a double-quote, followed by %linechars
657
 *          bytes of base64 data, followed optionally by a double-quote,
658
 *          followed by a newline.
659
 *      (2) This can be used to convert a base64 encoded string to a
660
 *          string formatted for inclusion in a C source file.
661
 * </pre>
662
 */
663
char *
664
reformatPacked64(const char *inarray,
665
                 l_int32     insize,
666
                 l_int32     leadspace,
667
                 l_int32     linechars,
668
                 l_int32     addquotes,
669
                 l_int32    *poutsize)
670
0
{
671
0
char    *flata, *outa;
672
0
l_int32  i, j, flatindex, flatsize, outindex, nlines, linewithpad, linecount;
673
674
0
    if (!poutsize)
675
0
        return (char *)ERROR_PTR("&outsize not defined", __func__, NULL);
676
0
    *poutsize = 0;
677
0
    if (!inarray)
678
0
        return (char *)ERROR_PTR("inarray not defined", __func__, NULL);
679
0
    if (insize <= 0)
680
0
        return (char *)ERROR_PTR("insize not > 0", __func__, NULL);
681
0
    if (leadspace < 0)
682
0
        return (char *)ERROR_PTR("leadspace must be >= 0", __func__, NULL);
683
0
    if (linechars % 4)
684
0
        return (char *)ERROR_PTR("linechars % 4 must be 0", __func__, NULL);
685
686
        /* Remove all white space */
687
0
    if ((flata = (char *)LEPT_CALLOC(insize, sizeof(char))) == NULL)
688
0
        return (char *)ERROR_PTR("flata not made", __func__, NULL);
689
0
    for (i = 0, flatindex = 0; i < insize; i++) {
690
0
        if (isBase64(inarray[i]) || inarray[i] == '=')
691
0
            flata[flatindex++] = inarray[i];
692
0
    }
693
694
        /* Generate output string */
695
0
    flatsize = flatindex;
696
0
    nlines = (flatsize + linechars - 1) / linechars;
697
0
    linewithpad = leadspace + linechars + 1;  /* including newline */
698
0
    if (addquotes) linewithpad += 2;
699
0
    if ((outa = (char *)LEPT_CALLOC((size_t)nlines * linewithpad,
700
0
                                    sizeof(char))) == NULL) {
701
0
        LEPT_FREE(flata);
702
0
        return (char *)ERROR_PTR("outa not made", __func__, NULL);
703
0
    }
704
0
    for (j = 0, outindex = 0; j < leadspace; j++)
705
0
        outa[outindex++] = ' ';
706
0
    if (addquotes) outa[outindex++] = '"';
707
0
    for (i = 0, linecount = 0; i < flatsize; i++) {
708
0
        if (linecount == linechars) {
709
0
            if (addquotes) outa[outindex++] = '"';
710
0
            outa[outindex++] = '\n';
711
0
            for (j = 0; j < leadspace; j++)
712
0
                outa[outindex++] = ' ';
713
0
            if (addquotes) outa[outindex++] = '"';
714
0
            linecount = 0;
715
0
        }
716
0
        outa[outindex++] = flata[i];
717
0
        linecount++;
718
0
    }
719
0
    if (addquotes) outa[outindex++] = '"';
720
0
    *poutsize = outindex;
721
722
0
    LEPT_FREE(flata);
723
0
    return outa;
724
0
}