/src/leptonica/src/encoding.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*====================================================================* |
2 | | - Copyright (C) 2001 Leptonica. All rights reserved. |
3 | | - This software is distributed in the hope that it will be |
4 | | - useful, but with NO WARRANTY OF ANY KIND. |
5 | | - No author or distributor accepts responsibility to anyone for the |
6 | | - consequences of using this software, or for whether it serves any |
7 | | - particular purpose or works at all, unless he or she says so in |
8 | | - writing. Everyone is granted permission to copy, modify and |
9 | | - redistribute this source code, for commercial or non-commercial |
10 | | - purposes, with the following restrictions: (1) the origin of this |
11 | | - source code must not be misrepresented; (2) modified versions must |
12 | | - be plainly marked as such; and (3) this notice may not be removed |
13 | | - or altered from any source or modified source distribution. |
14 | | *====================================================================*/ |
15 | | |
16 | | /* |
17 | | * encodings.c |
18 | | * |
19 | | * Base64 |
20 | | * char *encodeBase64() |
21 | | * l_uint8 *decodeBase64() |
22 | | * static l_int32 isBase64() |
23 | | * static l_int32 *genReverseTab64() |
24 | | * static void byteConvert3to4() |
25 | | * static void byteConvert4to3() |
26 | | * |
27 | | * Ascii85 |
28 | | * char *encodeAscii85() |
29 | | * l_uint8 *decodeAscii85() |
30 | | * static l_int32 convertChunkToAscii85() |
31 | | * |
32 | | * char *encodeAscii85WithComp() |
33 | | * l_uint8 *decodeAscii85WithComp() |
34 | | * |
35 | | * String reformatting for base 64 encoded data |
36 | | * char *reformatPacked64() |
37 | | * |
38 | | * Base64 encoding is useful for encding binary data in a restricted set of |
39 | | * 64 printable ascii symbols, that includes the 62 alphanumerics and '+' |
40 | | * and '/'. Notably it does not include quotes, so that base64 encoded |
41 | | * strings can be used in situations where quotes are used for formatting. |
42 | | * 64 symbols was chosen because it is the smallest number that can be used |
43 | | * in 4-for-3 byte encoding of binary data: |
44 | | * log2(64) / log2(256) = 0.75 = 3/4 |
45 | | * |
46 | | * Ascii85 encoding is used in PostScript and some pdf files for |
47 | | * representing binary data (for example, a compressed image) in printable |
48 | | * ascii symbols. It has a dictionary of 85 symbols; 85 was chosen because |
49 | | * it is the smallest number that can be used in 5-for-4 byte encoding |
50 | | * of binary data (256 possible input values). This can be seen from |
51 | | * the max information content in such a sequence: |
52 | | * log2(84) / log2(256) = 0.799 < 4/5 |
53 | | * log2(85) / log2(256) = 0.801 > 4/5 |
54 | | */ |
55 | | |
56 | | #ifdef HAVE_CONFIG_H |
57 | | #include <config_auto.h> |
58 | | #endif /* HAVE_CONFIG_H */ |
59 | | |
60 | | #include <ctype.h> |
61 | | #include <string.h> |
62 | | #include "allheaders.h" |
63 | | |
64 | | /* Base64 encoding table in string representation */ |
65 | | static const l_int32 MAX_BASE64_LINE = 72; /* max line length base64 */ |
66 | | static const char *tablechar64 = |
67 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
68 | | "abcdefghijklmnopqrstuvwxyz" |
69 | | "0123456789+/"; |
70 | | |
71 | | static l_int32 isBase64(char); |
72 | | static l_int32 *genReverseTab64(void); |
73 | | static void byteConvert3to4(l_uint8 *in3, l_uint8 *out4); |
74 | | static void byteConvert4to3(l_uint8 *in4, l_uint8 *out3); |
75 | | |
76 | | /* Ascii85 encoding */ |
77 | | static const l_int32 MAX_ASCII85_LINE = 64; /* max line length ascii85 */ |
78 | | static const l_uint32 power85[5] = {1, |
79 | | 85, |
80 | | 85 * 85, |
81 | | 85 * 85 * 85, |
82 | | 85 * 85 * 85 * 85}; |
83 | | |
84 | | static l_int32 convertChunkToAscii85(const l_uint8 *inarray, size_t insize, |
85 | | l_int32 *pindex, char *outbuf, |
86 | | l_int32 *pnbout); |
87 | | |
88 | | /*-------------------------------------------------------------* |
89 | | * Utility for encoding and decoding data with base64 * |
90 | | *-------------------------------------------------------------*/ |
91 | | /*! |
92 | | * \brief encodeBase64() |
93 | | * |
94 | | * \param[in] inarray input binary data |
95 | | * \param[in] insize number of bytes in input array |
96 | | * \param[out] poutsize number of bytes in output char array |
97 | | * \return chara with MAX_BASE64_LINE characters + \n in each line |
98 | | * |
99 | | * <pre> |
100 | | * Notes: |
101 | | * (1) The input character data is unrestricted binary. |
102 | | * The output encoded data consists of the 64 characters |
103 | | * in the base64 set, plus newlines and the pad character '='. |
104 | | * </pre> |
105 | | */ |
106 | | char * |
107 | | encodeBase64(const l_uint8 *inarray, |
108 | | l_int32 insize, |
109 | | l_int32 *poutsize) |
110 | 0 | { |
111 | 0 | char *chara; |
112 | 0 | const l_uint8 *bytea; |
113 | 0 | l_uint8 array3[3], array4[4]; |
114 | 0 | l_int32 outsize, i, j, index, linecount; |
115 | |
|
116 | 0 | if (!poutsize) |
117 | 0 | return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); |
118 | 0 | *poutsize = 0; |
119 | 0 | if (!inarray) |
120 | 0 | return (char *)ERROR_PTR("inarray not defined", __func__, NULL); |
121 | 0 | if (insize <= 0) |
122 | 0 | return (char *)ERROR_PTR("insize not > 0", __func__, NULL); |
123 | | |
124 | | /* The output array is padded to a multiple of 4 bytes, not |
125 | | * counting the newlines. We just need to allocate a large |
126 | | * enough array, and add 4 bytes to make sure it is big enough. */ |
127 | 0 | outsize = 4 * ((insize + 2) / 3); /* without newlines */ |
128 | 0 | outsize += outsize / MAX_BASE64_LINE + 4; /* with the newlines */ |
129 | 0 | if ((chara = (char *)LEPT_CALLOC(outsize, sizeof(char))) == NULL) |
130 | 0 | return (char *)ERROR_PTR("chara not made", __func__, NULL); |
131 | | |
132 | | /* Read all the input data, and convert in sets of 3 input |
133 | | * bytes --> 4 output bytes. */ |
134 | 0 | i = index = linecount = 0; |
135 | 0 | bytea = inarray; |
136 | 0 | while (insize--) { |
137 | 0 | if (linecount == MAX_BASE64_LINE) { |
138 | 0 | chara[index++] = '\n'; |
139 | 0 | linecount = 0; |
140 | 0 | } |
141 | 0 | array3[i++] = *bytea++; |
142 | 0 | if (i == 3) { /* convert 3 to 4 and save */ |
143 | 0 | byteConvert3to4(array3, array4); |
144 | 0 | for (j = 0; j < 4; j++) |
145 | 0 | chara[index++] = tablechar64[array4[j]]; |
146 | 0 | i = 0; |
147 | 0 | linecount += 4; |
148 | 0 | } |
149 | 0 | } |
150 | | |
151 | | /* Suppose 1 or 2 bytes has been read but not yet processed. |
152 | | * If 1 byte has been read, this will generate 2 bytes of |
153 | | * output, with 6 bits to the first byte and 2 bits to the second. |
154 | | * We will add two bytes of '=' for padding. |
155 | | * If 2 bytes has been read, this will generate 3 bytes of output, |
156 | | * with 6 bits to the first 2 bytes and 4 bits to the third, and |
157 | | * we add a fourth padding byte ('='). */ |
158 | 0 | if (i > 0) { /* left-over 1 or 2 input bytes */ |
159 | 0 | for (j = i; j < 3; j++) |
160 | 0 | array3[j] = '\0'; /* zero the remaining input bytes */ |
161 | 0 | byteConvert3to4(array3, array4); |
162 | 0 | for (j = 0; j <= i; j++) |
163 | 0 | chara[index++] = tablechar64[array4[j]]; |
164 | 0 | for (j = i + 1; j < 4; j++) |
165 | 0 | chara[index++] = '='; |
166 | 0 | } |
167 | 0 | *poutsize = index; |
168 | |
|
169 | 0 | return chara; |
170 | 0 | } |
171 | | |
172 | | |
173 | | /*! |
174 | | * \brief decodeBase64() |
175 | | * |
176 | | * \param[in] inarray input encoded char data, with 72 chars/line) |
177 | | * \param[in] insize number of bytes in input array |
178 | | * \param[out] poutsize number of bytes in output byte array |
179 | | * \return bytea decoded byte data, or NULL on error |
180 | | * |
181 | | * <pre> |
182 | | * Notes: |
183 | | * (1) The input character data should have only 66 different characters: |
184 | | * The 64 character set for base64 encoding, plus the pad |
185 | | * character '=' and newlines for formatting with fixed line |
186 | | * lengths. If there are any other characters, the decoder |
187 | | * will declare the input data to be invalid and return NULL. |
188 | | * (2) The decoder ignores newlines and, for a valid input string, |
189 | | * stops reading input when a pad byte is found. |
190 | | * </pre> |
191 | | */ |
192 | | l_uint8 * |
193 | | decodeBase64(const char *inarray, |
194 | | l_int32 insize, |
195 | | l_int32 *poutsize) |
196 | 2 | { |
197 | 2 | char inchar; |
198 | 2 | l_uint8 *bytea; |
199 | 2 | l_uint8 array3[3], array4[4]; |
200 | 2 | l_int32 *rtable64; |
201 | 2 | l_int32 i, j, outsize, in_index, out_index; |
202 | | |
203 | 2 | if (!poutsize) |
204 | 0 | return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL); |
205 | 2 | *poutsize = 0; |
206 | 2 | if (!inarray) |
207 | 0 | return (l_uint8 *)ERROR_PTR("inarray not defined", __func__, NULL); |
208 | 2 | if (insize <= 0) |
209 | 0 | return (l_uint8 *)ERROR_PTR("insize not > 0", __func__, NULL); |
210 | | |
211 | | /* Validate the input data */ |
212 | 9.81k | for (i = 0; i < insize; i++) { |
213 | 9.81k | inchar = inarray[i]; |
214 | 9.81k | if (inchar == '\n') continue; |
215 | 9.81k | if (isBase64(inchar) == 0 && inchar != '=') |
216 | 0 | return (l_uint8 *)ERROR_PTR("invalid char in inarray", |
217 | 9.81k | __func__, NULL); |
218 | 9.81k | } |
219 | | |
220 | | /* The input array typically is made with a newline every |
221 | | * MAX_BASE64_LINE input bytes. However, as a printed string, the |
222 | | * newlines would be stripped. So when we allocate the output |
223 | | * array, assume the input array is all data, but strip |
224 | | * out the newlines during decoding. This guarantees that |
225 | | * the allocated array is large enough. */ |
226 | 2 | outsize = 3 * ((insize + 3) / 4) + 4; |
227 | 2 | if ((bytea = (l_uint8 *)LEPT_CALLOC(outsize, sizeof(l_uint8))) == NULL) |
228 | 0 | return (l_uint8 *)ERROR_PTR("bytea not made", __func__, NULL); |
229 | | |
230 | | /* The number of encoded input data bytes is always a multiple of 4. |
231 | | * Read all the data, until you reach either the end or |
232 | | * the first pad character '='. The data is processed in |
233 | | * units of 4 input bytes, generating 3 output decoded bytes |
234 | | * of binary data. Newlines are ignored. If there are no |
235 | | * pad bytes, i == 0 at the end of this section. */ |
236 | 2 | rtable64 = genReverseTab64(); |
237 | 2 | i = in_index = out_index = 0; |
238 | 9.81k | for (in_index = 0; in_index < insize; in_index++) { |
239 | 9.81k | inchar = inarray[in_index]; |
240 | 9.81k | if (inchar == '\n') continue; |
241 | 9.81k | if (inchar == '=') break; |
242 | 9.81k | array4[i++] = rtable64[(unsigned char)inchar]; |
243 | 9.81k | if (i < 4) { |
244 | 7.36k | continue; |
245 | 7.36k | } else { /* i == 4; convert 4 to 3 and save */ |
246 | 2.45k | byteConvert4to3(array4, array3); |
247 | 9.80k | for (j = 0; j < 3; j++) |
248 | 7.35k | bytea[out_index++] = array3[j]; |
249 | 2.45k | i = 0; |
250 | 2.45k | } |
251 | 9.81k | } |
252 | | |
253 | | /* If i > 0, we ran into pad bytes ('='). If i == 2, there are |
254 | | * two input pad bytes and one output data byte. If i == 3, |
255 | | * there is one input pad byte and two output data bytes. */ |
256 | 2 | if (i > 0) { |
257 | 4 | for (j = i; j < 4; j++) |
258 | 2 | array4[j] = '\0'; /* zero the remaining input bytes */ |
259 | 2 | byteConvert4to3(array4, array3); |
260 | 6 | for (j = 0; j < i - 1; j++) |
261 | 4 | bytea[out_index++] = array3[j]; |
262 | 2 | } |
263 | 2 | *poutsize = out_index; |
264 | | |
265 | 2 | LEPT_FREE(rtable64); |
266 | 2 | return bytea; |
267 | 2 | } |
268 | | |
269 | | |
270 | | /*! |
271 | | * \brief isBase64() |
272 | | */ |
273 | | static l_int32 |
274 | | isBase64(char c) |
275 | 9.81k | { |
276 | 9.81k | return (isalnum(((int)c)) || ((c) == '+') || ((c) == '/')) ? 1 : 0; |
277 | 9.81k | } |
278 | | |
279 | | /*! |
280 | | * \brief genReverseTab64() |
281 | | */ |
282 | | static l_int32 * |
283 | | genReverseTab64(void) |
284 | 2 | { |
285 | 2 | l_int32 i; |
286 | 2 | l_int32 *rtable64; |
287 | | |
288 | 2 | rtable64 = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); |
289 | 130 | for (i = 0; i < 64; i++) { |
290 | 128 | rtable64[(unsigned char)tablechar64[i]] = i; |
291 | 128 | } |
292 | 2 | return rtable64; |
293 | 2 | } |
294 | | |
295 | | /*! |
296 | | * \brief byteConvert3to4() |
297 | | */ |
298 | | static void |
299 | | byteConvert3to4(l_uint8 *in3, |
300 | | l_uint8 *out4) |
301 | 0 | { |
302 | 0 | out4[0] = in3[0] >> 2; |
303 | 0 | out4[1] = ((in3[0] & 0x03) << 4) | (in3[1] >> 4); |
304 | 0 | out4[2] = ((in3[1] & 0x0f) << 2) | (in3[2] >> 6); |
305 | 0 | out4[3] = in3[2] & 0x3f; |
306 | 0 | return; |
307 | 0 | } |
308 | | |
309 | | /*! |
310 | | * \brief byteConvert4to3() |
311 | | */ |
312 | | static void |
313 | | byteConvert4to3(l_uint8 *in4, |
314 | | l_uint8 *out3) |
315 | 2.45k | { |
316 | 2.45k | out3[0] = (in4[0] << 2) | (in4[1] >> 4); |
317 | 2.45k | out3[1] = ((in4[1] & 0x0f) << 4) | (in4[2] >> 2); |
318 | 2.45k | out3[2] = ((in4[2] & 0x03) << 6) | in4[3]; |
319 | 2.45k | return; |
320 | 2.45k | } |
321 | | |
322 | | |
323 | | /*-------------------------------------------------------------* |
324 | | * Utility for encoding and decoding data with ascii85 * |
325 | | *-------------------------------------------------------------*/ |
326 | | /*! |
327 | | * \brief encodeAscii85() |
328 | | * |
329 | | * \param[in] inarray input data |
330 | | * \param[in] insize number of bytes in input array |
331 | | * \param[out] poutsize number of bytes in output char array |
332 | | * \return chara with 64 characters + \n in each line |
333 | | * |
334 | | * <pre> |
335 | | * Notes: |
336 | | * (1) Ghostscript has a stack break if the last line of |
337 | | * data only has a '>', so we avoid the problem by |
338 | | * always putting '~>' on the last line. |
339 | | * </pre> |
340 | | */ |
341 | | char * |
342 | | encodeAscii85(const l_uint8 *inarray, |
343 | | size_t insize, |
344 | | size_t *poutsize) |
345 | 0 | { |
346 | 0 | char *chara; |
347 | 0 | char outbuf[8]; |
348 | 0 | l_int32 maxsize, i, index, linecount, nbout, eof; |
349 | 0 | size_t outindex; |
350 | |
|
351 | 0 | if (!poutsize) |
352 | 0 | return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); |
353 | 0 | *poutsize = 0; |
354 | 0 | if (!inarray) |
355 | 0 | return (char *)ERROR_PTR("inarray not defined", __func__, NULL); |
356 | 0 | if (insize <= 0) |
357 | 0 | return (char *)ERROR_PTR("insize not > 0", __func__, NULL); |
358 | | |
359 | | /* Accumulate results in char array */ |
360 | 0 | maxsize = (l_int32)(80. + (insize * 5. / 4.) * |
361 | 0 | (1. + 2. / MAX_ASCII85_LINE)); |
362 | 0 | if ((chara = (char *)LEPT_CALLOC(maxsize, sizeof(char))) == NULL) |
363 | 0 | return (char *)ERROR_PTR("chara not made", __func__, NULL); |
364 | | |
365 | 0 | linecount = 0; |
366 | 0 | index = 0; |
367 | 0 | outindex = 0; |
368 | 0 | while (1) { |
369 | 0 | eof = convertChunkToAscii85(inarray, insize, &index, outbuf, &nbout); |
370 | 0 | for (i = 0; i < nbout; i++) { |
371 | 0 | chara[outindex++] = outbuf[i]; |
372 | 0 | linecount++; |
373 | 0 | if (linecount >= MAX_ASCII85_LINE) { |
374 | 0 | chara[outindex++] = '\n'; |
375 | 0 | linecount = 0; |
376 | 0 | } |
377 | 0 | } |
378 | 0 | if (eof == TRUE) { |
379 | 0 | if (linecount != 0) |
380 | 0 | chara[outindex++] = '\n'; |
381 | 0 | chara[outindex++] = '~'; |
382 | 0 | chara[outindex++] = '>'; |
383 | 0 | chara[outindex++] = '\n'; |
384 | 0 | break; |
385 | 0 | } |
386 | 0 | } |
387 | |
|
388 | 0 | *poutsize = outindex; |
389 | 0 | return chara; |
390 | 0 | } |
391 | | |
392 | | |
393 | | /*! |
394 | | * \brief convertChunkToAscii85() |
395 | | * |
396 | | * \param[in] inarray input data |
397 | | * \param[in] insize number of bytes in input array |
398 | | * \param[out] pindex use and -- ptr |
399 | | * \param[in] outbuf holds 8 ascii chars; we use no more than 7 |
400 | | * \param[out] pnbsout number of bytes written to outbuf |
401 | | * \return boolean for eof 0 if more data, 1 if end of file |
402 | | * |
403 | | * <pre> |
404 | | * Notes: |
405 | | * (1) Attempts to read 4 bytes and write 5. |
406 | | * (2) Writes 1 byte if the value is 0. |
407 | | * </pre> |
408 | | */ |
409 | | static l_int32 |
410 | | convertChunkToAscii85(const l_uint8 *inarray, |
411 | | size_t insize, |
412 | | l_int32 *pindex, |
413 | | char *outbuf, |
414 | | l_int32 *pnbout) |
415 | 0 | { |
416 | 0 | l_uint8 inbyte; |
417 | 0 | l_uint32 inword, val; |
418 | 0 | l_int32 eof, index, nread, nbout, i; |
419 | |
|
420 | 0 | eof = FALSE; |
421 | 0 | index = *pindex; |
422 | 0 | nread = L_MIN(4, (insize - index)); |
423 | 0 | if (insize == index + nread) |
424 | 0 | eof = TRUE; |
425 | 0 | *pindex += nread; /* save new index */ |
426 | | |
427 | | /* Read input data and save in l_uint32 */ |
428 | 0 | inword = 0; |
429 | 0 | for (i = 0; i < nread; i++) { |
430 | 0 | inbyte = inarray[index + i]; |
431 | 0 | inword += (l_uint32)inbyte << (8 * (3 - i)); |
432 | 0 | } |
433 | |
|
434 | | #if 0 |
435 | | lept_stderr("index = %d, nread = %d\n", index, nread); |
436 | | lept_stderr("inword = %x\n", inword); |
437 | | lept_stderr("eof = %d\n", eof); |
438 | | #endif |
439 | | |
440 | | /* Special case: output 1 byte only */ |
441 | 0 | if (inword == 0) { |
442 | 0 | outbuf[0] = 'z'; |
443 | 0 | nbout = 1; |
444 | 0 | } else { /* output nread + 1 bytes */ |
445 | 0 | for (i = 4; i >= 4 - nread; i--) { |
446 | 0 | val = inword / power85[i]; |
447 | 0 | outbuf[4 - i] = (l_uint8)(val + '!'); |
448 | 0 | inword -= val * power85[i]; |
449 | 0 | } |
450 | 0 | nbout = nread + 1; |
451 | 0 | } |
452 | 0 | *pnbout = nbout; |
453 | |
|
454 | 0 | return eof; |
455 | 0 | } |
456 | | |
457 | | |
458 | | /*! |
459 | | * \brief decodeAscii85() |
460 | | * |
461 | | * \param[in] inarray ascii85 input data |
462 | | * \param[in] insize number of bytes in input array |
463 | | * \param[out] poutsize number of bytes in output l_uint8 array |
464 | | * \return outarray binary |
465 | | * |
466 | | * <pre> |
467 | | * Notes: |
468 | | * (1) We assume the data is properly encoded, so we do not check |
469 | | * for invalid characters or the final '>' character. |
470 | | * (2) We permit whitespace to be added to the encoding in an |
471 | | * arbitrary way. |
472 | | * </pre> |
473 | | */ |
474 | | l_uint8 * |
475 | | decodeAscii85(const char *inarray, |
476 | | size_t insize, |
477 | | size_t *poutsize) |
478 | 0 | { |
479 | 0 | char inc; |
480 | 0 | const char *pin; |
481 | 0 | l_uint8 val; |
482 | 0 | l_uint8 *outa; |
483 | 0 | l_int32 maxsize, ocount, bytecount, index; |
484 | 0 | l_uint32 oword; |
485 | |
|
486 | 0 | if (!poutsize) |
487 | 0 | return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL); |
488 | 0 | *poutsize = 0; |
489 | 0 | if (!inarray) |
490 | 0 | return (l_uint8 *)ERROR_PTR("inarray not defined", __func__, NULL); |
491 | 0 | if (insize <= 0) |
492 | 0 | return (l_uint8 *)ERROR_PTR("insize not > 0", __func__, NULL); |
493 | | |
494 | | /* Accumulate results in outa */ |
495 | 0 | maxsize = (l_int32)(80. + (insize * 4. / 5.)); /* plenty big */ |
496 | 0 | if ((outa = (l_uint8 *)LEPT_CALLOC(maxsize, sizeof(l_uint8))) == NULL) |
497 | 0 | return (l_uint8 *)ERROR_PTR("outa not made", __func__, NULL); |
498 | | |
499 | 0 | pin = inarray; |
500 | 0 | ocount = 0; /* byte index into outa */ |
501 | 0 | oword = 0; |
502 | 0 | for (index = 0, bytecount = 0; index < insize; index++, pin++) { |
503 | 0 | inc = *pin; |
504 | |
|
505 | 0 | if (inc == ' ' || inc == '\t' || inc == '\n' || |
506 | 0 | inc == '\f' || inc == '\r' || inc == '\v') /* ignore white space */ |
507 | 0 | continue; |
508 | | |
509 | 0 | val = inc - '!'; |
510 | 0 | if (val < 85) { |
511 | 0 | oword = oword * 85 + val; |
512 | 0 | if (bytecount < 4) { |
513 | 0 | bytecount++; |
514 | 0 | } else { /* we have all 5 input chars for the oword */ |
515 | 0 | outa[ocount] = (oword >> 24) & 0xff; |
516 | 0 | outa[ocount + 1] = (oword >> 16) & 0xff; |
517 | 0 | outa[ocount + 2] = (oword >> 8) & 0xff; |
518 | 0 | outa[ocount + 3] = oword & 0xff; |
519 | 0 | ocount += 4; |
520 | 0 | bytecount = 0; |
521 | 0 | oword = 0; |
522 | 0 | } |
523 | 0 | } else if (inc == 'z' && bytecount == 0) { |
524 | 0 | outa[ocount] = 0; |
525 | 0 | outa[ocount + 1] = 0; |
526 | 0 | outa[ocount + 2] = 0; |
527 | 0 | outa[ocount + 3] = 0; |
528 | 0 | ocount += 4; |
529 | 0 | } else if (inc == '~') { /* end of data */ |
530 | 0 | L_INFO(" %d extra bytes output\n", __func__, bytecount - 1); |
531 | 0 | switch (bytecount) { |
532 | 0 | case 0: /* normal eof */ |
533 | 0 | case 1: /* error */ |
534 | 0 | break; |
535 | 0 | case 2: /* 1 extra byte */ |
536 | 0 | oword = oword * power85[3] + 0xffffff; |
537 | 0 | outa[ocount] = (oword >> 24) & 0xff; |
538 | 0 | break; |
539 | 0 | case 3: /* 2 extra bytes */ |
540 | 0 | oword = oword * power85[2] + 0xffff; |
541 | 0 | outa[ocount] = (oword >> 24) & 0xff; |
542 | 0 | outa[ocount + 1] = (oword >> 16) & 0xff; |
543 | 0 | break; |
544 | 0 | case 4: /* 3 extra bytes */ |
545 | 0 | oword = oword * 85 + 0xff; |
546 | 0 | outa[ocount] = (oword >> 24) & 0xff; |
547 | 0 | outa[ocount + 1] = (oword >> 16) & 0xff; |
548 | 0 | outa[ocount + 2] = (oword >> 8) & 0xff; |
549 | 0 | break; |
550 | 0 | } |
551 | 0 | if (bytecount > 1) |
552 | 0 | ocount += (bytecount - 1); |
553 | 0 | break; |
554 | 0 | } |
555 | 0 | } |
556 | 0 | *poutsize = ocount; |
557 | |
|
558 | 0 | return outa; |
559 | 0 | } |
560 | | |
561 | | |
562 | | /*! |
563 | | * \brief encodeAscii85WithComp) |
564 | | * |
565 | | * \param[in] indata input binary data |
566 | | * \param[in] insize number of bytes in input data |
567 | | * \param[out] poutsize number of bytes in output string |
568 | | * \return outstr with 64 characters + \n in each line |
569 | | * |
570 | | * <pre> |
571 | | * Notes: |
572 | | * (1) Compress the input data; then encode ascii85. For ascii |
573 | | * input, a first compression step will significantly reduce |
574 | | * the final encoded output size. |
575 | | * </pre> |
576 | | */ |
577 | | char * |
578 | | encodeAscii85WithComp(const l_uint8 *indata, |
579 | | size_t insize, |
580 | | size_t *poutsize) |
581 | 0 | { |
582 | 0 | char *outstr; |
583 | 0 | size_t size1; |
584 | 0 | l_uint8 *data1; |
585 | |
|
586 | 0 | if (!poutsize) |
587 | 0 | return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); |
588 | 0 | *poutsize = 0; |
589 | 0 | if (!indata) |
590 | 0 | return (char *)ERROR_PTR("indata not defined", __func__, NULL); |
591 | | |
592 | 0 | if ((data1 = zlibCompress(indata, insize, &size1)) == NULL) |
593 | 0 | return (char *)ERROR_PTR("data1 not made", __func__, NULL); |
594 | 0 | outstr = encodeAscii85(data1, size1, poutsize); |
595 | 0 | LEPT_FREE(data1); |
596 | 0 | return outstr; |
597 | 0 | } |
598 | | |
599 | | |
600 | | /*! |
601 | | * \brief decodeAscii85WithComp() |
602 | | * |
603 | | * \param[in] instr ascii85 input data string |
604 | | * \param[in] insize number of bytes in input data |
605 | | * \param[out] poutsize number of bytes in output binary data |
606 | | * \return outdata binary data before compression and ascii85 encoding |
607 | | * |
608 | | * <pre> |
609 | | * Notes: |
610 | | * (1) We assume the input data has been zlib compressed and then |
611 | | * properly encoded, so we reverse the procedure. This is the |
612 | | * inverse of encodeAscii85WithComp(). |
613 | | * (2) Set %insize == 0 to use strlen(%instr). |
614 | | * </pre> |
615 | | */ |
616 | | l_uint8 * |
617 | | decodeAscii85WithComp(const char *instr, |
618 | | size_t insize, |
619 | | size_t *poutsize) |
620 | 0 | { |
621 | 0 | size_t size1; |
622 | 0 | l_uint8 *data1, *outdata; |
623 | |
|
624 | 0 | if (!poutsize) |
625 | 0 | return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL); |
626 | 0 | *poutsize = 0; |
627 | 0 | if (!instr) |
628 | 0 | return (l_uint8 *)ERROR_PTR("instr not defined", __func__, NULL); |
629 | | |
630 | 0 | if (insize == 0) insize = strlen(instr); |
631 | 0 | if ((data1 = decodeAscii85(instr, insize, &size1)) == NULL) |
632 | 0 | return (l_uint8 *)ERROR_PTR("data1 not made", __func__, NULL); |
633 | 0 | outdata = zlibUncompress(data1, size1, poutsize); |
634 | 0 | LEPT_FREE(data1); |
635 | 0 | return outdata; |
636 | 0 | } |
637 | | |
638 | | |
639 | | /*-------------------------------------------------------------* |
640 | | * String reformatting for base 64 encoded data * |
641 | | *-------------------------------------------------------------*/ |
642 | | /*! |
643 | | * \brief reformatPacked64() |
644 | | * |
645 | | * \param[in] inarray base64 encoded string with newlines |
646 | | * \param[in] insize number of bytes in input array |
647 | | * \param[in] leadspace number of spaces in each line before the data |
648 | | * \param[in] linechars number of bytes of data in each line; multiple of 4 |
649 | | * \param[in] addquotes 1 to add quotes to each line of data; 0 to skip |
650 | | * \param[out] poutsize number of bytes in output char array |
651 | | * \return outarray ascii |
652 | | * |
653 | | * <pre> |
654 | | * Notes: |
655 | | * (1) Each line in the output array has %leadspace space characters, |
656 | | * followed optionally by a double-quote, followed by %linechars |
657 | | * bytes of base64 data, followed optionally by a double-quote, |
658 | | * followed by a newline. |
659 | | * (2) This can be used to convert a base64 encoded string to a |
660 | | * string formatted for inclusion in a C source file. |
661 | | * </pre> |
662 | | */ |
663 | | char * |
664 | | reformatPacked64(const char *inarray, |
665 | | l_int32 insize, |
666 | | l_int32 leadspace, |
667 | | l_int32 linechars, |
668 | | l_int32 addquotes, |
669 | | l_int32 *poutsize) |
670 | 0 | { |
671 | 0 | char *flata, *outa; |
672 | 0 | l_int32 i, j, flatindex, flatsize, outindex, nlines, linewithpad, linecount; |
673 | |
|
674 | 0 | if (!poutsize) |
675 | 0 | return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); |
676 | 0 | *poutsize = 0; |
677 | 0 | if (!inarray) |
678 | 0 | return (char *)ERROR_PTR("inarray not defined", __func__, NULL); |
679 | 0 | if (insize <= 0) |
680 | 0 | return (char *)ERROR_PTR("insize not > 0", __func__, NULL); |
681 | 0 | if (leadspace < 0) |
682 | 0 | return (char *)ERROR_PTR("leadspace must be >= 0", __func__, NULL); |
683 | 0 | if (linechars % 4) |
684 | 0 | return (char *)ERROR_PTR("linechars % 4 must be 0", __func__, NULL); |
685 | | |
686 | | /* Remove all white space */ |
687 | 0 | if ((flata = (char *)LEPT_CALLOC(insize, sizeof(char))) == NULL) |
688 | 0 | return (char *)ERROR_PTR("flata not made", __func__, NULL); |
689 | 0 | for (i = 0, flatindex = 0; i < insize; i++) { |
690 | 0 | if (isBase64(inarray[i]) || inarray[i] == '=') |
691 | 0 | flata[flatindex++] = inarray[i]; |
692 | 0 | } |
693 | | |
694 | | /* Generate output string */ |
695 | 0 | flatsize = flatindex; |
696 | 0 | nlines = (flatsize + linechars - 1) / linechars; |
697 | 0 | linewithpad = leadspace + linechars + 1; /* including newline */ |
698 | 0 | if (addquotes) linewithpad += 2; |
699 | 0 | if ((outa = (char *)LEPT_CALLOC((size_t)nlines * linewithpad, |
700 | 0 | sizeof(char))) == NULL) { |
701 | 0 | LEPT_FREE(flata); |
702 | 0 | return (char *)ERROR_PTR("outa not made", __func__, NULL); |
703 | 0 | } |
704 | 0 | for (j = 0, outindex = 0; j < leadspace; j++) |
705 | 0 | outa[outindex++] = ' '; |
706 | 0 | if (addquotes) outa[outindex++] = '"'; |
707 | 0 | for (i = 0, linecount = 0; i < flatsize; i++) { |
708 | 0 | if (linecount == linechars) { |
709 | 0 | if (addquotes) outa[outindex++] = '"'; |
710 | 0 | outa[outindex++] = '\n'; |
711 | 0 | for (j = 0; j < leadspace; j++) |
712 | 0 | outa[outindex++] = ' '; |
713 | 0 | if (addquotes) outa[outindex++] = '"'; |
714 | 0 | linecount = 0; |
715 | 0 | } |
716 | 0 | outa[outindex++] = flata[i]; |
717 | 0 | linecount++; |
718 | 0 | } |
719 | 0 | if (addquotes) outa[outindex++] = '"'; |
720 | 0 | *poutsize = outindex; |
721 | |
|
722 | 0 | LEPT_FREE(flata); |
723 | 0 | return outa; |
724 | 0 | } |