/src/ghostpdl/base/slzwd.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* LZW decoding filter */ |
18 | | #include "stdio_.h" /* includes std.h */ |
19 | | #include "gdebug.h" |
20 | | #include "strimpl.h" |
21 | | #include "slzwx.h" |
22 | | |
23 | | /* ------ LZWDecode ------ */ |
24 | | |
25 | | /********************************************************/ |
26 | | /* LZW routines are based on: */ |
27 | | /* Dr. Dobbs Journal --- Oct. 1989. */ |
28 | | /* Article on LZW Data Compression by Mark R. Nelson */ |
29 | | /********************************************************/ |
30 | | |
31 | | /* Define the special codes in terms of code_escape, which is */ |
32 | | /* 1 << InitialCodeLength. */ |
33 | 1.33k | #define code_reset (code_escape + 0) |
34 | 46.2k | #define code_eod (code_escape + 1) |
35 | 746 | #define code_0 (code_escape + 2) /* first assignable code */ |
36 | | |
37 | | struct lzw_decode_s { |
38 | | byte datum; |
39 | | byte len; /* length of code */ |
40 | | ushort prefix; /* code to be prefixed */ |
41 | | }; |
42 | | |
43 | | gs_private_st_simple(st_lzw_decode, lzw_decode, "lzw_decode"); |
44 | | /* We can use a simple type as the element type, */ |
45 | | /* because there are no pointers to enumerate or relocate. */ |
46 | | #define st_lzw_decode_element st_lzw_decode |
47 | 68.5k | #define lzw_decode_max 4096 /* must be 4096 */ |
48 | | |
49 | | /* Initialize LZWDecode filter */ |
50 | | /* We separate out the reset function for some non-stream clients. */ |
51 | | static int |
52 | | s_LZWD_reset(stream_state * st) |
53 | 176 | { |
54 | 176 | stream_LZW_state *const ss = (stream_LZW_state *) st; |
55 | 176 | register lzw_decode *dc = ss->table.decode; |
56 | 176 | register int i; |
57 | 176 | uint code_escape = 1 << ss->InitialCodeLength; |
58 | | |
59 | 176 | ss->bits_left = 0; |
60 | 176 | ss->bytes_left = 0; |
61 | 176 | ss->next_code = code_0; |
62 | 176 | ss->code_size = ss->InitialCodeLength + 1; |
63 | 176 | ss->prev_code = -1; |
64 | 176 | ss->copy_code = -1; |
65 | 176 | dc[code_reset].len = 255; |
66 | 176 | dc[code_eod].len = 255; |
67 | 45.2k | for (i = 0; i < code_escape; i++, dc++) |
68 | 45.0k | dc->datum = i, dc->len = 1, dc->prefix = code_eod; |
69 | 176 | return 0; |
70 | 176 | } |
71 | | static int |
72 | | s_LZWD_init(stream_state * st) |
73 | 176 | { |
74 | 176 | stream_LZW_state *const ss = (stream_LZW_state *) st; |
75 | 176 | lzw_decode *dc = |
76 | 176 | gs_alloc_struct_array(st->memory, lzw_decode_max + 1, |
77 | 176 | lzw_decode, &st_lzw_decode_element, |
78 | 176 | "LZWDecode(init)"); |
79 | | |
80 | 176 | if (dc == 0) |
81 | 0 | return ERRC; |
82 | | /****** WRONG ******/ |
83 | 176 | ss->table.decode = dc; |
84 | 176 | ss->min_left = 1; |
85 | 176 | return s_LZWD_reset(st); |
86 | 176 | } |
87 | | |
88 | | /* Process a buffer */ |
89 | | static int |
90 | | s_LZWD_process(stream_state * st, stream_cursor_read * pr, |
91 | | stream_cursor_write * pw, bool last) |
92 | 986 | { |
93 | 986 | stream_LZW_state *const ss = (stream_LZW_state *) st; |
94 | 986 | register const byte *p = pr->ptr; |
95 | 986 | register byte *q = pw->ptr; |
96 | | |
97 | | #ifdef DEBUG |
98 | | byte *q0 = q; |
99 | | |
100 | | #endif |
101 | 986 | const byte *rlimit = pr->limit; /* constant pointer */ |
102 | 986 | byte *wlimit = pw->limit; /* constant pointer */ |
103 | 986 | int status = 0; |
104 | 986 | int code = ss->copy_code; |
105 | 986 | int prev_code = ss->prev_code; |
106 | 986 | uint prev_len = ss->prev_len; |
107 | 986 | byte bits = ss->bits; |
108 | 986 | int bits_left = ss->bits_left; |
109 | 986 | int bytes_left = ss->bytes_left; |
110 | 986 | int code_size = ss->code_size; |
111 | 986 | int code_mask; |
112 | 986 | int switch_code; |
113 | 986 | int next_code = ss->next_code; |
114 | 986 | lzw_decode *table = ss->table.decode; /* constant pointer */ |
115 | 986 | lzw_decode *dc_next = table + next_code; /* invariant */ |
116 | 986 | lzw_decode *dc; |
117 | 986 | int code_escape = 1 << ss->InitialCodeLength; |
118 | 986 | int eod = code_eod; |
119 | 986 | bool low_order = ss->FirstBitLowOrder; |
120 | 986 | bool old_tiff = ss->OldTiff; |
121 | 986 | uint len; |
122 | 986 | int c; |
123 | 986 | byte b; |
124 | 986 | byte *q1; |
125 | | |
126 | 986 | if_debug2m('w', ss->memory, "[w]process decode: code_size=%d next_code=%d\n", |
127 | 986 | code_size, next_code); |
128 | 986 | #define set_code_size()\ |
129 | 1.28k | code_mask = (1 << code_size) - 1,\ |
130 | 1.28k | switch_code = code_mask + 1 - ss->EarlyChange |
131 | 986 | set_code_size(); |
132 | 986 | if (!ss->BlockData) |
133 | 986 | bytes_left = rlimit - p + 2; /* never stop for bytes_left */ |
134 | | /* If we are in the middle of copying a string, */ |
135 | | /* do some more now. */ |
136 | 986 | if (code >= 0) { |
137 | 611 | int rlen = ss->copy_left; |
138 | 611 | int wlen = wlimit - q; |
139 | 611 | int n = len = min(rlen, wlen); |
140 | | |
141 | 611 | c = code; |
142 | 611 | ss->copy_left = rlen -= len; |
143 | 611 | if_debug3m('W', ss->memory, "[W]copying 0x%x, %d byte(s) out of %d left\n", |
144 | 611 | code, len, rlen + len); |
145 | 611 | while (rlen) |
146 | 0 | c = table[c].prefix, |
147 | 0 | rlen--; |
148 | 611 | q1 = q += len; |
149 | 611 | n = len; |
150 | 117k | while (--n >= 0) { |
151 | 117k | *q1-- = (dc = &table[c])->datum; |
152 | 117k | c = dc->prefix; |
153 | 117k | } |
154 | 611 | if (ss->copy_left) { /* more to do */ |
155 | 0 | pw->ptr = q; |
156 | 0 | return 1; |
157 | 0 | } |
158 | 611 | ss->copy_code = -1; |
159 | 611 | len = ss->copy_len; |
160 | | /* Retrieve the first byte of the code just copied. */ |
161 | 611 | if (c == eod) { /* We just copied the entire code, */ |
162 | | /* so the byte we want is immediately available. */ |
163 | 611 | b = q1[1]; |
164 | 611 | } else { /* We have to scan to the beginning of the code. */ |
165 | 0 | for (; c != eod; c = table[c].prefix) |
166 | 0 | b = (byte) c; |
167 | 0 | } |
168 | 611 | goto add; |
169 | 611 | } |
170 | 35.2k | while (1) /* Loop while we have data to handle */ |
171 | 35.2k | { |
172 | 35.2k | if (code_size > bits_left) { |
173 | 35.2k | if (bytes_left == 0) { |
174 | 0 | if (p == rlimit) |
175 | 0 | goto out; |
176 | 0 | bytes_left = *++p; |
177 | 0 | if_debug1m('W', ss->memory, "[W]block count %d\n", bytes_left); |
178 | 0 | if (bytes_left == 0) { |
179 | 0 | status = EOFC; |
180 | 0 | goto out; |
181 | 0 | } |
182 | 0 | continue; |
183 | 0 | } |
184 | 35.2k | if (low_order) |
185 | 0 | code = bits >> (8 - bits_left); |
186 | 35.2k | else |
187 | 35.2k | code = (uint) bits << (code_size - bits_left); |
188 | 35.2k | if (bits_left + 8 < code_size) { /* Need 2 more data bytes */ |
189 | 6.01k | if (bytes_left == 1) { |
190 | 0 | if (rlimit - p < 3) |
191 | 0 | goto out; |
192 | 0 | bytes_left = p[2]; |
193 | 0 | if_debug1m('W', ss->memory, "[W]block count %d\n", |
194 | 0 | bytes_left); |
195 | 0 | if (bytes_left == 0) { |
196 | 0 | status = EOFC; |
197 | 0 | goto out; |
198 | 0 | } |
199 | 0 | bytes_left++; |
200 | 0 | bits = p[1]; |
201 | 0 | p++; |
202 | 6.01k | } else { |
203 | 6.01k | if (rlimit - p < 2) |
204 | 196 | goto out; |
205 | 5.81k | bits = p[1]; |
206 | 5.81k | } |
207 | 5.81k | if (low_order) |
208 | 0 | code += (uint) bits << bits_left; |
209 | 5.81k | else |
210 | 5.81k | code += (uint) bits << (code_size - 8 - bits_left); |
211 | 5.81k | bits_left += 8; |
212 | 5.81k | bits = p[2]; |
213 | 5.81k | p += 2; |
214 | 5.81k | bytes_left -= 2; |
215 | 29.2k | } else { |
216 | 29.2k | if (p == rlimit) |
217 | 16 | goto out; |
218 | 29.1k | bits = *++p; |
219 | 29.1k | bytes_left--; |
220 | 29.1k | } |
221 | 35.0k | if (low_order) |
222 | 0 | code += (uint) bits << bits_left, |
223 | 0 | bits_left += 8 - code_size; |
224 | 35.0k | else |
225 | 35.0k | bits_left += 8 - code_size, |
226 | 35.0k | code += bits >> bits_left; |
227 | 35.0k | } else { |
228 | 0 | if (low_order) |
229 | 0 | code = bits >> (8 - bits_left), |
230 | 0 | bits_left -= code_size; |
231 | 0 | else |
232 | 0 | bits_left -= code_size, |
233 | 0 | code = bits >> bits_left; |
234 | 0 | } |
235 | 35.0k | code &= code_mask; |
236 | 35.0k | if_debug2m('W', ss->memory, "[W]reading 0x%x,%d\n", code, code_size); |
237 | | /* |
238 | | * There is an anomalous case where a code S is followed |
239 | | * immediately by another occurrence of the S string. |
240 | | * In this case, the next available code will be defined as |
241 | | * S followed by the first character of S, and will be |
242 | | * emitted immediately after the code S. We have to |
243 | | * recognize this case specially, by noting that the code is |
244 | | * equal to next_code. |
245 | | */ |
246 | 35.0k | if (code >= next_code) { |
247 | 11.5k | if ((code > next_code) || (prev_code < 0)) { |
248 | | #ifdef DEBUG |
249 | | mlprintf3(ss->memory, "[W]code = %d > next_code = %d or prev_code = %d < 0\n", |
250 | | code, next_code, prev_code); |
251 | | #endif |
252 | 143 | status = ERRC; |
253 | 143 | goto out; |
254 | 143 | } |
255 | | /* Fabricate the entry for the code. It will be */ |
256 | | /* overwritten immediately, of course. */ |
257 | 704k | for (c = prev_code; c != eod; c = table[c].prefix) |
258 | 693k | dc_next->datum = c; |
259 | 11.4k | len = prev_len + 1; |
260 | 11.4k | dc_next->len = min(len, 255); |
261 | 11.4k | dc_next->prefix = prev_code; |
262 | 11.4k | if_debug3m('w', ss->memory, "[w]decoding anomalous 0x%x=0x%x+%c\n", |
263 | 11.4k | next_code, prev_code, dc_next->datum); |
264 | 11.4k | } |
265 | | /* See if there is enough room for the code. */ |
266 | 34.8k | while (1) /* Loop while we have codes to handle */ |
267 | 34.8k | { |
268 | 34.8k | len = table[code].len; |
269 | 34.8k | if (len == 255) { /* Check for special code (reset or end). */ |
270 | | /* We set their lengths to 255 to avoid doing */ |
271 | | /* an extra check in the normal case. */ |
272 | 1.15k | if (code == code_reset) { |
273 | 285 | if_debug1m('w', ss->memory, "[w]reset: next_code was %d\n", |
274 | 285 | next_code); |
275 | 285 | next_code = code_0; |
276 | 285 | dc_next = table + code_0; |
277 | 285 | code_size = ss->InitialCodeLength + 1; |
278 | 285 | set_code_size(); |
279 | 285 | prev_code = -1; |
280 | 285 | goto loop; |
281 | 874 | } else if (code == eod) { |
282 | 20 | status = EOFC; |
283 | 20 | goto out; |
284 | 20 | } |
285 | | /* The code length won't fit in a byte, */ |
286 | | /* compute it the hard way. */ |
287 | 334k | for (c = code, len = 0; c != eod; len++) |
288 | 333k | c = table[c].prefix; |
289 | 854 | if_debug2m('w', ss->memory, "[w]long code %d, length=%d\n", code, len); |
290 | 854 | } |
291 | 34.5k | if (wlimit - q < len) { |
292 | 611 | ss->copy_code = code; |
293 | 611 | ss->copy_left = ss->copy_len = len; |
294 | 611 | status = 1; |
295 | 611 | goto out; |
296 | 611 | } |
297 | | /* Copy the string to the buffer (back to front). */ |
298 | | /* Optimize for short codes, which are the most frequent. */ |
299 | 33.9k | dc = &table[code]; |
300 | 33.9k | switch (len) { |
301 | 14.4k | default: |
302 | 14.4k | { |
303 | 14.4k | byte *q1 = q += len; |
304 | | |
305 | 14.4k | c = code; |
306 | 1.06M | do { |
307 | 1.06M | *q1-- = (dc = &table[c])->datum; |
308 | 1.06M | } |
309 | 1.06M | while ((c = dc->prefix) != eod); |
310 | 14.4k | b = q1[1]; |
311 | 14.4k | break; |
312 | 0 | } |
313 | 1.50k | case 3: |
314 | 1.50k | q[3] = dc->datum; |
315 | 1.50k | dc = &table[dc->prefix]; |
316 | 5.76k | case 2: |
317 | 5.76k | q[2] = dc->datum; |
318 | 5.76k | dc = &table[dc->prefix]; |
319 | 19.5k | case 1: |
320 | 19.5k | q[1] = b = dc->datum; |
321 | 19.5k | q += len; |
322 | 33.9k | } |
323 | 34.5k | add: /* Add a new entry to the table */ |
324 | 34.5k | if (prev_code >= 0) { |
325 | | /* |
326 | | * Unfortunately, we have to check for next_code == |
327 | | * lzw_decode_max every time: just checking at power |
328 | | * of 2 boundaries stops us one code too soon. |
329 | | */ |
330 | 34.2k | if (!old_tiff && next_code == lzw_decode_max) { |
331 | | /* |
332 | | * A few anomalous files have one data item too many before the |
333 | | * reset code. We think this is a bug in the application that |
334 | | * produced the files, but Acrobat accepts the files, so we do |
335 | | * too. |
336 | | */ |
337 | 0 | if (!ss->BlockData) { /* don't do this for GIF */ |
338 | 0 | if (bits_left < 8 && p >= rlimit && last) { |
339 | | /* We're at EOD. */ |
340 | 0 | goto out; |
341 | 0 | } |
342 | 0 | if (bits_left + ((rlimit - p) << 3) < code_size) { |
343 | | /* |
344 | | * We need more data to decide whether a reset is next. |
345 | | * Return an error if we cannot get more. |
346 | | */ |
347 | 0 | if (last) |
348 | 0 | status = ERRC; |
349 | 0 | goto out; |
350 | 0 | } |
351 | 0 | if (low_order) { |
352 | 0 | code = bits >> (8 - bits_left); |
353 | 0 | code += (bits = *++p) << bits_left; |
354 | 0 | if (bits_left + 8 < code_size) |
355 | 0 | code += (bits = *++p) << (bits_left + 8); |
356 | 0 | } else { |
357 | 0 | code = bits & ((1 << bits_left) - 1); |
358 | 0 | code = (code << 8) + (bits = *++p); |
359 | 0 | if (bits_left + 8 < code_size) |
360 | 0 | code = (code << 8) + (bits = *++p); |
361 | 0 | code >>= (bits_left - code_size) & 7; |
362 | 0 | } |
363 | 0 | bits_left = (bits_left - code_size) & 7; |
364 | 0 | if (code == code_reset) |
365 | 0 | continue; /* Loop back to handle the reset */ |
366 | 0 | } |
367 | 0 | status = ERRC; |
368 | 0 | goto out; |
369 | 0 | } |
370 | 34.2k | if (next_code < lzw_decode_max) { |
371 | 34.2k | dc_next->datum = b; /* added char of string */ |
372 | 34.2k | dc_next->len = min(prev_len, 254) + 1; |
373 | 34.2k | dc_next->prefix = prev_code; |
374 | 34.2k | dc_next++; |
375 | 34.2k | if_debug4m('W', ss->memory, "[W]adding 0x%x=0x%x+%c(%d)\n", |
376 | 34.2k | next_code, prev_code, b, min(len, 255)); |
377 | 34.2k | } |
378 | 34.2k | if (++next_code == switch_code) { /* Crossed a power of 2. */ |
379 | | /* We have to make a strange special check for */ |
380 | | /* reaching the end of the code space. */ |
381 | 10 | if (next_code < lzw_decode_max - 1) { |
382 | 9 | code_size++; |
383 | 9 | set_code_size(); |
384 | 9 | if_debug2m('w', ss->memory, "[w]crossed power of 2: new code_size=%d, next_code=%d\n", |
385 | 9 | code_size, next_code); |
386 | 9 | } |
387 | 10 | } |
388 | 34.2k | } |
389 | 34.5k | break; /* No more codes to handle */ |
390 | 34.5k | } |
391 | 34.5k | prev_code = code; |
392 | 34.5k | prev_len = len; |
393 | 34.8k | loop: {} |
394 | 34.8k | } /* Loop back to the top */ |
395 | 986 | out:pr->ptr = p; |
396 | 986 | pw->ptr = q; |
397 | 986 | ss->code_size = code_size; |
398 | 986 | ss->prev_code = prev_code; |
399 | 986 | ss->prev_len = prev_len; |
400 | 986 | ss->bits = bits; |
401 | 986 | ss->bits_left = bits_left; |
402 | 986 | ss->bytes_left = bytes_left; |
403 | 986 | ss->next_code = next_code; |
404 | 986 | if_debug3m('w', ss->memory, "[w]decoded %d bytes, prev_code=%d, next_code=%d\n", |
405 | 986 | (int)(q - q0), prev_code, next_code); |
406 | 986 | return status; |
407 | 375 | } |
408 | | |
409 | | /* Stream template */ |
410 | | const stream_template s_LZWD_template = |
411 | | {&st_LZW_state, s_LZWD_init, s_LZWD_process, 3, 1, s_LZW_release, |
412 | | s_LZW_set_defaults, s_LZWD_reset |
413 | | }; |