/src/ghostpdl/base/slzwd.c
Line | Count | Source |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* LZW decoding filter */ |
18 | | #include "stdio_.h" /* includes std.h */ |
19 | | #include "gdebug.h" |
20 | | #include "strimpl.h" |
21 | | #include "slzwx.h" |
22 | | |
23 | | /* ------ LZWDecode ------ */ |
24 | | |
25 | | /********************************************************/ |
26 | | /* LZW routines are based on: */ |
27 | | /* Dr. Dobbs Journal --- Oct. 1989. */ |
28 | | /* Article on LZW Data Compression by Mark R. Nelson */ |
29 | | /********************************************************/ |
30 | | |
31 | | /* Define the special codes in terms of code_escape, which is */ |
32 | | /* 1 << InitialCodeLength. */ |
33 | 657 | #define code_reset (code_escape + 0) |
34 | 61.6k | #define code_eod (code_escape + 1) |
35 | 1.01k | #define code_0 (code_escape + 2) /* first assignable code */ |
36 | | |
37 | | struct lzw_decode_s { |
38 | | byte datum; |
39 | | byte len; /* length of code */ |
40 | | ushort prefix; /* code to be prefixed */ |
41 | | }; |
42 | | |
43 | | gs_private_st_simple(st_lzw_decode, lzw_decode, "lzw_decode"); |
44 | | /* We can use a simple type as the element type, */ |
45 | | /* because there are no pointers to enumerate or relocate. */ |
46 | | #define st_lzw_decode_element st_lzw_decode |
47 | 81.3k | #define lzw_decode_max 4096 /* must be 4096 */ |
48 | | |
49 | | /* Initialize LZWDecode filter */ |
50 | | /* We separate out the reset function for some non-stream clients. */ |
51 | | static int |
52 | | s_LZWD_reset(stream_state * st) |
53 | 236 | { |
54 | 236 | stream_LZW_state *const ss = (stream_LZW_state *) st; |
55 | 236 | register lzw_decode *dc = ss->table.decode; |
56 | 236 | register int i; |
57 | 236 | uint code_escape = 1 << ss->InitialCodeLength; |
58 | | |
59 | 236 | ss->bits_left = 0; |
60 | 236 | ss->bytes_left = 0; |
61 | 236 | ss->next_code = code_0; |
62 | 236 | ss->code_size = ss->InitialCodeLength + 1; |
63 | 236 | ss->prev_code = -1; |
64 | 236 | ss->copy_code = -1; |
65 | 236 | dc[code_reset].len = 255; |
66 | 236 | dc[code_eod].len = 255; |
67 | 60.6k | for (i = 0; i < code_escape; i++, dc++) |
68 | 60.4k | dc->datum = i, dc->len = 1, dc->prefix = code_eod; |
69 | 236 | return 0; |
70 | 236 | } |
71 | | static int |
72 | | s_LZWD_init(stream_state * st) |
73 | 236 | { |
74 | 236 | stream_LZW_state *const ss = (stream_LZW_state *) st; |
75 | 236 | lzw_decode *dc = |
76 | 236 | gs_alloc_struct_array(st->memory, lzw_decode_max + 1, |
77 | 236 | lzw_decode, &st_lzw_decode_element, |
78 | 236 | "LZWDecode(init)"); |
79 | | |
80 | 236 | if (dc == 0) |
81 | 0 | return ERRC; |
82 | | /****** WRONG ******/ |
83 | 236 | ss->table.decode = dc; |
84 | 236 | ss->min_left = 1; |
85 | 236 | return s_LZWD_reset(st); |
86 | 236 | } |
87 | | |
88 | | /* Process a buffer */ |
89 | | static int |
90 | | s_LZWD_process(stream_state * st, stream_cursor_read * pr, |
91 | | stream_cursor_write * pw, bool last) |
92 | 975 | { |
93 | 975 | stream_LZW_state *const ss = (stream_LZW_state *) st; |
94 | 975 | register const byte *p = pr->ptr; |
95 | 975 | register byte *q = pw->ptr; |
96 | | |
97 | | #ifdef DEBUG |
98 | | byte *q0 = q; |
99 | | |
100 | | #endif |
101 | 975 | const byte *rlimit = pr->limit; /* constant pointer */ |
102 | 975 | byte *wlimit = pw->limit; /* constant pointer */ |
103 | 975 | int status = 0; |
104 | 975 | int code = ss->copy_code; |
105 | 975 | int prev_code = ss->prev_code; |
106 | 975 | uint prev_len = ss->prev_len; |
107 | 975 | byte bits = ss->bits; |
108 | 975 | int bits_left = ss->bits_left; |
109 | 975 | int bytes_left = ss->bytes_left; |
110 | 975 | int code_size = ss->code_size; |
111 | 975 | int code_mask; |
112 | 975 | int switch_code; |
113 | 975 | int next_code = ss->next_code; |
114 | 975 | lzw_decode *table = ss->table.decode; /* constant pointer */ |
115 | 975 | lzw_decode *dc_next = table + next_code; /* invariant */ |
116 | 975 | lzw_decode *dc; |
117 | 975 | int code_escape = 1 << ss->InitialCodeLength; |
118 | 975 | int eod = code_eod; |
119 | 975 | bool low_order = ss->FirstBitLowOrder; |
120 | 975 | bool old_tiff = ss->OldTiff; |
121 | 975 | uint len; |
122 | 975 | int c; |
123 | 975 | byte b; |
124 | 975 | byte *q1; |
125 | | |
126 | 975 | if_debug2m('w', ss->memory, "[w]process decode: code_size=%d next_code=%d\n", |
127 | 975 | code_size, next_code); |
128 | 975 | #define set_code_size()\ |
129 | 1.36k | code_mask = (1 << code_size) - 1,\ |
130 | 1.36k | switch_code = code_mask + 1 - ss->EarlyChange |
131 | 975 | set_code_size(); |
132 | 975 | if (!ss->BlockData) |
133 | 975 | bytes_left = rlimit - p + 2; /* never stop for bytes_left */ |
134 | | /* If we are in the middle of copying a string, */ |
135 | | /* do some more now. */ |
136 | 975 | if (code >= 0) { |
137 | 476 | int rlen = ss->copy_left; |
138 | 476 | int wlen = wlimit - q; |
139 | 476 | int n = len = min(rlen, wlen); |
140 | | |
141 | 476 | c = code; |
142 | 476 | ss->copy_left = rlen -= len; |
143 | 476 | if_debug3m('W', ss->memory, "[W]copying 0x%x, %d byte(s) out of %d left\n", |
144 | 476 | code, len, rlen + len); |
145 | 476 | while (rlen) |
146 | 0 | c = table[c].prefix, |
147 | 0 | rlen--; |
148 | 476 | q1 = q += len; |
149 | 476 | n = len; |
150 | 37.9k | while (--n >= 0) { |
151 | 37.5k | *q1-- = (dc = &table[c])->datum; |
152 | 37.5k | c = dc->prefix; |
153 | 37.5k | } |
154 | 476 | if (ss->copy_left) { /* more to do */ |
155 | 0 | pw->ptr = q; |
156 | 0 | return 1; |
157 | 0 | } |
158 | 476 | ss->copy_code = -1; |
159 | 476 | len = ss->copy_len; |
160 | | /* Retrieve the first byte of the code just copied. */ |
161 | 476 | if (c == eod) { /* We just copied the entire code, */ |
162 | | /* so the byte we want is immediately available. */ |
163 | 476 | b = q1[1]; |
164 | 476 | } else { /* We have to scan to the beginning of the code. */ |
165 | 0 | for (; c != eod; c = table[c].prefix) |
166 | 0 | b = (byte) c; |
167 | 0 | } |
168 | 476 | goto add; |
169 | 476 | } |
170 | 41.9k | while (1) /* Loop while we have data to handle */ |
171 | 41.9k | { |
172 | 41.9k | if (code_size > bits_left) { |
173 | 41.9k | if (bytes_left == 0) { |
174 | 0 | if (p == rlimit) |
175 | 0 | goto out; |
176 | 0 | bytes_left = *++p; |
177 | 0 | if_debug1m('W', ss->memory, "[W]block count %d\n", bytes_left); |
178 | 0 | if (bytes_left == 0) { |
179 | 0 | status = EOFC; |
180 | 0 | goto out; |
181 | 0 | } |
182 | 0 | continue; |
183 | 0 | } |
184 | 41.9k | if (low_order) |
185 | 0 | code = bits >> (8 - bits_left); |
186 | 41.9k | else |
187 | 41.9k | code = (uint) bits << (code_size - bits_left); |
188 | 41.9k | if (bits_left + 8 < code_size) { /* Need 2 more data bytes */ |
189 | 5.57k | if (bytes_left == 1) { |
190 | 0 | if (rlimit - p < 3) |
191 | 0 | goto out; |
192 | 0 | bytes_left = p[2]; |
193 | 0 | if_debug1m('W', ss->memory, "[W]block count %d\n", |
194 | 0 | bytes_left); |
195 | 0 | if (bytes_left == 0) { |
196 | 0 | status = EOFC; |
197 | 0 | goto out; |
198 | 0 | } |
199 | 0 | bytes_left++; |
200 | 0 | bits = p[1]; |
201 | 0 | p++; |
202 | 5.57k | } else { |
203 | 5.57k | if (rlimit - p < 2) |
204 | 258 | goto out; |
205 | 5.31k | bits = p[1]; |
206 | 5.31k | } |
207 | 5.31k | if (low_order) |
208 | 0 | code += (uint) bits << bits_left; |
209 | 5.31k | else |
210 | 5.31k | code += (uint) bits << (code_size - 8 - bits_left); |
211 | 5.31k | bits_left += 8; |
212 | 5.31k | bits = p[2]; |
213 | 5.31k | p += 2; |
214 | 5.31k | bytes_left -= 2; |
215 | 36.3k | } else { |
216 | 36.3k | if (p == rlimit) |
217 | 22 | goto out; |
218 | 36.3k | bits = *++p; |
219 | 36.3k | bytes_left--; |
220 | 36.3k | } |
221 | 41.6k | if (low_order) |
222 | 0 | code += (uint) bits << bits_left, |
223 | 0 | bits_left += 8 - code_size; |
224 | 41.6k | else |
225 | 41.6k | bits_left += 8 - code_size, |
226 | 41.6k | code += bits >> bits_left; |
227 | 41.6k | } else { |
228 | 0 | if (low_order) |
229 | 0 | code = bits >> (8 - bits_left), |
230 | 0 | bits_left -= code_size; |
231 | 0 | else |
232 | 0 | bits_left -= code_size, |
233 | 0 | code = bits >> bits_left; |
234 | 0 | } |
235 | 41.6k | code &= code_mask; |
236 | 41.6k | if_debug2m('W', ss->memory, "[W]reading 0x%x,%d\n", code, code_size); |
237 | | /* |
238 | | * There is an anomalous case where a code S is followed |
239 | | * immediately by another occurrence of the S string. |
240 | | * In this case, the next available code will be defined as |
241 | | * S followed by the first character of S, and will be |
242 | | * emitted immediately after the code S. We have to |
243 | | * recognize this case specially, by noting that the code is |
244 | | * equal to next_code. |
245 | | */ |
246 | 41.6k | if (code >= next_code) { |
247 | 15.3k | if ((code > next_code) || (prev_code < 0)) { |
248 | | #ifdef DEBUG |
249 | | mlprintf3(ss->memory, "[W]code = %d > next_code = %d or prev_code = %d < 0\n", |
250 | | code, next_code, prev_code); |
251 | | #endif |
252 | 188 | status = ERRC; |
253 | 188 | goto out; |
254 | 188 | } |
255 | | /* Fabricate the entry for the code. It will be */ |
256 | | /* overwritten immediately, of course. */ |
257 | 838k | for (c = prev_code; c != eod; c = table[c].prefix) |
258 | 823k | dc_next->datum = c; |
259 | 15.1k | len = prev_len + 1; |
260 | 15.1k | dc_next->len = min(len, 255); |
261 | 15.1k | dc_next->prefix = prev_code; |
262 | 15.1k | if_debug3m('w', ss->memory, "[w]decoding anomalous 0x%x=0x%x+%c\n", |
263 | 15.1k | next_code, prev_code, dc_next->datum); |
264 | 15.1k | } |
265 | | /* See if there is enough room for the code. */ |
266 | 41.4k | while (1) /* Loop while we have codes to handle */ |
267 | 41.4k | { |
268 | 41.4k | len = table[code].len; |
269 | 41.4k | if (len == 255) { /* Check for special code (reset or end). */ |
270 | | /* We set their lengths to 255 to avoid doing */ |
271 | | /* an extra check in the normal case. */ |
272 | 421 | if (code == code_reset) { |
273 | 390 | if_debug1m('w', ss->memory, "[w]reset: next_code was %d\n", |
274 | 390 | next_code); |
275 | 390 | next_code = code_0; |
276 | 390 | dc_next = table + code_0; |
277 | 390 | code_size = ss->InitialCodeLength + 1; |
278 | 390 | set_code_size(); |
279 | 390 | prev_code = -1; |
280 | 390 | goto loop; |
281 | 390 | } else if (code == eod) { |
282 | 31 | status = EOFC; |
283 | 31 | goto out; |
284 | 31 | } |
285 | | /* The code length won't fit in a byte, */ |
286 | | /* compute it the hard way. */ |
287 | 0 | for (c = code, len = 0; c != eod; len++) |
288 | 0 | c = table[c].prefix; |
289 | 0 | if_debug2m('w', ss->memory, "[w]long code %d, length=%d\n", code, len); |
290 | 0 | } |
291 | 41.0k | if (wlimit - q < len) { |
292 | 476 | ss->copy_code = code; |
293 | 476 | ss->copy_left = ss->copy_len = len; |
294 | 476 | status = 1; |
295 | 476 | goto out; |
296 | 476 | } |
297 | | /* Copy the string to the buffer (back to front). */ |
298 | | /* Optimize for short codes, which are the most frequent. */ |
299 | 40.5k | dc = &table[code]; |
300 | 40.5k | switch (len) { |
301 | 17.4k | default: |
302 | 17.4k | { |
303 | 17.4k | byte *q1 = q += len; |
304 | | |
305 | 17.4k | c = code; |
306 | 916k | do { |
307 | 916k | *q1-- = (dc = &table[c])->datum; |
308 | 916k | } |
309 | 916k | while ((c = dc->prefix) != eod); |
310 | 17.4k | b = q1[1]; |
311 | 17.4k | break; |
312 | 0 | } |
313 | 1.33k | case 3: |
314 | 1.33k | q[3] = dc->datum; |
315 | 1.33k | dc = &table[dc->prefix]; |
316 | 6.16k | case 2: |
317 | 6.16k | q[2] = dc->datum; |
318 | 6.16k | dc = &table[dc->prefix]; |
319 | 23.1k | case 1: |
320 | 23.1k | q[1] = b = dc->datum; |
321 | 23.1k | q += len; |
322 | 40.5k | } |
323 | 41.0k | add: /* Add a new entry to the table */ |
324 | 41.0k | if (prev_code >= 0) { |
325 | | /* |
326 | | * Unfortunately, we have to check for next_code == |
327 | | * lzw_decode_max every time: just checking at power |
328 | | * of 2 boundaries stops us one code too soon. |
329 | | */ |
330 | 40.6k | if (!old_tiff && next_code == lzw_decode_max) { |
331 | | /* |
332 | | * A few anomalous files have one data item too many before the |
333 | | * reset code. We think this is a bug in the application that |
334 | | * produced the files, but Acrobat accepts the files, so we do |
335 | | * too. |
336 | | */ |
337 | 0 | if (!ss->BlockData) { /* don't do this for GIF */ |
338 | 0 | if (bits_left < 8 && p >= rlimit && last) { |
339 | | /* We're at EOD. */ |
340 | 0 | goto out; |
341 | 0 | } |
342 | 0 | if (bits_left + ((rlimit - p) << 3) < code_size) { |
343 | | /* |
344 | | * We need more data to decide whether a reset is next. |
345 | | * Return an error if we cannot get more. |
346 | | */ |
347 | 0 | if (last) |
348 | 0 | status = ERRC; |
349 | 0 | goto out; |
350 | 0 | } |
351 | 0 | if (low_order) { |
352 | 0 | code = bits >> (8 - bits_left); |
353 | 0 | code += (bits = *++p) << bits_left; |
354 | 0 | if (bits_left + 8 < code_size) |
355 | 0 | code += (bits = *++p) << (bits_left + 8); |
356 | 0 | } else { |
357 | 0 | code = bits & ((1 << bits_left) - 1); |
358 | 0 | code = (code << 8) + (bits = *++p); |
359 | 0 | if (bits_left + 8 < code_size) |
360 | 0 | code = (code << 8) + (bits = *++p); |
361 | 0 | code >>= (bits_left - code_size) & 7; |
362 | 0 | } |
363 | 0 | bits_left = (bits_left - code_size) & 7; |
364 | 0 | if (code == code_reset) |
365 | 0 | continue; /* Loop back to handle the reset */ |
366 | 0 | } |
367 | 0 | status = ERRC; |
368 | 0 | goto out; |
369 | 0 | } |
370 | 40.6k | if (next_code < lzw_decode_max) { |
371 | 40.6k | dc_next->datum = b; /* added char of string */ |
372 | 40.6k | dc_next->len = min(prev_len, 254) + 1; |
373 | 40.6k | dc_next->prefix = prev_code; |
374 | 40.6k | dc_next++; |
375 | 40.6k | if_debug4m('W', ss->memory, "[W]adding 0x%x=0x%x+%c(%d)\n", |
376 | 40.6k | next_code, prev_code, b, min(len, 255)); |
377 | 40.6k | } |
378 | 40.6k | if (++next_code == switch_code) { /* Crossed a power of 2. */ |
379 | | /* We have to make a strange special check for */ |
380 | | /* reaching the end of the code space. */ |
381 | 4 | if (next_code < lzw_decode_max - 1) { |
382 | 4 | code_size++; |
383 | 4 | set_code_size(); |
384 | 4 | if_debug2m('w', ss->memory, "[w]crossed power of 2: new code_size=%d, next_code=%d\n", |
385 | 4 | code_size, next_code); |
386 | 4 | } |
387 | 4 | } |
388 | 40.6k | } |
389 | 41.0k | break; /* No more codes to handle */ |
390 | 41.0k | } |
391 | 41.0k | prev_code = code; |
392 | 41.0k | prev_len = len; |
393 | 41.4k | loop: {} |
394 | 41.4k | } /* Loop back to the top */ |
395 | 975 | out:pr->ptr = p; |
396 | 975 | pw->ptr = q; |
397 | 975 | ss->code_size = code_size; |
398 | 975 | ss->prev_code = prev_code; |
399 | 975 | ss->prev_len = prev_len; |
400 | 975 | ss->bits = bits; |
401 | 975 | ss->bits_left = bits_left; |
402 | 975 | ss->bytes_left = bytes_left; |
403 | 975 | ss->next_code = next_code; |
404 | 975 | if_debug3m('w', ss->memory, "[w]decoded %d bytes, prev_code=%d, next_code=%d\n", |
405 | 975 | (int)(q - q0), prev_code, next_code); |
406 | 975 | return status; |
407 | 499 | } |
408 | | |
409 | | /* Stream template */ |
410 | | const stream_template s_LZWD_template = |
411 | | {&st_LZW_state, s_LZWD_init, s_LZWD_process, 3, 1, s_LZW_release, |
412 | | s_LZW_set_defaults, s_LZWD_reset |
413 | | }; |