/src/ghostpdl/psi/iscanbin.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2021 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Ghostscript binary token scanner and writer */ |
18 | | #include "math_.h" |
19 | | #include "memory_.h" |
20 | | #include "ghost.h" |
21 | | #include "gsutil.h" |
22 | | #include "gxalloc.h" /* for names_array in allocator */ |
23 | | #include "stream.h" |
24 | | #include "strimpl.h" /* for sfilter.h */ |
25 | | #include "sfilter.h" /* for iscan.h */ |
26 | | #include "ierrors.h" |
27 | | #include "ialloc.h" |
28 | | #include "iddict.h" |
29 | | #include "dstack.h" /* for immediately evaluated names */ |
30 | | #include "ostack.h" /* must precede iscan.h */ |
31 | | #include "iname.h" |
32 | | #include "iscan.h" /* for scan_Refill */ |
33 | | #include "iscanbin.h" |
34 | | #include "iutil.h" |
35 | | #include "ivmspace.h" |
36 | | #include "store.h" |
37 | | #include "btoken.h" |
38 | | #include "ibnum.h" |
39 | | |
40 | | /* Define the binary token types. */ |
41 | | typedef enum { |
42 | | BT_SEQ = 128, /* binary object sequence: */ |
43 | | BT_SEQ_IEEE_MSB = 128, /* IEEE floats, big-endian */ |
44 | | BT_SEQ_IEEE_LSB = 129, /* IEEE float, little-endian */ |
45 | | BT_SEQ_NATIVE_MSB = 130, /* native floats, big-endian */ |
46 | | BT_SEQ_NATIVE_LSB = 131, /* native floats, little-endian */ |
47 | 0 | #define BT_IS_SEQ(btype) (((btype) & ~3) == BT_SEQ) |
48 | | BT_INT32_MSB = 132, |
49 | | BT_INT32_LSB = 133, |
50 | | BT_INT16_MSB = 134, |
51 | | BT_INT16_LSB = 135, |
52 | | BT_INT8 = 136, |
53 | | BT_FIXED = 137, |
54 | | BT_FLOAT_IEEE_MSB = 138, |
55 | | BT_FLOAT_IEEE_LSB = 139, |
56 | | BT_FLOAT_NATIVE = 140, |
57 | | BT_BOOLEAN = 141, |
58 | | BT_STRING_256 = 142, |
59 | | BT_STRING_64K_MSB = 143, |
60 | | BT_STRING_64K_LSB = 144, |
61 | | BT_LITNAME_SYSTEM = 145, |
62 | | BT_EXECNAME_SYSTEM = 146, |
63 | | BT_LITNAME_USER = 147, |
64 | | BT_EXECNAME_USER = 148, |
65 | | BT_NUM_ARRAY = 149 |
66 | | } bin_token_type_t; |
67 | | |
68 | 0 | #define MIN_BIN_TOKEN_TYPE 128 |
69 | | #define MAX_BIN_TOKEN_TYPE 159 |
70 | | #define NUM_BIN_TOKEN_TYPES (MAX_BIN_TOKEN_TYPE - MIN_BIN_TOKEN_TYPE + 1) |
71 | | |
72 | | /* Define the number of required initial bytes for binary tokens. */ |
73 | | static const byte bin_token_bytes[NUM_BIN_TOKEN_TYPES] = |
74 | | { |
75 | | 4, 4, 4, 4, 5, 5, 3, 3, 2, 2, 5, 5, 5, |
76 | | 2, 2, 3, 3, 2, 2, 2, 2, 4, |
77 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* undefined */ |
78 | | }; |
79 | | |
80 | | /* Define the number formats for those binary tokens that need them. */ |
81 | | static const byte bin_token_num_formats[NUM_BIN_TOKEN_TYPES] = |
82 | | { |
83 | | num_msb + num_float_IEEE, /* BT_SEQ_IEEE_MSB */ |
84 | | num_lsb + num_float_IEEE, /* BT_SEQ_IEEE_LSB */ |
85 | | #if ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS |
86 | | /* Treat native floats like IEEE floats for byte swapping. */ |
87 | | num_msb + num_float_IEEE, /* BT_SEQ_NATIVE_MSB */ |
88 | | num_lsb + num_float_IEEE, /* BT_SEQ_NATIVE_LSB */ |
89 | | #else |
90 | | num_msb + num_float_native, /* BT_SEQ_NATIVE_MSB */ |
91 | | num_lsb + num_float_native, /* BT_SEQ_NATIVE_LSB */ |
92 | | #endif |
93 | | num_msb + num_int32, /* BT_INT32_MSB */ |
94 | | num_lsb + num_int32, /* BT_INT32_LSB */ |
95 | | num_msb + num_int16, /* BT_INT16_MSB */ |
96 | | num_lsb + num_int16, /* BT_INT16_LSB */ |
97 | | 0, /* BT_INT8, not used */ |
98 | | 0, /* BT_FIXED, not used */ |
99 | | num_msb + num_float_IEEE, /* BT_FLOAT_IEEE_MSB */ |
100 | | num_lsb + num_float_IEEE, /* BT_FLOAT_IEEE_LSB */ |
101 | | num_float_native, /* BT_FLOAT_NATIVE */ |
102 | | 0, /* BT_BOOLEAN, not used */ |
103 | | 0, /* BT_STRING_256, not used */ |
104 | | num_msb, /* BT_STRING_64K_MSB */ |
105 | | num_lsb /* BT_STRING_64K_LSB */ |
106 | | /* rest not used */ |
107 | | }; |
108 | | |
109 | | /* Binary object sequence element types */ |
110 | | typedef enum { |
111 | | BS_TYPE_NULL = 0, |
112 | | BS_TYPE_INTEGER = 1, |
113 | | BS_TYPE_REAL = 2, |
114 | | BS_TYPE_NAME = 3, |
115 | | BS_TYPE_BOOLEAN = 4, |
116 | | BS_TYPE_STRING = 5, |
117 | | BS_TYPE_EVAL_NAME = 6, |
118 | | BS_TYPE_ARRAY = 9, |
119 | | BS_TYPE_MARK = 10, |
120 | | /* |
121 | | * We extend the PostScript language definition by allowing |
122 | | * dictionaries in binary object sequences. The data for |
123 | | * a dictionary is like that for an array, with the following |
124 | | * changes: |
125 | | * - If the size is an even number, the value is the index of |
126 | | * the first of a series of alternating keys and values. |
127 | | * - If the size is 1, the value is the index of another |
128 | | * object (which must also be a dictionary, and must not have |
129 | | * size = 1); this object represents the same object as that one. |
130 | | */ |
131 | | BS_TYPE_DICTIONARY = 15 |
132 | | } bin_seq_type_t; |
133 | | |
134 | 0 | #define BS_EXECUTABLE 128 |
135 | 0 | #define SIZEOF_BIN_SEQ_OBJ ((uint)8) |
136 | | |
137 | | /* Forward references */ |
138 | | static int scan_bos(i_ctx_t *, ref *, scanner_state *); |
139 | | static void scan_bos_error(scanner_state *, const char *); |
140 | | static int scan_bin_scalar(i_ctx_t *, ref *, scanner_state *); |
141 | | static int scan_bin_get_name(scanner_state *, const gs_memory_t *mem, const ref *, int, ref *, const char *); |
142 | | static int scan_bin_num_array_continue(i_ctx_t *, ref *, scanner_state *); |
143 | | static int scan_bin_string_continue(i_ctx_t *, ref *, scanner_state *); |
144 | | static int scan_bos_continue(i_ctx_t *, ref *, scanner_state *); |
145 | | static byte *scan_bos_resize(i_ctx_t *, scanner_state *, uint, uint); |
146 | | static int scan_bos_string_continue(i_ctx_t *, ref *, scanner_state *); |
147 | | |
148 | | /* Scan a binary token. Called from the main scanner */ |
149 | | /* when it encounters an ASCII code 128-159, */ |
150 | | /* if binary tokens are being recognized (object format != 0). */ |
151 | | int |
152 | | scan_binary_token(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
153 | 0 | { |
154 | 0 | stream *const s = pstate->s_file.value.pfile; |
155 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
156 | 0 | s_declare_inline(s, p, rlimit); |
157 | 0 | int btype, code; |
158 | 0 | uint wanted; |
159 | |
|
160 | 0 | s_begin_inline(s, p, rlimit); |
161 | 0 | pbs->token_type = btype = *p; |
162 | 0 | wanted = bin_token_bytes[btype - MIN_BIN_TOKEN_TYPE] - 1; |
163 | 0 | if (rlimit - p < wanted) { |
164 | 0 | s_end_inline(s, p - 1, rlimit); |
165 | 0 | pstate->s_scan_type = scanning_none; |
166 | 0 | code = scan_Refill; |
167 | 0 | } else { |
168 | 0 | pbs->num_format = bin_token_num_formats[btype - MIN_BIN_TOKEN_TYPE]; |
169 | 0 | if (BT_IS_SEQ(btype)) |
170 | 0 | code = scan_bos(i_ctx_p, pref, pstate); |
171 | 0 | else |
172 | 0 | code = scan_bin_scalar(i_ctx_p, pref, pstate); |
173 | 0 | } |
174 | 0 | if (code == scan_Refill && s->end_status == EOFC) |
175 | 0 | code = gs_note_error(gs_error_syntaxerror); |
176 | 0 | if (code < 0 && pstate->s_error.string[0] == 0) |
177 | 0 | snprintf(pstate->s_error.string, sizeof(pstate->s_error.string), |
178 | 0 | "binary token, type=%d", btype); |
179 | 0 | return code; |
180 | 0 | } |
181 | | |
182 | | /* Scan a binary object sequence. */ |
183 | | static int |
184 | | scan_bos(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
185 | 0 | { |
186 | 0 | stream *const s = pstate->s_file.value.pfile; |
187 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
188 | 0 | s_declare_inline(s, p, rlimit); |
189 | 0 | int num_format = pbs->num_format; |
190 | 0 | int code; |
191 | |
|
192 | 0 | s_begin_inline(s, p, rlimit); |
193 | 0 | { |
194 | 0 | uint rcnt = rlimit - p; |
195 | 0 | uint top_size = p[1]; |
196 | 0 | uint hsize, size; |
197 | |
|
198 | 0 | if (top_size == 0) { |
199 | | /* Extended header (2-byte array size, 4-byte length) */ |
200 | |
|
201 | 0 | if (rcnt < 7) { |
202 | 0 | s_end_inline(s, p - 1, rlimit); |
203 | 0 | pstate->s_scan_type = scanning_none; |
204 | 0 | return scan_Refill; |
205 | 0 | } |
206 | 0 | pbs->top_size = top_size = sdecodeushort(p + 2, num_format); |
207 | 0 | pbs->lsize = size = sdecodeint32(p + 4, num_format); |
208 | 0 | hsize = 8; |
209 | 0 | } else { |
210 | | /* Normal header (1-byte array size, 2-byte length). */ |
211 | | /* We already checked rcnt >= 3. */ |
212 | 0 | pbs->top_size = top_size; |
213 | 0 | pbs->lsize = size = sdecodeushort(p + 2, num_format); |
214 | 0 | hsize = 4; |
215 | 0 | } |
216 | 0 | if (size < hsize || (size - hsize) >> 3 < top_size) { |
217 | 0 | scan_bos_error(pstate, "sequence too short"); |
218 | 0 | return_error(gs_error_syntaxerror); /* size too small */ |
219 | 0 | } |
220 | 0 | { /* Preliminary syntax check to avoid potentialy large |
221 | | * memory allocation on junk data. Bug 688833 |
222 | | */ |
223 | 0 | const unsigned char *q, *rend = p + hsize + top_size*8; |
224 | |
|
225 | 0 | if (rend > rlimit) |
226 | 0 | rend = rlimit; |
227 | 0 | for (q = p + hsize + 1; q < rend; q += 8) { |
228 | 0 | int c = q[-1] & 0x7f; |
229 | 0 | if (c > 10 && c != BS_TYPE_DICTIONARY) { |
230 | 0 | scan_bos_error(pstate, "invalid object type"); |
231 | 0 | return_error(gs_error_syntaxerror); |
232 | 0 | } |
233 | 0 | if (*q != 0) { |
234 | 0 | scan_bos_error(pstate, "non-zero unused field"); |
235 | 0 | return_error(gs_error_syntaxerror); |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } |
239 | | /* |
240 | | * Preallocate an array large enough for the worst case, |
241 | | * namely, all objects and no strings. Note that we must |
242 | | * divide size by 8, not sizeof(ref), since array elements |
243 | | * in binary tokens always occupy 8 bytes regardless of the |
244 | | * size of a ref. |
245 | | */ |
246 | 0 | code = ialloc_ref_array(&pbs->bin_array, |
247 | 0 | a_all + a_executable, size / 8, |
248 | 0 | "binary object sequence(objects)"); |
249 | 0 | if (code < 0) |
250 | 0 | return code; |
251 | 0 | p += hsize - 1; |
252 | 0 | size -= hsize; |
253 | 0 | s_end_inline(s, p, rlimit); |
254 | 0 | pbs->max_array_index = pbs->top_size = top_size; |
255 | 0 | pbs->min_string_index = pbs->size = size; |
256 | 0 | pbs->index = 0; |
257 | 0 | pstate->s_da.is_dynamic = false; |
258 | 0 | pstate->s_da.base = pstate->s_da.next = |
259 | 0 | pstate->s_da.limit = pstate->s_da.buf; |
260 | 0 | code = scan_bos_continue(i_ctx_p, pref, pstate); |
261 | 0 | if ((code == scan_Refill || code < 0) && pbs->index < r_size(&pbs->bin_array)) { |
262 | | /* Clean up array for GC. */ |
263 | 0 | uint index = pbs->index; |
264 | |
|
265 | 0 | refset_null(pbs->bin_array.value.refs + index, |
266 | 0 | r_size(&pbs->bin_array) - index); |
267 | 0 | } |
268 | 0 | return code; |
269 | 0 | } |
270 | 0 | } |
271 | | |
272 | | /* Report an error in a binary object sequence. */ |
273 | | static void |
274 | | scan_bos_error(scanner_state *pstate, const char *msg) |
275 | 0 | { |
276 | 0 | snprintf(pstate->s_error.string, sizeof(pstate->s_error.string), |
277 | 0 | "bin obj seq, type=%d, elements=%u, size=%lu, %s", |
278 | 0 | pstate->s_ss.binary.token_type, |
279 | 0 | pstate->s_ss.binary.top_size, |
280 | 0 | pstate->s_ss.binary.lsize, msg); |
281 | 0 | } |
282 | | |
283 | | /* Scan a non-sequence binary token. */ |
284 | | static int |
285 | | scan_bin_scalar(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
286 | 0 | { |
287 | 0 | stream *const s = pstate->s_file.value.pfile; |
288 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
289 | 0 | s_declare_inline(s, p, rlimit); |
290 | 0 | int num_format = pbs->num_format, code; |
291 | 0 | uint wanted, arg; |
292 | |
|
293 | 0 | s_begin_inline(s, p, rlimit); |
294 | 0 | wanted = bin_token_bytes[*p - MIN_BIN_TOKEN_TYPE] - 1; |
295 | 0 | switch (*p) { |
296 | 0 | case BT_INT8: |
297 | 0 | make_int(pref, (p[1] ^ 128) - 128); |
298 | 0 | s_end_inline(s, p + 1, rlimit); |
299 | 0 | return 0; |
300 | 0 | case BT_FIXED: |
301 | 0 | num_format = p[1]; |
302 | 0 | if (!num_is_valid(num_format)) |
303 | 0 | return_error(gs_error_syntaxerror); |
304 | 0 | wanted = 1 + encoded_number_bytes(num_format); |
305 | 0 | if (rlimit - p < wanted) { |
306 | 0 | s_end_inline(s, p - 1, rlimit); |
307 | 0 | pstate->s_scan_type = scanning_none; |
308 | 0 | return scan_Refill; |
309 | 0 | } |
310 | 0 | code = sdecode_number(p + 2, num_format, pref); |
311 | 0 | goto rnum; |
312 | 0 | case BT_INT32_MSB: |
313 | 0 | case BT_INT32_LSB: |
314 | 0 | case BT_INT16_MSB: |
315 | 0 | case BT_INT16_LSB: |
316 | 0 | case BT_FLOAT_IEEE_MSB: |
317 | 0 | case BT_FLOAT_IEEE_LSB: |
318 | 0 | case BT_FLOAT_NATIVE: |
319 | 0 | code = sdecode_number(p + 1, num_format, pref); |
320 | 0 | rnum: |
321 | 0 | switch (code) { |
322 | 0 | case t_integer: |
323 | 0 | case t_real: |
324 | 0 | r_set_type(pref, code); |
325 | 0 | break; |
326 | 0 | case t_null: |
327 | 0 | return_error(gs_error_syntaxerror); |
328 | 0 | default: |
329 | 0 | return code; |
330 | 0 | } |
331 | 0 | s_end_inline(s, p + wanted, rlimit); |
332 | 0 | return 0; |
333 | 0 | case BT_BOOLEAN: |
334 | 0 | arg = p[1]; |
335 | 0 | if (arg & ~1) |
336 | 0 | return_error(gs_error_syntaxerror); |
337 | 0 | make_bool(pref, arg); |
338 | 0 | s_end_inline(s, p + 1, rlimit); |
339 | 0 | return 0; |
340 | 0 | case BT_STRING_256: |
341 | 0 | arg = *++p; |
342 | 0 | goto str; |
343 | 0 | case BT_STRING_64K_MSB: |
344 | 0 | case BT_STRING_64K_LSB: |
345 | 0 | arg = sdecodeushort(p + 1, num_format); |
346 | 0 | p += 2; |
347 | 0 | str: |
348 | 0 | if (s->foreign && rlimit - p >= arg) { |
349 | | /* |
350 | | * Reference the string directly in the buffer. It is |
351 | | * marked writable for consistency with the non-direct |
352 | | * case, but since the "buffer" may be data compiled into |
353 | | * the executable, it is probably actually read-only. |
354 | | */ |
355 | 0 | s_end_inline(s, p, rlimit); |
356 | 0 | make_const_string(pref, a_all | avm_foreign, arg, sbufptr(s)); |
357 | 0 | return sbufskip(s, arg); |
358 | 0 | } else { |
359 | 0 | byte *str = ialloc_string(arg, "string token"); |
360 | |
|
361 | 0 | if (str == 0) |
362 | 0 | return_error(gs_error_VMerror); |
363 | 0 | s_end_inline(s, p, rlimit); |
364 | 0 | pstate->s_da.base = pstate->s_da.next = str; |
365 | 0 | pstate->s_da.limit = str + arg; |
366 | 0 | code = scan_bin_string_continue(i_ctx_p, pref, pstate); |
367 | 0 | if (code == scan_Refill || code < 0) { |
368 | 0 | pstate->s_da.is_dynamic = true; |
369 | 0 | make_null(&pbs->bin_array); /* clean up for GC */ |
370 | 0 | pbs->cont = scan_bin_string_continue; |
371 | 0 | } |
372 | 0 | return code; |
373 | 0 | } |
374 | 0 | case BT_LITNAME_SYSTEM: |
375 | 0 | code = scan_bin_get_name(pstate, imemory, system_names_p, p[1], |
376 | 0 | pref, "system"); |
377 | 0 | goto lname; |
378 | 0 | case BT_EXECNAME_SYSTEM: |
379 | 0 | code = scan_bin_get_name(pstate, imemory, system_names_p, p[1], |
380 | 0 | pref, "system"); |
381 | 0 | goto xname; |
382 | 0 | case BT_LITNAME_USER: |
383 | 0 | code = scan_bin_get_name(pstate, imemory, user_names_p, p[1], |
384 | 0 | pref, "user"); |
385 | 0 | lname: |
386 | 0 | if (code < 0) |
387 | 0 | return code; |
388 | 0 | s_end_inline(s, p + 1, rlimit); |
389 | 0 | return 0; |
390 | 0 | case BT_EXECNAME_USER: |
391 | 0 | code = scan_bin_get_name(pstate, imemory, user_names_p, p[1], |
392 | 0 | pref, "user"); |
393 | 0 | xname: |
394 | 0 | if (code < 0) |
395 | 0 | return code; |
396 | 0 | r_set_attrs(pref, a_executable); |
397 | 0 | s_end_inline(s, p + 1, rlimit); |
398 | 0 | return 0; |
399 | 0 | case BT_NUM_ARRAY: |
400 | 0 | num_format = p[1]; |
401 | 0 | if (!num_is_valid(num_format)) |
402 | 0 | return_error(gs_error_syntaxerror); |
403 | 0 | arg = sdecodeushort(p + 2, num_format); |
404 | 0 | code = ialloc_ref_array(&pbs->bin_array, a_all, arg, |
405 | 0 | "number array token"); |
406 | 0 | if (code < 0) |
407 | 0 | return code; |
408 | 0 | pbs->num_format = num_format; |
409 | 0 | pbs->index = 0; |
410 | 0 | p += 3; |
411 | 0 | s_end_inline(s, p, rlimit); |
412 | 0 | code = scan_bin_num_array_continue(i_ctx_p, pref, pstate); |
413 | 0 | if (code == scan_Refill || code < 0) { |
414 | | /* Make sure the array is clean for the GC. */ |
415 | 0 | refset_null(pbs->bin_array.value.refs + pbs->index, |
416 | 0 | arg - pbs->index); |
417 | 0 | pbs->cont = scan_bin_num_array_continue; |
418 | 0 | } |
419 | 0 | return code; |
420 | 0 | } |
421 | 0 | return_error(gs_error_syntaxerror); |
422 | 0 | } |
423 | | |
424 | | /* Get a system or user name. */ |
425 | | static int |
426 | | scan_bin_get_name(scanner_state *pstate, const gs_memory_t *mem, |
427 | | const ref *pnames /*t_array*/, int index, ref *pref, |
428 | | const char *usstring) |
429 | 0 | { |
430 | | /* Convert all errors to gs_error_undefined to match Adobe. */ |
431 | 0 | if (pnames == 0 || array_get(mem, pnames, (long)index, pref) < 0 || |
432 | 0 | !r_has_type(pref, t_name)) { |
433 | 0 | snprintf(pstate->s_error.string, |
434 | 0 | sizeof(pstate->s_error.string), |
435 | 0 | "%s%d", usstring, index); |
436 | 0 | pstate->s_error.is_name = true; |
437 | 0 | return_error(gs_error_undefined); |
438 | 0 | } |
439 | 0 | return 0; |
440 | 0 | } |
441 | | |
442 | | /* Continue collecting a binary string. */ |
443 | | static int |
444 | | scan_bin_string_continue(i_ctx_t *i_ctx_p, ref * pref, scanner_state * pstate) |
445 | 0 | { |
446 | 0 | stream *const s = pstate->s_file.value.pfile; |
447 | 0 | byte *q = pstate->s_da.next; |
448 | 0 | uint wanted = pstate->s_da.limit - q; |
449 | 0 | uint rcnt; |
450 | | |
451 | | /* We don't check the return status from 'sgets' here. |
452 | | If there is an error in sgets, the condition rcnt==wanted |
453 | | would be false and this function will return scan_Refill. |
454 | | */ |
455 | 0 | sgets(s, q, wanted, &rcnt); |
456 | 0 | if (rcnt == wanted) { |
457 | | /* Finished collecting the string. */ |
458 | 0 | make_string(pref, a_all | icurrent_space, |
459 | 0 | pstate->s_da.limit - pstate->s_da.base, |
460 | 0 | pstate->s_da.base); |
461 | 0 | return 0; |
462 | 0 | } |
463 | 0 | pstate->s_da.next = q + rcnt; |
464 | 0 | pstate->s_scan_type = scanning_binary; |
465 | 0 | return scan_Refill; |
466 | 0 | } |
467 | | |
468 | | /* Continue scanning a binary number array. */ |
469 | | static int |
470 | | scan_bin_num_array_continue(i_ctx_t *i_ctx_p, ref * pref, |
471 | | scanner_state * pstate) |
472 | 0 | { |
473 | 0 | stream *const s = pstate->s_file.value.pfile; |
474 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
475 | 0 | uint index = pbs->index; |
476 | 0 | ref *np = pbs->bin_array.value.refs + index; |
477 | 0 | uint wanted = encoded_number_bytes(pbs->num_format); |
478 | |
|
479 | 0 | for (; index < r_size(&pbs->bin_array); index++, np++) { |
480 | 0 | int code; |
481 | |
|
482 | 0 | if (sbufavailable(s) < wanted) { |
483 | 0 | pbs->index = index; |
484 | 0 | pstate->s_scan_type = scanning_binary; |
485 | 0 | return scan_Refill; |
486 | 0 | } |
487 | 0 | code = sdecode_number(sbufptr(s), pbs->num_format, np); |
488 | 0 | switch (code) { |
489 | 0 | case t_integer: |
490 | 0 | case t_real: |
491 | 0 | r_set_type(np, code); |
492 | 0 | (void)sbufskip(s, wanted); |
493 | 0 | break; |
494 | 0 | case t_null: |
495 | 0 | scan_bos_error(pstate, "bad number format"); |
496 | 0 | return_error(gs_error_syntaxerror); |
497 | 0 | default: |
498 | 0 | return code; |
499 | 0 | } |
500 | 0 | } |
501 | 0 | *pref = pbs->bin_array; |
502 | 0 | return 0; |
503 | 0 | } |
504 | | |
505 | | /* |
506 | | * Continue scanning a binary object sequence. We preallocated space for |
507 | | * the largest possible number of objects, but not for strings, since |
508 | | * the latter would probably be a gross over-estimate. Instead, |
509 | | * we wait until we see the first string or name, and allocate string space |
510 | | * based on the hope that its string index is the smallest one we will see. |
511 | | * If this turns out to be wrong, we may have to reallocate, and adjust |
512 | | * all the pointers. |
513 | | */ |
514 | | static int |
515 | | scan_bos_continue(i_ctx_t *i_ctx_p, ref * pref, scanner_state * pstate) |
516 | 0 | { |
517 | 0 | stream *const s = pstate->s_file.value.pfile; |
518 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
519 | 0 | s_declare_inline(s, p, rlimit); |
520 | 0 | uint max_array_index = pbs->max_array_index; |
521 | 0 | uint min_string_index = pbs->min_string_index; |
522 | 0 | int num_format = pbs->num_format; |
523 | 0 | uint index = pbs->index; |
524 | 0 | uint size = pbs->size; |
525 | 0 | ref *abase = pbs->bin_array.value.refs; |
526 | 0 | int code; |
527 | |
|
528 | 0 | pbs->cont = scan_bos_continue; /* in case of premature return */ |
529 | 0 | s_begin_inline(s, p, rlimit); |
530 | 0 | for (; index < max_array_index; p += SIZEOF_BIN_SEQ_OBJ, index++) { |
531 | 0 | ref *op = abase + index; |
532 | 0 | uint osize; |
533 | 0 | int value, atype, attrs; |
534 | |
|
535 | 0 | s_end_inline(s, p, rlimit); /* in case of error */ |
536 | 0 | if (rlimit - p < SIZEOF_BIN_SEQ_OBJ) { |
537 | 0 | pbs->index = index; |
538 | 0 | pbs->max_array_index = max_array_index; |
539 | 0 | pbs->min_string_index = min_string_index; |
540 | 0 | pstate->s_scan_type = scanning_binary; |
541 | 0 | return scan_Refill; |
542 | 0 | } |
543 | 0 | if (p[2] != 0) { /* reserved, must be 0 */ |
544 | 0 | scan_bos_error(pstate, "non-zero unused field"); |
545 | 0 | return_error(gs_error_syntaxerror); |
546 | 0 | } |
547 | 0 | attrs = (p[1] & 128 ? a_executable : 0); |
548 | | /* |
549 | | * We always decode all 8 bytes of the object, so we can signal |
550 | | * syntaxerror if any unused field is non-zero (per PLRM). |
551 | | */ |
552 | 0 | osize = sdecodeushort(p + 3, num_format); |
553 | 0 | value = sdecodeint32(p + 5, num_format); |
554 | 0 | switch (p[1] & 0x7f) { |
555 | 0 | case BS_TYPE_NULL: |
556 | 0 | if (osize | value) { /* unused */ |
557 | 0 | scan_bos_error(pstate, "non-zero unused field"); |
558 | 0 | return_error(gs_error_syntaxerror); |
559 | 0 | } |
560 | 0 | make_null(op); |
561 | 0 | break; |
562 | 0 | case BS_TYPE_INTEGER: |
563 | 0 | if (osize) { /* unused */ |
564 | 0 | scan_bos_error(pstate, "non-zero unused field"); |
565 | 0 | return_error(gs_error_syntaxerror); |
566 | 0 | } |
567 | 0 | make_int(op, value); |
568 | 0 | break; |
569 | 0 | case BS_TYPE_REAL:{ |
570 | 0 | float vreal; |
571 | |
|
572 | 0 | if (osize != 0) { /* fixed-point number */ |
573 | 0 | if (osize > 31) { |
574 | 0 | scan_bos_error(pstate, "invalid number format"); |
575 | 0 | return_error(gs_error_syntaxerror); |
576 | 0 | } |
577 | | /* ldexp requires a signed 2nd argument.... */ |
578 | 0 | vreal = (float)ldexp((double)value, -(int)osize); |
579 | 0 | } else { |
580 | 0 | code = sdecode_float(p + 5, num_format, &vreal); |
581 | 0 | if (code < 0) { |
582 | 0 | scan_bos_error(pstate, "invalid real number"); |
583 | 0 | return code; |
584 | 0 | } |
585 | 0 | } |
586 | 0 | make_real(op, vreal); |
587 | 0 | break; |
588 | 0 | } |
589 | 0 | case BS_TYPE_BOOLEAN: |
590 | 0 | if (osize) { /* unused */ |
591 | 0 | scan_bos_error(pstate, "non-zero unused field"); |
592 | 0 | return_error(gs_error_syntaxerror); |
593 | 0 | } |
594 | 0 | make_bool(op, value != 0); |
595 | 0 | break; |
596 | 0 | case BS_TYPE_STRING: |
597 | 0 | attrs |= a_all; |
598 | 0 | str: |
599 | 0 | if (osize == 0) { |
600 | | /* For zero-length strings, the offset */ |
601 | | /* doesn't matter, and may be zero. */ |
602 | 0 | make_empty_string(op, attrs); |
603 | 0 | break; |
604 | 0 | } |
605 | 0 | if (value < (int)(max_array_index * SIZEOF_BIN_SEQ_OBJ) || |
606 | 0 | value + osize > size |
607 | 0 | ) { |
608 | 0 | scan_bos_error(pstate, "invalid string offset"); |
609 | 0 | return_error(gs_error_syntaxerror); |
610 | 0 | } |
611 | 0 | if (value < (int)min_string_index) { |
612 | | /* We have to (re)allocate the strings. */ |
613 | 0 | uint str_size = size - value; |
614 | 0 | byte *sbase; |
615 | |
|
616 | 0 | if (pstate->s_da.is_dynamic) |
617 | 0 | sbase = scan_bos_resize(i_ctx_p, pstate, str_size, |
618 | 0 | index); |
619 | 0 | else |
620 | 0 | sbase = ialloc_string(str_size, |
621 | 0 | "bos strings"); |
622 | 0 | if (sbase == 0) |
623 | 0 | return_error(gs_error_VMerror); |
624 | 0 | pstate->s_da.is_dynamic = true; |
625 | 0 | pstate->s_da.base = pstate->s_da.next = sbase; |
626 | 0 | pstate->s_da.limit = sbase + str_size; |
627 | 0 | min_string_index = value; |
628 | 0 | } |
629 | 0 | make_string(op, attrs | icurrent_space, osize, |
630 | 0 | pstate->s_da.base + |
631 | 0 | (value - min_string_index)); |
632 | 0 | break; |
633 | 0 | case BS_TYPE_EVAL_NAME: |
634 | 0 | attrs |= a_readonly; /* mark as executable for later */ |
635 | | /* falls through */ |
636 | 0 | case BS_TYPE_NAME: |
637 | 0 | switch (osize) { |
638 | 0 | case 0: |
639 | 0 | code = scan_bin_get_name(pstate, imemory, |
640 | 0 | user_names_p, value, op, |
641 | 0 | "user"); |
642 | 0 | goto usn; |
643 | 0 | case 0xffff: |
644 | 0 | code = scan_bin_get_name(pstate, imemory, |
645 | 0 | system_names_p, value, op, |
646 | 0 | "system"); |
647 | 0 | usn: |
648 | 0 | if (code < 0) |
649 | 0 | return code; |
650 | 0 | r_set_attrs(op, attrs); |
651 | 0 | break; |
652 | 0 | default: |
653 | 0 | goto str; |
654 | 0 | } |
655 | 0 | break; |
656 | 0 | case BS_TYPE_ARRAY: |
657 | 0 | atype = t_array; |
658 | 0 | arr: |
659 | 0 | if (value + osize > (int)min_string_index || |
660 | 0 | value & (SIZEOF_BIN_SEQ_OBJ - 1) |
661 | 0 | ) { |
662 | 0 | scan_bos_error(pstate, "bad array offset"); |
663 | 0 | return_error(gs_error_syntaxerror); |
664 | 0 | } |
665 | 0 | if (osize > (size / 8)) { |
666 | 0 | scan_bos_error(pstate, "bad array length"); |
667 | 0 | return_error(gs_error_syntaxerror); |
668 | 0 | } |
669 | 0 | { |
670 | 0 | uint aindex = value / SIZEOF_BIN_SEQ_OBJ; |
671 | |
|
672 | 0 | max_array_index = |
673 | 0 | max(max_array_index, aindex + osize); |
674 | 0 | make_tasv_new(op, atype, |
675 | 0 | attrs | a_all | icurrent_space, |
676 | 0 | osize, refs, abase + aindex); |
677 | 0 | } |
678 | 0 | break; |
679 | 0 | case BS_TYPE_DICTIONARY: /* EXTENSION */ |
680 | 0 | if ((osize & 1) != 0 && osize != 1) |
681 | 0 | return_error(gs_error_syntaxerror); |
682 | 0 | atype = t_mixedarray; /* mark as dictionary */ |
683 | 0 | goto arr; |
684 | 0 | case BS_TYPE_MARK: |
685 | 0 | if (osize | value) { /* unused */ |
686 | 0 | scan_bos_error(pstate, "non-zero unused field"); |
687 | 0 | return_error(gs_error_syntaxerror); |
688 | 0 | } |
689 | 0 | make_mark(op); |
690 | 0 | break; |
691 | 0 | default: |
692 | 0 | scan_bos_error(pstate, "invalid object type"); |
693 | 0 | return_error(gs_error_syntaxerror); |
694 | 0 | } |
695 | 0 | } |
696 | 0 | s_end_inline(s, p, rlimit); |
697 | | /* Shorten the objects to remove the space that turned out */ |
698 | | /* to be used for strings. */ |
699 | 0 | pbs->index = max_array_index; |
700 | 0 | iresize_ref_array(&pbs->bin_array, max_array_index, |
701 | 0 | "binary object sequence(objects)"); |
702 | 0 | code = scan_bos_string_continue(i_ctx_p, pref, pstate); |
703 | 0 | if (code == scan_Refill) |
704 | 0 | pbs->cont = scan_bos_string_continue; |
705 | 0 | return code; |
706 | 0 | } |
707 | | |
708 | | /* Reallocate the strings for a binary object sequence, */ |
709 | | /* adjusting all the pointers to them from objects. */ |
710 | | static byte * |
711 | | scan_bos_resize(i_ctx_t *i_ctx_p, scanner_state * pstate, uint new_size, |
712 | | uint index) |
713 | 0 | { |
714 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
715 | 0 | uint old_size = da_size(&pstate->s_da); |
716 | 0 | byte *old_base = pstate->s_da.base; |
717 | 0 | byte *new_base = iresize_string(old_base, old_size, new_size, |
718 | 0 | "scan_bos_resize"); |
719 | 0 | byte *relocated_base = new_base + (new_size - old_size); |
720 | 0 | uint i; |
721 | 0 | ref *aptr = pbs->bin_array.value.refs; |
722 | |
|
723 | 0 | if (new_base == 0) |
724 | 0 | return 0; |
725 | | /* Since the allocator normally extends strings downward, */ |
726 | | /* it's quite possible that new and old addresses are the same. */ |
727 | 0 | if (relocated_base != old_base) |
728 | 0 | for (i = index; i != 0; i--, aptr++) |
729 | 0 | if (r_has_type(aptr, t_string) && r_size(aptr) != 0) |
730 | 0 | aptr->value.bytes = |
731 | 0 | aptr->value.bytes - old_base + relocated_base; |
732 | 0 | return new_base; |
733 | 0 | } |
734 | | |
735 | | /* Continue reading the strings for a binary object sequence. */ |
736 | | static int |
737 | | scan_bos_string_continue(i_ctx_t *i_ctx_p, ref * pref, |
738 | | scanner_state * pstate) |
739 | 0 | { |
740 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
741 | 0 | ref rstr; |
742 | 0 | ref *op; |
743 | 0 | int code = scan_bin_string_continue(i_ctx_p, &rstr, pstate); |
744 | 0 | uint space = ialloc_space(idmemory); |
745 | 0 | bool rescan = false; |
746 | 0 | uint i; |
747 | |
|
748 | 0 | if (code != 0) |
749 | 0 | return code; |
750 | | |
751 | | /* Fix up names. We must do this before creating dictionaries. */ |
752 | | |
753 | 0 | for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array); |
754 | 0 | i != 0; i--, op++ |
755 | 0 | ) |
756 | 0 | switch (r_type(op)) { |
757 | 0 | case t_string: |
758 | 0 | if (r_has_attr(op, a_write)) /* a real string */ |
759 | 0 | break; |
760 | | /* This is actually a name; look it up now. */ |
761 | 0 | { |
762 | 0 | uint attrs = r_type_attrs(op) & (a_read | a_executable); |
763 | |
|
764 | 0 | code = name_ref(imemory, op->value.bytes, r_size(op), op, 1); |
765 | 0 | if (code < 0) |
766 | 0 | return code; |
767 | 0 | r_set_attrs(op, attrs); |
768 | 0 | } |
769 | | /* falls through */ |
770 | 0 | case t_name: |
771 | 0 | if (r_has_attr(op, a_read)) { /* BS_TYPE_EVAL_NAME */ |
772 | 0 | ref *defp = dict_find_name(op); |
773 | |
|
774 | 0 | if (defp == 0) |
775 | 0 | return_error(gs_error_undefined); |
776 | 0 | store_check_space(space, defp); |
777 | 0 | ref_assign(op, defp); |
778 | 0 | } |
779 | 0 | break; |
780 | 0 | case t_mixedarray: /* actually a dictionary */ |
781 | 0 | rescan = true; |
782 | 0 | } |
783 | | |
784 | | /* Create dictionaries, if any. */ |
785 | | |
786 | 0 | if (rescan) { |
787 | 0 | rescan = false; |
788 | 0 | for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array); |
789 | 0 | i != 0; i--, op++ |
790 | 0 | ) |
791 | 0 | switch (r_type(op)) { |
792 | 0 | case t_mixedarray: /* actually a dictionary */ |
793 | 0 | { |
794 | 0 | uint count = r_size(op); |
795 | 0 | ref rdict; |
796 | |
|
797 | 0 | if (count == 1) { |
798 | | /* Indirect reference. */ |
799 | 0 | if (op->value.refs < op) |
800 | 0 | ref_assign(&rdict, op->value.refs); |
801 | 0 | else { |
802 | 0 | rescan = true; |
803 | 0 | continue; |
804 | 0 | } |
805 | 0 | } else { |
806 | 0 | code = dict_create(count >> 1, &rdict); |
807 | 0 | if (code < 0) |
808 | 0 | return code; |
809 | 0 | while (count) { |
810 | 0 | count -= 2; |
811 | 0 | code = idict_put(&rdict, |
812 | 0 | &op->value.refs[count], |
813 | 0 | &op->value.refs[count + 1]); |
814 | 0 | if (code < 0) |
815 | 0 | return code; |
816 | 0 | } |
817 | 0 | } |
818 | 0 | r_set_attrs(&rdict, a_all); |
819 | 0 | r_copy_attrs(&rdict, a_executable, op); |
820 | 0 | ref_assign(op, &rdict); |
821 | 0 | } |
822 | 0 | break; |
823 | 0 | } |
824 | 0 | } |
825 | | |
826 | | /* If there were any forward indirect references, fix them up now. */ |
827 | | |
828 | 0 | if (rescan) |
829 | 0 | for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array); |
830 | 0 | i != 0; i--, op++ |
831 | 0 | ) |
832 | 0 | if (r_has_type(op, t_mixedarray)) { |
833 | 0 | const ref *piref = op->value.const_refs; |
834 | 0 | ref rdict; |
835 | |
|
836 | 0 | if (r_has_type(piref, t_mixedarray)) /* ref to indirect */ |
837 | 0 | return_error(gs_error_syntaxerror); |
838 | 0 | ref_assign(&rdict, piref); |
839 | 0 | r_copy_attrs(&rdict, a_executable, op); |
840 | 0 | ref_assign(op, &rdict); |
841 | 0 | } |
842 | | |
843 | 0 | ref_assign(pref, &pbs->bin_array); |
844 | 0 | r_set_size(pref, pbs->top_size); |
845 | 0 | return scan_BOS; |
846 | 0 | } |
847 | | |
848 | | /* ---------------- Writing ---------------- */ |
849 | | |
850 | | /* |
851 | | * Encode a single object for a binary object sequence, for printobject and |
852 | | * write object. Note that this does not modify the always-unused byte (1), |
853 | | * but it always write bytes 0 and 2-7. |
854 | | */ |
855 | | int |
856 | | encode_binary_token(i_ctx_t *i_ctx_p, const ref *obj, ps_int *ref_offset, |
857 | | ps_int *char_offset, byte *str) |
858 | 0 | { |
859 | 0 | bin_seq_type_t type; |
860 | 0 | uint size = 0; |
861 | 0 | int format = (int)ref_binary_object_format.value.intval; |
862 | 0 | ps_int value = 0; |
863 | 0 | ref nstr; |
864 | |
|
865 | 0 | switch (r_type(obj)) { |
866 | 0 | case t_null: |
867 | 0 | type = BS_TYPE_NULL; |
868 | 0 | break; /* always set all fields */ |
869 | 0 | case t_mark: |
870 | 0 | type = BS_TYPE_MARK; |
871 | 0 | break; /* always set all fields */ |
872 | 0 | case t_integer: |
873 | 0 | type = BS_TYPE_INTEGER; |
874 | 0 | value = obj->value.intval; |
875 | 0 | break; |
876 | 0 | case t_real: |
877 | 0 | type = BS_TYPE_REAL; |
878 | 0 | if (sizeof(obj->value.realval) != sizeof(int)) { |
879 | | /* The PLRM allocates exactly 4 bytes for reals. */ |
880 | 0 | return_error(gs_error_rangecheck); |
881 | 0 | } |
882 | 0 | value = *(const ps_int *)&obj->value.realval; |
883 | | #if !(ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS) |
884 | | if (format >= 3) { |
885 | | /* Never byte-swap native reals -- use native byte order. */ |
886 | | format = 4 - ARCH_IS_BIG_ENDIAN; |
887 | | } |
888 | | #endif |
889 | 0 | break; |
890 | 0 | case t_boolean: |
891 | 0 | type = BS_TYPE_BOOLEAN; |
892 | 0 | value = obj->value.boolval; |
893 | 0 | break; |
894 | 0 | case t_array: |
895 | 0 | type = BS_TYPE_ARRAY; |
896 | 0 | size = r_size(obj); |
897 | 0 | goto aod; |
898 | 0 | case t_dictionary: /* EXTENSION */ |
899 | 0 | type = BS_TYPE_DICTIONARY; |
900 | 0 | size = dict_length(obj) << 1; |
901 | 0 | aod:value = *ref_offset; |
902 | 0 | *ref_offset += size * SIZEOF_BIN_SEQ_OBJ; |
903 | 0 | break; |
904 | 0 | case t_string: |
905 | 0 | type = BS_TYPE_STRING; |
906 | 0 | nos: |
907 | 0 | size = r_size(obj); |
908 | 0 | value = *char_offset; |
909 | 0 | *char_offset += size; |
910 | 0 | break; |
911 | 0 | case t_name: |
912 | 0 | type = BS_TYPE_NAME; |
913 | 0 | name_string_ref(imemory, obj, &nstr); |
914 | 0 | r_copy_attrs(&nstr, a_executable, obj); |
915 | 0 | obj = &nstr; |
916 | 0 | goto nos; |
917 | 0 | default: |
918 | 0 | return_error(gs_error_rangecheck); |
919 | 0 | } |
920 | 0 | { |
921 | 0 | byte s0 = (byte) size, s1 = (byte) (size >> 8); |
922 | 0 | byte v0 = (byte) value, v1 = (byte) (value >> 8), |
923 | 0 | v2 = (byte) (value >> 16), v3 = (byte) (value >> 24); |
924 | |
|
925 | 0 | if (format & 1) { |
926 | | /* Store big-endian */ |
927 | 0 | str[2] = s1, str[3] = s0; |
928 | 0 | str[4] = v3, str[5] = v2, str[6] = v1, str[7] = v0; |
929 | 0 | } else { |
930 | | /* Store little-endian */ |
931 | 0 | str[2] = s0, str[3] = s1; |
932 | 0 | str[4] = v0, str[5] = v1, str[6] = v2, str[7] = v3; |
933 | 0 | } |
934 | 0 | } |
935 | 0 | if (r_has_attr(obj, a_executable)) |
936 | 0 | type += BS_EXECUTABLE; |
937 | 0 | str[0] = (byte) type; |
938 | 0 | return 0; |
939 | 0 | } |