/src/ghostpdl/psi/iscanbin.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Ghostscript binary token scanner and writer */ |
18 | | #include "math_.h" |
19 | | #include "memory_.h" |
20 | | #include "ghost.h" |
21 | | #include "gsutil.h" |
22 | | #include "gxalloc.h" /* for names_array in allocator */ |
23 | | #include "stream.h" |
24 | | #include "strimpl.h" /* for sfilter.h */ |
25 | | #include "sfilter.h" /* for iscan.h */ |
26 | | #include "ierrors.h" |
27 | | #include "ialloc.h" |
28 | | #include "iddict.h" |
29 | | #include "dstack.h" /* for immediately evaluated names */ |
30 | | #include "ostack.h" /* must precede iscan.h */ |
31 | | #include "iname.h" |
32 | | #include "iscan.h" /* for scan_Refill */ |
33 | | #include "iscanbin.h" |
34 | | #include "iutil.h" |
35 | | #include "ivmspace.h" |
36 | | #include "store.h" |
37 | | #include "btoken.h" |
38 | | #include "ibnum.h" |
39 | | |
40 | | /* Define the binary token types. */ |
41 | | typedef enum { |
42 | | BT_SEQ = 128, /* binary object sequence: */ |
43 | | BT_SEQ_IEEE_MSB = 128, /* IEEE floats, big-endian */ |
44 | | BT_SEQ_IEEE_LSB = 129, /* IEEE float, little-endian */ |
45 | | BT_SEQ_NATIVE_MSB = 130, /* native floats, big-endian */ |
46 | | BT_SEQ_NATIVE_LSB = 131, /* native floats, little-endian */ |
47 | 120k | #define BT_IS_SEQ(btype) (((btype) & ~3) == BT_SEQ) |
48 | | BT_INT32_MSB = 132, |
49 | | BT_INT32_LSB = 133, |
50 | | BT_INT16_MSB = 134, |
51 | | BT_INT16_LSB = 135, |
52 | | BT_INT8 = 136, |
53 | | BT_FIXED = 137, |
54 | | BT_FLOAT_IEEE_MSB = 138, |
55 | | BT_FLOAT_IEEE_LSB = 139, |
56 | | BT_FLOAT_NATIVE = 140, |
57 | | BT_BOOLEAN = 141, |
58 | | BT_STRING_256 = 142, |
59 | | BT_STRING_64K_MSB = 143, |
60 | | BT_STRING_64K_LSB = 144, |
61 | | BT_LITNAME_SYSTEM = 145, |
62 | | BT_EXECNAME_SYSTEM = 146, |
63 | | BT_LITNAME_USER = 147, |
64 | | BT_EXECNAME_USER = 148, |
65 | | BT_NUM_ARRAY = 149 |
66 | | } bin_token_type_t; |
67 | | |
68 | 358k | #define MIN_BIN_TOKEN_TYPE 128 |
69 | | #define MAX_BIN_TOKEN_TYPE 159 |
70 | | #define NUM_BIN_TOKEN_TYPES (MAX_BIN_TOKEN_TYPE - MIN_BIN_TOKEN_TYPE + 1) |
71 | | |
72 | | /* Define the number of required initial bytes for binary tokens. */ |
73 | | static const byte bin_token_bytes[NUM_BIN_TOKEN_TYPES] = |
74 | | { |
75 | | 4, 4, 4, 4, 5, 5, 3, 3, 2, 2, 5, 5, 5, |
76 | | 2, 2, 3, 3, 2, 2, 2, 2, 4, |
77 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* undefined */ |
78 | | }; |
79 | | |
80 | | /* Define the number formats for those binary tokens that need them. */ |
81 | | static const byte bin_token_num_formats[NUM_BIN_TOKEN_TYPES] = |
82 | | { |
83 | | num_msb + num_float_IEEE, /* BT_SEQ_IEEE_MSB */ |
84 | | num_lsb + num_float_IEEE, /* BT_SEQ_IEEE_LSB */ |
85 | | #if ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS |
86 | | /* Treat native floats like IEEE floats for byte swapping. */ |
87 | | num_msb + num_float_IEEE, /* BT_SEQ_NATIVE_MSB */ |
88 | | num_lsb + num_float_IEEE, /* BT_SEQ_NATIVE_LSB */ |
89 | | #else |
90 | | num_msb + num_float_native, /* BT_SEQ_NATIVE_MSB */ |
91 | | num_lsb + num_float_native, /* BT_SEQ_NATIVE_LSB */ |
92 | | #endif |
93 | | num_msb + num_int32, /* BT_INT32_MSB */ |
94 | | num_lsb + num_int32, /* BT_INT32_LSB */ |
95 | | num_msb + num_int16, /* BT_INT16_MSB */ |
96 | | num_lsb + num_int16, /* BT_INT16_LSB */ |
97 | | 0, /* BT_INT8, not used */ |
98 | | 0, /* BT_FIXED, not used */ |
99 | | num_msb + num_float_IEEE, /* BT_FLOAT_IEEE_MSB */ |
100 | | num_lsb + num_float_IEEE, /* BT_FLOAT_IEEE_LSB */ |
101 | | num_float_native, /* BT_FLOAT_NATIVE */ |
102 | | 0, /* BT_BOOLEAN, not used */ |
103 | | 0, /* BT_STRING_256, not used */ |
104 | | num_msb, /* BT_STRING_64K_MSB */ |
105 | | num_lsb /* BT_STRING_64K_LSB */ |
106 | | /* rest not used */ |
107 | | }; |
108 | | |
109 | | /* Binary object sequence element types */ |
110 | | typedef enum { |
111 | | BS_TYPE_NULL = 0, |
112 | | BS_TYPE_INTEGER = 1, |
113 | | BS_TYPE_REAL = 2, |
114 | | BS_TYPE_NAME = 3, |
115 | | BS_TYPE_BOOLEAN = 4, |
116 | | BS_TYPE_STRING = 5, |
117 | | BS_TYPE_EVAL_NAME = 6, |
118 | | BS_TYPE_ARRAY = 9, |
119 | | BS_TYPE_MARK = 10, |
120 | | } bin_seq_type_t; |
121 | | |
122 | 0 | #define BS_EXECUTABLE 128 |
123 | 4.27k | #define SIZEOF_BIN_SEQ_OBJ ((uint)8) |
124 | | |
125 | | /* Forward references */ |
126 | | static int scan_bos(i_ctx_t *, ref *, scanner_state *); |
127 | | static void scan_bos_error(scanner_state *, const char *); |
128 | | static int scan_bin_scalar(i_ctx_t *, ref *, scanner_state *); |
129 | | static int scan_bin_get_name(scanner_state *, const gs_memory_t *mem, const ref *, int, ref *, const char *); |
130 | | static int scan_bin_num_array_continue(i_ctx_t *, ref *, scanner_state *); |
131 | | static int scan_bin_string_continue(i_ctx_t *, ref *, scanner_state *); |
132 | | static int scan_bos_continue(i_ctx_t *, ref *, scanner_state *); |
133 | | static byte *scan_bos_resize(i_ctx_t *, scanner_state *, uint, uint); |
134 | | static int scan_bos_string_continue(i_ctx_t *, ref *, scanner_state *); |
135 | | |
136 | | /* Scan a binary token. Called from the main scanner */ |
137 | | /* when it encounters an ASCII code 128-159, */ |
138 | | /* if binary tokens are being recognized (object format != 0). */ |
139 | | int |
140 | | scan_binary_token(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
141 | 120k | { |
142 | 120k | stream *const s = pstate->s_file.value.pfile; |
143 | 120k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
144 | 120k | s_declare_inline(s, p, rlimit); |
145 | 120k | int btype, code; |
146 | 120k | uint wanted; |
147 | | |
148 | 120k | s_begin_inline(s, p, rlimit); |
149 | 120k | pbs->token_type = btype = *p; |
150 | 120k | wanted = bin_token_bytes[btype - MIN_BIN_TOKEN_TYPE] - 1; |
151 | 120k | if (rlimit - p < wanted) { |
152 | 479 | s_end_inline(s, p - 1, rlimit); |
153 | 479 | pstate->s_scan_type = scanning_none; |
154 | 479 | code = scan_Refill; |
155 | 120k | } else { |
156 | 120k | pbs->num_format = bin_token_num_formats[btype - MIN_BIN_TOKEN_TYPE]; |
157 | 120k | if (BT_IS_SEQ(btype)) |
158 | 2.68k | code = scan_bos(i_ctx_p, pref, pstate); |
159 | 117k | else |
160 | 117k | code = scan_bin_scalar(i_ctx_p, pref, pstate); |
161 | 120k | } |
162 | 120k | if (code == scan_Refill && s->end_status == EOFC) |
163 | 239 | code = gs_note_error(gs_error_syntaxerror); |
164 | 120k | if (code < 0 && pstate->s_error.string[0] == 0) |
165 | 299 | snprintf(pstate->s_error.string, sizeof(pstate->s_error.string), |
166 | 299 | "binary token, type=%d", btype); |
167 | 120k | return code; |
168 | 120k | } |
169 | | |
170 | | /* Scan a binary object sequence. */ |
171 | | static int |
172 | | scan_bos(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
173 | 2.68k | { |
174 | 2.68k | stream *const s = pstate->s_file.value.pfile; |
175 | 2.68k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
176 | 2.68k | s_declare_inline(s, p, rlimit); |
177 | 2.68k | int num_format = pbs->num_format; |
178 | 2.68k | int code; |
179 | | |
180 | 2.68k | s_begin_inline(s, p, rlimit); |
181 | 2.68k | { |
182 | 2.68k | uint rcnt = rlimit - p; |
183 | 2.68k | uint top_size = p[1]; |
184 | 2.68k | uint hsize, size; |
185 | | |
186 | 2.68k | if (top_size == 0) { |
187 | | /* Extended header (2-byte array size, 4-byte length) */ |
188 | | |
189 | 2.61k | if (rcnt < 7) { |
190 | 29 | s_end_inline(s, p - 1, rlimit); |
191 | 29 | pstate->s_scan_type = scanning_none; |
192 | 29 | return scan_Refill; |
193 | 29 | } |
194 | 2.58k | pbs->top_size = top_size = sdecodeushort(p + 2, num_format); |
195 | 2.58k | pbs->lsize = size = sdecodeint32(p + 4, num_format); |
196 | 2.58k | hsize = 8; |
197 | 2.58k | } else { |
198 | | /* Normal header (1-byte array size, 2-byte length). */ |
199 | | /* We already checked rcnt >= 3. */ |
200 | 74 | pbs->top_size = top_size; |
201 | 74 | pbs->lsize = size = sdecodeushort(p + 2, num_format); |
202 | 74 | hsize = 4; |
203 | 74 | } |
204 | 2.65k | if (size < hsize || (size - hsize) >> 3 < top_size) { |
205 | 7 | scan_bos_error(pstate, "sequence too short"); |
206 | 7 | return_error(gs_error_syntaxerror); /* size too small */ |
207 | 7 | } |
208 | 2.65k | { /* Preliminary syntax check to avoid potentialy large |
209 | | * memory allocation on junk data. Bug 688833 |
210 | | */ |
211 | 2.65k | const unsigned char *q, *rend = p + hsize + top_size*8; |
212 | | |
213 | 2.65k | if (rend > rlimit) |
214 | 96 | rend = rlimit; |
215 | 3.74k | for (q = p + hsize + 1; q < rend; q += 8) { |
216 | 1.11k | int c = q[-1] & 0x7f; |
217 | 1.11k | if (c > 10) { |
218 | 17 | scan_bos_error(pstate, "invalid object type"); |
219 | 17 | return_error(gs_error_syntaxerror); |
220 | 17 | } |
221 | 1.09k | if (*q != 0) { |
222 | 3 | scan_bos_error(pstate, "non-zero unused field"); |
223 | 3 | return_error(gs_error_syntaxerror); |
224 | 3 | } |
225 | 1.09k | } |
226 | 2.65k | } |
227 | | /* |
228 | | * Preallocate an array large enough for the worst case, |
229 | | * namely, all objects and no strings. Note that we must |
230 | | * divide size by 8, not sizeof(ref), since array elements |
231 | | * in binary tokens always occupy 8 bytes regardless of the |
232 | | * size of a ref. |
233 | | */ |
234 | 2.63k | code = ialloc_ref_array(&pbs->bin_array, |
235 | 2.63k | a_all + a_executable, size / 8, |
236 | 2.63k | "binary object sequence(objects)"); |
237 | 2.63k | if (code < 0) |
238 | 21 | return code; |
239 | 2.61k | p += hsize - 1; |
240 | 2.61k | size -= hsize; |
241 | 2.61k | s_end_inline(s, p, rlimit); |
242 | 2.61k | pbs->max_array_index = pbs->top_size = top_size; |
243 | 2.61k | pbs->min_string_index = pbs->size = size; |
244 | 2.61k | pbs->index = 0; |
245 | 2.61k | pstate->s_da.is_dynamic = false; |
246 | 2.61k | pstate->s_da.base = pstate->s_da.next = |
247 | 2.61k | pstate->s_da.limit = pstate->s_da.buf; |
248 | 2.61k | code = scan_bos_continue(i_ctx_p, pref, pstate); |
249 | 2.61k | if ((code == scan_Refill || code < 0) && pbs->index < r_size(&pbs->bin_array)) { |
250 | | /* Clean up array for GC. */ |
251 | 64 | uint index = pbs->index; |
252 | | |
253 | 64 | refset_null(pbs->bin_array.value.refs + index, |
254 | 64 | r_size(&pbs->bin_array) - index); |
255 | 64 | } |
256 | 2.61k | return code; |
257 | 2.63k | } |
258 | 2.63k | } |
259 | | |
260 | | /* Report an error in a binary object sequence. */ |
261 | | static void |
262 | | scan_bos_error(scanner_state *pstate, const char *msg) |
263 | 52 | { |
264 | 52 | snprintf(pstate->s_error.string, sizeof(pstate->s_error.string), |
265 | 52 | "bin obj seq, type=%d, elements=%u, size=%lu, %s", |
266 | 52 | pstate->s_ss.binary.token_type, |
267 | 52 | pstate->s_ss.binary.top_size, |
268 | 52 | pstate->s_ss.binary.lsize, msg); |
269 | 52 | } |
270 | | |
271 | | /* Scan a non-sequence binary token. */ |
272 | | static int |
273 | | scan_bin_scalar(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
274 | 117k | { |
275 | 117k | stream *const s = pstate->s_file.value.pfile; |
276 | 117k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
277 | 117k | s_declare_inline(s, p, rlimit); |
278 | 117k | int num_format = pbs->num_format, code; |
279 | 117k | uint wanted, arg; |
280 | | |
281 | 117k | s_begin_inline(s, p, rlimit); |
282 | 117k | wanted = bin_token_bytes[*p - MIN_BIN_TOKEN_TYPE] - 1; |
283 | 117k | switch (*p) { |
284 | 271 | case BT_INT8: |
285 | 271 | make_int(pref, (p[1] ^ 128) - 128); |
286 | 271 | s_end_inline(s, p + 1, rlimit); |
287 | 271 | return 0; |
288 | 1.85k | case BT_FIXED: |
289 | 1.85k | num_format = p[1]; |
290 | 1.85k | if (!num_is_valid(num_format)) |
291 | 4 | return_error(gs_error_syntaxerror); |
292 | 1.85k | wanted = 1 + encoded_number_bytes(num_format); |
293 | 1.85k | if (rlimit - p < wanted) { |
294 | 28 | s_end_inline(s, p - 1, rlimit); |
295 | 28 | pstate->s_scan_type = scanning_none; |
296 | 28 | return scan_Refill; |
297 | 28 | } |
298 | 1.82k | code = sdecode_number(p + 2, num_format, pref); |
299 | 1.82k | goto rnum; |
300 | 275 | case BT_INT32_MSB: |
301 | 705 | case BT_INT32_LSB: |
302 | 882 | case BT_INT16_MSB: |
303 | 1.10k | case BT_INT16_LSB: |
304 | 21.9k | case BT_FLOAT_IEEE_MSB: |
305 | 71.5k | case BT_FLOAT_IEEE_LSB: |
306 | 72.3k | case BT_FLOAT_NATIVE: |
307 | 72.3k | code = sdecode_number(p + 1, num_format, pref); |
308 | 74.1k | rnum: |
309 | 74.1k | switch (code) { |
310 | 1.15k | case t_integer: |
311 | 74.1k | case t_real: |
312 | 74.1k | r_set_type(pref, code); |
313 | 74.1k | break; |
314 | 0 | case t_null: |
315 | 0 | return_error(gs_error_syntaxerror); |
316 | 2 | default: |
317 | 2 | return code; |
318 | 74.1k | } |
319 | 74.1k | s_end_inline(s, p + wanted, rlimit); |
320 | 74.1k | return 0; |
321 | 43 | case BT_BOOLEAN: |
322 | 43 | arg = p[1]; |
323 | 43 | if (arg & ~1) |
324 | 3 | return_error(gs_error_syntaxerror); |
325 | 40 | make_bool(pref, arg); |
326 | 40 | s_end_inline(s, p + 1, rlimit); |
327 | 40 | return 0; |
328 | 323 | case BT_STRING_256: |
329 | 323 | arg = *++p; |
330 | 323 | goto str; |
331 | 84 | case BT_STRING_64K_MSB: |
332 | 199 | case BT_STRING_64K_LSB: |
333 | 199 | arg = sdecodeushort(p + 1, num_format); |
334 | 199 | p += 2; |
335 | 522 | str: |
336 | 522 | if (s->foreign && rlimit - p >= arg) { |
337 | | /* |
338 | | * Reference the string directly in the buffer. It is |
339 | | * marked writable for consistency with the non-direct |
340 | | * case, but since the "buffer" may be data compiled into |
341 | | * the executable, it is probably actually read-only. |
342 | | */ |
343 | 0 | s_end_inline(s, p, rlimit); |
344 | 0 | make_const_string(pref, a_all | avm_foreign, arg, sbufptr(s)); |
345 | 0 | return sbufskip(s, arg); |
346 | 522 | } else { |
347 | 522 | byte *str = ialloc_string(arg, "string token"); |
348 | | |
349 | 522 | if (str == 0) |
350 | 0 | return_error(gs_error_VMerror); |
351 | 522 | s_end_inline(s, p, rlimit); |
352 | 522 | pstate->s_da.base = pstate->s_da.next = str; |
353 | 522 | pstate->s_da.limit = str + arg; |
354 | 522 | code = scan_bin_string_continue(i_ctx_p, pref, pstate); |
355 | 522 | if (code == scan_Refill || code < 0) { |
356 | 77 | pstate->s_da.is_dynamic = true; |
357 | 77 | make_null(&pbs->bin_array); /* clean up for GC */ |
358 | 77 | pbs->cont = scan_bin_string_continue; |
359 | 77 | } |
360 | 522 | return code; |
361 | 522 | } |
362 | 37.3k | case BT_LITNAME_SYSTEM: |
363 | 37.3k | code = scan_bin_get_name(pstate, imemory, system_names_p, p[1], |
364 | 37.3k | pref, "system"); |
365 | 37.3k | goto lname; |
366 | 3.74k | case BT_EXECNAME_SYSTEM: |
367 | 3.74k | code = scan_bin_get_name(pstate, imemory, system_names_p, p[1], |
368 | 3.74k | pref, "system"); |
369 | 3.74k | goto xname; |
370 | 1 | case BT_LITNAME_USER: |
371 | 1 | code = scan_bin_get_name(pstate, imemory, user_names_p, p[1], |
372 | 1 | pref, "user"); |
373 | 37.3k | lname: |
374 | 37.3k | if (code < 0) |
375 | 2 | return code; |
376 | 37.3k | s_end_inline(s, p + 1, rlimit); |
377 | 37.3k | return 0; |
378 | 2 | case BT_EXECNAME_USER: |
379 | 2 | code = scan_bin_get_name(pstate, imemory, user_names_p, p[1], |
380 | 2 | pref, "user"); |
381 | 3.74k | xname: |
382 | 3.74k | if (code < 0) |
383 | 7 | return code; |
384 | 3.73k | r_set_attrs(pref, a_executable); |
385 | 3.73k | s_end_inline(s, p + 1, rlimit); |
386 | 3.73k | return 0; |
387 | 1.56k | case BT_NUM_ARRAY: |
388 | 1.56k | num_format = p[1]; |
389 | 1.56k | if (!num_is_valid(num_format)) |
390 | 1 | return_error(gs_error_syntaxerror); |
391 | 1.55k | arg = sdecodeushort(p + 2, num_format); |
392 | 1.55k | code = ialloc_ref_array(&pbs->bin_array, a_all, arg, |
393 | 1.55k | "number array token"); |
394 | 1.55k | if (code < 0) |
395 | 0 | return code; |
396 | 1.55k | pbs->num_format = num_format; |
397 | 1.55k | pbs->index = 0; |
398 | 1.55k | p += 3; |
399 | 1.55k | s_end_inline(s, p, rlimit); |
400 | 1.55k | code = scan_bin_num_array_continue(i_ctx_p, pref, pstate); |
401 | 1.55k | if (code == scan_Refill || code < 0) { |
402 | | /* Make sure the array is clean for the GC. */ |
403 | 82 | refset_null(pbs->bin_array.value.refs + pbs->index, |
404 | 82 | arg - pbs->index); |
405 | 82 | pbs->cont = scan_bin_num_array_continue; |
406 | 82 | } |
407 | 1.55k | return code; |
408 | 117k | } |
409 | 117k | return_error(gs_error_syntaxerror); |
410 | 117k | } |
411 | | |
412 | | /* Get a system or user name. */ |
413 | | static int |
414 | | scan_bin_get_name(scanner_state *pstate, const gs_memory_t *mem, |
415 | | const ref *pnames /*t_array*/, int index, ref *pref, |
416 | | const char *usstring) |
417 | 41.0k | { |
418 | | /* Convert all errors to gs_error_undefined to match Adobe. */ |
419 | 41.0k | if (pnames == 0 || array_get(mem, pnames, (long)index, pref) < 0 || |
420 | 41.0k | !r_has_type(pref, t_name)) { |
421 | 10 | snprintf(pstate->s_error.string, |
422 | 10 | sizeof(pstate->s_error.string), |
423 | 10 | "%s%d", usstring, index); |
424 | 10 | pstate->s_error.is_name = true; |
425 | 10 | return_error(gs_error_undefined); |
426 | 10 | } |
427 | 41.0k | return 0; |
428 | 41.0k | } |
429 | | |
430 | | /* Continue collecting a binary string. */ |
431 | | static int |
432 | | scan_bin_string_continue(i_ctx_t *i_ctx_p, ref * pref, scanner_state * pstate) |
433 | 6.00k | { |
434 | 6.00k | stream *const s = pstate->s_file.value.pfile; |
435 | 6.00k | byte *q = pstate->s_da.next; |
436 | 6.00k | uint wanted = pstate->s_da.limit - q; |
437 | 6.00k | uint rcnt; |
438 | | |
439 | | /* We don't check the return status from 'sgets' here. |
440 | | If there is an error in sgets, the condition rcnt==wanted |
441 | | would be false and this function will return scan_Refill. |
442 | | */ |
443 | 6.00k | sgets(s, q, wanted, &rcnt); |
444 | 6.00k | if (rcnt == wanted) { |
445 | | /* Finished collecting the string. */ |
446 | 2.99k | make_string(pref, a_all | icurrent_space, |
447 | 2.99k | pstate->s_da.limit - pstate->s_da.base, |
448 | 2.99k | pstate->s_da.base); |
449 | 2.99k | return 0; |
450 | 2.99k | } |
451 | 3.01k | pstate->s_da.next = q + rcnt; |
452 | 3.01k | pstate->s_scan_type = scanning_binary; |
453 | 3.01k | return scan_Refill; |
454 | 6.00k | } |
455 | | |
456 | | /* Continue scanning a binary number array. */ |
457 | | static int |
458 | | scan_bin_num_array_continue(i_ctx_t *i_ctx_p, ref * pref, |
459 | | scanner_state * pstate) |
460 | 2.11k | { |
461 | 2.11k | stream *const s = pstate->s_file.value.pfile; |
462 | 2.11k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
463 | 2.11k | uint index = pbs->index; |
464 | 2.11k | ref *np = pbs->bin_array.value.refs + index; |
465 | 2.11k | uint wanted = encoded_number_bytes(pbs->num_format); |
466 | | |
467 | 142k | for (; index < r_size(&pbs->bin_array); index++, np++) { |
468 | 141k | int code; |
469 | | |
470 | 141k | if (sbufavailable(s) < wanted) { |
471 | 623 | pbs->index = index; |
472 | 623 | pstate->s_scan_type = scanning_binary; |
473 | 623 | return scan_Refill; |
474 | 623 | } |
475 | 140k | code = sdecode_number(sbufptr(s), pbs->num_format, np); |
476 | 140k | switch (code) { |
477 | 4.27k | case t_integer: |
478 | 140k | case t_real: |
479 | 140k | r_set_type(np, code); |
480 | 140k | (void)sbufskip(s, wanted); |
481 | 140k | break; |
482 | 0 | case t_null: |
483 | 0 | scan_bos_error(pstate, "bad number format"); |
484 | 0 | return_error(gs_error_syntaxerror); |
485 | 1 | default: |
486 | 1 | return code; |
487 | 140k | } |
488 | 140k | } |
489 | 1.48k | *pref = pbs->bin_array; |
490 | 1.48k | return 0; |
491 | 2.11k | } |
492 | | |
493 | | /* |
494 | | * Continue scanning a binary object sequence. We preallocated space for |
495 | | * the largest possible number of objects, but not for strings, since |
496 | | * the latter would probably be a gross over-estimate. Instead, |
497 | | * we wait until we see the first string or name, and allocate string space |
498 | | * based on the hope that its string index is the smallest one we will see. |
499 | | * If this turns out to be wrong, we may have to reallocate, and adjust |
500 | | * all the pointers. |
501 | | */ |
502 | | static int |
503 | | scan_bos_continue(i_ctx_t *i_ctx_p, ref * pref, scanner_state * pstate) |
504 | 2.62k | { |
505 | 2.62k | stream *const s = pstate->s_file.value.pfile; |
506 | 2.62k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
507 | 2.62k | s_declare_inline(s, p, rlimit); |
508 | 2.62k | uint max_array_index = pbs->max_array_index; |
509 | 2.62k | uint min_string_index = pbs->min_string_index; |
510 | 2.62k | int num_format = pbs->num_format; |
511 | 2.62k | uint index = pbs->index; |
512 | 2.62k | uint size = pbs->size; |
513 | 2.62k | ref *abase = pbs->bin_array.value.refs; |
514 | 2.62k | int code; |
515 | | |
516 | 2.62k | pbs->cont = scan_bos_continue; /* in case of premature return */ |
517 | 2.62k | s_begin_inline(s, p, rlimit); |
518 | 4.70k | for (; index < max_array_index; p += SIZEOF_BIN_SEQ_OBJ, index++) { |
519 | 2.16k | ref *op = abase + index; |
520 | 2.16k | uint osize; |
521 | 2.16k | int value, atype, attrs; |
522 | | |
523 | 2.16k | s_end_inline(s, p, rlimit); /* in case of error */ |
524 | 2.16k | if (rlimit - p < SIZEOF_BIN_SEQ_OBJ) { |
525 | 55 | pbs->index = index; |
526 | 55 | pbs->max_array_index = max_array_index; |
527 | 55 | pbs->min_string_index = min_string_index; |
528 | 55 | pstate->s_scan_type = scanning_binary; |
529 | 55 | return scan_Refill; |
530 | 55 | } |
531 | 2.10k | if (p[2] != 0) { /* reserved, must be 0 */ |
532 | 0 | scan_bos_error(pstate, "non-zero unused field"); |
533 | 0 | return_error(gs_error_syntaxerror); |
534 | 0 | } |
535 | 2.10k | attrs = (p[1] & 128 ? a_executable : 0); |
536 | | /* |
537 | | * We always decode all 8 bytes of the object, so we can signal |
538 | | * syntaxerror if any unused field is non-zero (per PLRM). |
539 | | */ |
540 | 2.10k | osize = sdecodeushort(p + 3, num_format); |
541 | 2.10k | value = sdecodeint32(p + 5, num_format); |
542 | 2.10k | switch (p[1] & 0x7f) { |
543 | 1.76k | case BS_TYPE_NULL: |
544 | 1.76k | if (osize | value) { /* unused */ |
545 | 9 | scan_bos_error(pstate, "non-zero unused field"); |
546 | 9 | return_error(gs_error_syntaxerror); |
547 | 9 | } |
548 | 1.75k | make_null(op); |
549 | 1.75k | break; |
550 | 2 | case BS_TYPE_INTEGER: |
551 | 2 | if (osize) { /* unused */ |
552 | 1 | scan_bos_error(pstate, "non-zero unused field"); |
553 | 1 | return_error(gs_error_syntaxerror); |
554 | 1 | } |
555 | 1 | make_int(op, value); |
556 | 1 | break; |
557 | 319 | case BS_TYPE_REAL:{ |
558 | 319 | float vreal; |
559 | | |
560 | 319 | if (osize != 0) { /* fixed-point number */ |
561 | 3 | if (osize > 31) { |
562 | 1 | scan_bos_error(pstate, "invalid number format"); |
563 | 1 | return_error(gs_error_syntaxerror); |
564 | 1 | } |
565 | | /* ldexp requires a signed 2nd argument.... */ |
566 | 2 | vreal = (float)ldexp((double)value, -(int)osize); |
567 | 316 | } else { |
568 | 316 | code = sdecode_float(p + 5, num_format, &vreal); |
569 | 316 | if (code < 0) { |
570 | 0 | scan_bos_error(pstate, "invalid real number"); |
571 | 0 | return code; |
572 | 0 | } |
573 | 316 | } |
574 | 318 | make_real(op, vreal); |
575 | 318 | break; |
576 | 319 | } |
577 | 2 | case BS_TYPE_BOOLEAN: |
578 | 2 | if (osize) { /* unused */ |
579 | 1 | scan_bos_error(pstate, "non-zero unused field"); |
580 | 1 | return_error(gs_error_syntaxerror); |
581 | 1 | } |
582 | 1 | make_bool(op, value != 0); |
583 | 1 | break; |
584 | 3 | case BS_TYPE_STRING: |
585 | 3 | attrs |= a_all; |
586 | 12 | str: |
587 | 12 | if (osize == 0) { |
588 | | /* For zero-length strings, the offset */ |
589 | | /* doesn't matter, and may be zero. */ |
590 | 1 | make_empty_string(op, attrs); |
591 | 1 | break; |
592 | 1 | } |
593 | 11 | { |
594 | 11 | const uint beg_ofs = (uint)value; |
595 | 11 | const uint end_ofs = beg_ofs + osize; |
596 | | |
597 | 11 | if (beg_ofs < max_array_index * SIZEOF_BIN_SEQ_OBJ || beg_ofs > size) { |
598 | 10 | scan_bos_error(pstate, "invalid string offset"); |
599 | 10 | return_error(gs_error_syntaxerror); |
600 | 10 | } |
601 | 1 | if (end_ofs < beg_ofs || end_ofs > size) { |
602 | 0 | scan_bos_error(pstate, "invalid string length"); |
603 | 0 | return_error(gs_error_syntaxerror); |
604 | 0 | } |
605 | 1 | if (beg_ofs < min_string_index) { |
606 | | /* We have to (re)allocate the strings. */ |
607 | 1 | uint str_size = size - beg_ofs; |
608 | 1 | byte *sbase; |
609 | | |
610 | 1 | if (pstate->s_da.is_dynamic) |
611 | 0 | sbase = scan_bos_resize(i_ctx_p, pstate, str_size, |
612 | 0 | index); |
613 | 1 | else |
614 | 1 | sbase = ialloc_string(str_size, |
615 | 1 | "bos strings"); |
616 | 1 | if (sbase == 0) |
617 | 0 | return_error(gs_error_VMerror); |
618 | 1 | pstate->s_da.is_dynamic = true; |
619 | 1 | pstate->s_da.base = pstate->s_da.next = sbase; |
620 | 1 | pstate->s_da.limit = sbase + str_size; |
621 | 1 | min_string_index = beg_ofs; |
622 | 1 | } |
623 | 1 | make_string(op, attrs | icurrent_space, osize, |
624 | 1 | pstate->s_da.base + |
625 | 1 | (beg_ofs - min_string_index)); |
626 | 1 | } |
627 | 0 | break; |
628 | 4 | case BS_TYPE_EVAL_NAME: |
629 | 4 | attrs |= a_readonly; /* mark as executable for later */ |
630 | | /* falls through */ |
631 | 10 | case BS_TYPE_NAME: |
632 | 10 | switch (osize) { |
633 | 1 | case 0: |
634 | 1 | code = scan_bin_get_name(pstate, imemory, |
635 | 1 | user_names_p, value, op, |
636 | 1 | "user"); |
637 | 1 | goto usn; |
638 | 0 | case 0xffff: |
639 | 0 | code = scan_bin_get_name(pstate, imemory, |
640 | 0 | system_names_p, value, op, |
641 | 0 | "system"); |
642 | 1 | usn: |
643 | 1 | if (code < 0) |
644 | 1 | return code; |
645 | 0 | r_set_attrs(op, attrs); |
646 | 0 | break; |
647 | 9 | default: |
648 | 9 | goto str; |
649 | 10 | } |
650 | 0 | break; |
651 | 2 | case BS_TYPE_ARRAY: |
652 | 2 | atype = t_array; |
653 | 2 | { |
654 | 2 | const uint beg_ofs = (uint)value; |
655 | 2 | const uint end_ofs = beg_ofs + osize * SIZEOF_BIN_SEQ_OBJ; |
656 | 2 | const uint beg_idx = beg_ofs / SIZEOF_BIN_SEQ_OBJ; |
657 | 2 | const uint end_idx = end_ofs / SIZEOF_BIN_SEQ_OBJ; |
658 | | |
659 | 2 | if (beg_ofs > min_string_index || beg_ofs & (SIZEOF_BIN_SEQ_OBJ - 1)) { |
660 | 1 | scan_bos_error(pstate, "bad array offset"); |
661 | 1 | return_error(gs_error_syntaxerror); |
662 | 1 | } |
663 | 1 | if (osize > (size / 8) || end_ofs < beg_ofs || end_ofs > min_string_index) { |
664 | 0 | scan_bos_error(pstate, "bad array length"); |
665 | 0 | return_error(gs_error_syntaxerror); |
666 | 0 | } |
667 | | |
668 | 1 | max_array_index = max(max_array_index, end_idx); |
669 | 1 | make_tasv_new(op, atype, |
670 | 1 | attrs | a_all | icurrent_space, |
671 | 1 | osize, refs, abase + beg_idx); |
672 | 1 | } |
673 | 0 | break; |
674 | 1 | case BS_TYPE_MARK: |
675 | 1 | if (osize | value) { /* unused */ |
676 | 1 | scan_bos_error(pstate, "non-zero unused field"); |
677 | 1 | return_error(gs_error_syntaxerror); |
678 | 1 | } |
679 | 0 | make_mark(op); |
680 | 0 | break; |
681 | 1 | default: |
682 | 1 | scan_bos_error(pstate, "invalid object type"); |
683 | 1 | return_error(gs_error_syntaxerror); |
684 | 2.10k | } |
685 | 2.10k | } |
686 | 2.54k | s_end_inline(s, p, rlimit); |
687 | | /* Shorten the objects to remove the space that turned out */ |
688 | | /* to be used for strings. */ |
689 | 2.54k | pbs->index = max_array_index; |
690 | 2.54k | iresize_ref_array(&pbs->bin_array, max_array_index, |
691 | 2.54k | "binary object sequence(objects)"); |
692 | 2.54k | code = scan_bos_string_continue(i_ctx_p, pref, pstate); |
693 | 2.54k | if (code == scan_Refill) |
694 | 0 | pbs->cont = scan_bos_string_continue; |
695 | 2.54k | return code; |
696 | 2.62k | } |
697 | | |
698 | | /* Reallocate the strings for a binary object sequence, */ |
699 | | /* adjusting all the pointers to them from objects. */ |
700 | | static byte * |
701 | | scan_bos_resize(i_ctx_t *i_ctx_p, scanner_state * pstate, uint new_size, |
702 | | uint index) |
703 | 0 | { |
704 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
705 | 0 | uint old_size = da_size(&pstate->s_da); |
706 | 0 | byte *old_base = pstate->s_da.base; |
707 | 0 | byte *new_base = iresize_string(old_base, old_size, new_size, |
708 | 0 | "scan_bos_resize"); |
709 | 0 | byte *relocated_base = new_base + (new_size - old_size); |
710 | 0 | uint i; |
711 | 0 | ref *aptr = pbs->bin_array.value.refs; |
712 | |
|
713 | 0 | if (new_base == 0) |
714 | 0 | return 0; |
715 | | /* Since the allocator normally extends strings downward, */ |
716 | | /* it's quite possible that new and old addresses are the same. */ |
717 | 0 | if (relocated_base != old_base) |
718 | 0 | for (i = index; i != 0; i--, aptr++) |
719 | 0 | if (r_has_type(aptr, t_string) && r_size(aptr) != 0) |
720 | 0 | aptr->value.bytes = |
721 | 0 | aptr->value.bytes - old_base + relocated_base; |
722 | 0 | return new_base; |
723 | 0 | } |
724 | | |
725 | | /* Continue reading the strings for a binary object sequence. */ |
726 | | static int |
727 | | scan_bos_string_continue(i_ctx_t *i_ctx_p, ref * pref, |
728 | | scanner_state * pstate) |
729 | 2.54k | { |
730 | 2.54k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
731 | 2.54k | ref rstr; |
732 | 2.54k | ref *op; |
733 | 2.54k | int code = scan_bin_string_continue(i_ctx_p, &rstr, pstate); |
734 | 2.54k | uint space = ialloc_space(idmemory); |
735 | 2.54k | uint i; |
736 | | |
737 | 2.54k | if (code != 0) |
738 | 0 | return code; |
739 | | |
740 | | /* Fix up names. We must do this before creating dictionaries. */ |
741 | | |
742 | 2.54k | for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array); |
743 | 2.56k | i != 0; i--, op++ |
744 | 2.54k | ) |
745 | 20 | switch (r_type(op)) { |
746 | 0 | case t_string: |
747 | 0 | if (r_has_attr(op, a_write)) /* a real string */ |
748 | 0 | break; |
749 | | /* This is actually a name; look it up now. */ |
750 | 0 | { |
751 | 0 | uint attrs = r_type_attrs(op) & (a_read | a_executable); |
752 | |
|
753 | 0 | code = name_ref(imemory, op->value.bytes, r_size(op), op, 1); |
754 | 0 | if (code < 0) |
755 | 0 | return code; |
756 | 0 | r_set_attrs(op, attrs); |
757 | 0 | } |
758 | | /* falls through */ |
759 | 0 | case t_name: |
760 | 0 | if (r_has_attr(op, a_read)) { /* BS_TYPE_EVAL_NAME */ |
761 | 0 | ref *defp = dict_find_name(op); |
762 | |
|
763 | 0 | if (defp == 0) |
764 | 0 | return_error(gs_error_undefined); |
765 | 0 | store_check_space(space, defp); |
766 | 0 | ref_assign(op, defp); |
767 | 0 | } |
768 | 0 | break; |
769 | 20 | } |
770 | | |
771 | 2.54k | ref_assign(pref, &pbs->bin_array); |
772 | 2.54k | r_set_size(pref, pbs->top_size); |
773 | 2.54k | return scan_BOS; |
774 | 2.54k | } |
775 | | |
776 | | /* ---------------- Writing ---------------- */ |
777 | | |
778 | | /* |
779 | | * Encode a single object for a binary object sequence, for printobject and |
780 | | * write object. Note that this does not modify the always-unused byte (1), |
781 | | * but it always write bytes 0 and 2-7. |
782 | | */ |
783 | | int |
784 | | encode_binary_token(i_ctx_t *i_ctx_p, const ref *obj, ps_int *ref_offset, |
785 | | ps_int *char_offset, byte *str) |
786 | 0 | { |
787 | 0 | bin_seq_type_t type; |
788 | 0 | uint size = 0; |
789 | 0 | int format = (int)ref_binary_object_format.value.intval; |
790 | 0 | ps_int value = 0; |
791 | 0 | ref nstr; |
792 | |
|
793 | 0 | switch (r_type(obj)) { |
794 | 0 | case t_null: |
795 | 0 | type = BS_TYPE_NULL; |
796 | 0 | break; /* always set all fields */ |
797 | 0 | case t_mark: |
798 | 0 | type = BS_TYPE_MARK; |
799 | 0 | break; /* always set all fields */ |
800 | 0 | case t_integer: |
801 | 0 | type = BS_TYPE_INTEGER; |
802 | 0 | value = obj->value.intval; |
803 | 0 | break; |
804 | 0 | case t_real: |
805 | 0 | type = BS_TYPE_REAL; |
806 | 0 | if (sizeof(obj->value.realval) != sizeof(int)) { |
807 | | /* The PLRM allocates exactly 4 bytes for reals. */ |
808 | 0 | return_error(gs_error_rangecheck); |
809 | 0 | } |
810 | 0 | value = *(const ps_int *)&obj->value.realval; |
811 | | #if !(ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS) |
812 | | if (format >= 3) { |
813 | | /* Never byte-swap native reals -- use native byte order. */ |
814 | | format = 4 - ARCH_IS_BIG_ENDIAN; |
815 | | } |
816 | | #endif |
817 | 0 | break; |
818 | 0 | case t_boolean: |
819 | 0 | type = BS_TYPE_BOOLEAN; |
820 | 0 | value = obj->value.boolval; |
821 | 0 | break; |
822 | 0 | case t_array: |
823 | 0 | type = BS_TYPE_ARRAY; |
824 | 0 | size = r_size(obj); |
825 | 0 | value = *ref_offset; |
826 | 0 | *ref_offset += size * SIZEOF_BIN_SEQ_OBJ; |
827 | 0 | break; |
828 | 0 | case t_string: |
829 | 0 | type = BS_TYPE_STRING; |
830 | 0 | nos: |
831 | 0 | size = r_size(obj); |
832 | 0 | value = *char_offset; |
833 | 0 | *char_offset += size; |
834 | 0 | break; |
835 | 0 | case t_name: |
836 | 0 | type = BS_TYPE_NAME; |
837 | 0 | name_string_ref(imemory, obj, &nstr); |
838 | 0 | r_copy_attrs(&nstr, a_executable, obj); |
839 | 0 | obj = &nstr; |
840 | 0 | goto nos; |
841 | 0 | default: |
842 | 0 | return_error(gs_error_rangecheck); |
843 | 0 | } |
844 | 0 | { |
845 | 0 | byte s0 = (byte) size, s1 = (byte) (size >> 8); |
846 | 0 | byte v0 = (byte) value, v1 = (byte) (value >> 8), |
847 | 0 | v2 = (byte) (value >> 16), v3 = (byte) (value >> 24); |
848 | |
|
849 | 0 | if (format & 1) { |
850 | | /* Store big-endian */ |
851 | 0 | str[2] = s1, str[3] = s0; |
852 | 0 | str[4] = v3, str[5] = v2, str[6] = v1, str[7] = v0; |
853 | 0 | } else { |
854 | | /* Store little-endian */ |
855 | 0 | str[2] = s0, str[3] = s1; |
856 | 0 | str[4] = v0, str[5] = v1, str[6] = v2, str[7] = v3; |
857 | 0 | } |
858 | 0 | } |
859 | 0 | if (r_has_attr(obj, a_executable)) |
860 | 0 | type += BS_EXECUTABLE; |
861 | 0 | str[0] = (byte) type; |
862 | 0 | return 0; |
863 | 0 | } |