/src/ghostpdl/psi/iscanbin.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Ghostscript binary token scanner and writer */ |
18 | | #include "math_.h" |
19 | | #include "memory_.h" |
20 | | #include "ghost.h" |
21 | | #include "gsutil.h" |
22 | | #include "gxalloc.h" /* for names_array in allocator */ |
23 | | #include "stream.h" |
24 | | #include "strimpl.h" /* for sfilter.h */ |
25 | | #include "sfilter.h" /* for iscan.h */ |
26 | | #include "ierrors.h" |
27 | | #include "ialloc.h" |
28 | | #include "iddict.h" |
29 | | #include "dstack.h" /* for immediately evaluated names */ |
30 | | #include "ostack.h" /* must precede iscan.h */ |
31 | | #include "iname.h" |
32 | | #include "iscan.h" /* for scan_Refill */ |
33 | | #include "iscanbin.h" |
34 | | #include "iutil.h" |
35 | | #include "ivmspace.h" |
36 | | #include "store.h" |
37 | | #include "btoken.h" |
38 | | #include "ibnum.h" |
39 | | |
40 | | /* Define the binary token types. */ |
41 | | typedef enum { |
42 | | BT_SEQ = 128, /* binary object sequence: */ |
43 | | BT_SEQ_IEEE_MSB = 128, /* IEEE floats, big-endian */ |
44 | | BT_SEQ_IEEE_LSB = 129, /* IEEE float, little-endian */ |
45 | | BT_SEQ_NATIVE_MSB = 130, /* native floats, big-endian */ |
46 | | BT_SEQ_NATIVE_LSB = 131, /* native floats, little-endian */ |
47 | 2.33M | #define BT_IS_SEQ(btype) (((btype) & ~3) == BT_SEQ) |
48 | | BT_INT32_MSB = 132, |
49 | | BT_INT32_LSB = 133, |
50 | | BT_INT16_MSB = 134, |
51 | | BT_INT16_LSB = 135, |
52 | | BT_INT8 = 136, |
53 | | BT_FIXED = 137, |
54 | | BT_FLOAT_IEEE_MSB = 138, |
55 | | BT_FLOAT_IEEE_LSB = 139, |
56 | | BT_FLOAT_NATIVE = 140, |
57 | | BT_BOOLEAN = 141, |
58 | | BT_STRING_256 = 142, |
59 | | BT_STRING_64K_MSB = 143, |
60 | | BT_STRING_64K_LSB = 144, |
61 | | BT_LITNAME_SYSTEM = 145, |
62 | | BT_EXECNAME_SYSTEM = 146, |
63 | | BT_LITNAME_USER = 147, |
64 | | BT_EXECNAME_USER = 148, |
65 | | BT_NUM_ARRAY = 149 |
66 | | } bin_token_type_t; |
67 | | |
68 | 6.97M | #define MIN_BIN_TOKEN_TYPE 128 |
69 | | #define MAX_BIN_TOKEN_TYPE 159 |
70 | | #define NUM_BIN_TOKEN_TYPES (MAX_BIN_TOKEN_TYPE - MIN_BIN_TOKEN_TYPE + 1) |
71 | | |
72 | | /* Define the number of required initial bytes for binary tokens. */ |
73 | | static const byte bin_token_bytes[NUM_BIN_TOKEN_TYPES] = |
74 | | { |
75 | | 4, 4, 4, 4, 5, 5, 3, 3, 2, 2, 5, 5, 5, |
76 | | 2, 2, 3, 3, 2, 2, 2, 2, 4, |
77 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* undefined */ |
78 | | }; |
79 | | |
80 | | /* Define the number formats for those binary tokens that need them. */ |
81 | | static const byte bin_token_num_formats[NUM_BIN_TOKEN_TYPES] = |
82 | | { |
83 | | num_msb + num_float_IEEE, /* BT_SEQ_IEEE_MSB */ |
84 | | num_lsb + num_float_IEEE, /* BT_SEQ_IEEE_LSB */ |
85 | | #if ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS |
86 | | /* Treat native floats like IEEE floats for byte swapping. */ |
87 | | num_msb + num_float_IEEE, /* BT_SEQ_NATIVE_MSB */ |
88 | | num_lsb + num_float_IEEE, /* BT_SEQ_NATIVE_LSB */ |
89 | | #else |
90 | | num_msb + num_float_native, /* BT_SEQ_NATIVE_MSB */ |
91 | | num_lsb + num_float_native, /* BT_SEQ_NATIVE_LSB */ |
92 | | #endif |
93 | | num_msb + num_int32, /* BT_INT32_MSB */ |
94 | | num_lsb + num_int32, /* BT_INT32_LSB */ |
95 | | num_msb + num_int16, /* BT_INT16_MSB */ |
96 | | num_lsb + num_int16, /* BT_INT16_LSB */ |
97 | | 0, /* BT_INT8, not used */ |
98 | | 0, /* BT_FIXED, not used */ |
99 | | num_msb + num_float_IEEE, /* BT_FLOAT_IEEE_MSB */ |
100 | | num_lsb + num_float_IEEE, /* BT_FLOAT_IEEE_LSB */ |
101 | | num_float_native, /* BT_FLOAT_NATIVE */ |
102 | | 0, /* BT_BOOLEAN, not used */ |
103 | | 0, /* BT_STRING_256, not used */ |
104 | | num_msb, /* BT_STRING_64K_MSB */ |
105 | | num_lsb /* BT_STRING_64K_LSB */ |
106 | | /* rest not used */ |
107 | | }; |
108 | | |
109 | | /* Binary object sequence element types */ |
110 | | typedef enum { |
111 | | BS_TYPE_NULL = 0, |
112 | | BS_TYPE_INTEGER = 1, |
113 | | BS_TYPE_REAL = 2, |
114 | | BS_TYPE_NAME = 3, |
115 | | BS_TYPE_BOOLEAN = 4, |
116 | | BS_TYPE_STRING = 5, |
117 | | BS_TYPE_EVAL_NAME = 6, |
118 | | BS_TYPE_ARRAY = 9, |
119 | | BS_TYPE_MARK = 10, |
120 | | } bin_seq_type_t; |
121 | | |
122 | 2 | #define BS_EXECUTABLE 128 |
123 | 49.0k | #define SIZEOF_BIN_SEQ_OBJ ((uint)8) |
124 | | |
125 | | /* Forward references */ |
126 | | static int scan_bos(i_ctx_t *, ref *, scanner_state *); |
127 | | static void scan_bos_error(scanner_state *, const char *); |
128 | | static int scan_bin_scalar(i_ctx_t *, ref *, scanner_state *); |
129 | | static int scan_bin_get_name(scanner_state *, const gs_memory_t *mem, const ref *, int, ref *, const char *); |
130 | | static int scan_bin_num_array_continue(i_ctx_t *, ref *, scanner_state *); |
131 | | static int scan_bin_string_continue(i_ctx_t *, ref *, scanner_state *); |
132 | | static int scan_bos_continue(i_ctx_t *, ref *, scanner_state *); |
133 | | static byte *scan_bos_resize(i_ctx_t *, scanner_state *, uint, uint); |
134 | | static int scan_bos_string_continue(i_ctx_t *, ref *, scanner_state *); |
135 | | |
136 | | /* Scan a binary token. Called from the main scanner */ |
137 | | /* when it encounters an ASCII code 128-159, */ |
138 | | /* if binary tokens are being recognized (object format != 0). */ |
139 | | int |
140 | | scan_binary_token(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
141 | 2.34M | { |
142 | 2.34M | stream *const s = pstate->s_file.value.pfile; |
143 | 2.34M | scan_binary_state *const pbs = &pstate->s_ss.binary; |
144 | 2.34M | s_declare_inline(s, p, rlimit); |
145 | 2.34M | int btype, code; |
146 | 2.34M | uint wanted; |
147 | | |
148 | 2.34M | s_begin_inline(s, p, rlimit); |
149 | 2.34M | pbs->token_type = btype = *p; |
150 | 2.34M | wanted = bin_token_bytes[btype - MIN_BIN_TOKEN_TYPE] - 1; |
151 | 2.34M | if (rlimit - p < wanted) { |
152 | 8.39k | s_end_inline(s, p - 1, rlimit); |
153 | 8.39k | pstate->s_scan_type = scanning_none; |
154 | 8.39k | code = scan_Refill; |
155 | 2.33M | } else { |
156 | 2.33M | pbs->num_format = bin_token_num_formats[btype - MIN_BIN_TOKEN_TYPE]; |
157 | 2.33M | if (BT_IS_SEQ(btype)) |
158 | 31.2k | code = scan_bos(i_ctx_p, pref, pstate); |
159 | 2.30M | else |
160 | 2.30M | code = scan_bin_scalar(i_ctx_p, pref, pstate); |
161 | 2.33M | } |
162 | 2.34M | if (code == scan_Refill && s->end_status == EOFC) |
163 | 5.17k | code = gs_note_error(gs_error_syntaxerror); |
164 | 2.34M | if (code < 0 && pstate->s_error.string[0] == 0) |
165 | 6.06k | snprintf(pstate->s_error.string, sizeof(pstate->s_error.string), |
166 | 6.06k | "binary token, type=%d", btype); |
167 | 2.34M | return code; |
168 | 2.34M | } |
169 | | |
170 | | /* Scan a binary object sequence. */ |
171 | | static int |
172 | | scan_bos(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
173 | 31.2k | { |
174 | 31.2k | stream *const s = pstate->s_file.value.pfile; |
175 | 31.2k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
176 | 31.2k | s_declare_inline(s, p, rlimit); |
177 | 31.2k | int num_format = pbs->num_format; |
178 | 31.2k | int code; |
179 | | |
180 | 31.2k | s_begin_inline(s, p, rlimit); |
181 | 31.2k | { |
182 | 31.2k | uint rcnt = rlimit - p; |
183 | 31.2k | uint top_size = p[1]; |
184 | 31.2k | uint hsize, size; |
185 | | |
186 | 31.2k | if (top_size == 0) { |
187 | | /* Extended header (2-byte array size, 4-byte length) */ |
188 | | |
189 | 30.0k | if (rcnt < 7) { |
190 | 334 | s_end_inline(s, p - 1, rlimit); |
191 | 334 | pstate->s_scan_type = scanning_none; |
192 | 334 | return scan_Refill; |
193 | 334 | } |
194 | 29.7k | pbs->top_size = top_size = sdecodeushort(p + 2, num_format); |
195 | 29.7k | pbs->lsize = size = sdecodeint32(p + 4, num_format); |
196 | 29.7k | hsize = 8; |
197 | 29.7k | } else { |
198 | | /* Normal header (1-byte array size, 2-byte length). */ |
199 | | /* We already checked rcnt >= 3. */ |
200 | 1.24k | pbs->top_size = top_size; |
201 | 1.24k | pbs->lsize = size = sdecodeushort(p + 2, num_format); |
202 | 1.24k | hsize = 4; |
203 | 1.24k | } |
204 | 30.9k | if (size < hsize || (size - hsize) >> 3 < top_size) { |
205 | 97 | scan_bos_error(pstate, "sequence too short"); |
206 | 97 | return_error(gs_error_syntaxerror); /* size too small */ |
207 | 97 | } |
208 | 30.8k | { /* Preliminary syntax check to avoid potentialy large |
209 | | * memory allocation on junk data. Bug 688833 |
210 | | */ |
211 | 30.8k | const unsigned char *q, *rend = p + hsize + top_size*8; |
212 | | |
213 | 30.8k | if (rend > rlimit) |
214 | 1.60k | rend = rlimit; |
215 | 44.6k | for (q = p + hsize + 1; q < rend; q += 8) { |
216 | 14.1k | int c = q[-1] & 0x7f; |
217 | 14.1k | if (c > 10) { |
218 | 251 | scan_bos_error(pstate, "invalid object type"); |
219 | 251 | return_error(gs_error_syntaxerror); |
220 | 251 | } |
221 | 13.8k | if (*q != 0) { |
222 | 56 | scan_bos_error(pstate, "non-zero unused field"); |
223 | 56 | return_error(gs_error_syntaxerror); |
224 | 56 | } |
225 | 13.8k | } |
226 | 30.8k | } |
227 | | /* |
228 | | * Preallocate an array large enough for the worst case, |
229 | | * namely, all objects and no strings. Note that we must |
230 | | * divide size by 8, not sizeof(ref), since array elements |
231 | | * in binary tokens always occupy 8 bytes regardless of the |
232 | | * size of a ref. |
233 | | */ |
234 | 30.5k | code = ialloc_ref_array(&pbs->bin_array, |
235 | 30.5k | a_all + a_executable, size / 8, |
236 | 30.5k | "binary object sequence(objects)"); |
237 | 30.5k | if (code < 0) |
238 | 293 | return code; |
239 | 30.2k | p += hsize - 1; |
240 | 30.2k | size -= hsize; |
241 | 30.2k | s_end_inline(s, p, rlimit); |
242 | 30.2k | pbs->max_array_index = pbs->top_size = top_size; |
243 | 30.2k | pbs->min_string_index = pbs->size = size; |
244 | 30.2k | pbs->index = 0; |
245 | 30.2k | pstate->s_da.is_dynamic = false; |
246 | 30.2k | pstate->s_da.base = pstate->s_da.next = |
247 | 30.2k | pstate->s_da.limit = pstate->s_da.buf; |
248 | 30.2k | code = scan_bos_continue(i_ctx_p, pref, pstate); |
249 | 30.2k | if ((code == scan_Refill || code < 0) && pbs->index < r_size(&pbs->bin_array)) { |
250 | | /* Clean up array for GC. */ |
251 | 1.05k | uint index = pbs->index; |
252 | | |
253 | 1.05k | refset_null(pbs->bin_array.value.refs + index, |
254 | 1.05k | r_size(&pbs->bin_array) - index); |
255 | 1.05k | } |
256 | 30.2k | return code; |
257 | 30.5k | } |
258 | 30.5k | } |
259 | | |
260 | | /* Report an error in a binary object sequence. */ |
261 | | static void |
262 | | scan_bos_error(scanner_state *pstate, const char *msg) |
263 | 769 | { |
264 | 769 | snprintf(pstate->s_error.string, sizeof(pstate->s_error.string), |
265 | 769 | "bin obj seq, type=%d, elements=%u, size=%lu, %s", |
266 | 769 | pstate->s_ss.binary.token_type, |
267 | 769 | pstate->s_ss.binary.top_size, |
268 | 769 | pstate->s_ss.binary.lsize, msg); |
269 | 769 | } |
270 | | |
271 | | /* Scan a non-sequence binary token. */ |
272 | | static int |
273 | | scan_bin_scalar(i_ctx_t *i_ctx_p, ref *pref, scanner_state *pstate) |
274 | 2.30M | { |
275 | 2.30M | stream *const s = pstate->s_file.value.pfile; |
276 | 2.30M | scan_binary_state *const pbs = &pstate->s_ss.binary; |
277 | 2.30M | s_declare_inline(s, p, rlimit); |
278 | 2.30M | int num_format = pbs->num_format, code; |
279 | 2.30M | uint wanted, arg; |
280 | | |
281 | 2.30M | s_begin_inline(s, p, rlimit); |
282 | 2.30M | wanted = bin_token_bytes[*p - MIN_BIN_TOKEN_TYPE] - 1; |
283 | 2.30M | switch (*p) { |
284 | 133k | case BT_INT8: |
285 | 133k | make_int(pref, (p[1] ^ 128) - 128); |
286 | 133k | s_end_inline(s, p + 1, rlimit); |
287 | 133k | return 0; |
288 | 291k | case BT_FIXED: |
289 | 291k | num_format = p[1]; |
290 | 291k | if (!num_is_valid(num_format)) |
291 | 38 | return_error(gs_error_syntaxerror); |
292 | 291k | wanted = 1 + encoded_number_bytes(num_format); |
293 | 291k | if (rlimit - p < wanted) { |
294 | 1.79k | s_end_inline(s, p - 1, rlimit); |
295 | 1.79k | pstate->s_scan_type = scanning_none; |
296 | 1.79k | return scan_Refill; |
297 | 1.79k | } |
298 | 289k | code = sdecode_number(p + 2, num_format, pref); |
299 | 289k | goto rnum; |
300 | 60.5k | case BT_INT32_MSB: |
301 | 70.6k | case BT_INT32_LSB: |
302 | 81.9k | case BT_INT16_MSB: |
303 | 88.9k | case BT_INT16_LSB: |
304 | 800k | case BT_FLOAT_IEEE_MSB: |
305 | 862k | case BT_FLOAT_IEEE_LSB: |
306 | 872k | case BT_FLOAT_NATIVE: |
307 | 872k | code = sdecode_number(p + 1, num_format, pref); |
308 | 1.16M | rnum: |
309 | 1.16M | switch (code) { |
310 | 89.6k | case t_integer: |
311 | 1.16M | case t_real: |
312 | 1.16M | r_set_type(pref, code); |
313 | 1.16M | break; |
314 | 0 | case t_null: |
315 | 0 | return_error(gs_error_syntaxerror); |
316 | 19 | default: |
317 | 19 | return code; |
318 | 1.16M | } |
319 | 1.16M | s_end_inline(s, p + wanted, rlimit); |
320 | 1.16M | return 0; |
321 | 6.61k | case BT_BOOLEAN: |
322 | 6.61k | arg = p[1]; |
323 | 6.61k | if (arg & ~1) |
324 | 77 | return_error(gs_error_syntaxerror); |
325 | 6.53k | make_bool(pref, arg); |
326 | 6.53k | s_end_inline(s, p + 1, rlimit); |
327 | 6.53k | return 0; |
328 | 6.32k | case BT_STRING_256: |
329 | 6.32k | arg = *++p; |
330 | 6.32k | goto str; |
331 | 1.72k | case BT_STRING_64K_MSB: |
332 | 4.37k | case BT_STRING_64K_LSB: |
333 | 4.37k | arg = sdecodeushort(p + 1, num_format); |
334 | 4.37k | p += 2; |
335 | 10.6k | str: |
336 | 10.6k | if (s->foreign && rlimit - p >= arg) { |
337 | | /* |
338 | | * Reference the string directly in the buffer. It is |
339 | | * marked writable for consistency with the non-direct |
340 | | * case, but since the "buffer" may be data compiled into |
341 | | * the executable, it is probably actually read-only. |
342 | | */ |
343 | 0 | s_end_inline(s, p, rlimit); |
344 | 0 | make_const_string(pref, a_all | avm_foreign, arg, sbufptr(s)); |
345 | 0 | return sbufskip(s, arg); |
346 | 10.6k | } else { |
347 | 10.6k | byte *str = ialloc_string(arg, "string token"); |
348 | | |
349 | 10.6k | if (str == 0) |
350 | 0 | return_error(gs_error_VMerror); |
351 | 10.6k | s_end_inline(s, p, rlimit); |
352 | 10.6k | pstate->s_da.base = pstate->s_da.next = str; |
353 | 10.6k | pstate->s_da.limit = str + arg; |
354 | 10.6k | code = scan_bin_string_continue(i_ctx_p, pref, pstate); |
355 | 10.6k | if (code == scan_Refill || code < 0) { |
356 | 741 | pstate->s_da.is_dynamic = true; |
357 | 741 | make_null(&pbs->bin_array); /* clean up for GC */ |
358 | 741 | pbs->cont = scan_bin_string_continue; |
359 | 741 | } |
360 | 10.6k | return code; |
361 | 10.6k | } |
362 | 668k | case BT_LITNAME_SYSTEM: |
363 | 668k | code = scan_bin_get_name(pstate, imemory, system_names_p, p[1], |
364 | 668k | pref, "system"); |
365 | 668k | goto lname; |
366 | 301k | case BT_EXECNAME_SYSTEM: |
367 | 301k | code = scan_bin_get_name(pstate, imemory, system_names_p, p[1], |
368 | 301k | pref, "system"); |
369 | 301k | goto xname; |
370 | 47 | case BT_LITNAME_USER: |
371 | 47 | code = scan_bin_get_name(pstate, imemory, user_names_p, p[1], |
372 | 47 | pref, "user"); |
373 | 668k | lname: |
374 | 668k | if (code < 0) |
375 | 67 | return code; |
376 | 668k | s_end_inline(s, p + 1, rlimit); |
377 | 668k | return 0; |
378 | 31 | case BT_EXECNAME_USER: |
379 | 31 | code = scan_bin_get_name(pstate, imemory, user_names_p, p[1], |
380 | 31 | pref, "user"); |
381 | 301k | xname: |
382 | 301k | if (code < 0) |
383 | 123 | return code; |
384 | 301k | r_set_attrs(pref, a_executable); |
385 | 301k | s_end_inline(s, p + 1, rlimit); |
386 | 301k | return 0; |
387 | 15.8k | case BT_NUM_ARRAY: |
388 | 15.8k | num_format = p[1]; |
389 | 15.8k | if (!num_is_valid(num_format)) |
390 | 20 | return_error(gs_error_syntaxerror); |
391 | 15.8k | arg = sdecodeushort(p + 2, num_format); |
392 | 15.8k | code = ialloc_ref_array(&pbs->bin_array, a_all, arg, |
393 | 15.8k | "number array token"); |
394 | 15.8k | if (code < 0) |
395 | 0 | return code; |
396 | 15.8k | pbs->num_format = num_format; |
397 | 15.8k | pbs->index = 0; |
398 | 15.8k | p += 3; |
399 | 15.8k | s_end_inline(s, p, rlimit); |
400 | 15.8k | code = scan_bin_num_array_continue(i_ctx_p, pref, pstate); |
401 | 15.8k | if (code == scan_Refill || code < 0) { |
402 | | /* Make sure the array is clean for the GC. */ |
403 | 1.16k | refset_null(pbs->bin_array.value.refs + pbs->index, |
404 | 1.16k | arg - pbs->index); |
405 | 1.16k | pbs->cont = scan_bin_num_array_continue; |
406 | 1.16k | } |
407 | 15.8k | return code; |
408 | 2.30M | } |
409 | 2.30M | return_error(gs_error_syntaxerror); |
410 | 2.30M | } |
411 | | |
412 | | /* Get a system or user name. */ |
413 | | static int |
414 | | scan_bin_get_name(scanner_state *pstate, const gs_memory_t *mem, |
415 | | const ref *pnames /*t_array*/, int index, ref *pref, |
416 | | const char *usstring) |
417 | 970k | { |
418 | | /* Convert all errors to gs_error_undefined to match Adobe. */ |
419 | 970k | if (pnames == 0 || array_get(mem, pnames, (long)index, pref) < 0 || |
420 | 970k | !r_has_type(pref, t_name)) { |
421 | 215 | snprintf(pstate->s_error.string, |
422 | 215 | sizeof(pstate->s_error.string), |
423 | 215 | "%s%d", usstring, index); |
424 | 215 | pstate->s_error.is_name = true; |
425 | 215 | return_error(gs_error_undefined); |
426 | 215 | } |
427 | 970k | return 0; |
428 | 970k | } |
429 | | |
430 | | /* Continue collecting a binary string. */ |
431 | | static int |
432 | | scan_bin_string_continue(i_ctx_t *i_ctx_p, ref * pref, scanner_state * pstate) |
433 | 58.6k | { |
434 | 58.6k | stream *const s = pstate->s_file.value.pfile; |
435 | 58.6k | byte *q = pstate->s_da.next; |
436 | 58.6k | uint wanted = pstate->s_da.limit - q; |
437 | 58.6k | uint rcnt; |
438 | | |
439 | | /* We don't check the return status from 'sgets' here. |
440 | | If there is an error in sgets, the condition rcnt==wanted |
441 | | would be false and this function will return scan_Refill. |
442 | | */ |
443 | 58.6k | sgets(s, q, wanted, &rcnt); |
444 | 58.6k | if (rcnt == wanted) { |
445 | | /* Finished collecting the string. */ |
446 | 39.1k | make_string(pref, a_all | icurrent_space, |
447 | 39.1k | pstate->s_da.limit - pstate->s_da.base, |
448 | 39.1k | pstate->s_da.base); |
449 | 39.1k | return 0; |
450 | 39.1k | } |
451 | 19.4k | pstate->s_da.next = q + rcnt; |
452 | 19.4k | pstate->s_scan_type = scanning_binary; |
453 | 19.4k | return scan_Refill; |
454 | 58.6k | } |
455 | | |
456 | | /* Continue scanning a binary number array. */ |
457 | | static int |
458 | | scan_bin_num_array_continue(i_ctx_t *i_ctx_p, ref * pref, |
459 | | scanner_state * pstate) |
460 | 23.7k | { |
461 | 23.7k | stream *const s = pstate->s_file.value.pfile; |
462 | 23.7k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
463 | 23.7k | uint index = pbs->index; |
464 | 23.7k | ref *np = pbs->bin_array.value.refs + index; |
465 | 23.7k | uint wanted = encoded_number_bytes(pbs->num_format); |
466 | | |
467 | 2.05M | for (; index < r_size(&pbs->bin_array); index++, np++) { |
468 | 2.04M | int code; |
469 | | |
470 | 2.04M | if (sbufavailable(s) < wanted) { |
471 | 8.86k | pbs->index = index; |
472 | 8.86k | pstate->s_scan_type = scanning_binary; |
473 | 8.86k | return scan_Refill; |
474 | 8.86k | } |
475 | 2.03M | code = sdecode_number(sbufptr(s), pbs->num_format, np); |
476 | 2.03M | switch (code) { |
477 | 137k | case t_integer: |
478 | 2.03M | case t_real: |
479 | 2.03M | r_set_type(np, code); |
480 | 2.03M | (void)sbufskip(s, wanted); |
481 | 2.03M | break; |
482 | 0 | case t_null: |
483 | 0 | scan_bos_error(pstate, "bad number format"); |
484 | 0 | return_error(gs_error_syntaxerror); |
485 | 15 | default: |
486 | 15 | return code; |
487 | 2.03M | } |
488 | 2.03M | } |
489 | 14.8k | *pref = pbs->bin_array; |
490 | 14.8k | return 0; |
491 | 23.7k | } |
492 | | |
493 | | /* |
494 | | * Continue scanning a binary object sequence. We preallocated space for |
495 | | * the largest possible number of objects, but not for strings, since |
496 | | * the latter would probably be a gross over-estimate. Instead, |
497 | | * we wait until we see the first string or name, and allocate string space |
498 | | * based on the hope that its string index is the smallest one we will see. |
499 | | * If this turns out to be wrong, we may have to reallocate, and adjust |
500 | | * all the pointers. |
501 | | */ |
502 | | static int |
503 | | scan_bos_continue(i_ctx_t *i_ctx_p, ref * pref, scanner_state * pstate) |
504 | 30.4k | { |
505 | 30.4k | stream *const s = pstate->s_file.value.pfile; |
506 | 30.4k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
507 | 30.4k | s_declare_inline(s, p, rlimit); |
508 | 30.4k | uint max_array_index = pbs->max_array_index; |
509 | 30.4k | uint min_string_index = pbs->min_string_index; |
510 | 30.4k | int num_format = pbs->num_format; |
511 | 30.4k | uint index = pbs->index; |
512 | 30.4k | uint size = pbs->size; |
513 | 30.4k | ref *abase = pbs->bin_array.value.refs; |
514 | 30.4k | int code; |
515 | | |
516 | 30.4k | pbs->cont = scan_bos_continue; /* in case of premature return */ |
517 | 30.4k | s_begin_inline(s, p, rlimit); |
518 | 54.1k | for (; index < max_array_index; p += SIZEOF_BIN_SEQ_OBJ, index++) { |
519 | 24.8k | ref *op = abase + index; |
520 | 24.8k | uint osize; |
521 | 24.8k | int value, atype, attrs; |
522 | | |
523 | 24.8k | s_end_inline(s, p, rlimit); /* in case of error */ |
524 | 24.8k | if (rlimit - p < SIZEOF_BIN_SEQ_OBJ) { |
525 | 825 | pbs->index = index; |
526 | 825 | pbs->max_array_index = max_array_index; |
527 | 825 | pbs->min_string_index = min_string_index; |
528 | 825 | pstate->s_scan_type = scanning_binary; |
529 | 825 | return scan_Refill; |
530 | 825 | } |
531 | 24.0k | if (p[2] != 0) { /* reserved, must be 0 */ |
532 | 10 | scan_bos_error(pstate, "non-zero unused field"); |
533 | 10 | return_error(gs_error_syntaxerror); |
534 | 10 | } |
535 | 24.0k | attrs = (p[1] & 128 ? a_executable : 0); |
536 | | /* |
537 | | * We always decode all 8 bytes of the object, so we can signal |
538 | | * syntaxerror if any unused field is non-zero (per PLRM). |
539 | | */ |
540 | 24.0k | osize = sdecodeushort(p + 3, num_format); |
541 | 24.0k | value = sdecodeint32(p + 5, num_format); |
542 | 24.0k | switch (p[1] & 0x7f) { |
543 | 22.1k | case BS_TYPE_NULL: |
544 | 22.1k | if (osize | value) { /* unused */ |
545 | 126 | scan_bos_error(pstate, "non-zero unused field"); |
546 | 126 | return_error(gs_error_syntaxerror); |
547 | 126 | } |
548 | 21.9k | make_null(op); |
549 | 21.9k | break; |
550 | 59 | case BS_TYPE_INTEGER: |
551 | 59 | if (osize) { /* unused */ |
552 | 16 | scan_bos_error(pstate, "non-zero unused field"); |
553 | 16 | return_error(gs_error_syntaxerror); |
554 | 16 | } |
555 | 43 | make_int(op, value); |
556 | 43 | break; |
557 | 1.52k | case BS_TYPE_REAL:{ |
558 | 1.52k | float vreal; |
559 | | |
560 | 1.52k | if (osize != 0) { /* fixed-point number */ |
561 | 35 | if (osize > 31) { |
562 | 16 | scan_bos_error(pstate, "invalid number format"); |
563 | 16 | return_error(gs_error_syntaxerror); |
564 | 16 | } |
565 | | /* ldexp requires a signed 2nd argument.... */ |
566 | 19 | vreal = (float)ldexp((double)value, -(int)osize); |
567 | 1.48k | } else { |
568 | 1.48k | code = sdecode_float(p + 5, num_format, &vreal); |
569 | 1.48k | if (code < 0) { |
570 | 4 | scan_bos_error(pstate, "invalid real number"); |
571 | 4 | return code; |
572 | 4 | } |
573 | 1.48k | } |
574 | 1.50k | make_real(op, vreal); |
575 | 1.50k | break; |
576 | 1.52k | } |
577 | 57 | case BS_TYPE_BOOLEAN: |
578 | 57 | if (osize) { /* unused */ |
579 | 15 | scan_bos_error(pstate, "non-zero unused field"); |
580 | 15 | return_error(gs_error_syntaxerror); |
581 | 15 | } |
582 | 42 | make_bool(op, value != 0); |
583 | 42 | break; |
584 | 94 | case BS_TYPE_STRING: |
585 | 94 | attrs |= a_all; |
586 | 187 | str: |
587 | 187 | if (osize == 0) { |
588 | | /* For zero-length strings, the offset */ |
589 | | /* doesn't matter, and may be zero. */ |
590 | 50 | make_empty_string(op, attrs); |
591 | 50 | break; |
592 | 50 | } |
593 | 137 | { |
594 | 137 | const uint beg_ofs = (uint)value; |
595 | 137 | const uint end_ofs = beg_ofs + osize; |
596 | | |
597 | 137 | if (beg_ofs < max_array_index * SIZEOF_BIN_SEQ_OBJ || beg_ofs > size) { |
598 | 113 | scan_bos_error(pstate, "invalid string offset"); |
599 | 113 | return_error(gs_error_syntaxerror); |
600 | 113 | } |
601 | 24 | if (end_ofs < beg_ofs || end_ofs > size) { |
602 | 4 | scan_bos_error(pstate, "invalid string length"); |
603 | 4 | return_error(gs_error_syntaxerror); |
604 | 4 | } |
605 | 20 | if (beg_ofs < min_string_index) { |
606 | | /* We have to (re)allocate the strings. */ |
607 | 20 | uint str_size = size - beg_ofs; |
608 | 20 | byte *sbase; |
609 | | |
610 | 20 | if (pstate->s_da.is_dynamic) |
611 | 0 | sbase = scan_bos_resize(i_ctx_p, pstate, str_size, |
612 | 0 | index); |
613 | 20 | else |
614 | 20 | sbase = ialloc_string(str_size, |
615 | 20 | "bos strings"); |
616 | 20 | if (sbase == 0) |
617 | 0 | return_error(gs_error_VMerror); |
618 | 20 | pstate->s_da.is_dynamic = true; |
619 | 20 | pstate->s_da.base = pstate->s_da.next = sbase; |
620 | 20 | pstate->s_da.limit = sbase + str_size; |
621 | 20 | min_string_index = beg_ofs; |
622 | 20 | } |
623 | 20 | make_string(op, attrs | icurrent_space, osize, |
624 | 20 | pstate->s_da.base + |
625 | 20 | (beg_ofs - min_string_index)); |
626 | 20 | } |
627 | 0 | break; |
628 | 48 | case BS_TYPE_EVAL_NAME: |
629 | 48 | attrs |= a_readonly; /* mark as executable for later */ |
630 | | /* falls through */ |
631 | 120 | case BS_TYPE_NAME: |
632 | 120 | switch (osize) { |
633 | 14 | case 0: |
634 | 14 | code = scan_bin_get_name(pstate, imemory, |
635 | 14 | user_names_p, value, op, |
636 | 14 | "user"); |
637 | 14 | goto usn; |
638 | 13 | case 0xffff: |
639 | 13 | code = scan_bin_get_name(pstate, imemory, |
640 | 13 | system_names_p, value, op, |
641 | 13 | "system"); |
642 | 27 | usn: |
643 | 27 | if (code < 0) |
644 | 25 | return code; |
645 | 2 | r_set_attrs(op, attrs); |
646 | 2 | break; |
647 | 93 | default: |
648 | 93 | goto str; |
649 | 120 | } |
650 | 2 | break; |
651 | 56 | case BS_TYPE_ARRAY: |
652 | 56 | atype = t_array; |
653 | 56 | { |
654 | 56 | const uint beg_ofs = (uint)value; |
655 | 56 | const uint end_ofs = beg_ofs + osize * SIZEOF_BIN_SEQ_OBJ; |
656 | 56 | const uint beg_idx = beg_ofs / SIZEOF_BIN_SEQ_OBJ; |
657 | 56 | const uint end_idx = end_ofs / SIZEOF_BIN_SEQ_OBJ; |
658 | | |
659 | 56 | if (beg_ofs > min_string_index || beg_ofs & (SIZEOF_BIN_SEQ_OBJ - 1)) { |
660 | 22 | scan_bos_error(pstate, "bad array offset"); |
661 | 22 | return_error(gs_error_syntaxerror); |
662 | 22 | } |
663 | 34 | if (osize > (size / 8) || end_ofs < beg_ofs || end_ofs > min_string_index) { |
664 | 7 | scan_bos_error(pstate, "bad array length"); |
665 | 7 | return_error(gs_error_syntaxerror); |
666 | 7 | } |
667 | | |
668 | 27 | max_array_index = max(max_array_index, end_idx); |
669 | 27 | make_tasv_new(op, atype, |
670 | 27 | attrs | a_all | icurrent_space, |
671 | 27 | osize, refs, abase + beg_idx); |
672 | 27 | } |
673 | 0 | break; |
674 | 36 | case BS_TYPE_MARK: |
675 | 36 | if (osize | value) { /* unused */ |
676 | 18 | scan_bos_error(pstate, "non-zero unused field"); |
677 | 18 | return_error(gs_error_syntaxerror); |
678 | 18 | } |
679 | 18 | make_mark(op); |
680 | 18 | break; |
681 | 14 | default: |
682 | 14 | scan_bos_error(pstate, "invalid object type"); |
683 | 14 | return_error(gs_error_syntaxerror); |
684 | 24.0k | } |
685 | 24.0k | } |
686 | 29.2k | s_end_inline(s, p, rlimit); |
687 | | /* Shorten the objects to remove the space that turned out */ |
688 | | /* to be used for strings. */ |
689 | 29.2k | pbs->index = max_array_index; |
690 | 29.2k | iresize_ref_array(&pbs->bin_array, max_array_index, |
691 | 29.2k | "binary object sequence(objects)"); |
692 | 29.2k | code = scan_bos_string_continue(i_ctx_p, pref, pstate); |
693 | 29.2k | if (code == scan_Refill) |
694 | 5 | pbs->cont = scan_bos_string_continue; |
695 | 29.2k | return code; |
696 | 30.4k | } |
697 | | |
698 | | /* Reallocate the strings for a binary object sequence, */ |
699 | | /* adjusting all the pointers to them from objects. */ |
700 | | static byte * |
701 | | scan_bos_resize(i_ctx_t *i_ctx_p, scanner_state * pstate, uint new_size, |
702 | | uint index) |
703 | 0 | { |
704 | 0 | scan_binary_state *const pbs = &pstate->s_ss.binary; |
705 | 0 | uint old_size = da_size(&pstate->s_da); |
706 | 0 | byte *old_base = pstate->s_da.base; |
707 | 0 | byte *new_base = iresize_string(old_base, old_size, new_size, |
708 | 0 | "scan_bos_resize"); |
709 | 0 | byte *relocated_base = new_base + (new_size - old_size); |
710 | 0 | uint i; |
711 | 0 | ref *aptr = pbs->bin_array.value.refs; |
712 | |
|
713 | 0 | if (new_base == 0) |
714 | 0 | return 0; |
715 | | /* Since the allocator normally extends strings downward, */ |
716 | | /* it's quite possible that new and old addresses are the same. */ |
717 | 0 | if (relocated_base != old_base) |
718 | 0 | for (i = index; i != 0; i--, aptr++) |
719 | 0 | if (r_has_type(aptr, t_string) && r_size(aptr) != 0) |
720 | 0 | aptr->value.bytes = |
721 | 0 | aptr->value.bytes - old_base + relocated_base; |
722 | 0 | return new_base; |
723 | 0 | } |
724 | | |
725 | | /* Continue reading the strings for a binary object sequence. */ |
726 | | static int |
727 | | scan_bos_string_continue(i_ctx_t *i_ctx_p, ref * pref, |
728 | | scanner_state * pstate) |
729 | 29.2k | { |
730 | 29.2k | scan_binary_state *const pbs = &pstate->s_ss.binary; |
731 | 29.2k | ref rstr; |
732 | 29.2k | ref *op; |
733 | 29.2k | int code = scan_bin_string_continue(i_ctx_p, &rstr, pstate); |
734 | 29.2k | uint space = ialloc_space(idmemory); |
735 | 29.2k | uint i; |
736 | | |
737 | 29.2k | if (code != 0) |
738 | 5 | return code; |
739 | | |
740 | | /* Fix up names. We must do this before creating dictionaries. */ |
741 | | |
742 | 29.2k | for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array); |
743 | 33.5k | i != 0; i--, op++ |
744 | 29.2k | ) |
745 | 4.35k | switch (r_type(op)) { |
746 | 18 | case t_string: |
747 | 18 | if (r_has_attr(op, a_write)) /* a real string */ |
748 | 18 | break; |
749 | | /* This is actually a name; look it up now. */ |
750 | 0 | { |
751 | 0 | uint attrs = r_type_attrs(op) & (a_read | a_executable); |
752 | |
|
753 | 0 | code = name_ref(imemory, op->value.bytes, r_size(op), op, 1); |
754 | 0 | if (code < 0) |
755 | 0 | return code; |
756 | 0 | r_set_attrs(op, attrs); |
757 | 0 | } |
758 | | /* falls through */ |
759 | 0 | case t_name: |
760 | 0 | if (r_has_attr(op, a_read)) { /* BS_TYPE_EVAL_NAME */ |
761 | 0 | ref *defp = dict_find_name(op); |
762 | |
|
763 | 0 | if (defp == 0) |
764 | 0 | return_error(gs_error_undefined); |
765 | 0 | store_check_space(space, defp); |
766 | 0 | ref_assign(op, defp); |
767 | 0 | } |
768 | 0 | break; |
769 | 4.35k | } |
770 | | |
771 | 29.2k | ref_assign(pref, &pbs->bin_array); |
772 | 29.2k | r_set_size(pref, pbs->top_size); |
773 | 29.2k | return scan_BOS; |
774 | 29.2k | } |
775 | | |
776 | | /* ---------------- Writing ---------------- */ |
777 | | |
778 | | /* |
779 | | * Encode a single object for a binary object sequence, for printobject and |
780 | | * write object. Note that this does not modify the always-unused byte (1), |
781 | | * but it always write bytes 0 and 2-7. |
782 | | */ |
783 | | int |
784 | | encode_binary_token(i_ctx_t *i_ctx_p, const ref *obj, ps_int *ref_offset, |
785 | | ps_int *char_offset, byte *str) |
786 | 38 | { |
787 | 38 | bin_seq_type_t type; |
788 | 38 | uint size = 0; |
789 | 38 | int format = (int)ref_binary_object_format.value.intval; |
790 | 38 | ps_int value = 0; |
791 | 38 | ref nstr; |
792 | | |
793 | 38 | switch (r_type(obj)) { |
794 | 0 | case t_null: |
795 | 0 | type = BS_TYPE_NULL; |
796 | 0 | break; /* always set all fields */ |
797 | 15 | case t_mark: |
798 | 15 | type = BS_TYPE_MARK; |
799 | 15 | break; /* always set all fields */ |
800 | 10 | case t_integer: |
801 | 10 | type = BS_TYPE_INTEGER; |
802 | 10 | value = obj->value.intval; |
803 | 10 | break; |
804 | 3 | case t_real: |
805 | 3 | type = BS_TYPE_REAL; |
806 | 3 | if (sizeof(obj->value.realval) != sizeof(int)) { |
807 | | /* The PLRM allocates exactly 4 bytes for reals. */ |
808 | 0 | return_error(gs_error_rangecheck); |
809 | 0 | } |
810 | 3 | value = *(const ps_int *)&obj->value.realval; |
811 | | #if !(ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS) |
812 | | if (format >= 3) { |
813 | | /* Never byte-swap native reals -- use native byte order. */ |
814 | | format = 4 - ARCH_IS_BIG_ENDIAN; |
815 | | } |
816 | | #endif |
817 | 3 | break; |
818 | 0 | case t_boolean: |
819 | 0 | type = BS_TYPE_BOOLEAN; |
820 | 0 | value = obj->value.boolval; |
821 | 0 | break; |
822 | 2 | case t_array: |
823 | 2 | type = BS_TYPE_ARRAY; |
824 | 2 | size = r_size(obj); |
825 | 2 | value = *ref_offset; |
826 | 2 | *ref_offset += size * SIZEOF_BIN_SEQ_OBJ; |
827 | 2 | break; |
828 | 2 | case t_string: |
829 | 2 | type = BS_TYPE_STRING; |
830 | 8 | nos: |
831 | 8 | size = r_size(obj); |
832 | 8 | value = *char_offset; |
833 | 8 | *char_offset += size; |
834 | 8 | break; |
835 | 6 | case t_name: |
836 | 6 | type = BS_TYPE_NAME; |
837 | 6 | name_string_ref(imemory, obj, &nstr); |
838 | 6 | r_copy_attrs(&nstr, a_executable, obj); |
839 | 6 | obj = &nstr; |
840 | 6 | goto nos; |
841 | 0 | default: |
842 | 0 | return_error(gs_error_rangecheck); |
843 | 38 | } |
844 | 38 | { |
845 | 38 | byte s0 = (byte) size, s1 = (byte) (size >> 8); |
846 | 38 | byte v0 = (byte) value, v1 = (byte) (value >> 8), |
847 | 38 | v2 = (byte) (value >> 16), v3 = (byte) (value >> 24); |
848 | | |
849 | 38 | if (format & 1) { |
850 | | /* Store big-endian */ |
851 | 0 | str[2] = s1, str[3] = s0; |
852 | 0 | str[4] = v3, str[5] = v2, str[6] = v1, str[7] = v0; |
853 | 38 | } else { |
854 | | /* Store little-endian */ |
855 | 38 | str[2] = s0, str[3] = s1; |
856 | 38 | str[4] = v0, str[5] = v1, str[6] = v2, str[7] = v3; |
857 | 38 | } |
858 | 38 | } |
859 | 38 | if (r_has_attr(obj, a_executable)) |
860 | 2 | type += BS_EXECUTABLE; |
861 | 38 | str[0] = (byte) type; |
862 | 38 | return 0; |
863 | 38 | } |