/src/ghostpdl/psi/iscannum.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2001-2021 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, |
13 | | CA 94945, U.S.A., +1(415)492-9861, for further information. |
14 | | */ |
15 | | |
16 | | |
17 | | /* Number scanner for Ghostscript interpreter */ |
18 | | #include "math_.h" |
19 | | #include "ghost.h" |
20 | | #include "ierrors.h" |
21 | | #include "scommon.h" |
22 | | #include "iscan.h" |
23 | | #include "iscannum.h" /* defines interface */ |
24 | | #include "scanchar.h" |
25 | | #include "store.h" |
26 | | |
27 | | /* |
28 | | * Warning: this file has a "spaghetti" control structure. But since this |
29 | | * code accounts for over 10% of the execution time of some PostScript |
30 | | * files, this is one of the few places we feel this is justified. |
31 | | */ |
32 | | |
33 | | /* |
34 | | * Scan a number. If the number consumes the entire string, return 0; |
35 | | * if not, set *psp to the first character beyond the number and return 1. |
36 | | */ |
37 | | int |
38 | | scan_number(const byte * str, const byte * end, int sign, |
39 | | ref * pref, const byte ** psp, int scanner_options) |
40 | 3.02G | { |
41 | 3.02G | const byte *sp = str; |
42 | 3.02G | #define GET_NEXT(cvar, sp, end_action)\ |
43 | 10.7G | if (sp >= end) { end_action; } else cvar = *sp++ |
44 | | |
45 | | /* |
46 | | * Powers of 10 up to 6 can be represented accurately as |
47 | | * a single-precision float. |
48 | | */ |
49 | 3.02G | #define NUM_POWERS_10 6 |
50 | 3.02G | static const float powers_10[NUM_POWERS_10 + 1] = { |
51 | 3.02G | 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6 |
52 | 3.02G | }; |
53 | 3.02G | static const double neg_powers_10[NUM_POWERS_10 + 1] = { |
54 | 3.02G | 1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6 |
55 | 3.02G | }; |
56 | | |
57 | 3.02G | ps_int ival; |
58 | 3.02G | double dval; |
59 | 3.02G | int exp10; |
60 | 3.02G | int code = 0; |
61 | 3.02G | int c, d; |
62 | 3.02G | ps_uint max_scan; /* max signed or unsigned int */ |
63 | 3.02G | ps_int max_ps_int_scan, min_ps_int_scan; |
64 | 3.02G | const byte *const decoder = scan_char_decoder; |
65 | 3.02G | #define IS_DIGIT(d, c)\ |
66 | 7.86G | ((d = decoder[c]) < 10) |
67 | 3.02G | #define WOULD_OVERFLOW(val, d, maxv)\ |
68 | 3.02G | (val >= maxv / 10 && (val > maxv / 10 || d > (int64_t)(maxv % 10))) |
69 | | |
70 | 3.02G | GET_NEXT(c, sp, return_error(gs_error_syntaxerror)); |
71 | 3.02G | if (!IS_DIGIT(d, c)) { |
72 | 546M | if (c != '.') |
73 | 1.27M | return_error(gs_error_syntaxerror); |
74 | | /* Might be a number starting with '.'. */ |
75 | 545M | GET_NEXT(c, sp, return_error(gs_error_syntaxerror)); |
76 | 545M | if (!IS_DIGIT(d, c)) |
77 | 544M | return_error(gs_error_syntaxerror); |
78 | 1.12M | ival = 0; |
79 | 1.12M | goto i2r; |
80 | 545M | } |
81 | | /* Accumulate an integer in ival. */ |
82 | | /* Do up to 4 digits without a loop, */ |
83 | | /* since we know this can't overflow and since */ |
84 | | /* most numbers have 4 (integer) digits or fewer. */ |
85 | 2.47G | ival = d; |
86 | 2.47G | if (end - sp >= 3) { /* just check once */ |
87 | 2.40G | if (!IS_DIGIT(d, (c = *sp))) { |
88 | 816M | sp++; |
89 | 816M | goto ind; |
90 | 816M | } |
91 | 1.58G | ival = ival * 10 + d; |
92 | 1.58G | if (!IS_DIGIT(d, (c = sp[1]))) { |
93 | 1.51G | sp += 2; |
94 | 1.51G | goto ind; |
95 | 1.51G | } |
96 | 66.3M | ival = ival * 10 + d; |
97 | 66.3M | sp += 3; |
98 | 66.3M | if (!IS_DIGIT(d, (c = sp[-1]))) |
99 | 40.1M | goto ind; |
100 | 26.1M | ival = ival * 10 + d; |
101 | 26.1M | } |
102 | | |
103 | 99.1M | max_ps_int_scan = scanner_options & SCAN_CPSI_MODE ? MAX_PS_INT32 : MAX_PS_INT; |
104 | 99.1M | min_ps_int_scan = scanner_options & SCAN_CPSI_MODE ? MIN_PS_INT32 : MIN_PS_INT; |
105 | | |
106 | 99.1M | max_scan = scanner_options & SCAN_PDF_UNSIGNED && sign >= 0 ? ~((ps_int)0) : max_ps_int_scan; |
107 | | |
108 | 142M | for (;; ival = ival * 10 + d) { |
109 | 142M | GET_NEXT(c, sp, goto iret); |
110 | 66.4M | if (!IS_DIGIT(d, c)) |
111 | 22.7M | break; |
112 | 43.7M | if (WOULD_OVERFLOW(((ps_uint)ival), d, max_scan)) { |
113 | 12.7k | if (ival == max_ps_int_scan / 10 && d == (max_ps_int_scan % 10) + 1 && sign < 0) { |
114 | 48 | GET_NEXT(c, sp, c = EOFC); |
115 | 48 | dval = -(double)min_ps_int_scan; |
116 | 48 | if (c == 'e' || c == 'E') { |
117 | 1 | exp10 = 0; |
118 | 1 | goto fs; |
119 | 47 | } else if (c == '.') { |
120 | 0 | GET_NEXT(c, sp, c = EOFC); |
121 | 0 | exp10 = 0; |
122 | 0 | goto fd; |
123 | 47 | } else if (!IS_DIGIT(d, c)) { |
124 | 43 | ival = min_ps_int_scan; |
125 | 43 | break; |
126 | 43 | } |
127 | 48 | } else |
128 | 12.6k | dval = (double)ival; |
129 | 12.6k | goto l2d; |
130 | 12.7k | } |
131 | 43.7M | } |
132 | 2.39G | ind: /* We saw a non-digit while accumulating an integer in ival. */ |
133 | 2.39G | switch (c) { |
134 | 21.2M | case '.': |
135 | 21.2M | GET_NEXT(c, sp, c = EOFC); |
136 | 21.2M | goto i2r; |
137 | 911M | default: |
138 | 911M | *psp = sp; |
139 | 911M | code = 1; |
140 | 911M | break; |
141 | 16 | case EOFC: |
142 | 16 | break; |
143 | 369k | case 'e': |
144 | 373k | case 'E': |
145 | 373k | if (sign < 0) |
146 | 243 | ival = -ival; |
147 | 373k | dval = (double)ival; |
148 | 373k | exp10 = 0; |
149 | 373k | goto fe; |
150 | 1.46G | case '#': |
151 | 1.46G | { |
152 | 1.46G | const int radix = ival; |
153 | 1.46G | ps_int uval = 0, imax; |
154 | | |
155 | 1.46G | if (sign || radix < min_radix || radix > max_radix) |
156 | 14.8k | return_error(gs_error_syntaxerror); |
157 | | /* Avoid multiplies for power-of-2 radix. */ |
158 | 1.46G | if (!(radix & (radix - 1))) { |
159 | 1.46G | int shift; |
160 | | |
161 | 1.46G | switch (radix) { |
162 | 94.8k | case 2: |
163 | 94.8k | shift = 1, imax = MAX_PS_UINT >> 1; |
164 | 94.8k | break; |
165 | 12.4k | case 4: |
166 | 12.4k | shift = 2, imax = MAX_PS_UINT >> 2; |
167 | 12.4k | break; |
168 | 1.71k | case 8: |
169 | 1.71k | shift = 3, imax = MAX_PS_UINT >> 3; |
170 | 1.71k | break; |
171 | 1.46G | case 16: |
172 | 1.46G | shift = 4, imax = MAX_PS_UINT >> 4; |
173 | 1.46G | break; |
174 | 319 | case 32: |
175 | 319 | shift = 5, imax = MAX_PS_UINT >> 5; |
176 | 319 | break; |
177 | 0 | default: /* can't happen */ |
178 | 0 | return_error(gs_error_rangecheck); |
179 | 1.46G | } |
180 | 6.98G | for (;; uval = (uval << shift) + d) { |
181 | 6.98G | GET_NEXT(c, sp, break); |
182 | 6.27G | d = decoder[c]; |
183 | 6.27G | if (d >= radix) { |
184 | 758M | *psp = sp; |
185 | 758M | code = 1; |
186 | 758M | break; |
187 | 758M | } |
188 | 5.51G | if (uval > imax) |
189 | 17 | return_error(gs_error_limitcheck); |
190 | 5.51G | } |
191 | 1.46G | } else { |
192 | 9.54k | ps_int irem = MAX_PS_UINT % radix; |
193 | | |
194 | 9.54k | imax = MAX_PS_UINT / radix; |
195 | 20.1k | for (;; uval = uval * radix + d) { |
196 | 20.1k | GET_NEXT(c, sp, break); |
197 | 15.7k | d = decoder[c]; |
198 | 15.7k | if (d >= radix) { |
199 | 5.07k | *psp = sp; |
200 | 5.07k | code = 1; |
201 | 5.07k | break; |
202 | 5.07k | } |
203 | 10.7k | if (uval >= imax && |
204 | 10.7k | (uval > imax || d > irem) |
205 | 10.7k | ) |
206 | 76 | return_error(gs_error_limitcheck); |
207 | 10.7k | } |
208 | 9.54k | } |
209 | 1.46G | if (scanner_options & SCAN_CPSI_MODE) { |
210 | 0 | ps_uint32 int1 = 0; |
211 | 0 | int1 |= (uval & 0xffffffff); |
212 | 0 | make_int(pref, (ps_int)((ps_int32)int1)); |
213 | 0 | } |
214 | 1.46G | else |
215 | 1.46G | make_int(pref, uval); |
216 | | |
217 | 1.46G | return code; |
218 | 1.46G | } |
219 | 2.39G | } |
220 | 988M | iret: |
221 | 988M | if (scanner_options & SCAN_CPSI_MODE) { |
222 | 0 | make_int(pref, (sign < 0 ? (ps_int32)-ival : (ps_int32)ival)); |
223 | 0 | } |
224 | 988M | else { |
225 | 988M | make_int(pref, (sign < 0 ? (ps_int)-ival : (ps_int)ival)); |
226 | 988M | } |
227 | 988M | return code; |
228 | | |
229 | | /* Accumulate a double in dval. */ |
230 | 12.6k | l2d: |
231 | 12.6k | exp10 = 0; |
232 | 175k | for (;;) { |
233 | 175k | dval = dval * 10 + d; |
234 | 175k | GET_NEXT(c, sp, c = EOFC); |
235 | 175k | if (!IS_DIGIT(d, c)) |
236 | 12.6k | break; |
237 | 175k | } |
238 | 12.6k | switch (c) { |
239 | 200 | case '.': |
240 | 200 | GET_NEXT(c, sp, c = EOFC); |
241 | 200 | exp10 = 0; |
242 | 200 | goto fd; |
243 | 6.71k | default: |
244 | 6.71k | *psp = sp; |
245 | 6.71k | code = 1; |
246 | | /* falls through */ |
247 | 10.1k | case EOFC: |
248 | 10.1k | if (sign < 0) |
249 | 2.05k | dval = -dval; |
250 | 10.1k | goto rret; |
251 | 155 | case 'e': |
252 | 258 | case 'E': |
253 | 258 | exp10 = 0; |
254 | 258 | goto fs; |
255 | 2.05k | case '#': |
256 | 2.05k | return_error(gs_error_syntaxerror); |
257 | 12.6k | } |
258 | | |
259 | | /* We saw a '.' while accumulating an integer in ival. */ |
260 | 22.3M | i2r: |
261 | 22.3M | exp10 = 0; |
262 | 88.1M | while (IS_DIGIT(d, c) || c == '-') { |
263 | | /* |
264 | | * PostScript gives an error on numbers with a '-' following a '.' |
265 | | * Adobe Acrobat Reader (PDF) apparently doesn't treat this as an |
266 | | * error. Experiments show that the numbers following the '-' are |
267 | | * ignored, so we swallow the fractional part. SCAN_PDF_INV_NUM |
268 | | * enables this compatibility kloodge. |
269 | | */ |
270 | 65.7M | if (c == '-') { |
271 | 144 | if ((SCAN_PDF_INV_NUM & scanner_options) == 0) |
272 | 144 | break; |
273 | 0 | do { |
274 | 0 | GET_NEXT(c, sp, c = EOFC); |
275 | 0 | } while (IS_DIGIT(d, c)); |
276 | 0 | break; |
277 | 144 | } |
278 | 65.7M | if (WOULD_OVERFLOW(ival, d, max_int)) { |
279 | 5.63k | dval = (double)ival; |
280 | 5.63k | goto fd; |
281 | 5.63k | } |
282 | 65.7M | ival = ival * 10 + d; |
283 | 65.7M | exp10--; |
284 | 65.7M | GET_NEXT(c, sp, c = EOFC); |
285 | 65.7M | } |
286 | 22.3M | if (sign < 0) |
287 | 903k | ival = -ival; |
288 | | /* Take a shortcut for the common case */ |
289 | 22.3M | if (!(c == 'e' || c == 'E' || exp10 < -NUM_POWERS_10)) { /* Check for trailing garbage */ |
290 | 21.1M | if (c != EOFC) |
291 | 19.0M | *psp = sp, code = 1; |
292 | 21.1M | make_real(pref, ival * neg_powers_10[-exp10]); |
293 | 21.1M | return code; |
294 | 21.1M | } |
295 | 1.16M | dval = (double)ival; |
296 | 1.16M | goto fe; |
297 | | |
298 | | /* Now we are accumulating a double in dval. */ |
299 | 5.83k | fd: |
300 | 119k | while (IS_DIGIT(d, c)) { |
301 | 113k | dval = dval * 10 + d; |
302 | 113k | exp10--; |
303 | 113k | GET_NEXT(c, sp, c = EOFC); |
304 | 113k | } |
305 | 6.09k | fs: |
306 | 6.09k | if (sign < 0) |
307 | 725 | dval = -dval; |
308 | 1.54M | fe: |
309 | | /* Now dval contains the value, negated if necessary. */ |
310 | 1.54M | switch (c) { |
311 | 370k | case 'e': |
312 | 376k | case 'E': |
313 | 376k | { /* Check for a following exponent. */ |
314 | 376k | int esign = 0; |
315 | 376k | int iexp; |
316 | | |
317 | 376k | GET_NEXT(c, sp, return_error(gs_error_syntaxerror)); |
318 | 284k | switch (c) { |
319 | 2.84k | case '-': |
320 | 2.84k | esign = 1; |
321 | | /* fall through */ |
322 | 3.39k | case '+': |
323 | 3.39k | GET_NEXT(c, sp, return_error(gs_error_syntaxerror)); |
324 | 284k | } |
325 | | /* Scan the exponent. We limit it arbitrarily to 999. */ |
326 | 284k | if (!IS_DIGIT(d, c)) |
327 | 97.7k | return_error(gs_error_syntaxerror); |
328 | 187k | iexp = d; |
329 | 376k | for (;; iexp = iexp * 10 + d) { |
330 | 376k | GET_NEXT(c, sp, break); |
331 | 285k | if (!IS_DIGIT(d, c)) { |
332 | 96.0k | *psp = sp; |
333 | 96.0k | code = 1; |
334 | 96.0k | break; |
335 | 96.0k | } |
336 | 189k | if (iexp > 99) |
337 | 85 | return_error(gs_error_limitcheck); |
338 | 189k | } |
339 | 186k | if (esign) |
340 | 2.74k | exp10 -= iexp; |
341 | 184k | else |
342 | 184k | exp10 += iexp; |
343 | 186k | break; |
344 | 187k | } |
345 | 987k | default: |
346 | 987k | *psp = sp; |
347 | 987k | code = 1; |
348 | 1.16M | case EOFC: |
349 | 1.16M | ; |
350 | 1.54M | } |
351 | | /* Compute dval * 10^exp10. */ |
352 | 1.35M | if (exp10 > 0) { |
353 | 372k | while (exp10 > NUM_POWERS_10) |
354 | 190k | dval *= powers_10[NUM_POWERS_10], |
355 | 190k | exp10 -= NUM_POWERS_10; |
356 | 182k | dval *= powers_10[exp10]; |
357 | 1.17M | } else if (exp10 < 0) { |
358 | 2.46M | while (exp10 < -NUM_POWERS_10) |
359 | 1.29M | dval /= powers_10[NUM_POWERS_10], |
360 | 1.29M | exp10 += NUM_POWERS_10; |
361 | 1.16M | dval /= powers_10[-exp10]; |
362 | 1.16M | } |
363 | | /* |
364 | | * Check for an out-of-range result. Currently we don't check for |
365 | | * absurdly large numbers of digits in the accumulation loops, |
366 | | * but we should. |
367 | | */ |
368 | 1.35M | if (dval >= 0) { |
369 | 1.35M | if (dval > MAX_FLOAT) |
370 | 179 | return_error(gs_error_limitcheck); |
371 | 1.35M | } else { |
372 | 1.23k | if (dval < -MAX_FLOAT) |
373 | 55 | return_error(gs_error_limitcheck); |
374 | 1.23k | } |
375 | 1.36M | rret: |
376 | 1.36M | make_real(pref, dval); |
377 | 1.36M | return code; |
378 | 1.35M | } |