/src/php-src/ext/standard/scanf.c
Line | Count | Source |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright © The PHP Group and Contributors. | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to the Modified BSD License that is | |
6 | | | bundled with this package in the file LICENSE, and is available | |
7 | | | through the World Wide Web at <https://www.php.net/license/>. | |
8 | | | | |
9 | | | SPDX-License-Identifier: BSD-3-Clause | |
10 | | +----------------------------------------------------------------------+ |
11 | | | Author: Clayton Collie <clcollie@mindspring.com> | |
12 | | +----------------------------------------------------------------------+ |
13 | | */ |
14 | | |
15 | | /* |
16 | | scanf.c -- |
17 | | |
18 | | This file contains the base code which implements sscanf and by extension |
19 | | fscanf. Original code is from TCL8.3.0 and bears the following copyright: |
20 | | |
21 | | This software is copyrighted by the Regents of the University of |
22 | | California, Sun Microsystems, Inc., Scriptics Corporation, |
23 | | and other parties. The following terms apply to all files associated |
24 | | with the software unless explicitly disclaimed in individual files. |
25 | | |
26 | | The authors hereby grant permission to use, copy, modify, distribute, |
27 | | and license this software and its documentation for any purpose, provided |
28 | | that existing copyright notices are retained in all copies and that this |
29 | | notice is included verbatim in any distributions. No written agreement, |
30 | | license, or royalty fee is required for any of the authorized uses. |
31 | | Modifications to this software may be copyrighted by their authors |
32 | | and need not follow the licensing terms described here, provided that |
33 | | the new terms are clearly indicated on the first page of each file where |
34 | | they apply. |
35 | | |
36 | | IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY |
37 | | FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
38 | | ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY |
39 | | DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE |
40 | | POSSIBILITY OF SUCH DAMAGE. |
41 | | |
42 | | THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, |
43 | | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, |
44 | | FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE |
45 | | IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE |
46 | | NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR |
47 | | MODIFICATIONS. |
48 | | |
49 | | GOVERNMENT USE: If you are acquiring this software on behalf of the |
50 | | U.S. government, the Government shall have only "Restricted Rights" |
51 | | in the software and related documentation as defined in the Federal |
52 | | Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you |
53 | | are acquiring the software on behalf of the Department of Defense, the |
54 | | software shall be classified as "Commercial Computer Software" and the |
55 | | Government shall have only "Restricted Rights" as defined in Clause |
56 | | 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the |
57 | | authors grant the U.S. Government and others acting in its behalf |
58 | | permission to use and distribute the software in accordance with the |
59 | | terms specified in this license. |
60 | | */ |
61 | | |
62 | | #include <stdio.h> |
63 | | #include <limits.h> |
64 | | #include <ctype.h> |
65 | | #include "php.h" |
66 | | #include "php_variables.h" |
67 | | #include <locale.h> |
68 | | #include "zend_execute.h" |
69 | | #include "zend_operators.h" |
70 | | #include "zend_strtod.h" |
71 | | #include "php_globals.h" |
72 | | #include "basic_functions.h" |
73 | | #include "scanf.h" |
74 | | |
75 | | /* |
76 | | * Flag values used internally by [f|s]canf. |
77 | | */ |
78 | 0 | #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */ |
79 | 0 | #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */ |
80 | 0 | #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */ |
81 | 0 | #define SCAN_WIDTH 0x8 /* A width value was supplied. */ |
82 | | |
83 | 0 | #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */ |
84 | 0 | #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */ |
85 | 0 | #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */ |
86 | 0 | #define SCAN_XOK 0x80 /* An 'x' is allowed. */ |
87 | 0 | #define SCAN_PTOK 0x100 /* Decimal point is allowed. */ |
88 | 0 | #define SCAN_EXPOK 0x200 /* An exponent is allowed. */ |
89 | | |
90 | | #define UCHAR(x) (zend_uchar)(x) |
91 | | |
92 | | /* |
93 | | * The following structure contains the information associated with |
94 | | * a character set. |
95 | | */ |
96 | | typedef struct CharSet { |
97 | | int exclude; /* 1 if this is an exclusion set. */ |
98 | | int nchars; |
99 | | char *chars; |
100 | | int nranges; |
101 | | struct Range { |
102 | | char start; |
103 | | char end; |
104 | | } *ranges; |
105 | | } CharSet; |
106 | | |
107 | | typedef zend_long (*int_string_formater)(const char*, char**, int); |
108 | | |
109 | | /* |
110 | | * Declarations for functions used only in this file. |
111 | | */ |
112 | | static char *BuildCharSet(CharSet *cset, char *format); |
113 | | static int CharInSet(CharSet *cset, int ch); |
114 | | static void ReleaseCharSet(CharSet *cset); |
115 | | static inline void scan_set_error_return(int numVars, zval *return_value); |
116 | | |
117 | | |
118 | | /* {{{ BuildCharSet |
119 | | *---------------------------------------------------------------------- |
120 | | * |
121 | | * BuildCharSet -- |
122 | | * |
123 | | * This function examines a character set format specification |
124 | | * and builds a CharSet containing the individual characters and |
125 | | * character ranges specified. |
126 | | * |
127 | | * Results: |
128 | | * Returns the next format position. |
129 | | * |
130 | | * Side effects: |
131 | | * Initializes the charset. |
132 | | * |
133 | | *---------------------------------------------------------------------- |
134 | | */ |
135 | | static char * BuildCharSet(CharSet *cset, char *format) |
136 | 0 | { |
137 | 0 | char *ch, start; |
138 | 0 | int nranges; |
139 | 0 | char *end; |
140 | |
|
141 | 0 | memset(cset, 0, sizeof(CharSet)); |
142 | |
|
143 | 0 | ch = format; |
144 | 0 | if (*ch == '^') { |
145 | 0 | cset->exclude = 1; |
146 | 0 | ch = ++format; |
147 | 0 | } |
148 | 0 | end = format + 1; /* verify this - cc */ |
149 | | |
150 | | /* |
151 | | * Find the close bracket so we can overallocate the set. |
152 | | */ |
153 | 0 | if (*ch == ']') { |
154 | 0 | ch = end++; |
155 | 0 | } |
156 | 0 | nranges = 0; |
157 | 0 | while (*ch != ']') { |
158 | 0 | if (*ch == '-') { |
159 | 0 | nranges++; |
160 | 0 | } |
161 | 0 | ch = end++; |
162 | 0 | } |
163 | |
|
164 | 0 | cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0); |
165 | 0 | if (nranges > 0) { |
166 | 0 | cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0); |
167 | 0 | } else { |
168 | 0 | cset->ranges = NULL; |
169 | 0 | } |
170 | | |
171 | | /* |
172 | | * Now build the character set. |
173 | | */ |
174 | 0 | cset->nchars = cset->nranges = 0; |
175 | 0 | ch = format++; |
176 | 0 | start = *ch; |
177 | 0 | if (*ch == ']' || *ch == '-') { |
178 | 0 | cset->chars[cset->nchars++] = *ch; |
179 | 0 | ch = format++; |
180 | 0 | } |
181 | 0 | while (*ch != ']') { |
182 | 0 | if (*format == '-') { |
183 | | /* |
184 | | * This may be the first character of a range, so don't add |
185 | | * it yet. |
186 | | */ |
187 | 0 | start = *ch; |
188 | 0 | } else if (*ch == '-') { |
189 | | /* |
190 | | * Check to see if this is the last character in the set, in which |
191 | | * case it is not a range and we should add the previous character |
192 | | * as well as the dash. |
193 | | */ |
194 | 0 | if (*format == ']') { |
195 | 0 | cset->chars[cset->nchars++] = start; |
196 | 0 | cset->chars[cset->nchars++] = *ch; |
197 | 0 | } else { |
198 | 0 | ch = format++; |
199 | | |
200 | | /* |
201 | | * Check to see if the range is in reverse order. |
202 | | */ |
203 | 0 | if (start < *ch) { |
204 | 0 | cset->ranges[cset->nranges].start = start; |
205 | 0 | cset->ranges[cset->nranges].end = *ch; |
206 | 0 | } else { |
207 | 0 | cset->ranges[cset->nranges].start = *ch; |
208 | 0 | cset->ranges[cset->nranges].end = start; |
209 | 0 | } |
210 | 0 | cset->nranges++; |
211 | 0 | } |
212 | 0 | } else { |
213 | 0 | cset->chars[cset->nchars++] = *ch; |
214 | 0 | } |
215 | 0 | ch = format++; |
216 | 0 | } |
217 | 0 | return format; |
218 | 0 | } |
219 | | /* }}} */ |
220 | | |
221 | | /* {{{ CharInSet |
222 | | *---------------------------------------------------------------------- |
223 | | * |
224 | | * CharInSet -- |
225 | | * |
226 | | * Check to see if a character matches the given set. |
227 | | * |
228 | | * Results: |
229 | | * Returns non-zero if the character matches the given set. |
230 | | * |
231 | | * Side effects: |
232 | | * None. |
233 | | * |
234 | | *---------------------------------------------------------------------- |
235 | | */ |
236 | | static int CharInSet(CharSet *cset, int c) |
237 | 0 | { |
238 | 0 | char ch = (char) c; |
239 | 0 | int i, match = 0; |
240 | |
|
241 | 0 | for (i = 0; i < cset->nchars; i++) { |
242 | 0 | if (cset->chars[i] == ch) { |
243 | 0 | match = 1; |
244 | 0 | break; |
245 | 0 | } |
246 | 0 | } |
247 | 0 | if (!match) { |
248 | 0 | for (i = 0; i < cset->nranges; i++) { |
249 | 0 | if ((cset->ranges[i].start <= ch) |
250 | 0 | && (ch <= cset->ranges[i].end)) { |
251 | 0 | match = 1; |
252 | 0 | break; |
253 | 0 | } |
254 | 0 | } |
255 | 0 | } |
256 | 0 | return (cset->exclude ? !match : match); |
257 | 0 | } |
258 | | /* }}} */ |
259 | | |
260 | | /* {{{ ReleaseCharSet |
261 | | *---------------------------------------------------------------------- |
262 | | * |
263 | | * ReleaseCharSet -- |
264 | | * |
265 | | * Free the storage associated with a character set. |
266 | | * |
267 | | * Results: |
268 | | * None. |
269 | | * |
270 | | * Side effects: |
271 | | * None. |
272 | | * |
273 | | *---------------------------------------------------------------------- |
274 | | */ |
275 | | static void ReleaseCharSet(CharSet *cset) |
276 | 0 | { |
277 | 0 | efree((char *)cset->chars); |
278 | 0 | if (cset->ranges) { |
279 | 0 | efree((char *)cset->ranges); |
280 | 0 | } |
281 | 0 | } |
282 | | /* }}} */ |
283 | | |
284 | | /* {{{ ValidateFormat |
285 | | *---------------------------------------------------------------------- |
286 | | * |
287 | | * ValidateFormat -- |
288 | | * |
289 | | * Parse the format string and verify that it is properly formed |
290 | | * and that there are exactly enough variables on the command line. |
291 | | * |
292 | | * Results: |
293 | | * FAILURE or SUCCESS. |
294 | | * |
295 | | * Side effects: |
296 | | * May set php_error based on abnormal conditions. |
297 | | * |
298 | | * Parameters : |
299 | | * format The format string. |
300 | | * numVars The number of variables passed to the scan command. |
301 | | * totalSubs The number of variables that will be required. |
302 | | * |
303 | | *---------------------------------------------------------------------- |
304 | | */ |
305 | | PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs) |
306 | 0 | { |
307 | 0 | #define STATIC_LIST_SIZE 16 |
308 | 0 | int gotXpg, gotSequential, value, i, flags; |
309 | 0 | char *end, *ch = NULL; |
310 | 0 | int staticAssign[STATIC_LIST_SIZE]; |
311 | 0 | int *nassign = staticAssign; |
312 | 0 | int objIndex, xpgSize, nspace = STATIC_LIST_SIZE; |
313 | | |
314 | | /* |
315 | | * Initialize an array that records the number of times a variable |
316 | | * is assigned to by the format string. We use this to detect if |
317 | | * a variable is multiply assigned or left unassigned. |
318 | | */ |
319 | 0 | if (numVars > nspace) { |
320 | 0 | nassign = (int*)safe_emalloc(sizeof(int), numVars, 0); |
321 | 0 | nspace = numVars; |
322 | 0 | } |
323 | 0 | for (i = 0; i < nspace; i++) { |
324 | 0 | nassign[i] = 0; |
325 | 0 | } |
326 | |
|
327 | 0 | xpgSize = objIndex = gotXpg = gotSequential = 0; |
328 | |
|
329 | 0 | while (*format != '\0') { |
330 | 0 | ch = format++; |
331 | 0 | flags = 0; |
332 | |
|
333 | 0 | if (*ch != '%') { |
334 | 0 | continue; |
335 | 0 | } |
336 | 0 | ch = format++; |
337 | 0 | if (*ch == '%') { |
338 | 0 | continue; |
339 | 0 | } |
340 | 0 | if (*ch == '*') { |
341 | 0 | flags |= SCAN_SUPPRESS; |
342 | 0 | ch = format++; |
343 | 0 | goto xpgCheckDone; |
344 | 0 | } |
345 | | |
346 | 0 | if ( isdigit( (unsigned char)*ch ) ) { |
347 | | /* |
348 | | * Check for an XPG3-style %n$ specification. Note: there |
349 | | * must not be a mixture of XPG3 specs and non-XPG3 specs |
350 | | * in the same format string. |
351 | | */ |
352 | 0 | value = ZEND_STRTOUL(format-1, &end, 10); |
353 | 0 | if (*end != '$') { |
354 | 0 | goto notXpg; |
355 | 0 | } |
356 | 0 | format = end+1; |
357 | 0 | ch = format++; |
358 | 0 | gotXpg = 1; |
359 | 0 | if (gotSequential) { |
360 | 0 | goto mixedXPG; |
361 | 0 | } |
362 | 0 | if ((value < 1) || (numVars && (value > numVars))) { |
363 | 0 | goto badIndex; |
364 | 0 | } else if (numVars == 0) { |
365 | | /* |
366 | | * In the case where no vars are specified, the user can |
367 | | * specify %9999$ legally, so we have to consider special |
368 | | * rules for growing the assign array. 'value' is |
369 | | * guaranteed to be > 0. |
370 | | */ |
371 | | |
372 | | /* set a lower artificial limit on this |
373 | | * in the interest of security and resource friendliness |
374 | | * 255 arguments should be more than enough. - cc |
375 | | */ |
376 | 0 | if (value > SCAN_MAX_ARGS) { |
377 | 0 | goto badIndex; |
378 | 0 | } |
379 | | |
380 | 0 | xpgSize = (xpgSize > value) ? xpgSize : value; |
381 | 0 | } |
382 | 0 | objIndex = value - 1; |
383 | 0 | goto xpgCheckDone; |
384 | 0 | } |
385 | | |
386 | 0 | notXpg: |
387 | 0 | gotSequential = 1; |
388 | 0 | if (gotXpg) { |
389 | 0 | mixedXPG: |
390 | 0 | zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers"); |
391 | 0 | goto error; |
392 | 0 | } |
393 | | |
394 | 0 | xpgCheckDone: |
395 | | /* |
396 | | * Parse any width specifier. |
397 | | */ |
398 | 0 | if (isdigit(UCHAR(*ch))) { |
399 | 0 | value = ZEND_STRTOUL(format-1, &format, 10); |
400 | 0 | flags |= SCAN_WIDTH; |
401 | 0 | ch = format++; |
402 | 0 | } |
403 | | |
404 | | /* |
405 | | * Ignore size specifier. |
406 | | */ |
407 | 0 | if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) { |
408 | 0 | ch = format++; |
409 | 0 | } |
410 | |
|
411 | 0 | if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { |
412 | 0 | goto badIndex; |
413 | 0 | } |
414 | | |
415 | | /* |
416 | | * Handle the various field types. |
417 | | */ |
418 | 0 | switch (*ch) { |
419 | 0 | case 'n': |
420 | 0 | case 'd': |
421 | 0 | case 'D': |
422 | 0 | case 'i': |
423 | 0 | case 'o': |
424 | 0 | case 'x': |
425 | 0 | case 'X': |
426 | 0 | case 'u': |
427 | 0 | case 'f': |
428 | 0 | case 'e': |
429 | 0 | case 'E': |
430 | 0 | case 'g': |
431 | 0 | case 's': |
432 | 0 | break; |
433 | | |
434 | 0 | case 'c': |
435 | | /* we differ here with the TCL implementation in allowing for */ |
436 | | /* a character width specification, to be more consistent with */ |
437 | | /* ANSI. since Zend auto allocates space for vars, this is no */ |
438 | | /* problem - cc */ |
439 | | /* |
440 | | if (flags & SCAN_WIDTH) { |
441 | | php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion"); |
442 | | goto error; |
443 | | } |
444 | | */ |
445 | 0 | break; |
446 | | |
447 | 0 | case '[': |
448 | 0 | if (*format == '\0') { |
449 | 0 | goto badSet; |
450 | 0 | } |
451 | 0 | ch = format++; |
452 | 0 | if (*ch == '^') { |
453 | 0 | if (*format == '\0') { |
454 | 0 | goto badSet; |
455 | 0 | } |
456 | 0 | ch = format++; |
457 | 0 | } |
458 | 0 | if (*ch == ']') { |
459 | 0 | if (*format == '\0') { |
460 | 0 | goto badSet; |
461 | 0 | } |
462 | 0 | ch = format++; |
463 | 0 | } |
464 | 0 | while (*ch != ']') { |
465 | 0 | if (*format == '\0') { |
466 | 0 | goto badSet; |
467 | 0 | } |
468 | 0 | ch = format++; |
469 | 0 | } |
470 | 0 | break; |
471 | 0 | badSet: |
472 | 0 | zend_value_error("Unmatched [ in format string"); |
473 | 0 | goto error; |
474 | | |
475 | 0 | default: { |
476 | 0 | zend_value_error("Bad scan conversion character \"%c\"", *ch); |
477 | 0 | goto error; |
478 | 0 | } |
479 | 0 | } |
480 | | |
481 | 0 | if (!(flags & SCAN_SUPPRESS)) { |
482 | 0 | if (objIndex >= nspace) { |
483 | | /* |
484 | | * Expand the nassign buffer. If we are using XPG specifiers, |
485 | | * make sure that we grow to a large enough size. xpgSize is |
486 | | * guaranteed to be at least one larger than objIndex. |
487 | | */ |
488 | 0 | value = nspace; |
489 | 0 | if (xpgSize) { |
490 | 0 | nspace = xpgSize; |
491 | 0 | } else { |
492 | 0 | nspace += STATIC_LIST_SIZE; |
493 | 0 | } |
494 | 0 | if (nassign == staticAssign) { |
495 | 0 | nassign = (void *)safe_emalloc(nspace, sizeof(int), 0); |
496 | 0 | for (i = 0; i < STATIC_LIST_SIZE; ++i) { |
497 | 0 | nassign[i] = staticAssign[i]; |
498 | 0 | } |
499 | 0 | } else { |
500 | 0 | nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int)); |
501 | 0 | } |
502 | 0 | for (i = value; i < nspace; i++) { |
503 | 0 | nassign[i] = 0; |
504 | 0 | } |
505 | 0 | } |
506 | 0 | nassign[objIndex]++; |
507 | 0 | objIndex++; |
508 | 0 | } |
509 | 0 | } /* while (*format != '\0') */ |
510 | | |
511 | | /* |
512 | | * Verify that all of the variable were assigned exactly once. |
513 | | */ |
514 | 0 | if (numVars == 0) { |
515 | 0 | if (xpgSize) { |
516 | 0 | numVars = xpgSize; |
517 | 0 | } else { |
518 | 0 | numVars = objIndex; |
519 | 0 | } |
520 | 0 | } |
521 | 0 | if (totalSubs) { |
522 | 0 | *totalSubs = numVars; |
523 | 0 | } |
524 | 0 | for (i = 0; i < numVars; i++) { |
525 | 0 | if (nassign[i] > 1) { |
526 | 0 | zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers"); |
527 | 0 | goto error; |
528 | 0 | } else if (!xpgSize && (nassign[i] == 0)) { |
529 | | /* |
530 | | * If the space is empty, and xpgSize is 0 (means XPG wasn't |
531 | | * used, and/or numVars != 0), then too many vars were given |
532 | | */ |
533 | 0 | zend_value_error("Variable is not assigned by any conversion specifiers"); |
534 | 0 | goto error; |
535 | 0 | } |
536 | 0 | } |
537 | | |
538 | 0 | if (nassign != staticAssign) { |
539 | 0 | efree((char *)nassign); |
540 | 0 | } |
541 | 0 | return SCAN_SUCCESS; |
542 | | |
543 | 0 | badIndex: |
544 | 0 | if (gotXpg) { |
545 | 0 | zend_value_error("%s", "\"%n$\" argument index out of range"); |
546 | 0 | } else { |
547 | 0 | zend_value_error("Different numbers of variable names and field specifiers"); |
548 | 0 | } |
549 | |
|
550 | 0 | error: |
551 | 0 | if (nassign != staticAssign) { |
552 | 0 | efree((char *)nassign); |
553 | 0 | } |
554 | 0 | return SCAN_ERROR_INVALID_FORMAT; |
555 | 0 | #undef STATIC_LIST_SIZE |
556 | 0 | } |
557 | | /* }}} */ |
558 | | |
559 | | /* {{{ php_sscanf_internal |
560 | | * This is the internal function which does processing on behalf of |
561 | | * both sscanf() and fscanf() |
562 | | * |
563 | | * parameters : |
564 | | * string literal string to be processed |
565 | | * format format string |
566 | | * argCount total number of elements in the args array |
567 | | * args arguments passed in from user function (f|s)scanf |
568 | | * varStart offset (in args) of 1st variable passed in to (f|s)scanf |
569 | | * return_value set with the results of the scan |
570 | | */ |
571 | | |
572 | | PHPAPI int php_sscanf_internal( char *string, char *format, |
573 | | int argCount, zval *args, |
574 | | int varStart, zval *return_value) |
575 | 0 | { |
576 | 0 | int numVars, nconversions, totalVars = -1; |
577 | 0 | int i, result; |
578 | 0 | zend_long value; |
579 | 0 | int objIndex; |
580 | 0 | char *end, *baseString; |
581 | 0 | zval *current; |
582 | 0 | char op = 0; |
583 | 0 | int base = 0; |
584 | 0 | int underflow = 0; |
585 | 0 | size_t width; |
586 | 0 | int_string_formater fn = NULL; |
587 | 0 | char *ch, sch; |
588 | 0 | int flags; |
589 | 0 | char buf[64]; /* Temporary buffer to hold scanned number |
590 | | * strings before they are passed to strtoul() */ |
591 | | |
592 | | /* do some sanity checking */ |
593 | 0 | if ((varStart > argCount) || (varStart < 0)){ |
594 | 0 | varStart = SCAN_MAX_ARGS + 1; |
595 | 0 | } |
596 | 0 | numVars = argCount - varStart; |
597 | 0 | if (numVars < 0) { |
598 | 0 | numVars = 0; |
599 | 0 | } |
600 | | |
601 | | /* |
602 | | * Check for errors in the format string. |
603 | | */ |
604 | 0 | if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) { |
605 | 0 | scan_set_error_return( numVars, return_value ); |
606 | 0 | return SCAN_ERROR_INVALID_FORMAT; |
607 | 0 | } |
608 | | |
609 | 0 | objIndex = numVars ? varStart : 0; |
610 | | |
611 | | /* |
612 | | * If any variables are passed, make sure they are all passed by reference |
613 | | */ |
614 | 0 | if (numVars) { |
615 | 0 | for (i = varStart;i < argCount;i++){ |
616 | 0 | ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference"); |
617 | 0 | } |
618 | 0 | } |
619 | | |
620 | | /* |
621 | | * Allocate space for the result objects. Only happens when no variables |
622 | | * are specified |
623 | | */ |
624 | 0 | if (!numVars) { |
625 | 0 | zval tmp; |
626 | | |
627 | | /* allocate an array for return */ |
628 | 0 | array_init(return_value); |
629 | |
|
630 | 0 | for (i = 0; i < totalVars; i++) { |
631 | 0 | ZVAL_NULL(&tmp); |
632 | 0 | if (add_next_index_zval(return_value, &tmp) == FAILURE) { |
633 | 0 | scan_set_error_return(0, return_value); |
634 | 0 | return FAILURE; |
635 | 0 | } |
636 | 0 | } |
637 | 0 | varStart = 0; /* Array index starts from 0 */ |
638 | 0 | } |
639 | | |
640 | 0 | baseString = string; |
641 | | |
642 | | /* |
643 | | * Iterate over the format string filling in the result objects until |
644 | | * we reach the end of input, the end of the format string, or there |
645 | | * is a mismatch. |
646 | | */ |
647 | 0 | nconversions = 0; |
648 | | /* note ! - we need to limit the loop for objIndex to keep it in bounds */ |
649 | |
|
650 | 0 | while (*format != '\0') { |
651 | 0 | ch = format++; |
652 | 0 | flags = 0; |
653 | | |
654 | | /* |
655 | | * If we see whitespace in the format, skip whitespace in the string. |
656 | | */ |
657 | 0 | if ( isspace( (unsigned char)*ch ) ) { |
658 | 0 | sch = *string; |
659 | 0 | while ( isspace( (unsigned char)sch ) ) { |
660 | 0 | if (*string == '\0') { |
661 | 0 | goto done; |
662 | 0 | } |
663 | 0 | string++; |
664 | 0 | sch = *string; |
665 | 0 | } |
666 | 0 | continue; |
667 | 0 | } |
668 | | |
669 | 0 | if (*ch != '%') { |
670 | 0 | literal: |
671 | 0 | if (*string == '\0') { |
672 | 0 | underflow = 1; |
673 | 0 | goto done; |
674 | 0 | } |
675 | 0 | sch = *string; |
676 | 0 | string++; |
677 | 0 | if (*ch != sch) { |
678 | 0 | goto done; |
679 | 0 | } |
680 | 0 | continue; |
681 | 0 | } |
682 | | |
683 | 0 | ch = format++; |
684 | 0 | if (*ch == '%') { |
685 | 0 | goto literal; |
686 | 0 | } |
687 | | |
688 | | /* |
689 | | * Check for assignment suppression ('*') or an XPG3-style |
690 | | * assignment ('%n$'). |
691 | | */ |
692 | 0 | if (*ch == '*') { |
693 | 0 | flags |= SCAN_SUPPRESS; |
694 | 0 | ch = format++; |
695 | 0 | } else if ( isdigit(UCHAR(*ch))) { |
696 | 0 | value = ZEND_STRTOUL(format-1, &end, 10); |
697 | 0 | if (*end == '$') { |
698 | 0 | format = end+1; |
699 | 0 | ch = format++; |
700 | 0 | objIndex = varStart + value - 1; |
701 | 0 | } |
702 | 0 | } |
703 | | |
704 | | /* |
705 | | * Parse any width specifier. |
706 | | */ |
707 | 0 | if ( isdigit(UCHAR(*ch))) { |
708 | 0 | width = ZEND_STRTOUL(format-1, &format, 10); |
709 | 0 | ch = format++; |
710 | 0 | } else { |
711 | 0 | width = 0; |
712 | 0 | } |
713 | | |
714 | | /* |
715 | | * Ignore size specifier. |
716 | | */ |
717 | 0 | if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) { |
718 | 0 | ch = format++; |
719 | 0 | } |
720 | | |
721 | | /* |
722 | | * Handle the various field types. |
723 | | */ |
724 | 0 | switch (*ch) { |
725 | 0 | case 'n': |
726 | 0 | if (!(flags & SCAN_SUPPRESS)) { |
727 | 0 | if (numVars && objIndex >= argCount) { |
728 | 0 | break; |
729 | 0 | } else if (numVars) { |
730 | 0 | current = args + objIndex++; |
731 | 0 | ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString)); |
732 | 0 | } else { |
733 | 0 | add_index_long(return_value, objIndex++, string - baseString); |
734 | 0 | } |
735 | 0 | } |
736 | 0 | nconversions++; |
737 | 0 | continue; |
738 | | |
739 | 0 | case 'd': |
740 | 0 | case 'D': |
741 | 0 | op = 'i'; |
742 | 0 | base = 10; |
743 | 0 | fn = (int_string_formater)ZEND_STRTOL_PTR; |
744 | 0 | break; |
745 | 0 | case 'i': |
746 | 0 | op = 'i'; |
747 | 0 | base = 0; |
748 | 0 | fn = (int_string_formater)ZEND_STRTOL_PTR; |
749 | 0 | break; |
750 | 0 | case 'o': |
751 | 0 | op = 'i'; |
752 | 0 | base = 8; |
753 | 0 | fn = (int_string_formater)ZEND_STRTOL_PTR; |
754 | 0 | break; |
755 | 0 | case 'x': |
756 | 0 | case 'X': |
757 | 0 | op = 'i'; |
758 | 0 | base = 16; |
759 | 0 | fn = (int_string_formater)ZEND_STRTOL_PTR; |
760 | 0 | break; |
761 | 0 | case 'u': |
762 | 0 | op = 'i'; |
763 | 0 | base = 10; |
764 | 0 | flags |= SCAN_UNSIGNED; |
765 | 0 | fn = (int_string_formater)ZEND_STRTOUL_PTR; |
766 | 0 | break; |
767 | | |
768 | 0 | case 'f': |
769 | 0 | case 'e': |
770 | 0 | case 'E': |
771 | 0 | case 'g': |
772 | 0 | op = 'f'; |
773 | 0 | break; |
774 | | |
775 | 0 | case 's': |
776 | 0 | op = 's'; |
777 | 0 | break; |
778 | | |
779 | 0 | case 'c': |
780 | 0 | op = 's'; |
781 | 0 | flags |= SCAN_NOSKIP; |
782 | | /*-cc-*/ |
783 | 0 | if (0 == width) { |
784 | 0 | width = 1; |
785 | 0 | } |
786 | | /*-cc-*/ |
787 | 0 | break; |
788 | 0 | case '[': |
789 | 0 | op = '['; |
790 | 0 | flags |= SCAN_NOSKIP; |
791 | 0 | break; |
792 | 0 | } /* switch */ |
793 | | |
794 | | /* |
795 | | * At this point, we will need additional characters from the |
796 | | * string to proceed. |
797 | | */ |
798 | 0 | if (*string == '\0') { |
799 | 0 | underflow = 1; |
800 | 0 | goto done; |
801 | 0 | } |
802 | | |
803 | | /* |
804 | | * Skip any leading whitespace at the beginning of a field unless |
805 | | * the format suppresses this behavior. |
806 | | */ |
807 | 0 | if (!(flags & SCAN_NOSKIP)) { |
808 | 0 | while (*string != '\0') { |
809 | 0 | sch = *string; |
810 | 0 | if (! isspace((unsigned char)sch) ) { |
811 | 0 | break; |
812 | 0 | } |
813 | 0 | string++; |
814 | 0 | } |
815 | 0 | if (*string == '\0') { |
816 | 0 | underflow = 1; |
817 | 0 | goto done; |
818 | 0 | } |
819 | 0 | } |
820 | | |
821 | | /* |
822 | | * Perform the requested scanning operation. |
823 | | */ |
824 | 0 | switch (op) { |
825 | 0 | case 'c': |
826 | 0 | case 's': |
827 | | /* |
828 | | * Scan a string up to width characters or whitespace. |
829 | | */ |
830 | 0 | if (width == 0) { |
831 | 0 | width = (size_t) ~0; |
832 | 0 | } |
833 | 0 | end = string; |
834 | 0 | while (*end != '\0') { |
835 | 0 | sch = *end; |
836 | 0 | if ( isspace( (unsigned char)sch ) ) { |
837 | 0 | break; |
838 | 0 | } |
839 | 0 | end++; |
840 | 0 | if (--width == 0) { |
841 | 0 | break; |
842 | 0 | } |
843 | 0 | } |
844 | 0 | if (!(flags & SCAN_SUPPRESS)) { |
845 | 0 | if (numVars && objIndex >= argCount) { |
846 | 0 | break; |
847 | 0 | } else if (numVars) { |
848 | 0 | current = args + objIndex++; |
849 | 0 | ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string); |
850 | 0 | } else { |
851 | 0 | add_index_stringl(return_value, objIndex++, string, end-string); |
852 | 0 | } |
853 | 0 | } |
854 | 0 | string = end; |
855 | 0 | break; |
856 | | |
857 | 0 | case '[': { |
858 | 0 | CharSet cset; |
859 | |
|
860 | 0 | if (width == 0) { |
861 | 0 | width = (size_t) ~0; |
862 | 0 | } |
863 | 0 | end = string; |
864 | |
|
865 | 0 | format = BuildCharSet(&cset, format); |
866 | 0 | while (*end != '\0') { |
867 | 0 | sch = *end; |
868 | 0 | if (!CharInSet(&cset, (int)sch)) { |
869 | 0 | break; |
870 | 0 | } |
871 | 0 | end++; |
872 | 0 | if (--width == 0) { |
873 | 0 | break; |
874 | 0 | } |
875 | 0 | } |
876 | 0 | ReleaseCharSet(&cset); |
877 | |
|
878 | 0 | if (string == end) { |
879 | | /* |
880 | | * Nothing matched the range, stop processing |
881 | | */ |
882 | 0 | goto done; |
883 | 0 | } |
884 | 0 | if (!(flags & SCAN_SUPPRESS)) { |
885 | 0 | if (numVars && objIndex >= argCount) { |
886 | 0 | break; |
887 | 0 | } else if (numVars) { |
888 | 0 | current = args + objIndex++; |
889 | 0 | ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string); |
890 | 0 | } else { |
891 | 0 | add_index_stringl(return_value, objIndex++, string, end-string); |
892 | 0 | } |
893 | 0 | } |
894 | 0 | string = end; |
895 | 0 | break; |
896 | 0 | } |
897 | | /* |
898 | | case 'c': |
899 | | / Scan a single character./ |
900 | | |
901 | | sch = *string; |
902 | | string++; |
903 | | if (!(flags & SCAN_SUPPRESS)) { |
904 | | if (numVars) { |
905 | | char __buf[2]; |
906 | | __buf[0] = sch; |
907 | | __buf[1] = '\0'; |
908 | | current = args[objIndex++]; |
909 | | zval_ptr_dtor_nogc(*current); |
910 | | ZVAL_STRINGL( *current, __buf, 1); |
911 | | } else { |
912 | | add_index_stringl(return_value, objIndex++, &sch, 1); |
913 | | } |
914 | | } |
915 | | break; |
916 | | */ |
917 | 0 | case 'i': |
918 | | /* |
919 | | * Scan an unsigned or signed integer. |
920 | | */ |
921 | | /*-cc-*/ |
922 | 0 | buf[0] = '\0'; |
923 | | /*-cc-*/ |
924 | 0 | if ((width == 0) || (width > sizeof(buf) - 1)) { |
925 | 0 | width = sizeof(buf) - 1; |
926 | 0 | } |
927 | |
|
928 | 0 | flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO; |
929 | 0 | for (end = buf; width > 0; width--) { |
930 | 0 | switch (*string) { |
931 | | /* |
932 | | * The 0 digit has special meaning at the beginning of |
933 | | * a number. If we are unsure of the base, it |
934 | | * indicates that we are in base 8 or base 16 (if it is |
935 | | * followed by an 'x'). |
936 | | */ |
937 | 0 | case '0': |
938 | | /*-cc-*/ |
939 | 0 | if (base == 16) { |
940 | 0 | flags |= SCAN_XOK; |
941 | 0 | } |
942 | | /*-cc-*/ |
943 | 0 | if (base == 0) { |
944 | 0 | base = 8; |
945 | 0 | flags |= SCAN_XOK; |
946 | 0 | } |
947 | 0 | if (flags & SCAN_NOZERO) { |
948 | 0 | flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO); |
949 | 0 | } else { |
950 | 0 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
951 | 0 | } |
952 | 0 | goto addToInt; |
953 | | |
954 | 0 | case '1': case '2': case '3': case '4': |
955 | 0 | case '5': case '6': case '7': |
956 | 0 | if (base == 0) { |
957 | 0 | base = 10; |
958 | 0 | } |
959 | 0 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
960 | 0 | goto addToInt; |
961 | | |
962 | 0 | case '8': case '9': |
963 | 0 | if (base == 0) { |
964 | 0 | base = 10; |
965 | 0 | } |
966 | 0 | if (base <= 8) { |
967 | 0 | break; |
968 | 0 | } |
969 | 0 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
970 | 0 | goto addToInt; |
971 | | |
972 | 0 | case 'A': case 'B': case 'C': |
973 | 0 | case 'D': case 'E': case 'F': |
974 | 0 | case 'a': case 'b': case 'c': |
975 | 0 | case 'd': case 'e': case 'f': |
976 | 0 | if (base <= 10) { |
977 | 0 | break; |
978 | 0 | } |
979 | 0 | flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); |
980 | 0 | goto addToInt; |
981 | | |
982 | 0 | case '+': case '-': |
983 | 0 | if (flags & SCAN_SIGNOK) { |
984 | 0 | flags &= ~SCAN_SIGNOK; |
985 | 0 | goto addToInt; |
986 | 0 | } |
987 | 0 | break; |
988 | | |
989 | 0 | case 'x': case 'X': |
990 | 0 | if ((flags & SCAN_XOK) && (end == buf+1)) { |
991 | 0 | base = 16; |
992 | 0 | flags &= ~SCAN_XOK; |
993 | 0 | goto addToInt; |
994 | 0 | } |
995 | 0 | break; |
996 | 0 | } |
997 | | |
998 | | /* |
999 | | * We got an illegal character so we are done accumulating. |
1000 | | */ |
1001 | 0 | break; |
1002 | | |
1003 | 0 | addToInt: |
1004 | | /* |
1005 | | * Add the character to the temporary buffer. |
1006 | | */ |
1007 | 0 | *end++ = *string++; |
1008 | 0 | if (*string == '\0') { |
1009 | 0 | break; |
1010 | 0 | } |
1011 | 0 | } |
1012 | | |
1013 | | /* |
1014 | | * Check to see if we need to back up because we only got a |
1015 | | * sign or a trailing x after a 0. |
1016 | | */ |
1017 | 0 | if (flags & SCAN_NODIGITS) { |
1018 | 0 | if (*string == '\0') { |
1019 | 0 | underflow = 1; |
1020 | 0 | } |
1021 | 0 | goto done; |
1022 | 0 | } else if (end[-1] == 'x' || end[-1] == 'X') { |
1023 | 0 | end--; |
1024 | 0 | string--; |
1025 | 0 | } |
1026 | | |
1027 | | /* |
1028 | | * Scan the value from the temporary buffer. If we are |
1029 | | * returning a large unsigned value, we have to convert it back |
1030 | | * to a string since PHP only supports signed values. |
1031 | | */ |
1032 | 0 | if (!(flags & SCAN_SUPPRESS)) { |
1033 | 0 | *end = '\0'; |
1034 | 0 | value = (zend_long) (*fn)(buf, NULL, base); |
1035 | 0 | if ((flags & SCAN_UNSIGNED) && (value < 0)) { |
1036 | 0 | snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */ |
1037 | 0 | if (numVars && objIndex >= argCount) { |
1038 | 0 | break; |
1039 | 0 | } else if (numVars) { |
1040 | | /* change passed value type to string */ |
1041 | 0 | current = args + objIndex++; |
1042 | 0 | ZEND_TRY_ASSIGN_REF_STRING(current, buf); |
1043 | 0 | } else { |
1044 | 0 | add_index_string(return_value, objIndex++, buf); |
1045 | 0 | } |
1046 | 0 | } else { |
1047 | 0 | if (numVars && objIndex >= argCount) { |
1048 | 0 | break; |
1049 | 0 | } else if (numVars) { |
1050 | 0 | current = args + objIndex++; |
1051 | 0 | ZEND_TRY_ASSIGN_REF_LONG(current, value); |
1052 | 0 | } else { |
1053 | 0 | add_index_long(return_value, objIndex++, value); |
1054 | 0 | } |
1055 | 0 | } |
1056 | 0 | } |
1057 | 0 | break; |
1058 | | |
1059 | 0 | case 'f': |
1060 | | /* |
1061 | | * Scan a floating point number |
1062 | | */ |
1063 | 0 | buf[0] = '\0'; /* call me pedantic */ |
1064 | 0 | if ((width == 0) || (width > sizeof(buf) - 1)) { |
1065 | 0 | width = sizeof(buf) - 1; |
1066 | 0 | } |
1067 | 0 | flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK; |
1068 | 0 | for (end = buf; width > 0; width--) { |
1069 | 0 | switch (*string) { |
1070 | 0 | case '0': case '1': case '2': case '3': |
1071 | 0 | case '4': case '5': case '6': case '7': |
1072 | 0 | case '8': case '9': |
1073 | 0 | flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS); |
1074 | 0 | goto addToFloat; |
1075 | 0 | case '+': |
1076 | 0 | case '-': |
1077 | 0 | if (flags & SCAN_SIGNOK) { |
1078 | 0 | flags &= ~SCAN_SIGNOK; |
1079 | 0 | goto addToFloat; |
1080 | 0 | } |
1081 | 0 | break; |
1082 | 0 | case '.': |
1083 | 0 | if (flags & SCAN_PTOK) { |
1084 | 0 | flags &= ~(SCAN_SIGNOK | SCAN_PTOK); |
1085 | 0 | goto addToFloat; |
1086 | 0 | } |
1087 | 0 | break; |
1088 | 0 | case 'e': |
1089 | 0 | case 'E': |
1090 | | /* |
1091 | | * An exponent is not allowed until there has |
1092 | | * been at least one digit. |
1093 | | */ |
1094 | 0 | if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) { |
1095 | 0 | flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK)) |
1096 | 0 | | SCAN_SIGNOK | SCAN_NODIGITS; |
1097 | 0 | goto addToFloat; |
1098 | 0 | } |
1099 | 0 | break; |
1100 | 0 | } |
1101 | | |
1102 | | /* |
1103 | | * We got an illegal character so we are done accumulating. |
1104 | | */ |
1105 | 0 | break; |
1106 | | |
1107 | 0 | addToFloat: |
1108 | | /* |
1109 | | * Add the character to the temporary buffer. |
1110 | | */ |
1111 | 0 | *end++ = *string++; |
1112 | 0 | if (*string == '\0') { |
1113 | 0 | break; |
1114 | 0 | } |
1115 | 0 | } |
1116 | | |
1117 | | /* |
1118 | | * Check to see if we need to back up because we saw a |
1119 | | * trailing 'e' or sign. |
1120 | | */ |
1121 | 0 | if (flags & SCAN_NODIGITS) { |
1122 | 0 | if (flags & SCAN_EXPOK) { |
1123 | | /* |
1124 | | * There were no digits at all so scanning has |
1125 | | * failed and we are done. |
1126 | | */ |
1127 | 0 | if (*string == '\0') { |
1128 | 0 | underflow = 1; |
1129 | 0 | } |
1130 | 0 | goto done; |
1131 | 0 | } |
1132 | | |
1133 | | /* |
1134 | | * We got a bad exponent ('e' and maybe a sign). |
1135 | | */ |
1136 | 0 | end--; |
1137 | 0 | string--; |
1138 | 0 | if (*end != 'e' && *end != 'E') { |
1139 | 0 | end--; |
1140 | 0 | string--; |
1141 | 0 | } |
1142 | 0 | } |
1143 | | |
1144 | | /* |
1145 | | * Scan the value from the temporary buffer. |
1146 | | */ |
1147 | 0 | if (!(flags & SCAN_SUPPRESS)) { |
1148 | 0 | double dvalue; |
1149 | 0 | *end = '\0'; |
1150 | 0 | dvalue = zend_strtod(buf, NULL); |
1151 | 0 | if (numVars && objIndex >= argCount) { |
1152 | 0 | break; |
1153 | 0 | } else if (numVars) { |
1154 | 0 | current = args + objIndex++; |
1155 | 0 | ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue); |
1156 | 0 | } else { |
1157 | 0 | add_index_double(return_value, objIndex++, dvalue ); |
1158 | 0 | } |
1159 | 0 | } |
1160 | 0 | break; |
1161 | 0 | } /* switch (op) */ |
1162 | 0 | nconversions++; |
1163 | 0 | } /* while (*format != '\0') */ |
1164 | | |
1165 | 0 | done: |
1166 | 0 | result = SCAN_SUCCESS; |
1167 | |
|
1168 | 0 | if (underflow && (0==nconversions)) { |
1169 | 0 | scan_set_error_return( numVars, return_value ); |
1170 | 0 | result = SCAN_ERROR_EOF; |
1171 | 0 | } else if (numVars) { |
1172 | 0 | zval_ptr_dtor(return_value ); |
1173 | 0 | ZVAL_LONG(return_value, nconversions); |
1174 | 0 | } else if (nconversions < totalVars) { |
1175 | | /* TODO: not all elements converted. we need to prune the list - cc */ |
1176 | 0 | } |
1177 | 0 | return result; |
1178 | 0 | } |
1179 | | /* }}} */ |
1180 | | |
1181 | | /* the compiler choked when i tried to make this a macro */ |
1182 | | static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */ |
1183 | 0 | { |
1184 | 0 | if (numVars) { |
1185 | 0 | ZVAL_LONG(return_value, SCAN_ERROR_EOF); /* EOF marker */ |
1186 | 0 | } else { |
1187 | | /* convert_to_null calls destructor */ |
1188 | 0 | convert_to_null(return_value); |
1189 | 0 | } |
1190 | 0 | } |
1191 | | /* }}} */ |