Coverage Report

Created: 2025-06-13 06:43

/src/php-src/ext/standard/scanf.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright (c) The PHP Group                                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to version 3.01 of the PHP license,      |
6
   | that is bundled with this package in the file LICENSE, and is        |
7
   | available through the world-wide-web at the following url:           |
8
   | https://www.php.net/license/3_01.txt                                 |
9
   | If you did not receive a copy of the PHP license and are unable to   |
10
   | obtain it through the world-wide-web, please send a note to          |
11
   | license@php.net so we can mail you a copy immediately.               |
12
   +----------------------------------------------------------------------+
13
   | Author: Clayton Collie <clcollie@mindspring.com>                     |
14
   +----------------------------------------------------------------------+
15
*/
16
17
/*
18
  scanf.c --
19
20
  This file contains the base code which implements sscanf and by extension
21
  fscanf. Original code is from TCL8.3.0 and bears the following copyright:
22
23
  This software is copyrighted by the Regents of the University of
24
  California, Sun Microsystems, Inc., Scriptics Corporation,
25
  and other parties.  The following terms apply to all files associated
26
  with the software unless explicitly disclaimed in individual files.
27
28
  The authors hereby grant permission to use, copy, modify, distribute,
29
  and license this software and its documentation for any purpose, provided
30
  that existing copyright notices are retained in all copies and that this
31
  notice is included verbatim in any distributions. No written agreement,
32
  license, or royalty fee is required for any of the authorized uses.
33
  Modifications to this software may be copyrighted by their authors
34
  and need not follow the licensing terms described here, provided that
35
  the new terms are clearly indicated on the first page of each file where
36
  they apply.
37
38
  IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
39
  FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
40
  ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
41
  DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
42
  POSSIBILITY OF SUCH DAMAGE.
43
44
  THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
45
  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
46
  FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
47
  IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
48
  NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
49
  MODIFICATIONS.
50
51
  GOVERNMENT USE: If you are acquiring this software on behalf of the
52
  U.S. government, the Government shall have only "Restricted Rights"
53
  in the software and related documentation as defined in the Federal
54
  Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
55
  are acquiring the software on behalf of the Department of Defense, the
56
  software shall be classified as "Commercial Computer Software" and the
57
  Government shall have only "Restricted Rights" as defined in Clause
58
  252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
59
  authors grant the U.S. Government and others acting in its behalf
60
  permission to use and distribute the software in accordance with the
61
  terms specified in this license.
62
*/
63
64
#include <stdio.h>
65
#include <limits.h>
66
#include <ctype.h>
67
#include "php.h"
68
#include "php_variables.h"
69
#include <locale.h>
70
#include "zend_execute.h"
71
#include "zend_operators.h"
72
#include "zend_strtod.h"
73
#include "php_globals.h"
74
#include "basic_functions.h"
75
#include "scanf.h"
76
77
/*
78
 * Flag values used internally by [f|s]canf.
79
 */
80
0
#define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
81
0
#define SCAN_SUPPRESS 0x2    /* Suppress assignment. */
82
0
#define SCAN_UNSIGNED 0x4    /* Read an unsigned value. */
83
0
#define SCAN_WIDTH      0x8       /* A width value was supplied. */
84
85
0
#define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
86
0
#define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
87
0
#define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
88
0
#define SCAN_XOK        0x80      /* An 'x' is allowed. */
89
0
#define SCAN_PTOK       0x100     /* Decimal point is allowed. */
90
0
#define SCAN_EXPOK      0x200     /* An exponent is allowed. */
91
92
#define UCHAR(x)    (zend_uchar)(x)
93
94
/*
95
 * The following structure contains the information associated with
96
 * a character set.
97
 */
98
typedef struct CharSet {
99
  int exclude;    /* 1 if this is an exclusion set. */
100
  int nchars;
101
  char *chars;
102
  int nranges;
103
  struct Range {
104
    char start;
105
    char end;
106
  } *ranges;
107
} CharSet;
108
109
typedef zend_long (*int_string_formater)(const char*, char**, int);
110
111
/*
112
 * Declarations for functions used only in this file.
113
 */
114
static char *BuildCharSet(CharSet *cset, char *format);
115
static int  CharInSet(CharSet *cset, int ch);
116
static void ReleaseCharSet(CharSet *cset);
117
static inline void scan_set_error_return(int numVars, zval *return_value);
118
119
120
/* {{{ BuildCharSet
121
 *----------------------------------------------------------------------
122
 *
123
 * BuildCharSet --
124
 *
125
 *  This function examines a character set format specification
126
 *  and builds a CharSet containing the individual characters and
127
 *  character ranges specified.
128
 *
129
 * Results:
130
 *  Returns the next format position.
131
 *
132
 * Side effects:
133
 *  Initializes the charset.
134
 *
135
 *----------------------------------------------------------------------
136
 */
137
static char * BuildCharSet(CharSet *cset, char *format)
138
0
{
139
0
  char *ch, start;
140
0
  int  nranges;
141
0
  char *end;
142
143
0
  memset(cset, 0, sizeof(CharSet));
144
145
0
  ch = format;
146
0
  if (*ch == '^') {
147
0
    cset->exclude = 1;
148
0
    ch = ++format;
149
0
  }
150
0
  end = format + 1; /* verify this - cc */
151
152
  /*
153
   * Find the close bracket so we can overallocate the set.
154
   */
155
0
  if (*ch == ']') {
156
0
    ch = end++;
157
0
  }
158
0
  nranges = 0;
159
0
  while (*ch != ']') {
160
0
    if (*ch == '-') {
161
0
      nranges++;
162
0
    }
163
0
    ch = end++;
164
0
  }
165
166
0
  cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
167
0
  if (nranges > 0) {
168
0
    cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
169
0
  } else {
170
0
    cset->ranges = NULL;
171
0
  }
172
173
  /*
174
   * Now build the character set.
175
   */
176
0
  cset->nchars = cset->nranges = 0;
177
0
  ch    = format++;
178
0
  start = *ch;
179
0
  if (*ch == ']' || *ch == '-') {
180
0
    cset->chars[cset->nchars++] = *ch;
181
0
    ch = format++;
182
0
  }
183
0
  while (*ch != ']') {
184
0
    if (*format == '-') {
185
      /*
186
       * This may be the first character of a range, so don't add
187
       * it yet.
188
       */
189
0
      start = *ch;
190
0
    } else if (*ch == '-') {
191
      /*
192
       * Check to see if this is the last character in the set, in which
193
       * case it is not a range and we should add the previous character
194
       * as well as the dash.
195
       */
196
0
      if (*format == ']') {
197
0
        cset->chars[cset->nchars++] = start;
198
0
        cset->chars[cset->nchars++] = *ch;
199
0
      } else {
200
0
        ch = format++;
201
202
        /*
203
         * Check to see if the range is in reverse order.
204
         */
205
0
        if (start < *ch) {
206
0
          cset->ranges[cset->nranges].start = start;
207
0
          cset->ranges[cset->nranges].end = *ch;
208
0
        } else {
209
0
          cset->ranges[cset->nranges].start = *ch;
210
0
          cset->ranges[cset->nranges].end = start;
211
0
        }
212
0
        cset->nranges++;
213
0
      }
214
0
    } else {
215
0
      cset->chars[cset->nchars++] = *ch;
216
0
    }
217
0
    ch = format++;
218
0
  }
219
0
  return format;
220
0
}
221
/* }}} */
222
223
/* {{{ CharInSet
224
 *----------------------------------------------------------------------
225
 *
226
 * CharInSet --
227
 *
228
 *  Check to see if a character matches the given set.
229
 *
230
 * Results:
231
 *  Returns non-zero if the character matches the given set.
232
 *
233
 * Side effects:
234
 *  None.
235
 *
236
 *----------------------------------------------------------------------
237
 */
238
static int CharInSet(CharSet *cset, int c)
239
0
{
240
0
  char ch = (char) c;
241
0
  int i, match = 0;
242
243
0
  for (i = 0; i < cset->nchars; i++) {
244
0
    if (cset->chars[i] == ch) {
245
0
      match = 1;
246
0
      break;
247
0
    }
248
0
  }
249
0
  if (!match) {
250
0
    for (i = 0; i < cset->nranges; i++) {
251
0
      if ((cset->ranges[i].start <= ch)
252
0
        && (ch <= cset->ranges[i].end)) {
253
0
        match = 1;
254
0
        break;
255
0
      }
256
0
    }
257
0
  }
258
0
  return (cset->exclude ? !match : match);
259
0
}
260
/* }}} */
261
262
/* {{{ ReleaseCharSet
263
 *----------------------------------------------------------------------
264
 *
265
 * ReleaseCharSet --
266
 *
267
 *  Free the storage associated with a character set.
268
 *
269
 * Results:
270
 *  None.
271
 *
272
 * Side effects:
273
 *  None.
274
 *
275
 *----------------------------------------------------------------------
276
 */
277
static void ReleaseCharSet(CharSet *cset)
278
0
{
279
0
  efree((char *)cset->chars);
280
0
  if (cset->ranges) {
281
0
    efree((char *)cset->ranges);
282
0
  }
283
0
}
284
/* }}} */
285
286
/* {{{ ValidateFormat
287
 *----------------------------------------------------------------------
288
 *
289
 * ValidateFormat --
290
 *
291
 *  Parse the format string and verify that it is properly formed
292
 *  and that there are exactly enough variables on the command line.
293
 *
294
 * Results:
295
 *    FAILURE or SUCCESS.
296
 *
297
 * Side effects:
298
 *     May set php_error based on abnormal conditions.
299
 *
300
 * Parameters :
301
 *     format     The format string.
302
 *     numVars    The number of variables passed to the scan command.
303
 *     totalSubs  The number of variables that will be required.
304
 *
305
 *----------------------------------------------------------------------
306
*/
307
PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
308
0
{
309
0
#define STATIC_LIST_SIZE 16
310
0
  int gotXpg, gotSequential, value, i, flags;
311
0
  char *end, *ch = NULL;
312
0
  int staticAssign[STATIC_LIST_SIZE];
313
0
  int *nassign = staticAssign;
314
0
  int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
315
316
  /*
317
   * Initialize an array that records the number of times a variable
318
   * is assigned to by the format string.  We use this to detect if
319
   * a variable is multiply assigned or left unassigned.
320
   */
321
0
  if (numVars > nspace) {
322
0
    nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
323
0
    nspace = numVars;
324
0
  }
325
0
  for (i = 0; i < nspace; i++) {
326
0
    nassign[i] = 0;
327
0
  }
328
329
0
  xpgSize = objIndex = gotXpg = gotSequential = 0;
330
331
0
  while (*format != '\0') {
332
0
    ch = format++;
333
0
    flags = 0;
334
335
0
    if (*ch != '%') {
336
0
      continue;
337
0
    }
338
0
    ch = format++;
339
0
    if (*ch == '%') {
340
0
      continue;
341
0
    }
342
0
    if (*ch == '*') {
343
0
      flags |= SCAN_SUPPRESS;
344
0
      ch = format++;
345
0
      goto xpgCheckDone;
346
0
    }
347
348
0
    if ( isdigit( (int)*ch ) ) {
349
      /*
350
       * Check for an XPG3-style %n$ specification.  Note: there
351
       * must not be a mixture of XPG3 specs and non-XPG3 specs
352
       * in the same format string.
353
       */
354
0
      value = ZEND_STRTOUL(format-1, &end, 10);
355
0
      if (*end != '$') {
356
0
        goto notXpg;
357
0
      }
358
0
      format = end+1;
359
0
      ch     = format++;
360
0
      gotXpg = 1;
361
0
      if (gotSequential) {
362
0
        goto mixedXPG;
363
0
      }
364
0
      if ((value < 1) || (numVars && (value > numVars))) {
365
0
        goto badIndex;
366
0
      } else if (numVars == 0) {
367
        /*
368
         * In the case where no vars are specified, the user can
369
         * specify %9999$ legally, so we have to consider special
370
         * rules for growing the assign array.  'value' is
371
         * guaranteed to be > 0.
372
         */
373
374
        /* set a lower artificial limit on this
375
         * in the interest of security and resource friendliness
376
         * 255 arguments should be more than enough. - cc
377
         */
378
0
        if (value > SCAN_MAX_ARGS) {
379
0
          goto badIndex;
380
0
        }
381
382
0
        xpgSize = (xpgSize > value) ? xpgSize : value;
383
0
      }
384
0
      objIndex = value - 1;
385
0
      goto xpgCheckDone;
386
0
    }
387
388
0
notXpg:
389
0
    gotSequential = 1;
390
0
    if (gotXpg) {
391
0
mixedXPG:
392
0
      zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
393
0
      goto error;
394
0
    }
395
396
0
xpgCheckDone:
397
    /*
398
     * Parse any width specifier.
399
     */
400
0
    if (isdigit(UCHAR(*ch))) {
401
0
      value = ZEND_STRTOUL(format-1, &format, 10);
402
0
      flags |= SCAN_WIDTH;
403
0
      ch = format++;
404
0
    }
405
406
    /*
407
     * Ignore size specifier.
408
     */
409
0
    if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
410
0
      ch = format++;
411
0
    }
412
413
0
    if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
414
0
      goto badIndex;
415
0
    }
416
417
    /*
418
     * Handle the various field types.
419
     */
420
0
    switch (*ch) {
421
0
      case 'n':
422
0
      case 'd':
423
0
      case 'D':
424
0
      case 'i':
425
0
      case 'o':
426
0
      case 'x':
427
0
      case 'X':
428
0
      case 'u':
429
0
      case 'f':
430
0
      case 'e':
431
0
      case 'E':
432
0
      case 'g':
433
0
      case 's':
434
0
        break;
435
436
0
      case 'c':
437
        /* we differ here with the TCL implementation in allowing for */
438
        /* a character width specification, to be more consistent with */
439
        /* ANSI. since Zend auto allocates space for vars, this is no */
440
        /* problem - cc                                               */
441
        /*
442
        if (flags & SCAN_WIDTH) {
443
          php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
444
          goto error;
445
        }
446
        */
447
0
        break;
448
449
0
      case '[':
450
0
        if (*format == '\0') {
451
0
          goto badSet;
452
0
        }
453
0
        ch = format++;
454
0
        if (*ch == '^') {
455
0
          if (*format == '\0') {
456
0
            goto badSet;
457
0
          }
458
0
          ch = format++;
459
0
        }
460
0
        if (*ch == ']') {
461
0
          if (*format == '\0') {
462
0
            goto badSet;
463
0
          }
464
0
          ch = format++;
465
0
        }
466
0
        while (*ch != ']') {
467
0
          if (*format == '\0') {
468
0
            goto badSet;
469
0
          }
470
0
          ch = format++;
471
0
        }
472
0
        break;
473
0
badSet:
474
0
        zend_value_error("Unmatched [ in format string");
475
0
        goto error;
476
477
0
      default: {
478
0
        zend_value_error("Bad scan conversion character \"%c\"", *ch);
479
0
        goto error;
480
0
      }
481
0
    }
482
483
0
    if (!(flags & SCAN_SUPPRESS)) {
484
0
      if (objIndex >= nspace) {
485
        /*
486
         * Expand the nassign buffer.  If we are using XPG specifiers,
487
         * make sure that we grow to a large enough size.  xpgSize is
488
         * guaranteed to be at least one larger than objIndex.
489
         */
490
0
        value = nspace;
491
0
        if (xpgSize) {
492
0
          nspace = xpgSize;
493
0
        } else {
494
0
          nspace += STATIC_LIST_SIZE;
495
0
        }
496
0
        if (nassign == staticAssign) {
497
0
          nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
498
0
          for (i = 0; i < STATIC_LIST_SIZE; ++i) {
499
0
            nassign[i] = staticAssign[i];
500
0
          }
501
0
        } else {
502
0
          nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
503
0
        }
504
0
        for (i = value; i < nspace; i++) {
505
0
          nassign[i] = 0;
506
0
        }
507
0
      }
508
0
      nassign[objIndex]++;
509
0
      objIndex++;
510
0
    }
511
0
  } /* while (*format != '\0') */
512
513
  /*
514
   * Verify that all of the variable were assigned exactly once.
515
   */
516
0
  if (numVars == 0) {
517
0
    if (xpgSize) {
518
0
      numVars = xpgSize;
519
0
    } else {
520
0
      numVars = objIndex;
521
0
    }
522
0
  }
523
0
  if (totalSubs) {
524
0
    *totalSubs = numVars;
525
0
  }
526
0
  for (i = 0; i < numVars; i++) {
527
0
    if (nassign[i] > 1) {
528
0
      zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
529
0
      goto error;
530
0
    } else if (!xpgSize && (nassign[i] == 0)) {
531
      /*
532
       * If the space is empty, and xpgSize is 0 (means XPG wasn't
533
       * used, and/or numVars != 0), then too many vars were given
534
       */
535
0
      zend_value_error("Variable is not assigned by any conversion specifiers");
536
0
      goto error;
537
0
    }
538
0
  }
539
540
0
  if (nassign != staticAssign) {
541
0
    efree((char *)nassign);
542
0
  }
543
0
  return SCAN_SUCCESS;
544
545
0
badIndex:
546
0
  if (gotXpg) {
547
0
    zend_value_error("%s", "\"%n$\" argument index out of range");
548
0
  } else {
549
0
    zend_value_error("Different numbers of variable names and field specifiers");
550
0
  }
551
552
0
error:
553
0
  if (nassign != staticAssign) {
554
0
    efree((char *)nassign);
555
0
  }
556
0
  return SCAN_ERROR_INVALID_FORMAT;
557
0
#undef STATIC_LIST_SIZE
558
0
}
559
/* }}} */
560
561
/* {{{ php_sscanf_internal
562
 * This is the internal function which does processing on behalf of
563
 * both sscanf() and fscanf()
564
 *
565
 * parameters :
566
 *    string    literal string to be processed
567
 *    format    format string
568
 *    argCount  total number of elements in the args array
569
 *    args    arguments passed in from user function (f|s)scanf
570
 *    varStart  offset (in args) of 1st variable passed in to (f|s)scanf
571
 *    return_value set with the results of the scan
572
 */
573
574
PHPAPI int php_sscanf_internal( char *string, char *format,
575
        int argCount, zval *args,
576
        int varStart, zval *return_value)
577
0
{
578
0
  int  numVars, nconversions, totalVars = -1;
579
0
  int  i, result;
580
0
  zend_long value;
581
0
  int  objIndex;
582
0
  char *end, *baseString;
583
0
  zval *current;
584
0
  char op   = 0;
585
0
  int  base = 0;
586
0
  int  underflow = 0;
587
0
  size_t width;
588
0
  int_string_formater fn = NULL;
589
0
  char *ch, sch;
590
0
  int  flags;
591
0
  char buf[64]; /* Temporary buffer to hold scanned number
592
           * strings before they are passed to strtoul() */
593
594
  /* do some sanity checking */
595
0
  if ((varStart > argCount) || (varStart < 0)){
596
0
    varStart = SCAN_MAX_ARGS + 1;
597
0
  }
598
0
  numVars = argCount - varStart;
599
0
  if (numVars < 0) {
600
0
    numVars = 0;
601
0
  }
602
603
  /*
604
   * Check for errors in the format string.
605
   */
606
0
  if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
607
0
    scan_set_error_return( numVars, return_value );
608
0
    return SCAN_ERROR_INVALID_FORMAT;
609
0
  }
610
611
0
  objIndex = numVars ? varStart : 0;
612
613
  /*
614
   * If any variables are passed, make sure they are all passed by reference
615
   */
616
0
  if (numVars) {
617
0
    for (i = varStart;i < argCount;i++){
618
0
      ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference");
619
0
    }
620
0
  }
621
622
  /*
623
   * Allocate space for the result objects. Only happens when no variables
624
   * are specified
625
   */
626
0
  if (!numVars) {
627
0
    zval tmp;
628
629
    /* allocate an array for return */
630
0
    array_init(return_value);
631
632
0
    for (i = 0; i < totalVars; i++) {
633
0
      ZVAL_NULL(&tmp);
634
0
      if (add_next_index_zval(return_value, &tmp) == FAILURE) {
635
0
        scan_set_error_return(0, return_value);
636
0
        return FAILURE;
637
0
      }
638
0
    }
639
0
    varStart = 0; /* Array index starts from 0 */
640
0
  }
641
642
0
  baseString = string;
643
644
  /*
645
   * Iterate over the format string filling in the result objects until
646
   * we reach the end of input, the end of the format string, or there
647
   * is a mismatch.
648
   */
649
0
  nconversions = 0;
650
  /* note ! - we need to limit the loop for objIndex to keep it in bounds */
651
652
0
  while (*format != '\0') {
653
0
    ch    = format++;
654
0
    flags = 0;
655
656
    /*
657
     * If we see whitespace in the format, skip whitespace in the string.
658
     */
659
0
    if ( isspace( (int)*ch ) ) {
660
0
      sch = *string;
661
0
      while ( isspace( (int)sch ) ) {
662
0
        if (*string == '\0') {
663
0
          goto done;
664
0
        }
665
0
        string++;
666
0
        sch = *string;
667
0
      }
668
0
      continue;
669
0
    }
670
671
0
    if (*ch != '%') {
672
0
literal:
673
0
      if (*string == '\0') {
674
0
        underflow = 1;
675
0
        goto done;
676
0
      }
677
0
      sch = *string;
678
0
      string++;
679
0
      if (*ch != sch) {
680
0
        goto done;
681
0
      }
682
0
      continue;
683
0
    }
684
685
0
    ch = format++;
686
0
    if (*ch == '%') {
687
0
      goto literal;
688
0
    }
689
690
    /*
691
     * Check for assignment suppression ('*') or an XPG3-style
692
     * assignment ('%n$').
693
     */
694
0
    if (*ch == '*') {
695
0
      flags |= SCAN_SUPPRESS;
696
0
      ch = format++;
697
0
    } else if ( isdigit(UCHAR(*ch))) {
698
0
      value = ZEND_STRTOUL(format-1, &end, 10);
699
0
      if (*end == '$') {
700
0
        format = end+1;
701
0
        ch = format++;
702
0
        objIndex = varStart + value - 1;
703
0
      }
704
0
    }
705
706
    /*
707
     * Parse any width specifier.
708
     */
709
0
    if ( isdigit(UCHAR(*ch))) {
710
0
      width = ZEND_STRTOUL(format-1, &format, 10);
711
0
      ch = format++;
712
0
    } else {
713
0
      width = 0;
714
0
    }
715
716
    /*
717
     * Ignore size specifier.
718
     */
719
0
    if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
720
0
      ch = format++;
721
0
    }
722
723
    /*
724
     * Handle the various field types.
725
     */
726
0
    switch (*ch) {
727
0
      case 'n':
728
0
        if (!(flags & SCAN_SUPPRESS)) {
729
0
          if (numVars && objIndex >= argCount) {
730
0
            break;
731
0
          } else if (numVars) {
732
0
            current = args + objIndex++;
733
0
            ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
734
0
          } else {
735
0
            add_index_long(return_value, objIndex++, string - baseString);
736
0
          }
737
0
        }
738
0
        nconversions++;
739
0
        continue;
740
741
0
      case 'd':
742
0
      case 'D':
743
0
        op = 'i';
744
0
        base = 10;
745
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
746
0
        break;
747
0
      case 'i':
748
0
        op = 'i';
749
0
        base = 0;
750
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
751
0
        break;
752
0
      case 'o':
753
0
        op = 'i';
754
0
        base = 8;
755
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
756
0
        break;
757
0
      case 'x':
758
0
      case 'X':
759
0
        op = 'i';
760
0
        base = 16;
761
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
762
0
        break;
763
0
      case 'u':
764
0
        op = 'i';
765
0
        base = 10;
766
0
        flags |= SCAN_UNSIGNED;
767
0
        fn = (int_string_formater)ZEND_STRTOUL_PTR;
768
0
        break;
769
770
0
      case 'f':
771
0
      case 'e':
772
0
      case 'E':
773
0
      case 'g':
774
0
        op = 'f';
775
0
        break;
776
777
0
      case 's':
778
0
        op = 's';
779
0
        break;
780
781
0
      case 'c':
782
0
        op = 's';
783
0
        flags |= SCAN_NOSKIP;
784
        /*-cc-*/
785
0
        if (0 == width) {
786
0
          width = 1;
787
0
        }
788
        /*-cc-*/
789
0
        break;
790
0
      case '[':
791
0
        op = '[';
792
0
        flags |= SCAN_NOSKIP;
793
0
        break;
794
0
    }   /* switch */
795
796
    /*
797
     * At this point, we will need additional characters from the
798
     * string to proceed.
799
     */
800
0
    if (*string == '\0') {
801
0
      underflow = 1;
802
0
      goto done;
803
0
    }
804
805
    /*
806
     * Skip any leading whitespace at the beginning of a field unless
807
     * the format suppresses this behavior.
808
     */
809
0
    if (!(flags & SCAN_NOSKIP)) {
810
0
      while (*string != '\0') {
811
0
        sch = *string;
812
0
        if (! isspace((int)sch) ) {
813
0
          break;
814
0
        }
815
0
        string++;
816
0
      }
817
0
      if (*string == '\0') {
818
0
        underflow = 1;
819
0
        goto done;
820
0
      }
821
0
    }
822
823
    /*
824
     * Perform the requested scanning operation.
825
     */
826
0
    switch (op) {
827
0
      case 'c':
828
0
      case 's':
829
        /*
830
         * Scan a string up to width characters or whitespace.
831
         */
832
0
        if (width == 0) {
833
0
          width = (size_t) ~0;
834
0
        }
835
0
        end = string;
836
0
        while (*end != '\0') {
837
0
          sch = *end;
838
0
          if ( isspace( (int)sch ) ) {
839
0
            break;
840
0
          }
841
0
          end++;
842
0
          if (--width == 0) {
843
0
             break;
844
0
          }
845
0
        }
846
0
        if (!(flags & SCAN_SUPPRESS)) {
847
0
          if (numVars && objIndex >= argCount) {
848
0
            break;
849
0
          } else if (numVars) {
850
0
            current = args + objIndex++;
851
0
            ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
852
0
          } else {
853
0
            add_index_stringl(return_value, objIndex++, string, end-string);
854
0
          }
855
0
        }
856
0
        string = end;
857
0
        break;
858
859
0
      case '[': {
860
0
        CharSet cset;
861
862
0
        if (width == 0) {
863
0
          width = (size_t) ~0;
864
0
        }
865
0
        end = string;
866
867
0
        format = BuildCharSet(&cset, format);
868
0
        while (*end != '\0') {
869
0
          sch = *end;
870
0
          if (!CharInSet(&cset, (int)sch)) {
871
0
            break;
872
0
          }
873
0
          end++;
874
0
          if (--width == 0) {
875
0
            break;
876
0
          }
877
0
        }
878
0
        ReleaseCharSet(&cset);
879
880
0
        if (string == end) {
881
          /*
882
           * Nothing matched the range, stop processing
883
           */
884
0
          goto done;
885
0
        }
886
0
        if (!(flags & SCAN_SUPPRESS)) {
887
0
          if (numVars && objIndex >= argCount) {
888
0
            break;
889
0
          } else if (numVars) {
890
0
            current = args + objIndex++;
891
0
            ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
892
0
          } else {
893
0
            add_index_stringl(return_value, objIndex++, string, end-string);
894
0
          }
895
0
        }
896
0
        string = end;
897
0
        break;
898
0
      }
899
/*
900
      case 'c':
901
         / Scan a single character./
902
903
        sch = *string;
904
        string++;
905
        if (!(flags & SCAN_SUPPRESS)) {
906
          if (numVars) {
907
            char __buf[2];
908
            __buf[0] = sch;
909
            __buf[1] = '\0';
910
            current = args[objIndex++];
911
            zval_ptr_dtor_nogc(*current);
912
            ZVAL_STRINGL( *current, __buf, 1);
913
          } else {
914
            add_index_stringl(return_value, objIndex++, &sch, 1);
915
          }
916
        }
917
        break;
918
*/
919
0
      case 'i':
920
        /*
921
         * Scan an unsigned or signed integer.
922
         */
923
        /*-cc-*/
924
0
        buf[0] = '\0';
925
        /*-cc-*/
926
0
        if ((width == 0) || (width > sizeof(buf) - 1)) {
927
0
          width = sizeof(buf) - 1;
928
0
        }
929
930
0
        flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
931
0
        for (end = buf; width > 0; width--) {
932
0
          switch (*string) {
933
            /*
934
             * The 0 digit has special meaning at the beginning of
935
             * a number.  If we are unsure of the base, it
936
             * indicates that we are in base 8 or base 16 (if it is
937
             * followed by an 'x').
938
             */
939
0
            case '0':
940
              /*-cc-*/
941
0
              if (base == 16) {
942
0
                flags |= SCAN_XOK;
943
0
              }
944
              /*-cc-*/
945
0
              if (base == 0) {
946
0
                base = 8;
947
0
                flags |= SCAN_XOK;
948
0
              }
949
0
              if (flags & SCAN_NOZERO) {
950
0
                flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
951
0
              } else {
952
0
                flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
953
0
              }
954
0
              goto addToInt;
955
956
0
            case '1': case '2': case '3': case '4':
957
0
            case '5': case '6': case '7':
958
0
              if (base == 0) {
959
0
                base = 10;
960
0
              }
961
0
              flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
962
0
              goto addToInt;
963
964
0
            case '8': case '9':
965
0
              if (base == 0) {
966
0
                base = 10;
967
0
              }
968
0
              if (base <= 8) {
969
0
                 break;
970
0
              }
971
0
              flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
972
0
              goto addToInt;
973
974
0
            case 'A': case 'B': case 'C':
975
0
            case 'D': case 'E': case 'F':
976
0
            case 'a': case 'b': case 'c':
977
0
            case 'd': case 'e': case 'f':
978
0
              if (base <= 10) {
979
0
                break;
980
0
              }
981
0
              flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
982
0
              goto addToInt;
983
984
0
            case '+': case '-':
985
0
              if (flags & SCAN_SIGNOK) {
986
0
                flags &= ~SCAN_SIGNOK;
987
0
                goto addToInt;
988
0
              }
989
0
              break;
990
991
0
            case 'x': case 'X':
992
0
              if ((flags & SCAN_XOK) && (end == buf+1)) {
993
0
                base = 16;
994
0
                flags &= ~SCAN_XOK;
995
0
                goto addToInt;
996
0
              }
997
0
              break;
998
0
          }
999
1000
          /*
1001
           * We got an illegal character so we are done accumulating.
1002
           */
1003
0
          break;
1004
1005
0
addToInt:
1006
          /*
1007
           * Add the character to the temporary buffer.
1008
           */
1009
0
          *end++ = *string++;
1010
0
          if (*string == '\0') {
1011
0
            break;
1012
0
          }
1013
0
        }
1014
1015
        /*
1016
         * Check to see if we need to back up because we only got a
1017
         * sign or a trailing x after a 0.
1018
         */
1019
0
        if (flags & SCAN_NODIGITS) {
1020
0
          if (*string == '\0') {
1021
0
            underflow = 1;
1022
0
          }
1023
0
          goto done;
1024
0
        } else if (end[-1] == 'x' || end[-1] == 'X') {
1025
0
          end--;
1026
0
          string--;
1027
0
        }
1028
1029
        /*
1030
         * Scan the value from the temporary buffer.  If we are
1031
         * returning a large unsigned value, we have to convert it back
1032
         * to a string since PHP only supports signed values.
1033
         */
1034
0
        if (!(flags & SCAN_SUPPRESS)) {
1035
0
          *end = '\0';
1036
0
          value = (zend_long) (*fn)(buf, NULL, base);
1037
0
          if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1038
0
            snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1039
0
            if (numVars && objIndex >= argCount) {
1040
0
              break;
1041
0
            } else if (numVars) {
1042
               /* change passed value type to string */
1043
0
              current = args + objIndex++;
1044
0
              ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1045
0
            } else {
1046
0
              add_index_string(return_value, objIndex++, buf);
1047
0
            }
1048
0
          } else {
1049
0
            if (numVars && objIndex >= argCount) {
1050
0
              break;
1051
0
            } else if (numVars) {
1052
0
              current = args + objIndex++;
1053
0
              ZEND_TRY_ASSIGN_REF_LONG(current, value);
1054
0
            } else {
1055
0
              add_index_long(return_value, objIndex++, value);
1056
0
            }
1057
0
          }
1058
0
        }
1059
0
        break;
1060
1061
0
      case 'f':
1062
        /*
1063
         * Scan a floating point number
1064
         */
1065
0
        buf[0] = '\0';     /* call me pedantic */
1066
0
        if ((width == 0) || (width > sizeof(buf) - 1)) {
1067
0
          width = sizeof(buf) - 1;
1068
0
        }
1069
0
        flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1070
0
        for (end = buf; width > 0; width--) {
1071
0
          switch (*string) {
1072
0
            case '0': case '1': case '2': case '3':
1073
0
            case '4': case '5': case '6': case '7':
1074
0
            case '8': case '9':
1075
0
              flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1076
0
              goto addToFloat;
1077
0
            case '+':
1078
0
            case '-':
1079
0
              if (flags & SCAN_SIGNOK) {
1080
0
                flags &= ~SCAN_SIGNOK;
1081
0
                goto addToFloat;
1082
0
              }
1083
0
              break;
1084
0
            case '.':
1085
0
              if (flags & SCAN_PTOK) {
1086
0
                flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1087
0
                goto addToFloat;
1088
0
              }
1089
0
              break;
1090
0
            case 'e':
1091
0
            case 'E':
1092
              /*
1093
               * An exponent is not allowed until there has
1094
               * been at least one digit.
1095
               */
1096
0
              if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1097
0
                flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1098
0
                  | SCAN_SIGNOK | SCAN_NODIGITS;
1099
0
                goto addToFloat;
1100
0
              }
1101
0
              break;
1102
0
          }
1103
1104
          /*
1105
           * We got an illegal character so we are done accumulating.
1106
           */
1107
0
          break;
1108
1109
0
addToFloat:
1110
          /*
1111
           * Add the character to the temporary buffer.
1112
           */
1113
0
          *end++ = *string++;
1114
0
          if (*string == '\0') {
1115
0
            break;
1116
0
          }
1117
0
        }
1118
1119
        /*
1120
         * Check to see if we need to back up because we saw a
1121
         * trailing 'e' or sign.
1122
         */
1123
0
        if (flags & SCAN_NODIGITS) {
1124
0
          if (flags & SCAN_EXPOK) {
1125
            /*
1126
             * There were no digits at all so scanning has
1127
             * failed and we are done.
1128
             */
1129
0
            if (*string == '\0') {
1130
0
              underflow = 1;
1131
0
            }
1132
0
            goto done;
1133
0
          }
1134
1135
          /*
1136
           * We got a bad exponent ('e' and maybe a sign).
1137
           */
1138
0
          end--;
1139
0
          string--;
1140
0
          if (*end != 'e' && *end != 'E') {
1141
0
            end--;
1142
0
            string--;
1143
0
          }
1144
0
        }
1145
1146
        /*
1147
         * Scan the value from the temporary buffer.
1148
         */
1149
0
        if (!(flags & SCAN_SUPPRESS)) {
1150
0
          double dvalue;
1151
0
          *end = '\0';
1152
0
          dvalue = zend_strtod(buf, NULL);
1153
0
          if (numVars && objIndex >= argCount) {
1154
0
            break;
1155
0
          } else if (numVars) {
1156
0
            current = args + objIndex++;
1157
0
            ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1158
0
          } else {
1159
0
            add_index_double(return_value, objIndex++, dvalue );
1160
0
          }
1161
0
        }
1162
0
        break;
1163
0
    } /* switch (op) */
1164
0
    nconversions++;
1165
0
  } /*  while (*format != '\0') */
1166
1167
0
done:
1168
0
  result = SCAN_SUCCESS;
1169
1170
0
  if (underflow && (0==nconversions)) {
1171
0
    scan_set_error_return( numVars, return_value );
1172
0
    result = SCAN_ERROR_EOF;
1173
0
  } else if (numVars) {
1174
0
    zval_ptr_dtor(return_value );
1175
0
    ZVAL_LONG(return_value, nconversions);
1176
0
  } else if (nconversions < totalVars) {
1177
    /* TODO: not all elements converted. we need to prune the list - cc */
1178
0
  }
1179
0
  return result;
1180
0
}
1181
/* }}} */
1182
1183
/* the compiler choked when i tried to make this a macro    */
1184
static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1185
0
{
1186
0
  if (numVars) {
1187
0
    ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1188
0
  } else {
1189
    /* convert_to_null calls destructor */
1190
0
    convert_to_null(return_value);
1191
0
  }
1192
0
}
1193
/* }}} */