Coverage Report

Created: 2026-06-02 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/standard/scanf.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright © The PHP Group and Contributors.                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to the Modified BSD License that is      |
6
   | bundled with this package in the file LICENSE, and is available      |
7
   | through the World Wide Web at <https://www.php.net/license/>.        |
8
   |                                                                      |
9
   | SPDX-License-Identifier: BSD-3-Clause                                |
10
   +----------------------------------------------------------------------+
11
   | Author: Clayton Collie <clcollie@mindspring.com>                     |
12
   +----------------------------------------------------------------------+
13
*/
14
15
/*
16
  scanf.c --
17
18
  This file contains the base code which implements sscanf and by extension
19
  fscanf. Original code is from TCL8.3.0 and bears the following copyright:
20
21
  This software is copyrighted by the Regents of the University of
22
  California, Sun Microsystems, Inc., Scriptics Corporation,
23
  and other parties.  The following terms apply to all files associated
24
  with the software unless explicitly disclaimed in individual files.
25
26
  The authors hereby grant permission to use, copy, modify, distribute,
27
  and license this software and its documentation for any purpose, provided
28
  that existing copyright notices are retained in all copies and that this
29
  notice is included verbatim in any distributions. No written agreement,
30
  license, or royalty fee is required for any of the authorized uses.
31
  Modifications to this software may be copyrighted by their authors
32
  and need not follow the licensing terms described here, provided that
33
  the new terms are clearly indicated on the first page of each file where
34
  they apply.
35
36
  IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
37
  FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
38
  ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
39
  DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
40
  POSSIBILITY OF SUCH DAMAGE.
41
42
  THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
43
  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
44
  FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
45
  IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
46
  NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
47
  MODIFICATIONS.
48
49
  GOVERNMENT USE: If you are acquiring this software on behalf of the
50
  U.S. government, the Government shall have only "Restricted Rights"
51
  in the software and related documentation as defined in the Federal
52
  Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
53
  are acquiring the software on behalf of the Department of Defense, the
54
  software shall be classified as "Commercial Computer Software" and the
55
  Government shall have only "Restricted Rights" as defined in Clause
56
  252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
57
  authors grant the U.S. Government and others acting in its behalf
58
  permission to use and distribute the software in accordance with the
59
  terms specified in this license.
60
*/
61
62
#include <stdio.h>
63
#include <limits.h>
64
#include <ctype.h>
65
#include "php.h"
66
#include "php_variables.h"
67
#include <locale.h>
68
#include "zend_execute.h"
69
#include "zend_operators.h"
70
#include "zend_strtod.h"
71
#include "php_globals.h"
72
#include "basic_functions.h"
73
#include "scanf.h"
74
75
/*
76
 * Flag values used internally by [f|s]canf.
77
 */
78
0
#define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
79
0
#define SCAN_SUPPRESS 0x2    /* Suppress assignment. */
80
0
#define SCAN_UNSIGNED 0x4    /* Read an unsigned value. */
81
0
#define SCAN_WIDTH      0x8       /* A width value was supplied. */
82
83
0
#define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
84
0
#define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
85
0
#define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
86
0
#define SCAN_XOK        0x80      /* An 'x' is allowed. */
87
0
#define SCAN_PTOK       0x100     /* Decimal point is allowed. */
88
0
#define SCAN_EXPOK      0x200     /* An exponent is allowed. */
89
90
#define UCHAR(x)    (zend_uchar)(x)
91
92
/*
93
 * The following structure contains the information associated with
94
 * a character set.
95
 */
96
typedef struct CharSet {
97
  int exclude;    /* 1 if this is an exclusion set. */
98
  int nchars;
99
  char *chars;
100
  int nranges;
101
  struct Range {
102
    char start;
103
    char end;
104
  } *ranges;
105
} CharSet;
106
107
typedef zend_long (*int_string_formater)(const char*, char**, int);
108
109
/*
110
 * Declarations for functions used only in this file.
111
 */
112
static char *BuildCharSet(CharSet *cset, char *format);
113
static int  CharInSet(CharSet *cset, int ch);
114
static void ReleaseCharSet(CharSet *cset);
115
static inline void scan_set_error_return(int numVars, zval *return_value);
116
117
118
/* {{{ BuildCharSet
119
 *----------------------------------------------------------------------
120
 *
121
 * BuildCharSet --
122
 *
123
 *  This function examines a character set format specification
124
 *  and builds a CharSet containing the individual characters and
125
 *  character ranges specified.
126
 *
127
 * Results:
128
 *  Returns the next format position.
129
 *
130
 * Side effects:
131
 *  Initializes the charset.
132
 *
133
 *----------------------------------------------------------------------
134
 */
135
static char * BuildCharSet(CharSet *cset, char *format)
136
0
{
137
0
  char *ch, start;
138
0
  int  nranges;
139
0
  char *end;
140
141
0
  memset(cset, 0, sizeof(CharSet));
142
143
0
  ch = format;
144
0
  if (*ch == '^') {
145
0
    cset->exclude = 1;
146
0
    ch = ++format;
147
0
  }
148
0
  end = format + 1; /* verify this - cc */
149
150
  /*
151
   * Find the close bracket so we can overallocate the set.
152
   */
153
0
  if (*ch == ']') {
154
0
    ch = end++;
155
0
  }
156
0
  nranges = 0;
157
0
  while (*ch != ']') {
158
0
    if (*ch == '-') {
159
0
      nranges++;
160
0
    }
161
0
    ch = end++;
162
0
  }
163
164
0
  cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
165
0
  if (nranges > 0) {
166
0
    cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
167
0
  } else {
168
0
    cset->ranges = NULL;
169
0
  }
170
171
  /*
172
   * Now build the character set.
173
   */
174
0
  cset->nchars = cset->nranges = 0;
175
0
  ch    = format++;
176
0
  start = *ch;
177
0
  if (*ch == ']' || *ch == '-') {
178
0
    cset->chars[cset->nchars++] = *ch;
179
0
    ch = format++;
180
0
  }
181
0
  while (*ch != ']') {
182
0
    if (*format == '-') {
183
      /*
184
       * This may be the first character of a range, so don't add
185
       * it yet.
186
       */
187
0
      start = *ch;
188
0
    } else if (*ch == '-') {
189
      /*
190
       * Check to see if this is the last character in the set, in which
191
       * case it is not a range and we should add the previous character
192
       * as well as the dash.
193
       */
194
0
      if (*format == ']') {
195
0
        cset->chars[cset->nchars++] = start;
196
0
        cset->chars[cset->nchars++] = *ch;
197
0
      } else {
198
0
        ch = format++;
199
200
        /*
201
         * Check to see if the range is in reverse order.
202
         */
203
0
        if (start < *ch) {
204
0
          cset->ranges[cset->nranges].start = start;
205
0
          cset->ranges[cset->nranges].end = *ch;
206
0
        } else {
207
0
          cset->ranges[cset->nranges].start = *ch;
208
0
          cset->ranges[cset->nranges].end = start;
209
0
        }
210
0
        cset->nranges++;
211
0
      }
212
0
    } else {
213
0
      cset->chars[cset->nchars++] = *ch;
214
0
    }
215
0
    ch = format++;
216
0
  }
217
0
  return format;
218
0
}
219
/* }}} */
220
221
/* {{{ CharInSet
222
 *----------------------------------------------------------------------
223
 *
224
 * CharInSet --
225
 *
226
 *  Check to see if a character matches the given set.
227
 *
228
 * Results:
229
 *  Returns non-zero if the character matches the given set.
230
 *
231
 * Side effects:
232
 *  None.
233
 *
234
 *----------------------------------------------------------------------
235
 */
236
static int CharInSet(CharSet *cset, int c)
237
0
{
238
0
  char ch = (char) c;
239
0
  int i, match = 0;
240
241
0
  for (i = 0; i < cset->nchars; i++) {
242
0
    if (cset->chars[i] == ch) {
243
0
      match = 1;
244
0
      break;
245
0
    }
246
0
  }
247
0
  if (!match) {
248
0
    for (i = 0; i < cset->nranges; i++) {
249
0
      if ((cset->ranges[i].start <= ch)
250
0
        && (ch <= cset->ranges[i].end)) {
251
0
        match = 1;
252
0
        break;
253
0
      }
254
0
    }
255
0
  }
256
0
  return (cset->exclude ? !match : match);
257
0
}
258
/* }}} */
259
260
/* {{{ ReleaseCharSet
261
 *----------------------------------------------------------------------
262
 *
263
 * ReleaseCharSet --
264
 *
265
 *  Free the storage associated with a character set.
266
 *
267
 * Results:
268
 *  None.
269
 *
270
 * Side effects:
271
 *  None.
272
 *
273
 *----------------------------------------------------------------------
274
 */
275
static void ReleaseCharSet(CharSet *cset)
276
0
{
277
0
  efree((char *)cset->chars);
278
0
  if (cset->ranges) {
279
0
    efree((char *)cset->ranges);
280
0
  }
281
0
}
282
/* }}} */
283
284
/* {{{ ValidateFormat
285
 *----------------------------------------------------------------------
286
 *
287
 * ValidateFormat --
288
 *
289
 *  Parse the format string and verify that it is properly formed
290
 *  and that there are exactly enough variables on the command line.
291
 *
292
 * Results:
293
 *    FAILURE or SUCCESS.
294
 *
295
 * Side effects:
296
 *     May set php_error based on abnormal conditions.
297
 *
298
 * Parameters :
299
 *     format     The format string.
300
 *     numVars    The number of variables passed to the scan command.
301
 *     totalSubs  The number of variables that will be required.
302
 *
303
 *----------------------------------------------------------------------
304
*/
305
PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
306
0
{
307
0
#define STATIC_LIST_SIZE 16
308
0
  int gotXpg, gotSequential, value, i, flags;
309
0
  char *end, *ch = NULL;
310
0
  int staticAssign[STATIC_LIST_SIZE];
311
0
  int *nassign = staticAssign;
312
0
  int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
313
314
  /*
315
   * Initialize an array that records the number of times a variable
316
   * is assigned to by the format string.  We use this to detect if
317
   * a variable is multiply assigned or left unassigned.
318
   */
319
0
  if (numVars > nspace) {
320
0
    nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
321
0
    nspace = numVars;
322
0
  }
323
0
  for (i = 0; i < nspace; i++) {
324
0
    nassign[i] = 0;
325
0
  }
326
327
0
  xpgSize = objIndex = gotXpg = gotSequential = 0;
328
329
0
  while (*format != '\0') {
330
0
    ch = format++;
331
0
    flags = 0;
332
333
0
    if (*ch != '%') {
334
0
      continue;
335
0
    }
336
0
    ch = format++;
337
0
    if (*ch == '%') {
338
0
      continue;
339
0
    }
340
0
    if (*ch == '*') {
341
0
      flags |= SCAN_SUPPRESS;
342
0
      ch = format++;
343
0
      goto xpgCheckDone;
344
0
    }
345
346
0
    if ( isdigit( (unsigned char)*ch ) ) {
347
      /*
348
       * Check for an XPG3-style %n$ specification.  Note: there
349
       * must not be a mixture of XPG3 specs and non-XPG3 specs
350
       * in the same format string.
351
       */
352
0
      value = ZEND_STRTOUL(format-1, &end, 10);
353
0
      if (*end != '$') {
354
0
        goto notXpg;
355
0
      }
356
0
      format = end+1;
357
0
      ch     = format++;
358
0
      gotXpg = 1;
359
0
      if (gotSequential) {
360
0
        goto mixedXPG;
361
0
      }
362
0
      if ((value < 1) || (numVars && (value > numVars))) {
363
0
        goto badIndex;
364
0
      } else if (numVars == 0) {
365
        /*
366
         * In the case where no vars are specified, the user can
367
         * specify %9999$ legally, so we have to consider special
368
         * rules for growing the assign array.  'value' is
369
         * guaranteed to be > 0.
370
         */
371
372
        /* set a lower artificial limit on this
373
         * in the interest of security and resource friendliness
374
         * 255 arguments should be more than enough. - cc
375
         */
376
0
        if (value > SCAN_MAX_ARGS) {
377
0
          goto badIndex;
378
0
        }
379
380
0
        xpgSize = (xpgSize > value) ? xpgSize : value;
381
0
      }
382
0
      objIndex = value - 1;
383
0
      goto xpgCheckDone;
384
0
    }
385
386
0
notXpg:
387
0
    gotSequential = 1;
388
0
    if (gotXpg) {
389
0
mixedXPG:
390
0
      zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
391
0
      goto error;
392
0
    }
393
394
0
xpgCheckDone:
395
    /*
396
     * Parse any width specifier.
397
     */
398
0
    if (isdigit(UCHAR(*ch))) {
399
0
      value = ZEND_STRTOUL(format-1, &format, 10);
400
0
      flags |= SCAN_WIDTH;
401
0
      ch = format++;
402
0
    }
403
404
    /*
405
     * Ignore size specifier.
406
     */
407
0
    if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
408
0
      ch = format++;
409
0
    }
410
411
0
    if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
412
0
      goto badIndex;
413
0
    }
414
415
    /*
416
     * Handle the various field types.
417
     */
418
0
    switch (*ch) {
419
0
      case 'n':
420
0
      case 'd':
421
0
      case 'D':
422
0
      case 'i':
423
0
      case 'o':
424
0
      case 'x':
425
0
      case 'X':
426
0
      case 'u':
427
0
      case 'f':
428
0
      case 'e':
429
0
      case 'E':
430
0
      case 'g':
431
0
      case 's':
432
0
        break;
433
434
0
      case 'c':
435
        /* we differ here with the TCL implementation in allowing for */
436
        /* a character width specification, to be more consistent with */
437
        /* ANSI. since Zend auto allocates space for vars, this is no */
438
        /* problem - cc                                               */
439
        /*
440
        if (flags & SCAN_WIDTH) {
441
          php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
442
          goto error;
443
        }
444
        */
445
0
        break;
446
447
0
      case '[':
448
0
        if (*format == '\0') {
449
0
          goto badSet;
450
0
        }
451
0
        ch = format++;
452
0
        if (*ch == '^') {
453
0
          if (*format == '\0') {
454
0
            goto badSet;
455
0
          }
456
0
          ch = format++;
457
0
        }
458
0
        if (*ch == ']') {
459
0
          if (*format == '\0') {
460
0
            goto badSet;
461
0
          }
462
0
          ch = format++;
463
0
        }
464
0
        while (*ch != ']') {
465
0
          if (*format == '\0') {
466
0
            goto badSet;
467
0
          }
468
0
          ch = format++;
469
0
        }
470
0
        break;
471
0
badSet:
472
0
        zend_value_error("Unmatched [ in format string");
473
0
        goto error;
474
475
0
      default: {
476
0
        zend_value_error("Bad scan conversion character \"%c\"", *ch);
477
0
        goto error;
478
0
      }
479
0
    }
480
481
0
    if (!(flags & SCAN_SUPPRESS)) {
482
0
      if (objIndex >= nspace) {
483
        /*
484
         * Expand the nassign buffer.  If we are using XPG specifiers,
485
         * make sure that we grow to a large enough size.  xpgSize is
486
         * guaranteed to be at least one larger than objIndex.
487
         */
488
0
        value = nspace;
489
0
        if (xpgSize) {
490
0
          nspace = xpgSize;
491
0
        } else {
492
0
          nspace += STATIC_LIST_SIZE;
493
0
        }
494
0
        if (nassign == staticAssign) {
495
0
          nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
496
0
          for (i = 0; i < STATIC_LIST_SIZE; ++i) {
497
0
            nassign[i] = staticAssign[i];
498
0
          }
499
0
        } else {
500
0
          nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
501
0
        }
502
0
        for (i = value; i < nspace; i++) {
503
0
          nassign[i] = 0;
504
0
        }
505
0
      }
506
0
      nassign[objIndex]++;
507
0
      objIndex++;
508
0
    }
509
0
  } /* while (*format != '\0') */
510
511
  /*
512
   * Verify that all of the variable were assigned exactly once.
513
   */
514
0
  if (numVars == 0) {
515
0
    if (xpgSize) {
516
0
      numVars = xpgSize;
517
0
    } else {
518
0
      numVars = objIndex;
519
0
    }
520
0
  }
521
0
  if (totalSubs) {
522
0
    *totalSubs = numVars;
523
0
  }
524
0
  for (i = 0; i < numVars; i++) {
525
0
    if (nassign[i] > 1) {
526
0
      zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
527
0
      goto error;
528
0
    } else if (!xpgSize && (nassign[i] == 0)) {
529
      /*
530
       * If the space is empty, and xpgSize is 0 (means XPG wasn't
531
       * used, and/or numVars != 0), then too many vars were given
532
       */
533
0
      zend_value_error("Variable is not assigned by any conversion specifiers");
534
0
      goto error;
535
0
    }
536
0
  }
537
538
0
  if (nassign != staticAssign) {
539
0
    efree((char *)nassign);
540
0
  }
541
0
  return SCAN_SUCCESS;
542
543
0
badIndex:
544
0
  if (gotXpg) {
545
0
    zend_value_error("%s", "\"%n$\" argument index out of range");
546
0
  } else {
547
0
    zend_value_error("Different numbers of variable names and field specifiers");
548
0
  }
549
550
0
error:
551
0
  if (nassign != staticAssign) {
552
0
    efree((char *)nassign);
553
0
  }
554
0
  return SCAN_ERROR_INVALID_FORMAT;
555
0
#undef STATIC_LIST_SIZE
556
0
}
557
/* }}} */
558
559
/* {{{ php_sscanf_internal
560
 * This is the internal function which does processing on behalf of
561
 * both sscanf() and fscanf()
562
 *
563
 * parameters :
564
 *    string    literal string to be processed
565
 *    format    format string
566
 *    argCount  total number of elements in the args array
567
 *    args    arguments passed in from user function (f|s)scanf
568
 *    varStart  offset (in args) of 1st variable passed in to (f|s)scanf
569
 *    return_value set with the results of the scan
570
 */
571
572
PHPAPI int php_sscanf_internal( char *string, char *format,
573
        int argCount, zval *args,
574
        int varStart, zval *return_value)
575
0
{
576
0
  int  numVars, nconversions, totalVars = -1;
577
0
  int  i, result;
578
0
  zend_long value;
579
0
  int  objIndex;
580
0
  char *end, *baseString;
581
0
  zval *current;
582
0
  char op   = 0;
583
0
  int  base = 0;
584
0
  int  underflow = 0;
585
0
  size_t width;
586
0
  int_string_formater fn = NULL;
587
0
  char *ch, sch;
588
0
  int  flags;
589
0
  char buf[64]; /* Temporary buffer to hold scanned number
590
           * strings before they are passed to strtoul() */
591
592
  /* do some sanity checking */
593
0
  if ((varStart > argCount) || (varStart < 0)){
594
0
    varStart = SCAN_MAX_ARGS + 1;
595
0
  }
596
0
  numVars = argCount - varStart;
597
0
  if (numVars < 0) {
598
0
    numVars = 0;
599
0
  }
600
601
  /*
602
   * Check for errors in the format string.
603
   */
604
0
  if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
605
0
    scan_set_error_return( numVars, return_value );
606
0
    return SCAN_ERROR_INVALID_FORMAT;
607
0
  }
608
609
0
  objIndex = numVars ? varStart : 0;
610
611
  /*
612
   * If any variables are passed, make sure they are all passed by reference
613
   */
614
0
  if (numVars) {
615
0
    for (i = varStart;i < argCount;i++){
616
0
      ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference");
617
0
    }
618
0
  }
619
620
  /*
621
   * Allocate space for the result objects. Only happens when no variables
622
   * are specified
623
   */
624
0
  if (!numVars) {
625
0
    zval tmp;
626
627
    /* allocate an array for return */
628
0
    array_init(return_value);
629
630
0
    for (i = 0; i < totalVars; i++) {
631
0
      ZVAL_NULL(&tmp);
632
0
      if (add_next_index_zval(return_value, &tmp) == FAILURE) {
633
0
        scan_set_error_return(0, return_value);
634
0
        return FAILURE;
635
0
      }
636
0
    }
637
0
    varStart = 0; /* Array index starts from 0 */
638
0
  }
639
640
0
  baseString = string;
641
642
  /*
643
   * Iterate over the format string filling in the result objects until
644
   * we reach the end of input, the end of the format string, or there
645
   * is a mismatch.
646
   */
647
0
  nconversions = 0;
648
  /* note ! - we need to limit the loop for objIndex to keep it in bounds */
649
650
0
  while (*format != '\0') {
651
0
    ch    = format++;
652
0
    flags = 0;
653
654
    /*
655
     * If we see whitespace in the format, skip whitespace in the string.
656
     */
657
0
    if ( isspace( (unsigned char)*ch ) ) {
658
0
      sch = *string;
659
0
      while ( isspace( (unsigned char)sch ) ) {
660
0
        if (*string == '\0') {
661
0
          goto done;
662
0
        }
663
0
        string++;
664
0
        sch = *string;
665
0
      }
666
0
      continue;
667
0
    }
668
669
0
    if (*ch != '%') {
670
0
literal:
671
0
      if (*string == '\0') {
672
0
        underflow = 1;
673
0
        goto done;
674
0
      }
675
0
      sch = *string;
676
0
      string++;
677
0
      if (*ch != sch) {
678
0
        goto done;
679
0
      }
680
0
      continue;
681
0
    }
682
683
0
    ch = format++;
684
0
    if (*ch == '%') {
685
0
      goto literal;
686
0
    }
687
688
    /*
689
     * Check for assignment suppression ('*') or an XPG3-style
690
     * assignment ('%n$').
691
     */
692
0
    if (*ch == '*') {
693
0
      flags |= SCAN_SUPPRESS;
694
0
      ch = format++;
695
0
    } else if ( isdigit(UCHAR(*ch))) {
696
0
      value = ZEND_STRTOUL(format-1, &end, 10);
697
0
      if (*end == '$') {
698
0
        format = end+1;
699
0
        ch = format++;
700
0
        objIndex = varStart + value - 1;
701
0
      }
702
0
    }
703
704
    /*
705
     * Parse any width specifier.
706
     */
707
0
    if ( isdigit(UCHAR(*ch))) {
708
0
      width = ZEND_STRTOUL(format-1, &format, 10);
709
0
      ch = format++;
710
0
    } else {
711
0
      width = 0;
712
0
    }
713
714
    /*
715
     * Ignore size specifier.
716
     */
717
0
    if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
718
0
      ch = format++;
719
0
    }
720
721
    /*
722
     * Handle the various field types.
723
     */
724
0
    switch (*ch) {
725
0
      case 'n':
726
0
        if (!(flags & SCAN_SUPPRESS)) {
727
0
          if (numVars && objIndex >= argCount) {
728
0
            break;
729
0
          } else if (numVars) {
730
0
            current = args + objIndex++;
731
0
            ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
732
0
          } else {
733
0
            add_index_long(return_value, objIndex++, string - baseString);
734
0
          }
735
0
        }
736
0
        nconversions++;
737
0
        continue;
738
739
0
      case 'd':
740
0
      case 'D':
741
0
        op = 'i';
742
0
        base = 10;
743
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
744
0
        break;
745
0
      case 'i':
746
0
        op = 'i';
747
0
        base = 0;
748
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
749
0
        break;
750
0
      case 'o':
751
0
        op = 'i';
752
0
        base = 8;
753
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
754
0
        break;
755
0
      case 'x':
756
0
      case 'X':
757
0
        op = 'i';
758
0
        base = 16;
759
0
        fn = (int_string_formater)ZEND_STRTOL_PTR;
760
0
        break;
761
0
      case 'u':
762
0
        op = 'i';
763
0
        base = 10;
764
0
        flags |= SCAN_UNSIGNED;
765
0
        fn = (int_string_formater)ZEND_STRTOUL_PTR;
766
0
        break;
767
768
0
      case 'f':
769
0
      case 'e':
770
0
      case 'E':
771
0
      case 'g':
772
0
        op = 'f';
773
0
        break;
774
775
0
      case 's':
776
0
        op = 's';
777
0
        break;
778
779
0
      case 'c':
780
0
        op = 's';
781
0
        flags |= SCAN_NOSKIP;
782
        /*-cc-*/
783
0
        if (0 == width) {
784
0
          width = 1;
785
0
        }
786
        /*-cc-*/
787
0
        break;
788
0
      case '[':
789
0
        op = '[';
790
0
        flags |= SCAN_NOSKIP;
791
0
        break;
792
0
    }   /* switch */
793
794
    /*
795
     * At this point, we will need additional characters from the
796
     * string to proceed.
797
     */
798
0
    if (*string == '\0') {
799
0
      underflow = 1;
800
0
      goto done;
801
0
    }
802
803
    /*
804
     * Skip any leading whitespace at the beginning of a field unless
805
     * the format suppresses this behavior.
806
     */
807
0
    if (!(flags & SCAN_NOSKIP)) {
808
0
      while (*string != '\0') {
809
0
        sch = *string;
810
0
        if (! isspace((unsigned char)sch) ) {
811
0
          break;
812
0
        }
813
0
        string++;
814
0
      }
815
0
      if (*string == '\0') {
816
0
        underflow = 1;
817
0
        goto done;
818
0
      }
819
0
    }
820
821
    /*
822
     * Perform the requested scanning operation.
823
     */
824
0
    switch (op) {
825
0
      case 'c':
826
0
      case 's':
827
        /*
828
         * Scan a string up to width characters or whitespace.
829
         */
830
0
        if (width == 0) {
831
0
          width = (size_t) ~0;
832
0
        }
833
0
        end = string;
834
0
        while (*end != '\0') {
835
0
          sch = *end;
836
0
          if ( isspace( (unsigned char)sch ) ) {
837
0
            break;
838
0
          }
839
0
          end++;
840
0
          if (--width == 0) {
841
0
             break;
842
0
          }
843
0
        }
844
0
        if (!(flags & SCAN_SUPPRESS)) {
845
0
          if (numVars && objIndex >= argCount) {
846
0
            break;
847
0
          } else if (numVars) {
848
0
            current = args + objIndex++;
849
0
            ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
850
0
          } else {
851
0
            add_index_stringl(return_value, objIndex++, string, end-string);
852
0
          }
853
0
        }
854
0
        string = end;
855
0
        break;
856
857
0
      case '[': {
858
0
        CharSet cset;
859
860
0
        if (width == 0) {
861
0
          width = (size_t) ~0;
862
0
        }
863
0
        end = string;
864
865
0
        format = BuildCharSet(&cset, format);
866
0
        while (*end != '\0') {
867
0
          sch = *end;
868
0
          if (!CharInSet(&cset, (int)sch)) {
869
0
            break;
870
0
          }
871
0
          end++;
872
0
          if (--width == 0) {
873
0
            break;
874
0
          }
875
0
        }
876
0
        ReleaseCharSet(&cset);
877
878
0
        if (string == end) {
879
          /*
880
           * Nothing matched the range, stop processing
881
           */
882
0
          goto done;
883
0
        }
884
0
        if (!(flags & SCAN_SUPPRESS)) {
885
0
          if (numVars && objIndex >= argCount) {
886
0
            break;
887
0
          } else if (numVars) {
888
0
            current = args + objIndex++;
889
0
            ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
890
0
          } else {
891
0
            add_index_stringl(return_value, objIndex++, string, end-string);
892
0
          }
893
0
        }
894
0
        string = end;
895
0
        break;
896
0
      }
897
/*
898
      case 'c':
899
         / Scan a single character./
900
901
        sch = *string;
902
        string++;
903
        if (!(flags & SCAN_SUPPRESS)) {
904
          if (numVars) {
905
            char __buf[2];
906
            __buf[0] = sch;
907
            __buf[1] = '\0';
908
            current = args[objIndex++];
909
            zval_ptr_dtor_nogc(*current);
910
            ZVAL_STRINGL( *current, __buf, 1);
911
          } else {
912
            add_index_stringl(return_value, objIndex++, &sch, 1);
913
          }
914
        }
915
        break;
916
*/
917
0
      case 'i':
918
        /*
919
         * Scan an unsigned or signed integer.
920
         */
921
        /*-cc-*/
922
0
        buf[0] = '\0';
923
        /*-cc-*/
924
0
        if ((width == 0) || (width > sizeof(buf) - 1)) {
925
0
          width = sizeof(buf) - 1;
926
0
        }
927
928
0
        flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
929
0
        for (end = buf; width > 0; width--) {
930
0
          switch (*string) {
931
            /*
932
             * The 0 digit has special meaning at the beginning of
933
             * a number.  If we are unsure of the base, it
934
             * indicates that we are in base 8 or base 16 (if it is
935
             * followed by an 'x').
936
             */
937
0
            case '0':
938
              /*-cc-*/
939
0
              if (base == 16) {
940
0
                flags |= SCAN_XOK;
941
0
              }
942
              /*-cc-*/
943
0
              if (base == 0) {
944
0
                base = 8;
945
0
                flags |= SCAN_XOK;
946
0
              }
947
0
              if (flags & SCAN_NOZERO) {
948
0
                flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
949
0
              } else {
950
0
                flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
951
0
              }
952
0
              goto addToInt;
953
954
0
            case '1': case '2': case '3': case '4':
955
0
            case '5': case '6': case '7':
956
0
              if (base == 0) {
957
0
                base = 10;
958
0
              }
959
0
              flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
960
0
              goto addToInt;
961
962
0
            case '8': case '9':
963
0
              if (base == 0) {
964
0
                base = 10;
965
0
              }
966
0
              if (base <= 8) {
967
0
                 break;
968
0
              }
969
0
              flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
970
0
              goto addToInt;
971
972
0
            case 'A': case 'B': case 'C':
973
0
            case 'D': case 'E': case 'F':
974
0
            case 'a': case 'b': case 'c':
975
0
            case 'd': case 'e': case 'f':
976
0
              if (base <= 10) {
977
0
                break;
978
0
              }
979
0
              flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980
0
              goto addToInt;
981
982
0
            case '+': case '-':
983
0
              if (flags & SCAN_SIGNOK) {
984
0
                flags &= ~SCAN_SIGNOK;
985
0
                goto addToInt;
986
0
              }
987
0
              break;
988
989
0
            case 'x': case 'X':
990
0
              if ((flags & SCAN_XOK) && (end == buf+1)) {
991
0
                base = 16;
992
0
                flags &= ~SCAN_XOK;
993
0
                goto addToInt;
994
0
              }
995
0
              break;
996
0
          }
997
998
          /*
999
           * We got an illegal character so we are done accumulating.
1000
           */
1001
0
          break;
1002
1003
0
addToInt:
1004
          /*
1005
           * Add the character to the temporary buffer.
1006
           */
1007
0
          *end++ = *string++;
1008
0
          if (*string == '\0') {
1009
0
            break;
1010
0
          }
1011
0
        }
1012
1013
        /*
1014
         * Check to see if we need to back up because we only got a
1015
         * sign or a trailing x after a 0.
1016
         */
1017
0
        if (flags & SCAN_NODIGITS) {
1018
0
          if (*string == '\0') {
1019
0
            underflow = 1;
1020
0
          }
1021
0
          goto done;
1022
0
        } else if (end[-1] == 'x' || end[-1] == 'X') {
1023
0
          end--;
1024
0
          string--;
1025
0
        }
1026
1027
        /*
1028
         * Scan the value from the temporary buffer.  If we are
1029
         * returning a large unsigned value, we have to convert it back
1030
         * to a string since PHP only supports signed values.
1031
         */
1032
0
        if (!(flags & SCAN_SUPPRESS)) {
1033
0
          *end = '\0';
1034
0
          value = (zend_long) (*fn)(buf, NULL, base);
1035
0
          if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1036
0
            snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1037
0
            if (numVars && objIndex >= argCount) {
1038
0
              break;
1039
0
            } else if (numVars) {
1040
               /* change passed value type to string */
1041
0
              current = args + objIndex++;
1042
0
              ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1043
0
            } else {
1044
0
              add_index_string(return_value, objIndex++, buf);
1045
0
            }
1046
0
          } else {
1047
0
            if (numVars && objIndex >= argCount) {
1048
0
              break;
1049
0
            } else if (numVars) {
1050
0
              current = args + objIndex++;
1051
0
              ZEND_TRY_ASSIGN_REF_LONG(current, value);
1052
0
            } else {
1053
0
              add_index_long(return_value, objIndex++, value);
1054
0
            }
1055
0
          }
1056
0
        }
1057
0
        break;
1058
1059
0
      case 'f':
1060
        /*
1061
         * Scan a floating point number
1062
         */
1063
0
        buf[0] = '\0';     /* call me pedantic */
1064
0
        if ((width == 0) || (width > sizeof(buf) - 1)) {
1065
0
          width = sizeof(buf) - 1;
1066
0
        }
1067
0
        flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1068
0
        for (end = buf; width > 0; width--) {
1069
0
          switch (*string) {
1070
0
            case '0': case '1': case '2': case '3':
1071
0
            case '4': case '5': case '6': case '7':
1072
0
            case '8': case '9':
1073
0
              flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1074
0
              goto addToFloat;
1075
0
            case '+':
1076
0
            case '-':
1077
0
              if (flags & SCAN_SIGNOK) {
1078
0
                flags &= ~SCAN_SIGNOK;
1079
0
                goto addToFloat;
1080
0
              }
1081
0
              break;
1082
0
            case '.':
1083
0
              if (flags & SCAN_PTOK) {
1084
0
                flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1085
0
                goto addToFloat;
1086
0
              }
1087
0
              break;
1088
0
            case 'e':
1089
0
            case 'E':
1090
              /*
1091
               * An exponent is not allowed until there has
1092
               * been at least one digit.
1093
               */
1094
0
              if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1095
0
                flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1096
0
                  | SCAN_SIGNOK | SCAN_NODIGITS;
1097
0
                goto addToFloat;
1098
0
              }
1099
0
              break;
1100
0
          }
1101
1102
          /*
1103
           * We got an illegal character so we are done accumulating.
1104
           */
1105
0
          break;
1106
1107
0
addToFloat:
1108
          /*
1109
           * Add the character to the temporary buffer.
1110
           */
1111
0
          *end++ = *string++;
1112
0
          if (*string == '\0') {
1113
0
            break;
1114
0
          }
1115
0
        }
1116
1117
        /*
1118
         * Check to see if we need to back up because we saw a
1119
         * trailing 'e' or sign.
1120
         */
1121
0
        if (flags & SCAN_NODIGITS) {
1122
0
          if (flags & SCAN_EXPOK) {
1123
            /*
1124
             * There were no digits at all so scanning has
1125
             * failed and we are done.
1126
             */
1127
0
            if (*string == '\0') {
1128
0
              underflow = 1;
1129
0
            }
1130
0
            goto done;
1131
0
          }
1132
1133
          /*
1134
           * We got a bad exponent ('e' and maybe a sign).
1135
           */
1136
0
          end--;
1137
0
          string--;
1138
0
          if (*end != 'e' && *end != 'E') {
1139
0
            end--;
1140
0
            string--;
1141
0
          }
1142
0
        }
1143
1144
        /*
1145
         * Scan the value from the temporary buffer.
1146
         */
1147
0
        if (!(flags & SCAN_SUPPRESS)) {
1148
0
          double dvalue;
1149
0
          *end = '\0';
1150
0
          dvalue = zend_strtod(buf, NULL);
1151
0
          if (numVars && objIndex >= argCount) {
1152
0
            break;
1153
0
          } else if (numVars) {
1154
0
            current = args + objIndex++;
1155
0
            ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1156
0
          } else {
1157
0
            add_index_double(return_value, objIndex++, dvalue );
1158
0
          }
1159
0
        }
1160
0
        break;
1161
0
    } /* switch (op) */
1162
0
    nconversions++;
1163
0
  } /*  while (*format != '\0') */
1164
1165
0
done:
1166
0
  result = SCAN_SUCCESS;
1167
1168
0
  if (underflow && (0==nconversions)) {
1169
0
    scan_set_error_return( numVars, return_value );
1170
0
    result = SCAN_ERROR_EOF;
1171
0
  } else if (numVars) {
1172
0
    zval_ptr_dtor(return_value );
1173
0
    ZVAL_LONG(return_value, nconversions);
1174
0
  } else if (nconversions < totalVars) {
1175
    /* TODO: not all elements converted. we need to prune the list - cc */
1176
0
  }
1177
0
  return result;
1178
0
}
1179
/* }}} */
1180
1181
/* the compiler choked when i tried to make this a macro    */
1182
static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1183
0
{
1184
0
  if (numVars) {
1185
0
    ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1186
0
  } else {
1187
    /* convert_to_null calls destructor */
1188
0
    convert_to_null(return_value);
1189
0
  }
1190
0
}
1191
/* }}} */