Coverage Report

Created: 2026-06-02 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/standard/pack.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright © The PHP Group and Contributors.                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to the Modified BSD License that is      |
6
   | bundled with this package in the file LICENSE, and is available      |
7
   | through the World Wide Web at <https://www.php.net/license/>.        |
8
   |                                                                      |
9
   | SPDX-License-Identifier: BSD-3-Clause                                |
10
   +----------------------------------------------------------------------+
11
   | Author: Chris Schneider <cschneid@relog.ch>                          |
12
   +----------------------------------------------------------------------+
13
 */
14
15
#include "php.h"
16
17
#include <stdlib.h>
18
#include <errno.h>
19
#include <sys/types.h>
20
21
#define INC_OUTPUTPOS(a,b) \
22
0
  if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
23
0
    efree(formatcodes);  \
24
0
    efree(formatargs); \
25
0
    zend_value_error("Type %c: integer overflow in format string", code); \
26
0
    RETURN_THROWS(); \
27
0
  } \
28
0
  outputpos += (a)*(b);
29
30
typedef enum {
31
  PHP_LITTLE_ENDIAN,
32
  PHP_BIG_ENDIAN,
33
} php_pack_endianness;
34
35
#ifdef WORDS_BIGENDIAN
36
# define MACHINE_LITTLE_ENDIAN 0
37
# define PHP_MACHINE_ENDIAN PHP_BIG_ENDIAN
38
#else
39
0
# define MACHINE_LITTLE_ENDIAN 1
40
0
# define PHP_MACHINE_ENDIAN PHP_LITTLE_ENDIAN
41
#endif
42
43
#ifdef ZEND_ENABLE_ZVAL_LONG64
44
0
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP64(u)
45
#else
46
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP32(u)
47
#endif
48
49
typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
50
typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
51
typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
52
typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
53
typedef ZEND_SET_ALIGNED(1, int unaligned_int);
54
55
/* {{{ php_pack */
56
static void php_pack(const zval *val, size_t size, php_pack_endianness endianness, char *output)
57
0
{
58
0
  zend_ulong zl = zval_get_long(val);
59
60
0
  if ((endianness == PHP_LITTLE_ENDIAN) != MACHINE_LITTLE_ENDIAN) {
61
0
    zl = PHP_LONG_BSWAP(zl);
62
0
#if MACHINE_LITTLE_ENDIAN
63
0
    zl >>= (sizeof(zl) - size) * 8;
64
0
#endif
65
0
  } else {
66
#if !MACHINE_LITTLE_ENDIAN
67
    zl <<= (sizeof(zl) - size) * 8;
68
#endif
69
0
  }
70
71
0
  memcpy(output, (const char *) &zl, size);
72
0
}
73
/* }}} */
74
75
ZEND_ATTRIBUTE_CONST static inline uint16_t php_pack_reverse_int16(uint16_t arg)
76
0
{
77
0
  return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
78
0
}
79
80
/* {{{ php_pack_reverse_int32 */
81
ZEND_ATTRIBUTE_CONST static inline uint32_t php_pack_reverse_int32(uint32_t arg)
82
0
{
83
0
  return ZEND_BYTES_SWAP32(arg);
84
0
}
85
/* }}} */
86
87
/* {{{ php_pack */
88
static inline uint64_t php_pack_reverse_int64(uint64_t arg)
89
0
{
90
0
  union Swap64 {
91
0
    uint64_t i;
92
0
    uint32_t ul[2];
93
0
  } tmp, result;
94
0
  tmp.i = arg;
95
0
  result.ul[0] = php_pack_reverse_int32(tmp.ul[1]);
96
0
  result.ul[1] = php_pack_reverse_int32(tmp.ul[0]);
97
98
0
  return result.i;
99
0
}
100
/* }}} */
101
102
/* {{{ php_pack_copy_float */
103
static void php_pack_copy_float(int is_little_endian, void * dst, float f)
104
0
{
105
0
  union Copy32 {
106
0
    float f;
107
0
    uint32_t i;
108
0
  } m;
109
0
  m.f = f;
110
111
#ifdef WORDS_BIGENDIAN
112
  if (is_little_endian) {
113
    m.i = php_pack_reverse_int32(m.i);
114
  }
115
#else /* WORDS_BIGENDIAN */
116
0
  if (!is_little_endian) {
117
0
    m.i = php_pack_reverse_int32(m.i);
118
0
  }
119
0
#endif /* WORDS_BIGENDIAN */
120
121
0
  memcpy(dst, &m.f, sizeof(float));
122
0
}
123
/* }}} */
124
125
/* {{{ php_pack_copy_double */
126
static void php_pack_copy_double(int is_little_endian, void * dst, double d)
127
0
{
128
0
  union Copy64 {
129
0
    double d;
130
0
    uint64_t i;
131
0
  } m;
132
0
  m.d = d;
133
134
#ifdef WORDS_BIGENDIAN
135
  if (is_little_endian) {
136
    m.i = php_pack_reverse_int64(m.i);
137
  }
138
#else /* WORDS_BIGENDIAN */
139
0
  if (!is_little_endian) {
140
0
    m.i = php_pack_reverse_int64(m.i);
141
0
  }
142
0
#endif /* WORDS_BIGENDIAN */
143
144
0
  memcpy(dst, &m.d, sizeof(double));
145
0
}
146
/* }}} */
147
148
/* {{{ php_pack_parse_float */
149
static float php_pack_parse_float(int is_little_endian, void * src)
150
0
{
151
0
  union Copy32 {
152
0
    float f;
153
0
    uint32_t i;
154
0
  } m;
155
0
  memcpy(&m.i, src, sizeof(float));
156
157
#ifdef WORDS_BIGENDIAN
158
  if (is_little_endian) {
159
    m.i = php_pack_reverse_int32(m.i);
160
  }
161
#else /* WORDS_BIGENDIAN */
162
0
  if (!is_little_endian) {
163
0
    m.i = php_pack_reverse_int32(m.i);
164
0
  }
165
0
#endif /* WORDS_BIGENDIAN */
166
167
0
  return m.f;
168
0
}
169
/* }}} */
170
171
/* {{{ php_pack_parse_double */
172
static double php_pack_parse_double(int is_little_endian, void * src)
173
0
{
174
0
  union Copy64 {
175
0
    double d;
176
0
    uint64_t i;
177
0
  } m;
178
0
  memcpy(&m.i, src, sizeof(double));
179
180
#ifdef WORDS_BIGENDIAN
181
  if (is_little_endian) {
182
    m.i = php_pack_reverse_int64(m.i);
183
  }
184
#else /* WORDS_BIGENDIAN */
185
0
  if (!is_little_endian) {
186
0
    m.i = php_pack_reverse_int64(m.i);
187
0
  }
188
0
#endif /* WORDS_BIGENDIAN */
189
190
0
  return m.d;
191
0
}
192
/* }}} */
193
194
/* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
195
 * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
196
 * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
197
 */
198
/* {{{ Takes one or more arguments and packs them into a binary string according to the format argument */
199
PHP_FUNCTION(pack)
200
0
{
201
0
  zval *argv = NULL;
202
0
  int num_args = 0;
203
0
  size_t i;
204
0
  int currentarg;
205
0
  char *format;
206
0
  size_t formatlen;
207
0
  char *formatcodes;
208
0
  int *formatargs;
209
0
  size_t formatcount = 0;
210
0
  int outputpos = 0, outputsize = 0;
211
0
  zend_string *output;
212
213
0
  ZEND_PARSE_PARAMETERS_START(1, -1)
214
0
    Z_PARAM_STRING(format, formatlen)
215
0
    Z_PARAM_VARIADIC('*', argv, num_args)
216
0
  ZEND_PARSE_PARAMETERS_END();
217
218
  /* We have a maximum of <formatlen> format codes to deal with */
219
0
  formatcodes = safe_emalloc(formatlen, sizeof(*formatcodes), 0);
220
0
  formatargs = safe_emalloc(formatlen, sizeof(*formatargs), 0);
221
0
  currentarg = 0;
222
223
  /* Preprocess format into formatcodes and formatargs */
224
0
  for (i = 0; i < formatlen; formatcount++) {
225
0
    char code = format[i++];
226
0
    int arg = 1;
227
228
    /* Handle format arguments if any */
229
0
    if (i < formatlen) {
230
0
      char c = format[i];
231
232
0
      if (c == '*') {
233
0
        arg = -1;
234
0
        i++;
235
0
      }
236
0
      else if (c >= '0' && c <= '9') {
237
0
        arg = atoi(&format[i]);
238
239
0
        while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
240
0
          i++;
241
0
        }
242
0
      }
243
0
    }
244
245
    /* Handle special arg '*' for all codes and check argv overflows */
246
0
    switch (code) {
247
      /* Never uses any args */
248
0
      case 'x':
249
0
      case 'X':
250
0
      case '@':
251
0
        if (arg < 0) {
252
0
          php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", code);
253
0
          arg = 1;
254
0
        }
255
0
        break;
256
257
      /* Always uses one arg */
258
0
      case 'a':
259
0
      case 'A':
260
0
      case 'Z':
261
0
      case 'h':
262
0
      case 'H':
263
0
        if (currentarg >= num_args) {
264
0
          efree(formatcodes);
265
0
          efree(formatargs);
266
0
          zend_value_error("Type %c: not enough arguments", code);
267
0
          RETURN_THROWS();
268
0
        }
269
270
0
        if (arg < 0) {
271
0
          if (!try_convert_to_string(&argv[currentarg])) {
272
0
            efree(formatcodes);
273
0
            efree(formatargs);
274
0
            RETURN_THROWS();
275
0
          }
276
277
0
          arg = Z_STRLEN(argv[currentarg]);
278
0
          if (code == 'Z') {
279
            /* add one because Z is always NUL-terminated:
280
             * pack("Z*", "aa") === "aa\0"
281
             * pack("Z2", "aa") === "a\0" */
282
0
            arg++;
283
0
          }
284
0
        }
285
286
0
        currentarg++;
287
0
        break;
288
289
      /* Use as many args as specified */
290
0
      case 'q':
291
0
      case 'Q':
292
0
      case 'J':
293
0
      case 'P':
294
#if SIZEOF_ZEND_LONG < 8
295
          efree(formatcodes);
296
          efree(formatargs);
297
          zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
298
          RETURN_THROWS();
299
#endif
300
0
      case 'c':
301
0
      case 'C':
302
0
      case 's':
303
0
      case 'S':
304
0
      case 'i':
305
0
      case 'I':
306
0
      case 'l':
307
0
      case 'L':
308
0
      case 'n':
309
0
      case 'N':
310
0
      case 'v':
311
0
      case 'V':
312
0
      case 'f': /* float */
313
0
      case 'g': /* little endian float */
314
0
      case 'G': /* big endian float */
315
0
      case 'd': /* double */
316
0
      case 'e': /* little endian double */
317
0
      case 'E': /* big endian double */
318
0
        if (arg < 0) {
319
0
          arg = num_args - currentarg;
320
0
        }
321
0
        if (currentarg > INT_MAX - arg) {
322
0
          goto too_few_args;
323
0
        }
324
0
        currentarg += arg;
325
326
0
        if (currentarg > num_args) {
327
0
too_few_args:
328
0
          efree(formatcodes);
329
0
          efree(formatargs);
330
0
          zend_value_error("Type %c: too few arguments", code);
331
0
          RETURN_THROWS();
332
0
        }
333
0
        break;
334
335
0
      default:
336
0
        efree(formatcodes);
337
0
        efree(formatargs);
338
0
        zend_value_error("Type %c: unknown format code", code);
339
0
        RETURN_THROWS();
340
0
    }
341
342
0
    formatcodes[formatcount] = code;
343
0
    formatargs[formatcount] = arg;
344
0
  }
345
346
0
  if (currentarg < num_args) {
347
0
    php_error_docref(NULL, E_WARNING, "%d arguments unused", (num_args - currentarg));
348
0
  }
349
350
  /* Calculate output length and upper bound while processing*/
351
0
  for (i = 0; i < formatcount; i++) {
352
0
    char code = formatcodes[i];
353
0
    int arg = formatargs[i];
354
355
0
    switch (code) {
356
0
      case 'h':
357
0
      case 'H':
358
0
        INC_OUTPUTPOS((arg / 2) + (arg % 2),1) /* 4 bit per arg */
359
0
        break;
360
361
0
      case 'a':
362
0
      case 'A':
363
0
      case 'Z':
364
0
      case 'c':
365
0
      case 'C':
366
0
      case 'x':
367
0
        INC_OUTPUTPOS(arg,1)   /* 8 bit per arg */
368
0
        break;
369
370
0
      case 's':
371
0
      case 'S':
372
0
      case 'n':
373
0
      case 'v':
374
0
        INC_OUTPUTPOS(arg,2)   /* 16 bit per arg */
375
0
        break;
376
377
0
      case 'i':
378
0
      case 'I':
379
0
        INC_OUTPUTPOS(arg,sizeof(int))
380
0
        break;
381
382
0
      case 'l':
383
0
      case 'L':
384
0
      case 'N':
385
0
      case 'V':
386
0
        INC_OUTPUTPOS(arg,4)   /* 32 bit per arg */
387
0
        break;
388
389
0
#if SIZEOF_ZEND_LONG > 4
390
0
      case 'q':
391
0
      case 'Q':
392
0
      case 'J':
393
0
      case 'P':
394
0
        INC_OUTPUTPOS(arg,8)   /* 32 bit per arg */
395
0
        break;
396
0
#endif
397
398
0
      case 'f': /* float */
399
0
      case 'g': /* little endian float */
400
0
      case 'G': /* big endian float */
401
0
        INC_OUTPUTPOS(arg,sizeof(float))
402
0
        break;
403
404
0
      case 'd': /* double */
405
0
      case 'e': /* little endian double */
406
0
      case 'E': /* big endian double */
407
0
        INC_OUTPUTPOS(arg,sizeof(double))
408
0
        break;
409
410
0
      case 'X':
411
0
        outputpos -= arg;
412
413
0
        if (outputpos < 0) {
414
0
          php_error_docref(NULL, E_WARNING, "Type %c: outside of string", code);
415
0
          outputpos = 0;
416
0
        }
417
0
        break;
418
419
0
      case '@':
420
0
        outputpos = arg;
421
0
        break;
422
0
    }
423
424
0
    if (outputsize < outputpos) {
425
0
      outputsize = outputpos;
426
0
    }
427
0
  }
428
429
0
  output = zend_string_alloc(outputsize, 0);
430
0
  outputpos = 0;
431
0
  currentarg = 0;
432
433
  /* Do actual packing */
434
0
  for (i = 0; i < formatcount; i++) {
435
0
    char code = formatcodes[i];
436
0
    int arg = formatargs[i];
437
438
0
    switch (code) {
439
0
      case 'a':
440
0
      case 'A':
441
0
      case 'Z': {
442
0
        size_t arg_cp = (code != 'Z') ? arg : MAX(0, arg - 1);
443
0
        zend_string *tmp_str;
444
0
        zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
445
446
0
        memset(&ZSTR_VAL(output)[outputpos], (code == 'a' || code == 'Z') ? '\0' : ' ', arg);
447
0
        memcpy(&ZSTR_VAL(output)[outputpos], ZSTR_VAL(str),
448
0
             (ZSTR_LEN(str) < arg_cp) ? ZSTR_LEN(str) : arg_cp);
449
450
0
        outputpos += arg;
451
0
        zend_tmp_string_release(tmp_str);
452
0
        break;
453
0
      }
454
455
0
      case 'h':
456
0
      case 'H': {
457
0
        int nibbleshift = (code == 'h') ? 0 : 4;
458
0
        int first = 1;
459
0
        zend_string *tmp_str;
460
0
        zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
461
0
        char *v = ZSTR_VAL(str);
462
463
0
        outputpos--;
464
0
        if ((size_t)arg > ZSTR_LEN(str)) {
465
0
          php_error_docref(NULL, E_WARNING, "Type %c: not enough characters in string", code);
466
0
          arg = ZSTR_LEN(str);
467
0
        }
468
469
0
        while (arg-- > 0) {
470
0
          char n = *v++;
471
472
0
          if (n >= '0' && n <= '9') {
473
0
            n -= '0';
474
0
          } else if (n >= 'A' && n <= 'F') {
475
0
            n -= ('A' - 10);
476
0
          } else if (n >= 'a' && n <= 'f') {
477
0
            n -= ('a' - 10);
478
0
          } else {
479
0
            php_error_docref(NULL, E_WARNING, "Type %c: illegal hex digit %c", code, n);
480
0
            n = 0;
481
0
          }
482
483
0
          if (first--) {
484
0
            ZSTR_VAL(output)[++outputpos] = 0;
485
0
          } else {
486
0
            first = 1;
487
0
          }
488
489
0
          ZSTR_VAL(output)[outputpos] |= (n << nibbleshift);
490
0
          nibbleshift = (nibbleshift + 4) & 7;
491
0
        }
492
493
0
        outputpos++;
494
0
        zend_tmp_string_release(tmp_str);
495
0
        break;
496
0
      }
497
498
0
      case 'c':
499
0
      case 'C':
500
0
        while (arg-- > 0) {
501
0
          php_pack(&argv[currentarg++], 1, PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
502
0
          outputpos++;
503
0
        }
504
0
        break;
505
506
0
      case 's':
507
0
      case 'S':
508
0
      case 'n':
509
0
      case 'v': {
510
0
        php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
511
512
0
        if (code == 'n') {
513
0
          endianness = PHP_BIG_ENDIAN;
514
0
        } else if (code == 'v') {
515
0
          endianness = PHP_LITTLE_ENDIAN;
516
0
        }
517
518
0
        while (arg-- > 0) {
519
0
          php_pack(&argv[currentarg++], 2, endianness, &ZSTR_VAL(output)[outputpos]);
520
0
          outputpos += 2;
521
0
        }
522
0
        break;
523
0
      }
524
525
0
      case 'i':
526
0
      case 'I':
527
0
        while (arg-- > 0) {
528
0
          php_pack(&argv[currentarg++], sizeof(int), PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
529
0
          outputpos += sizeof(int);
530
0
        }
531
0
        break;
532
533
0
      case 'l':
534
0
      case 'L':
535
0
      case 'N':
536
0
      case 'V': {
537
0
        php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
538
539
0
        if (code == 'N') {
540
0
          endianness = PHP_BIG_ENDIAN;
541
0
        } else if (code == 'V') {
542
0
          endianness = PHP_LITTLE_ENDIAN;
543
0
        }
544
545
0
        while (arg-- > 0) {
546
0
          php_pack(&argv[currentarg++], 4, endianness, &ZSTR_VAL(output)[outputpos]);
547
0
          outputpos += 4;
548
0
        }
549
0
        break;
550
0
      }
551
552
0
#if SIZEOF_ZEND_LONG > 4
553
0
      case 'q':
554
0
      case 'Q':
555
0
      case 'J':
556
0
      case 'P': {
557
0
        php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
558
559
0
        if (code == 'J') {
560
0
          endianness = PHP_BIG_ENDIAN;
561
0
        } else if (code == 'P') {
562
0
          endianness = PHP_LITTLE_ENDIAN;
563
0
        }
564
565
0
        while (arg-- > 0) {
566
0
          php_pack(&argv[currentarg++], 8, endianness, &ZSTR_VAL(output)[outputpos]);
567
0
          outputpos += 8;
568
0
        }
569
0
        break;
570
0
      }
571
0
#endif
572
573
0
      case 'f': {
574
0
        while (arg-- > 0) {
575
0
          float v = (float) zval_get_double(&argv[currentarg++]);
576
0
          memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
577
0
          outputpos += sizeof(v);
578
0
        }
579
0
        break;
580
0
      }
581
582
0
      case 'g': {
583
        /* pack little endian float */
584
0
        while (arg-- > 0) {
585
0
          float v = (float) zval_get_double(&argv[currentarg++]);
586
0
          php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
587
0
          outputpos += sizeof(v);
588
0
        }
589
590
0
        break;
591
0
      }
592
0
      case 'G': {
593
        /* pack big endian float */
594
0
        while (arg-- > 0) {
595
0
          float v = (float) zval_get_double(&argv[currentarg++]);
596
0
          php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
597
0
          outputpos += sizeof(v);
598
0
        }
599
0
        break;
600
0
      }
601
602
0
      case 'd': {
603
0
        while (arg-- > 0) {
604
0
          double v = zval_get_double(&argv[currentarg++]);
605
0
          memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
606
0
          outputpos += sizeof(v);
607
0
        }
608
0
        break;
609
0
      }
610
611
0
      case 'e': {
612
        /* pack little endian double */
613
0
        while (arg-- > 0) {
614
0
          double v = zval_get_double(&argv[currentarg++]);
615
0
          php_pack_copy_double(1, &ZSTR_VAL(output)[outputpos], v);
616
0
          outputpos += sizeof(v);
617
0
        }
618
0
        break;
619
0
      }
620
621
0
      case 'E': {
622
        /* pack big endian double */
623
0
        while (arg-- > 0) {
624
0
          double v = zval_get_double(&argv[currentarg++]);
625
0
          php_pack_copy_double(0, &ZSTR_VAL(output)[outputpos], v);
626
0
          outputpos += sizeof(v);
627
0
        }
628
0
        break;
629
0
      }
630
631
0
      case 'x':
632
0
        memset(&ZSTR_VAL(output)[outputpos], '\0', arg);
633
0
        outputpos += arg;
634
0
        break;
635
636
0
      case 'X':
637
0
        outputpos -= arg;
638
639
0
        if (outputpos < 0) {
640
0
          outputpos = 0;
641
0
        }
642
0
        break;
643
644
0
      case '@':
645
0
        if (arg > outputpos) {
646
0
          memset(&ZSTR_VAL(output)[outputpos], '\0', arg - outputpos);
647
0
        }
648
0
        outputpos = arg;
649
0
        break;
650
0
    }
651
0
  }
652
653
0
  efree(formatcodes);
654
0
  efree(formatargs);
655
0
  ZSTR_VAL(output)[outputpos] = '\0';
656
0
  ZSTR_LEN(output) = outputpos;
657
0
  RETURN_NEW_STR(output);
658
0
}
659
/* }}} */
660
661
/* unpack() is based on Perl's unpack(), but is modified a bit from there.
662
 * Rather than depending on error-prone ordered lists or syntactically
663
 * unpleasant pass-by-reference, we return an object with named parameters
664
 * (like *_fetch_object()). Syntax is "f[repeat]name/...", where "f" is the
665
 * formatter char (like pack()), "[repeat]" is the optional repeater argument,
666
 * and "name" is the name of the variable to use.
667
 * Example: "c2chars/nints" will return an object with fields
668
 * chars1, chars2, and ints.
669
 * Numeric pack types will return numbers, a and A will return strings,
670
 * f and d will return doubles.
671
 * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
672
 * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
673
 */
674
/* {{{ Unpack binary string into named array elements according to format argument */
675
PHP_FUNCTION(unpack)
676
0
{
677
0
  char *format, *input;
678
0
  zend_string *formatarg, *inputarg;
679
0
  zend_long formatlen, inputpos, inputlen;
680
0
  int i;
681
0
  zend_long offset = 0;
682
683
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
684
0
    Z_PARAM_STR(formatarg)
685
0
    Z_PARAM_STR(inputarg)
686
0
    Z_PARAM_OPTIONAL
687
0
    Z_PARAM_LONG(offset)
688
0
  ZEND_PARSE_PARAMETERS_END();
689
690
0
  format = ZSTR_VAL(formatarg);
691
0
  formatlen = ZSTR_LEN(formatarg);
692
0
  input = ZSTR_VAL(inputarg);
693
0
  inputlen = ZSTR_LEN(inputarg);
694
0
  inputpos = 0;
695
696
697
0
  if (offset < 0 || offset > inputlen) {
698
0
    zend_argument_value_error(3, "must be contained in argument #2 ($data)");
699
0
    RETURN_THROWS();
700
0
  }
701
702
0
  input += offset;
703
0
  inputlen -= offset;
704
705
0
  array_init(return_value);
706
707
0
  while (formatlen-- > 0) {
708
0
    char type = *(format++);
709
0
    int repetitions = 1, argb;
710
0
    char *name;
711
0
    int namelen;
712
0
    int size = 0;
713
714
    /* Handle format arguments if any */
715
0
    if (formatlen > 0) {
716
0
      char c = *format;
717
718
0
      if (c >= '0' && c <= '9') {
719
0
        errno = 0;
720
0
        long tmp = strtol(format, NULL, 10);
721
        /* There is not strtoi. We have to check the range ourselves.
722
         * With 32-bit long the INT_{MIN,MAX} are useless because long == int, but with 64-bit they do limit us to 32-bit. */
723
0
        if (errno || tmp < INT_MIN || tmp > INT_MAX) {
724
0
          php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
725
0
          zend_array_destroy(Z_ARR_P(return_value));
726
0
          RETURN_FALSE;
727
0
        }
728
0
        repetitions = tmp;
729
730
0
        while (formatlen > 0 && *format >= '0' && *format <= '9') {
731
0
          format++;
732
0
          formatlen--;
733
0
        }
734
0
      } else if (c == '*') {
735
0
        repetitions = -1;
736
0
        format++;
737
0
        formatlen--;
738
0
      }
739
0
    }
740
741
    /* Get of new value in array */
742
0
    name = format;
743
0
    argb = repetitions;
744
745
0
    while (formatlen > 0 && *format != '/') {
746
0
      formatlen--;
747
0
      format++;
748
0
    }
749
750
0
    namelen = format - name;
751
752
0
    if (namelen > 200)
753
0
      namelen = 200;
754
755
0
    switch (type) {
756
      /* Never use any input */
757
0
      case 'X':
758
0
        size = -1;
759
0
        if (repetitions < 0) {
760
0
          php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
761
0
          repetitions = 1;
762
0
        }
763
0
        break;
764
765
0
      case '@':
766
0
        size = 0;
767
0
        break;
768
769
0
      case 'a':
770
0
      case 'A':
771
0
      case 'Z':
772
0
        size = repetitions;
773
0
        repetitions = 1;
774
0
        break;
775
776
0
      case 'h':
777
0
      case 'H':
778
0
        size = (repetitions > 0) ? ((unsigned int) repetitions + 1) / 2 : repetitions;
779
0
        repetitions = 1;
780
0
        break;
781
782
      /* Use 1 byte of input */
783
0
      case 'c':
784
0
      case 'C':
785
0
      case 'x':
786
0
        size = 1;
787
0
        break;
788
789
      /* Use 2 bytes of input */
790
0
      case 's':
791
0
      case 'S':
792
0
      case 'n':
793
0
      case 'v':
794
0
        size = 2;
795
0
        break;
796
797
      /* Use sizeof(int) bytes of input */
798
0
      case 'i':
799
0
      case 'I':
800
0
        size = sizeof(int);
801
0
        break;
802
803
      /* Use 4 bytes of input */
804
0
      case 'l':
805
0
      case 'L':
806
0
      case 'N':
807
0
      case 'V':
808
0
        size = 4;
809
0
        break;
810
811
      /* Use 8 bytes of input */
812
0
      case 'q':
813
0
      case 'Q':
814
0
      case 'J':
815
0
      case 'P':
816
0
#if SIZEOF_ZEND_LONG > 4
817
0
        size = 8;
818
0
        break;
819
#else
820
        zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
821
        RETURN_THROWS();
822
#endif
823
824
      /* Use sizeof(float) bytes of input */
825
0
      case 'f':
826
0
      case 'g':
827
0
      case 'G':
828
0
        size = sizeof(float);
829
0
        break;
830
831
      /* Use sizeof(double) bytes of input */
832
0
      case 'd':
833
0
      case 'e':
834
0
      case 'E':
835
0
        size = sizeof(double);
836
0
        break;
837
838
0
      default:
839
0
        zend_value_error("Invalid format type %c", type);
840
0
        RETURN_THROWS();
841
0
    }
842
843
844
    /* Do actual unpacking */
845
0
    for (i = 0; i != repetitions; i++ ) {
846
847
0
      if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
848
0
        php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
849
0
        zend_array_destroy(Z_ARR_P(return_value));
850
0
        RETURN_FALSE;
851
0
      }
852
853
0
      if ((inputpos + size) <= inputlen) {
854
855
0
        zend_string* real_name;
856
0
        zend_long long_key = 0;
857
0
        zval val;
858
859
0
        if (namelen == 0) {
860
0
          real_name = NULL;
861
0
          long_key = i + 1;
862
0
        } else if (repetitions == 1) {
863
          /* Use a part of the formatarg argument directly as the name. */
864
0
          real_name = zend_string_init_fast(name, namelen);
865
0
        } else {
866
          /* Need to add the 1-based element number to the name */
867
0
          char buf[MAX_LENGTH_OF_LONG + 1];
868
0
          char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
869
0
          size_t digits = buf + sizeof(buf) - 1 - res;
870
0
          real_name = zend_string_concat2(name, namelen, res, digits);
871
0
        }
872
873
0
        switch (type) {
874
0
          case 'a': {
875
            /* a will not strip any trailing whitespace or null padding */
876
0
            zend_long len = inputlen - inputpos;  /* Remaining string */
877
878
            /* If size was given take minimum of len and size */
879
0
            if ((size >= 0) && (len > size)) {
880
0
              len = size;
881
0
            }
882
883
0
            size = len;
884
885
0
            ZVAL_STRINGL(&val, &input[inputpos], len);
886
0
            break;
887
0
          }
888
0
          case 'A': {
889
            /* A will strip any trailing whitespace */
890
0
            zend_long len = inputlen - inputpos;  /* Remaining string */
891
892
            /* If size was given take minimum of len and size */
893
0
            if ((size >= 0) && (len > size)) {
894
0
              len = size;
895
0
            }
896
897
0
            size = len;
898
899
            /* Remove trailing white space and nulls chars from unpacked data */
900
0
            while (--len >= 0) {
901
0
              if (input[inputpos + len] != '\0'
902
0
                && input[inputpos + len] != ' '
903
0
                && input[inputpos + len] != '\t'
904
0
                && input[inputpos + len] != '\r'
905
0
                && input[inputpos + len] != '\n'
906
0
              )
907
0
                break;
908
0
            }
909
910
0
            ZVAL_STRINGL(&val, &input[inputpos], len + 1);
911
0
            break;
912
0
          }
913
          /* New option added for Z to remain in-line with the Perl implementation */
914
0
          case 'Z': {
915
            /* Z will strip everything after the first null character */
916
0
            zend_long s,
917
0
               len = inputlen - inputpos; /* Remaining string */
918
919
            /* If size was given take minimum of len and size */
920
0
            if ((size >= 0) && (len > size)) {
921
0
              len = size;
922
0
            }
923
924
0
            size = len;
925
926
            /* Remove everything after the first null */
927
0
            for (s=0 ; s < len ; s++) {
928
0
              if (input[inputpos + s] == '\0')
929
0
                break;
930
0
            }
931
0
            len = s;
932
933
0
            ZVAL_STRINGL(&val, &input[inputpos], len);
934
0
            break;
935
0
          }
936
937
938
0
          case 'h':
939
0
          case 'H': {
940
0
            zend_long len = (inputlen - inputpos) * 2;  /* Remaining */
941
0
            int nibbleshift = (type == 'h') ? 0 : 4;
942
0
            int first = 1;
943
0
            zend_string *buf;
944
0
            zend_long ipos, opos;
945
946
947
0
            if (size > INT_MAX / 2) {
948
0
              if (real_name) {
949
0
                zend_string_release_ex(real_name, false);
950
0
              }
951
0
              zend_argument_value_error(1, "repeater must be less than or equal to %d", INT_MAX / 2);
952
0
              RETURN_THROWS();
953
0
            }
954
955
            /* If size was given take minimum of len and size */
956
0
            if (size >= 0 && len > (size * 2)) {
957
0
              len = size * 2;
958
0
            }
959
960
0
            if (len > 0 && argb > 0) {
961
0
              len -= argb % 2;
962
0
            }
963
964
0
            buf = zend_string_alloc(len, 0);
965
966
0
            for (ipos = opos = 0; opos < len; opos++) {
967
0
              char cc = (input[inputpos + ipos] >> nibbleshift) & 0xf;
968
969
0
              if (cc < 10) {
970
0
                cc += '0';
971
0
              } else {
972
0
                cc += 'a' - 10;
973
0
              }
974
975
0
              ZSTR_VAL(buf)[opos] = cc;
976
0
              nibbleshift = (nibbleshift + 4) & 7;
977
978
0
              if (first-- == 0) {
979
0
                ipos++;
980
0
                first = 1;
981
0
              }
982
0
            }
983
984
0
            ZSTR_VAL(buf)[len] = '\0';
985
986
0
            ZVAL_STR(&val, buf);
987
0
            break;
988
0
          }
989
990
0
          case 'c':   /* signed */
991
0
          case 'C': { /* unsigned */
992
0
            uint8_t x = input[inputpos];
993
0
            zend_long v = (type == 'c') ? (int8_t) x : x;
994
995
0
            ZVAL_LONG(&val, v);
996
0
            break;
997
0
          }
998
999
0
          case 's':   /* signed machine endian   */
1000
0
          case 'S':   /* unsigned machine endian */
1001
0
          case 'n':   /* unsigned big endian     */
1002
0
          case 'v': { /* unsigned little endian  */
1003
0
            zend_long v = 0;
1004
0
            uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);
1005
1006
0
            if (type == 's') {
1007
0
              v = (int16_t) x;
1008
0
            } else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
1009
0
              v = php_pack_reverse_int16(x);
1010
0
            } else {
1011
0
              v = x;
1012
0
            }
1013
1014
0
            ZVAL_LONG(&val, v);
1015
0
            break;
1016
0
          }
1017
1018
0
          case 'i':   /* signed integer, machine size, machine endian */
1019
0
          case 'I': { /* unsigned integer, machine size, machine endian */
1020
0
            zend_long v;
1021
0
            if (type == 'i') {
1022
0
              int x = *((unaligned_int*) &input[inputpos]);
1023
0
              v = x;
1024
0
            } else {
1025
0
              unsigned int x = *((unaligned_uint*) &input[inputpos]);
1026
0
              v = x;
1027
0
            }
1028
1029
0
            ZVAL_LONG(&val, v);
1030
0
            break;
1031
0
          }
1032
1033
0
          case 'l':   /* signed machine endian   */
1034
0
          case 'L':   /* unsigned machine endian */
1035
0
          case 'N':   /* unsigned big endian     */
1036
0
          case 'V': { /* unsigned little endian  */
1037
0
            zend_long v = 0;
1038
0
            uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);
1039
1040
0
            if (type == 'l') {
1041
0
              v = (int32_t) x;
1042
0
            } else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
1043
0
              v = php_pack_reverse_int32(x);
1044
0
            } else {
1045
0
              v = x;
1046
0
            }
1047
1048
0
            ZVAL_LONG(&val, v);
1049
0
            break;
1050
0
          }
1051
1052
0
#if SIZEOF_ZEND_LONG > 4
1053
0
          case 'q':   /* signed machine endian   */
1054
0
          case 'Q':   /* unsigned machine endian */
1055
0
          case 'J':   /* unsigned big endian     */
1056
0
          case 'P': { /* unsigned little endian  */
1057
0
            zend_long v = 0;
1058
0
            uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);
1059
1060
0
            if (type == 'q') {
1061
0
              v = (int64_t) x;
1062
0
            } else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
1063
0
              v = php_pack_reverse_int64(x);
1064
0
            } else {
1065
0
              v = x;
1066
0
            }
1067
1068
0
            ZVAL_LONG(&val, v);
1069
0
            break;
1070
0
          }
1071
0
#endif
1072
1073
0
          case 'f': /* float */
1074
0
          case 'g': /* little endian float*/
1075
0
          case 'G': /* big endian float*/
1076
0
          {
1077
0
            float v;
1078
1079
0
            if (type == 'g') {
1080
0
              v = php_pack_parse_float(1, &input[inputpos]);
1081
0
            } else if (type == 'G') {
1082
0
              v = php_pack_parse_float(0, &input[inputpos]);
1083
0
            } else {
1084
0
              memcpy(&v, &input[inputpos], sizeof(float));
1085
0
            }
1086
1087
0
            ZVAL_DOUBLE(&val, v);
1088
0
            break;
1089
0
          }
1090
1091
1092
0
          case 'd': /* double */
1093
0
          case 'e': /* little endian float */
1094
0
          case 'E': /* big endian float */
1095
0
          {
1096
0
            double v;
1097
0
            if (type == 'e') {
1098
0
              v = php_pack_parse_double(1, &input[inputpos]);
1099
0
            } else if (type == 'E') {
1100
0
              v = php_pack_parse_double(0, &input[inputpos]);
1101
0
            } else {
1102
0
              memcpy(&v, &input[inputpos], sizeof(double));
1103
0
            }
1104
1105
0
            ZVAL_DOUBLE(&val, v);
1106
0
            break;
1107
0
          }
1108
1109
0
          case 'x':
1110
            /* Do nothing with input, just skip it */
1111
0
            goto no_output;
1112
1113
0
          case 'X':
1114
0
            if (inputpos < size) {
1115
0
              inputpos = -size;
1116
0
              i = repetitions - 1;    /* Break out of for loop */
1117
1118
0
              if (repetitions >= 0) {
1119
0
                php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1120
0
              }
1121
0
            }
1122
0
            goto no_output;
1123
1124
0
          case '@':
1125
0
            if (repetitions <= inputlen) {
1126
0
              inputpos = repetitions;
1127
0
            } else {
1128
0
              php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1129
0
            }
1130
1131
0
            i = repetitions - 1;  /* Done, break out of for loop */
1132
0
            goto no_output;
1133
0
        }
1134
1135
0
        if (real_name) {
1136
0
          zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1137
0
        } else {
1138
0
          zend_hash_index_update(Z_ARRVAL_P(return_value), long_key, &val);
1139
0
        }
1140
1141
0
no_output:
1142
0
        if (real_name) {
1143
0
          zend_string_release_ex(real_name, false);
1144
0
        }
1145
1146
0
        inputpos += size;
1147
0
        if (inputpos < 0) {
1148
0
          if (size != -1) { /* only print warning if not working with * */
1149
0
            php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1150
0
          }
1151
0
          inputpos = 0;
1152
0
        }
1153
0
      } else if (repetitions < 0) {
1154
        /* Reached end of input for '*' repeater */
1155
0
        break;
1156
0
      } else {
1157
0
        php_error_docref(NULL, E_WARNING, "Type %c: not enough input values, need %d values but only " ZEND_LONG_FMT " %s provided", type, size, inputlen - inputpos, inputlen - inputpos == 1 ? "was" : "were");
1158
0
        zend_array_destroy(Z_ARR_P(return_value));
1159
0
        RETURN_FALSE;
1160
0
      }
1161
0
    }
1162
1163
0
    if (formatlen > 0) {
1164
0
      formatlen--;  /* Skip '/' separator, does no harm if inputlen == 0 */
1165
0
      format++;
1166
0
    }
1167
0
  }
1168
0
}
1169
/* }}} */