Coverage Report

Created: 2025-07-23 06:33

/src/php-src/ext/standard/pack.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright (c) The PHP Group                                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to version 3.01 of the PHP license,      |
6
   | that is bundled with this package in the file LICENSE, and is        |
7
   | available through the world-wide-web at the following url:           |
8
   | https://www.php.net/license/3_01.txt                                 |
9
   | If you did not receive a copy of the PHP license and are unable to   |
10
   | obtain it through the world-wide-web, please send a note to          |
11
   | license@php.net so we can mail you a copy immediately.               |
12
   +----------------------------------------------------------------------+
13
   | Author: Chris Schneider <cschneid@relog.ch>                          |
14
   +----------------------------------------------------------------------+
15
 */
16
17
#include "php.h"
18
19
#include <stdlib.h>
20
#include <errno.h>
21
#include <sys/types.h>
22
23
#define INC_OUTPUTPOS(a,b) \
24
0
  if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
25
0
    efree(formatcodes);  \
26
0
    efree(formatargs); \
27
0
    zend_value_error("Type %c: integer overflow in format string", code); \
28
0
    RETURN_THROWS(); \
29
0
  } \
30
0
  outputpos += (a)*(b);
31
32
typedef enum {
33
  PHP_LITTLE_ENDIAN,
34
  PHP_BIG_ENDIAN,
35
} php_pack_endianness;
36
37
#ifdef WORDS_BIGENDIAN
38
# define MACHINE_LITTLE_ENDIAN 0
39
# define PHP_MACHINE_ENDIAN PHP_BIG_ENDIAN
40
#else
41
0
# define MACHINE_LITTLE_ENDIAN 1
42
0
# define PHP_MACHINE_ENDIAN PHP_LITTLE_ENDIAN
43
#endif
44
45
#ifdef ZEND_ENABLE_ZVAL_LONG64
46
0
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP64(u)
47
#else
48
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP32(u)
49
#endif
50
51
typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
52
typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
53
typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
54
typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
55
typedef ZEND_SET_ALIGNED(1, int unaligned_int);
56
57
/* {{{ php_pack */
58
static void php_pack(const zval *val, size_t size, php_pack_endianness endianness, char *output)
59
0
{
60
0
  zend_ulong zl = zval_get_long(val);
61
62
0
  if ((endianness == PHP_LITTLE_ENDIAN) != MACHINE_LITTLE_ENDIAN) {
63
0
    zl = PHP_LONG_BSWAP(zl);
64
0
#if MACHINE_LITTLE_ENDIAN
65
0
    zl >>= (sizeof(zl) - size) * 8;
66
0
#endif
67
0
  } else {
68
#if !MACHINE_LITTLE_ENDIAN
69
    zl <<= (sizeof(zl) - size) * 8;
70
#endif
71
0
  }
72
73
0
  memcpy(output, (const char *) &zl, size);
74
0
}
75
/* }}} */
76
77
ZEND_ATTRIBUTE_CONST static inline uint16_t php_pack_reverse_int16(uint16_t arg)
78
0
{
79
0
  return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
80
0
}
81
82
/* {{{ php_pack_reverse_int32 */
83
ZEND_ATTRIBUTE_CONST static inline uint32_t php_pack_reverse_int32(uint32_t arg)
84
0
{
85
0
  return ZEND_BYTES_SWAP32(arg);
86
0
}
87
/* }}} */
88
89
/* {{{ php_pack */
90
static inline uint64_t php_pack_reverse_int64(uint64_t arg)
91
0
{
92
0
  union Swap64 {
93
0
    uint64_t i;
94
0
    uint32_t ul[2];
95
0
  } tmp, result;
96
0
  tmp.i = arg;
97
0
  result.ul[0] = php_pack_reverse_int32(tmp.ul[1]);
98
0
  result.ul[1] = php_pack_reverse_int32(tmp.ul[0]);
99
100
0
  return result.i;
101
0
}
102
/* }}} */
103
104
/* {{{ php_pack_copy_float */
105
static void php_pack_copy_float(int is_little_endian, void * dst, float f)
106
0
{
107
0
  union Copy32 {
108
0
    float f;
109
0
    uint32_t i;
110
0
  } m;
111
0
  m.f = f;
112
113
#ifdef WORDS_BIGENDIAN
114
  if (is_little_endian) {
115
    m.i = php_pack_reverse_int32(m.i);
116
  }
117
#else /* WORDS_BIGENDIAN */
118
0
  if (!is_little_endian) {
119
0
    m.i = php_pack_reverse_int32(m.i);
120
0
  }
121
0
#endif /* WORDS_BIGENDIAN */
122
123
0
  memcpy(dst, &m.f, sizeof(float));
124
0
}
125
/* }}} */
126
127
/* {{{ php_pack_copy_double */
128
static void php_pack_copy_double(int is_little_endian, void * dst, double d)
129
0
{
130
0
  union Copy64 {
131
0
    double d;
132
0
    uint64_t i;
133
0
  } m;
134
0
  m.d = d;
135
136
#ifdef WORDS_BIGENDIAN
137
  if (is_little_endian) {
138
    m.i = php_pack_reverse_int64(m.i);
139
  }
140
#else /* WORDS_BIGENDIAN */
141
0
  if (!is_little_endian) {
142
0
    m.i = php_pack_reverse_int64(m.i);
143
0
  }
144
0
#endif /* WORDS_BIGENDIAN */
145
146
0
  memcpy(dst, &m.d, sizeof(double));
147
0
}
148
/* }}} */
149
150
/* {{{ php_pack_parse_float */
151
static float php_pack_parse_float(int is_little_endian, void * src)
152
0
{
153
0
  union Copy32 {
154
0
    float f;
155
0
    uint32_t i;
156
0
  } m;
157
0
  memcpy(&m.i, src, sizeof(float));
158
159
#ifdef WORDS_BIGENDIAN
160
  if (is_little_endian) {
161
    m.i = php_pack_reverse_int32(m.i);
162
  }
163
#else /* WORDS_BIGENDIAN */
164
0
  if (!is_little_endian) {
165
0
    m.i = php_pack_reverse_int32(m.i);
166
0
  }
167
0
#endif /* WORDS_BIGENDIAN */
168
169
0
  return m.f;
170
0
}
171
/* }}} */
172
173
/* {{{ php_pack_parse_double */
174
static double php_pack_parse_double(int is_little_endian, void * src)
175
0
{
176
0
  union Copy64 {
177
0
    double d;
178
0
    uint64_t i;
179
0
  } m;
180
0
  memcpy(&m.i, src, sizeof(double));
181
182
#ifdef WORDS_BIGENDIAN
183
  if (is_little_endian) {
184
    m.i = php_pack_reverse_int64(m.i);
185
  }
186
#else /* WORDS_BIGENDIAN */
187
0
  if (!is_little_endian) {
188
0
    m.i = php_pack_reverse_int64(m.i);
189
0
  }
190
0
#endif /* WORDS_BIGENDIAN */
191
192
0
  return m.d;
193
0
}
194
/* }}} */
195
196
/* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
197
 * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
198
 * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
199
 */
200
/* {{{ Takes one or more arguments and packs them into a binary string according to the format argument */
201
PHP_FUNCTION(pack)
202
0
{
203
0
  zval *argv = NULL;
204
0
  int num_args = 0;
205
0
  size_t i;
206
0
  int currentarg;
207
0
  char *format;
208
0
  size_t formatlen;
209
0
  char *formatcodes;
210
0
  int *formatargs;
211
0
  size_t formatcount = 0;
212
0
  int outputpos = 0, outputsize = 0;
213
0
  zend_string *output;
214
215
0
  ZEND_PARSE_PARAMETERS_START(1, -1)
216
0
    Z_PARAM_STRING(format, formatlen)
217
0
    Z_PARAM_VARIADIC('*', argv, num_args)
218
0
  ZEND_PARSE_PARAMETERS_END();
219
220
  /* We have a maximum of <formatlen> format codes to deal with */
221
0
  formatcodes = safe_emalloc(formatlen, sizeof(*formatcodes), 0);
222
0
  formatargs = safe_emalloc(formatlen, sizeof(*formatargs), 0);
223
0
  currentarg = 0;
224
225
  /* Preprocess format into formatcodes and formatargs */
226
0
  for (i = 0; i < formatlen; formatcount++) {
227
0
    char code = format[i++];
228
0
    int arg = 1;
229
230
    /* Handle format arguments if any */
231
0
    if (i < formatlen) {
232
0
      char c = format[i];
233
234
0
      if (c == '*') {
235
0
        arg = -1;
236
0
        i++;
237
0
      }
238
0
      else if (c >= '0' && c <= '9') {
239
0
        arg = atoi(&format[i]);
240
241
0
        while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
242
0
          i++;
243
0
        }
244
0
      }
245
0
    }
246
247
    /* Handle special arg '*' for all codes and check argv overflows */
248
0
    switch (code) {
249
      /* Never uses any args */
250
0
      case 'x':
251
0
      case 'X':
252
0
      case '@':
253
0
        if (arg < 0) {
254
0
          php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", code);
255
0
          arg = 1;
256
0
        }
257
0
        break;
258
259
      /* Always uses one arg */
260
0
      case 'a':
261
0
      case 'A':
262
0
      case 'Z':
263
0
      case 'h':
264
0
      case 'H':
265
0
        if (currentarg >= num_args) {
266
0
          efree(formatcodes);
267
0
          efree(formatargs);
268
0
          zend_value_error("Type %c: not enough arguments", code);
269
0
          RETURN_THROWS();
270
0
        }
271
272
0
        if (arg < 0) {
273
0
          if (!try_convert_to_string(&argv[currentarg])) {
274
0
            efree(formatcodes);
275
0
            efree(formatargs);
276
0
            RETURN_THROWS();
277
0
          }
278
279
0
          arg = Z_STRLEN(argv[currentarg]);
280
0
          if (code == 'Z') {
281
            /* add one because Z is always NUL-terminated:
282
             * pack("Z*", "aa") === "aa\0"
283
             * pack("Z2", "aa") === "a\0" */
284
0
            arg++;
285
0
          }
286
0
        }
287
288
0
        currentarg++;
289
0
        break;
290
291
      /* Use as many args as specified */
292
0
      case 'q':
293
0
      case 'Q':
294
0
      case 'J':
295
0
      case 'P':
296
#if SIZEOF_ZEND_LONG < 8
297
          efree(formatcodes);
298
          efree(formatargs);
299
          zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
300
          RETURN_THROWS();
301
#endif
302
0
      case 'c':
303
0
      case 'C':
304
0
      case 's':
305
0
      case 'S':
306
0
      case 'i':
307
0
      case 'I':
308
0
      case 'l':
309
0
      case 'L':
310
0
      case 'n':
311
0
      case 'N':
312
0
      case 'v':
313
0
      case 'V':
314
0
      case 'f': /* float */
315
0
      case 'g': /* little endian float */
316
0
      case 'G': /* big endian float */
317
0
      case 'd': /* double */
318
0
      case 'e': /* little endian double */
319
0
      case 'E': /* big endian double */
320
0
        if (arg < 0) {
321
0
          arg = num_args - currentarg;
322
0
        }
323
0
        if (currentarg > INT_MAX - arg) {
324
0
          goto too_few_args;
325
0
        }
326
0
        currentarg += arg;
327
328
0
        if (currentarg > num_args) {
329
0
too_few_args:
330
0
          efree(formatcodes);
331
0
          efree(formatargs);
332
0
          zend_value_error("Type %c: too few arguments", code);
333
0
          RETURN_THROWS();
334
0
        }
335
0
        break;
336
337
0
      default:
338
0
        efree(formatcodes);
339
0
        efree(formatargs);
340
0
        zend_value_error("Type %c: unknown format code", code);
341
0
        RETURN_THROWS();
342
0
    }
343
344
0
    formatcodes[formatcount] = code;
345
0
    formatargs[formatcount] = arg;
346
0
  }
347
348
0
  if (currentarg < num_args) {
349
0
    php_error_docref(NULL, E_WARNING, "%d arguments unused", (num_args - currentarg));
350
0
  }
351
352
  /* Calculate output length and upper bound while processing*/
353
0
  for (i = 0; i < formatcount; i++) {
354
0
    char code = formatcodes[i];
355
0
    int arg = formatargs[i];
356
357
0
    switch (code) {
358
0
      case 'h':
359
0
      case 'H':
360
0
        INC_OUTPUTPOS((arg / 2) + (arg % 2),1) /* 4 bit per arg */
361
0
        break;
362
363
0
      case 'a':
364
0
      case 'A':
365
0
      case 'Z':
366
0
      case 'c':
367
0
      case 'C':
368
0
      case 'x':
369
0
        INC_OUTPUTPOS(arg,1)   /* 8 bit per arg */
370
0
        break;
371
372
0
      case 's':
373
0
      case 'S':
374
0
      case 'n':
375
0
      case 'v':
376
0
        INC_OUTPUTPOS(arg,2)   /* 16 bit per arg */
377
0
        break;
378
379
0
      case 'i':
380
0
      case 'I':
381
0
        INC_OUTPUTPOS(arg,sizeof(int))
382
0
        break;
383
384
0
      case 'l':
385
0
      case 'L':
386
0
      case 'N':
387
0
      case 'V':
388
0
        INC_OUTPUTPOS(arg,4)   /* 32 bit per arg */
389
0
        break;
390
391
0
#if SIZEOF_ZEND_LONG > 4
392
0
      case 'q':
393
0
      case 'Q':
394
0
      case 'J':
395
0
      case 'P':
396
0
        INC_OUTPUTPOS(arg,8)   /* 32 bit per arg */
397
0
        break;
398
0
#endif
399
400
0
      case 'f': /* float */
401
0
      case 'g': /* little endian float */
402
0
      case 'G': /* big endian float */
403
0
        INC_OUTPUTPOS(arg,sizeof(float))
404
0
        break;
405
406
0
      case 'd': /* double */
407
0
      case 'e': /* little endian double */
408
0
      case 'E': /* big endian double */
409
0
        INC_OUTPUTPOS(arg,sizeof(double))
410
0
        break;
411
412
0
      case 'X':
413
0
        outputpos -= arg;
414
415
0
        if (outputpos < 0) {
416
0
          php_error_docref(NULL, E_WARNING, "Type %c: outside of string", code);
417
0
          outputpos = 0;
418
0
        }
419
0
        break;
420
421
0
      case '@':
422
0
        outputpos = arg;
423
0
        break;
424
0
    }
425
426
0
    if (outputsize < outputpos) {
427
0
      outputsize = outputpos;
428
0
    }
429
0
  }
430
431
0
  output = zend_string_alloc(outputsize, 0);
432
0
  outputpos = 0;
433
0
  currentarg = 0;
434
435
  /* Do actual packing */
436
0
  for (i = 0; i < formatcount; i++) {
437
0
    char code = formatcodes[i];
438
0
    int arg = formatargs[i];
439
440
0
    switch (code) {
441
0
      case 'a':
442
0
      case 'A':
443
0
      case 'Z': {
444
0
        size_t arg_cp = (code != 'Z') ? arg : MAX(0, arg - 1);
445
0
        zend_string *tmp_str;
446
0
        zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
447
448
0
        memset(&ZSTR_VAL(output)[outputpos], (code == 'a' || code == 'Z') ? '\0' : ' ', arg);
449
0
        memcpy(&ZSTR_VAL(output)[outputpos], ZSTR_VAL(str),
450
0
             (ZSTR_LEN(str) < arg_cp) ? ZSTR_LEN(str) : arg_cp);
451
452
0
        outputpos += arg;
453
0
        zend_tmp_string_release(tmp_str);
454
0
        break;
455
0
      }
456
457
0
      case 'h':
458
0
      case 'H': {
459
0
        int nibbleshift = (code == 'h') ? 0 : 4;
460
0
        int first = 1;
461
0
        zend_string *tmp_str;
462
0
        zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
463
0
        char *v = ZSTR_VAL(str);
464
465
0
        outputpos--;
466
0
        if ((size_t)arg > ZSTR_LEN(str)) {
467
0
          php_error_docref(NULL, E_WARNING, "Type %c: not enough characters in string", code);
468
0
          arg = ZSTR_LEN(str);
469
0
        }
470
471
0
        while (arg-- > 0) {
472
0
          char n = *v++;
473
474
0
          if (n >= '0' && n <= '9') {
475
0
            n -= '0';
476
0
          } else if (n >= 'A' && n <= 'F') {
477
0
            n -= ('A' - 10);
478
0
          } else if (n >= 'a' && n <= 'f') {
479
0
            n -= ('a' - 10);
480
0
          } else {
481
0
            php_error_docref(NULL, E_WARNING, "Type %c: illegal hex digit %c", code, n);
482
0
            n = 0;
483
0
          }
484
485
0
          if (first--) {
486
0
            ZSTR_VAL(output)[++outputpos] = 0;
487
0
          } else {
488
0
            first = 1;
489
0
          }
490
491
0
          ZSTR_VAL(output)[outputpos] |= (n << nibbleshift);
492
0
          nibbleshift = (nibbleshift + 4) & 7;
493
0
        }
494
495
0
        outputpos++;
496
0
        zend_tmp_string_release(tmp_str);
497
0
        break;
498
0
      }
499
500
0
      case 'c':
501
0
      case 'C':
502
0
        while (arg-- > 0) {
503
0
          php_pack(&argv[currentarg++], 1, PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
504
0
          outputpos++;
505
0
        }
506
0
        break;
507
508
0
      case 's':
509
0
      case 'S':
510
0
      case 'n':
511
0
      case 'v': {
512
0
        php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
513
514
0
        if (code == 'n') {
515
0
          endianness = PHP_BIG_ENDIAN;
516
0
        } else if (code == 'v') {
517
0
          endianness = PHP_LITTLE_ENDIAN;
518
0
        }
519
520
0
        while (arg-- > 0) {
521
0
          php_pack(&argv[currentarg++], 2, endianness, &ZSTR_VAL(output)[outputpos]);
522
0
          outputpos += 2;
523
0
        }
524
0
        break;
525
0
      }
526
527
0
      case 'i':
528
0
      case 'I':
529
0
        while (arg-- > 0) {
530
0
          php_pack(&argv[currentarg++], sizeof(int), PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
531
0
          outputpos += sizeof(int);
532
0
        }
533
0
        break;
534
535
0
      case 'l':
536
0
      case 'L':
537
0
      case 'N':
538
0
      case 'V': {
539
0
        php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
540
541
0
        if (code == 'N') {
542
0
          endianness = PHP_BIG_ENDIAN;
543
0
        } else if (code == 'V') {
544
0
          endianness = PHP_LITTLE_ENDIAN;
545
0
        }
546
547
0
        while (arg-- > 0) {
548
0
          php_pack(&argv[currentarg++], 4, endianness, &ZSTR_VAL(output)[outputpos]);
549
0
          outputpos += 4;
550
0
        }
551
0
        break;
552
0
      }
553
554
0
#if SIZEOF_ZEND_LONG > 4
555
0
      case 'q':
556
0
      case 'Q':
557
0
      case 'J':
558
0
      case 'P': {
559
0
        php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
560
561
0
        if (code == 'J') {
562
0
          endianness = PHP_BIG_ENDIAN;
563
0
        } else if (code == 'P') {
564
0
          endianness = PHP_LITTLE_ENDIAN;
565
0
        }
566
567
0
        while (arg-- > 0) {
568
0
          php_pack(&argv[currentarg++], 8, endianness, &ZSTR_VAL(output)[outputpos]);
569
0
          outputpos += 8;
570
0
        }
571
0
        break;
572
0
      }
573
0
#endif
574
575
0
      case 'f': {
576
0
        while (arg-- > 0) {
577
0
          float v = (float) zval_get_double(&argv[currentarg++]);
578
0
          memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
579
0
          outputpos += sizeof(v);
580
0
        }
581
0
        break;
582
0
      }
583
584
0
      case 'g': {
585
        /* pack little endian float */
586
0
        while (arg-- > 0) {
587
0
          float v = (float) zval_get_double(&argv[currentarg++]);
588
0
          php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
589
0
          outputpos += sizeof(v);
590
0
        }
591
592
0
        break;
593
0
      }
594
0
      case 'G': {
595
        /* pack big endian float */
596
0
        while (arg-- > 0) {
597
0
          float v = (float) zval_get_double(&argv[currentarg++]);
598
0
          php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
599
0
          outputpos += sizeof(v);
600
0
        }
601
0
        break;
602
0
      }
603
604
0
      case 'd': {
605
0
        while (arg-- > 0) {
606
0
          double v = zval_get_double(&argv[currentarg++]);
607
0
          memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
608
0
          outputpos += sizeof(v);
609
0
        }
610
0
        break;
611
0
      }
612
613
0
      case 'e': {
614
        /* pack little endian double */
615
0
        while (arg-- > 0) {
616
0
          double v = zval_get_double(&argv[currentarg++]);
617
0
          php_pack_copy_double(1, &ZSTR_VAL(output)[outputpos], v);
618
0
          outputpos += sizeof(v);
619
0
        }
620
0
        break;
621
0
      }
622
623
0
      case 'E': {
624
        /* pack big endian double */
625
0
        while (arg-- > 0) {
626
0
          double v = zval_get_double(&argv[currentarg++]);
627
0
          php_pack_copy_double(0, &ZSTR_VAL(output)[outputpos], v);
628
0
          outputpos += sizeof(v);
629
0
        }
630
0
        break;
631
0
      }
632
633
0
      case 'x':
634
0
        memset(&ZSTR_VAL(output)[outputpos], '\0', arg);
635
0
        outputpos += arg;
636
0
        break;
637
638
0
      case 'X':
639
0
        outputpos -= arg;
640
641
0
        if (outputpos < 0) {
642
0
          outputpos = 0;
643
0
        }
644
0
        break;
645
646
0
      case '@':
647
0
        if (arg > outputpos) {
648
0
          memset(&ZSTR_VAL(output)[outputpos], '\0', arg - outputpos);
649
0
        }
650
0
        outputpos = arg;
651
0
        break;
652
0
    }
653
0
  }
654
655
0
  efree(formatcodes);
656
0
  efree(formatargs);
657
0
  ZSTR_VAL(output)[outputpos] = '\0';
658
0
  ZSTR_LEN(output) = outputpos;
659
0
  RETURN_NEW_STR(output);
660
0
}
661
/* }}} */
662
663
/* unpack() is based on Perl's unpack(), but is modified a bit from there.
664
 * Rather than depending on error-prone ordered lists or syntactically
665
 * unpleasant pass-by-reference, we return an object with named parameters
666
 * (like *_fetch_object()). Syntax is "f[repeat]name/...", where "f" is the
667
 * formatter char (like pack()), "[repeat]" is the optional repeater argument,
668
 * and "name" is the name of the variable to use.
669
 * Example: "c2chars/nints" will return an object with fields
670
 * chars1, chars2, and ints.
671
 * Numeric pack types will return numbers, a and A will return strings,
672
 * f and d will return doubles.
673
 * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
674
 * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
675
 */
676
/* {{{ Unpack binary string into named array elements according to format argument */
677
PHP_FUNCTION(unpack)
678
0
{
679
0
  char *format, *input;
680
0
  zend_string *formatarg, *inputarg;
681
0
  zend_long formatlen, inputpos, inputlen;
682
0
  int i;
683
0
  zend_long offset = 0;
684
685
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
686
0
    Z_PARAM_STR(formatarg)
687
0
    Z_PARAM_STR(inputarg)
688
0
    Z_PARAM_OPTIONAL
689
0
    Z_PARAM_LONG(offset)
690
0
  ZEND_PARSE_PARAMETERS_END();
691
692
0
  format = ZSTR_VAL(formatarg);
693
0
  formatlen = ZSTR_LEN(formatarg);
694
0
  input = ZSTR_VAL(inputarg);
695
0
  inputlen = ZSTR_LEN(inputarg);
696
0
  inputpos = 0;
697
698
699
0
  if (offset < 0 || offset > inputlen) {
700
0
    zend_argument_value_error(3, "must be contained in argument #2 ($data)");
701
0
    RETURN_THROWS();
702
0
  }
703
704
0
  input += offset;
705
0
  inputlen -= offset;
706
707
0
  array_init(return_value);
708
709
0
  while (formatlen-- > 0) {
710
0
    char type = *(format++);
711
0
    int repetitions = 1, argb;
712
0
    char *name;
713
0
    int namelen;
714
0
    int size = 0;
715
716
    /* Handle format arguments if any */
717
0
    if (formatlen > 0) {
718
0
      char c = *format;
719
720
0
      if (c >= '0' && c <= '9') {
721
0
        errno = 0;
722
0
        long tmp = strtol(format, NULL, 10);
723
        /* There is not strtoi. We have to check the range ourselves.
724
         * With 32-bit long the INT_{MIN,MAX} are useless because long == int, but with 64-bit they do limit us to 32-bit. */
725
0
        if (errno || tmp < INT_MIN || tmp > INT_MAX) {
726
0
          php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
727
0
          zend_array_destroy(Z_ARR_P(return_value));
728
0
          RETURN_FALSE;
729
0
        }
730
0
        repetitions = tmp;
731
732
0
        while (formatlen > 0 && *format >= '0' && *format <= '9') {
733
0
          format++;
734
0
          formatlen--;
735
0
        }
736
0
      } else if (c == '*') {
737
0
        repetitions = -1;
738
0
        format++;
739
0
        formatlen--;
740
0
      }
741
0
    }
742
743
    /* Get of new value in array */
744
0
    name = format;
745
0
    argb = repetitions;
746
747
0
    while (formatlen > 0 && *format != '/') {
748
0
      formatlen--;
749
0
      format++;
750
0
    }
751
752
0
    namelen = format - name;
753
754
0
    if (namelen > 200)
755
0
      namelen = 200;
756
757
0
    switch (type) {
758
      /* Never use any input */
759
0
      case 'X':
760
0
        size = -1;
761
0
        if (repetitions < 0) {
762
0
          php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
763
0
          repetitions = 1;
764
0
        }
765
0
        break;
766
767
0
      case '@':
768
0
        size = 0;
769
0
        break;
770
771
0
      case 'a':
772
0
      case 'A':
773
0
      case 'Z':
774
0
        size = repetitions;
775
0
        repetitions = 1;
776
0
        break;
777
778
0
      case 'h':
779
0
      case 'H':
780
0
        size = (repetitions > 0) ? ((unsigned int) repetitions + 1) / 2 : repetitions;
781
0
        repetitions = 1;
782
0
        break;
783
784
      /* Use 1 byte of input */
785
0
      case 'c':
786
0
      case 'C':
787
0
      case 'x':
788
0
        size = 1;
789
0
        break;
790
791
      /* Use 2 bytes of input */
792
0
      case 's':
793
0
      case 'S':
794
0
      case 'n':
795
0
      case 'v':
796
0
        size = 2;
797
0
        break;
798
799
      /* Use sizeof(int) bytes of input */
800
0
      case 'i':
801
0
      case 'I':
802
0
        size = sizeof(int);
803
0
        break;
804
805
      /* Use 4 bytes of input */
806
0
      case 'l':
807
0
      case 'L':
808
0
      case 'N':
809
0
      case 'V':
810
0
        size = 4;
811
0
        break;
812
813
      /* Use 8 bytes of input */
814
0
      case 'q':
815
0
      case 'Q':
816
0
      case 'J':
817
0
      case 'P':
818
0
#if SIZEOF_ZEND_LONG > 4
819
0
        size = 8;
820
0
        break;
821
#else
822
        zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
823
        RETURN_THROWS();
824
#endif
825
826
      /* Use sizeof(float) bytes of input */
827
0
      case 'f':
828
0
      case 'g':
829
0
      case 'G':
830
0
        size = sizeof(float);
831
0
        break;
832
833
      /* Use sizeof(double) bytes of input */
834
0
      case 'd':
835
0
      case 'e':
836
0
      case 'E':
837
0
        size = sizeof(double);
838
0
        break;
839
840
0
      default:
841
0
        zend_value_error("Invalid format type %c", type);
842
0
        RETURN_THROWS();
843
0
    }
844
845
846
    /* Do actual unpacking */
847
0
    for (i = 0; i != repetitions; i++ ) {
848
849
0
      if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
850
0
        php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
851
0
        zend_array_destroy(Z_ARR_P(return_value));
852
0
        RETURN_FALSE;
853
0
      }
854
855
0
      if ((inputpos + size) <= inputlen) {
856
857
0
        zend_string* real_name;
858
0
        zend_long long_key = 0;
859
0
        zval val;
860
861
0
        if (namelen == 0) {
862
0
          real_name = NULL;
863
0
          long_key = i + 1;
864
0
        } else if (repetitions == 1) {
865
          /* Use a part of the formatarg argument directly as the name. */
866
0
          real_name = zend_string_init_fast(name, namelen);
867
0
        } else {
868
          /* Need to add the 1-based element number to the name */
869
0
          char buf[MAX_LENGTH_OF_LONG + 1];
870
0
          char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
871
0
          size_t digits = buf + sizeof(buf) - 1 - res;
872
0
          real_name = zend_string_concat2(name, namelen, res, digits);
873
0
        }
874
875
0
        switch (type) {
876
0
          case 'a': {
877
            /* a will not strip any trailing whitespace or null padding */
878
0
            zend_long len = inputlen - inputpos;  /* Remaining string */
879
880
            /* If size was given take minimum of len and size */
881
0
            if ((size >= 0) && (len > size)) {
882
0
              len = size;
883
0
            }
884
885
0
            size = len;
886
887
0
            ZVAL_STRINGL(&val, &input[inputpos], len);
888
0
            break;
889
0
          }
890
0
          case 'A': {
891
            /* A will strip any trailing whitespace */
892
0
            zend_long len = inputlen - inputpos;  /* Remaining string */
893
894
            /* If size was given take minimum of len and size */
895
0
            if ((size >= 0) && (len > size)) {
896
0
              len = size;
897
0
            }
898
899
0
            size = len;
900
901
            /* Remove trailing white space and nulls chars from unpacked data */
902
0
            while (--len >= 0) {
903
0
              if (input[inputpos + len] != '\0'
904
0
                && input[inputpos + len] != ' '
905
0
                && input[inputpos + len] != '\t'
906
0
                && input[inputpos + len] != '\r'
907
0
                && input[inputpos + len] != '\n'
908
0
              )
909
0
                break;
910
0
            }
911
912
0
            ZVAL_STRINGL(&val, &input[inputpos], len + 1);
913
0
            break;
914
0
          }
915
          /* New option added for Z to remain in-line with the Perl implementation */
916
0
          case 'Z': {
917
            /* Z will strip everything after the first null character */
918
0
            zend_long s,
919
0
               len = inputlen - inputpos; /* Remaining string */
920
921
            /* If size was given take minimum of len and size */
922
0
            if ((size >= 0) && (len > size)) {
923
0
              len = size;
924
0
            }
925
926
0
            size = len;
927
928
            /* Remove everything after the first null */
929
0
            for (s=0 ; s < len ; s++) {
930
0
              if (input[inputpos + s] == '\0')
931
0
                break;
932
0
            }
933
0
            len = s;
934
935
0
            ZVAL_STRINGL(&val, &input[inputpos], len);
936
0
            break;
937
0
          }
938
939
940
0
          case 'h':
941
0
          case 'H': {
942
0
            zend_long len = (inputlen - inputpos) * 2;  /* Remaining */
943
0
            int nibbleshift = (type == 'h') ? 0 : 4;
944
0
            int first = 1;
945
0
            zend_string *buf;
946
0
            zend_long ipos, opos;
947
948
949
0
            if (size > INT_MAX / 2) {
950
0
              if (real_name) {
951
0
                zend_string_release_ex(real_name, false);
952
0
              }
953
0
              zend_argument_value_error(1, "repeater must be less than or equal to %d", INT_MAX / 2);
954
0
              RETURN_THROWS();
955
0
            }
956
957
            /* If size was given take minimum of len and size */
958
0
            if (size >= 0 && len > (size * 2)) {
959
0
              len = size * 2;
960
0
            }
961
962
0
            if (len > 0 && argb > 0) {
963
0
              len -= argb % 2;
964
0
            }
965
966
0
            buf = zend_string_alloc(len, 0);
967
968
0
            for (ipos = opos = 0; opos < len; opos++) {
969
0
              char cc = (input[inputpos + ipos] >> nibbleshift) & 0xf;
970
971
0
              if (cc < 10) {
972
0
                cc += '0';
973
0
              } else {
974
0
                cc += 'a' - 10;
975
0
              }
976
977
0
              ZSTR_VAL(buf)[opos] = cc;
978
0
              nibbleshift = (nibbleshift + 4) & 7;
979
980
0
              if (first-- == 0) {
981
0
                ipos++;
982
0
                first = 1;
983
0
              }
984
0
            }
985
986
0
            ZSTR_VAL(buf)[len] = '\0';
987
988
0
            ZVAL_STR(&val, buf);
989
0
            break;
990
0
          }
991
992
0
          case 'c':   /* signed */
993
0
          case 'C': { /* unsigned */
994
0
            uint8_t x = input[inputpos];
995
0
            zend_long v = (type == 'c') ? (int8_t) x : x;
996
997
0
            ZVAL_LONG(&val, v);
998
0
            break;
999
0
          }
1000
1001
0
          case 's':   /* signed machine endian   */
1002
0
          case 'S':   /* unsigned machine endian */
1003
0
          case 'n':   /* unsigned big endian     */
1004
0
          case 'v': { /* unsigned little endian  */
1005
0
            zend_long v = 0;
1006
0
            uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);
1007
1008
0
            if (type == 's') {
1009
0
              v = (int16_t) x;
1010
0
            } else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
1011
0
              v = php_pack_reverse_int16(x);
1012
0
            } else {
1013
0
              v = x;
1014
0
            }
1015
1016
0
            ZVAL_LONG(&val, v);
1017
0
            break;
1018
0
          }
1019
1020
0
          case 'i':   /* signed integer, machine size, machine endian */
1021
0
          case 'I': { /* unsigned integer, machine size, machine endian */
1022
0
            zend_long v;
1023
0
            if (type == 'i') {
1024
0
              int x = *((unaligned_int*) &input[inputpos]);
1025
0
              v = x;
1026
0
            } else {
1027
0
              unsigned int x = *((unaligned_uint*) &input[inputpos]);
1028
0
              v = x;
1029
0
            }
1030
1031
0
            ZVAL_LONG(&val, v);
1032
0
            break;
1033
0
          }
1034
1035
0
          case 'l':   /* signed machine endian   */
1036
0
          case 'L':   /* unsigned machine endian */
1037
0
          case 'N':   /* unsigned big endian     */
1038
0
          case 'V': { /* unsigned little endian  */
1039
0
            zend_long v = 0;
1040
0
            uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);
1041
1042
0
            if (type == 'l') {
1043
0
              v = (int32_t) x;
1044
0
            } else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
1045
0
              v = php_pack_reverse_int32(x);
1046
0
            } else {
1047
0
              v = x;
1048
0
            }
1049
1050
0
            ZVAL_LONG(&val, v);
1051
0
            break;
1052
0
          }
1053
1054
0
#if SIZEOF_ZEND_LONG > 4
1055
0
          case 'q':   /* signed machine endian   */
1056
0
          case 'Q':   /* unsigned machine endian */
1057
0
          case 'J':   /* unsigned big endian     */
1058
0
          case 'P': { /* unsigned little endian  */
1059
0
            zend_long v = 0;
1060
0
            uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);
1061
1062
0
            if (type == 'q') {
1063
0
              v = (int64_t) x;
1064
0
            } else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
1065
0
              v = php_pack_reverse_int64(x);
1066
0
            } else {
1067
0
              v = x;
1068
0
            }
1069
1070
0
            ZVAL_LONG(&val, v);
1071
0
            break;
1072
0
          }
1073
0
#endif
1074
1075
0
          case 'f': /* float */
1076
0
          case 'g': /* little endian float*/
1077
0
          case 'G': /* big endian float*/
1078
0
          {
1079
0
            float v;
1080
1081
0
            if (type == 'g') {
1082
0
              v = php_pack_parse_float(1, &input[inputpos]);
1083
0
            } else if (type == 'G') {
1084
0
              v = php_pack_parse_float(0, &input[inputpos]);
1085
0
            } else {
1086
0
              memcpy(&v, &input[inputpos], sizeof(float));
1087
0
            }
1088
1089
0
            ZVAL_DOUBLE(&val, v);
1090
0
            break;
1091
0
          }
1092
1093
1094
0
          case 'd': /* double */
1095
0
          case 'e': /* little endian float */
1096
0
          case 'E': /* big endian float */
1097
0
          {
1098
0
            double v;
1099
0
            if (type == 'e') {
1100
0
              v = php_pack_parse_double(1, &input[inputpos]);
1101
0
            } else if (type == 'E') {
1102
0
              v = php_pack_parse_double(0, &input[inputpos]);
1103
0
            } else {
1104
0
              memcpy(&v, &input[inputpos], sizeof(double));
1105
0
            }
1106
1107
0
            ZVAL_DOUBLE(&val, v);
1108
0
            break;
1109
0
          }
1110
1111
0
          case 'x':
1112
            /* Do nothing with input, just skip it */
1113
0
            goto no_output;
1114
1115
0
          case 'X':
1116
0
            if (inputpos < size) {
1117
0
              inputpos = -size;
1118
0
              i = repetitions - 1;    /* Break out of for loop */
1119
1120
0
              if (repetitions >= 0) {
1121
0
                php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1122
0
              }
1123
0
            }
1124
0
            goto no_output;
1125
1126
0
          case '@':
1127
0
            if (repetitions <= inputlen) {
1128
0
              inputpos = repetitions;
1129
0
            } else {
1130
0
              php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1131
0
            }
1132
1133
0
            i = repetitions - 1;  /* Done, break out of for loop */
1134
0
            goto no_output;
1135
0
        }
1136
1137
0
        if (real_name) {
1138
0
          zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1139
0
        } else {
1140
0
          zend_hash_index_update(Z_ARRVAL_P(return_value), long_key, &val);
1141
0
        }
1142
1143
0
no_output:
1144
0
        if (real_name) {
1145
0
          zend_string_release_ex(real_name, false);
1146
0
        }
1147
1148
0
        inputpos += size;
1149
0
        if (inputpos < 0) {
1150
0
          if (size != -1) { /* only print warning if not working with * */
1151
0
            php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1152
0
          }
1153
0
          inputpos = 0;
1154
0
        }
1155
0
      } else if (repetitions < 0) {
1156
        /* Reached end of input for '*' repeater */
1157
0
        break;
1158
0
      } else {
1159
0
        php_error_docref(NULL, E_WARNING, "Type %c: not enough input values, need %d values but only " ZEND_LONG_FMT " %s provided", type, size, inputlen - inputpos, inputlen - inputpos == 1 ? "was" : "were");
1160
0
        zend_array_destroy(Z_ARR_P(return_value));
1161
0
        RETURN_FALSE;
1162
0
      }
1163
0
    }
1164
1165
0
    if (formatlen > 0) {
1166
0
      formatlen--;  /* Skip '/' separator, does no harm if inputlen == 0 */
1167
0
      format++;
1168
0
    }
1169
0
  }
1170
0
}
1171
/* }}} */