Coverage Report

Created: 2022-02-19 20:31

/src/php-src/ext/mbstring/mbstring.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright (c) The PHP Group                                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to version 3.01 of the PHP license,      |
6
   | that is bundled with this package in the file LICENSE, and is        |
7
   | available through the world-wide-web at the following url:           |
8
   | http://www.php.net/license/3_01.txt                                  |
9
   | If you did not receive a copy of the PHP license and are unable to   |
10
   | obtain it through the world-wide-web, please send a note to          |
11
   | license@php.net so we can mail you a copy immediately.               |
12
   +----------------------------------------------------------------------+
13
   | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
14
   |         Rui Hirokawa <hirokawa@php.net>                              |
15
   |         Hironori Sato <satoh@jpnnet.com>                             |
16
   |         Shigeru Kanemoto <sgk@happysize.co.jp>                       |
17
   +----------------------------------------------------------------------+
18
 */
19
20
/* {{{ includes */
21
#ifdef HAVE_CONFIG_H
22
#include "config.h"
23
#endif
24
25
#include "php.h"
26
#include "php_ini.h"
27
#include "php_variables.h"
28
#include "mbstring.h"
29
#include "ext/standard/php_string.h"
30
#include "ext/standard/php_mail.h"
31
#include "ext/standard/exec.h"
32
#include "ext/standard/url.h"
33
#include "main/php_output.h"
34
#include "ext/standard/info.h"
35
36
#include "libmbfl/mbfl/mbfl_allocators.h"
37
#include "libmbfl/mbfl/mbfilter_8bit.h"
38
#include "libmbfl/mbfl/mbfilter_pass.h"
39
#include "libmbfl/mbfl/mbfilter_wchar.h"
40
#include "libmbfl/filters/mbfilter_ascii.h"
41
#include "libmbfl/filters/mbfilter_base64.h"
42
#include "libmbfl/filters/mbfilter_qprint.h"
43
#include "libmbfl/filters/mbfilter_ucs4.h"
44
#include "libmbfl/filters/mbfilter_utf8.h"
45
46
#include "php_variables.h"
47
#include "php_globals.h"
48
#include "rfc1867.h"
49
#include "php_content_types.h"
50
#include "SAPI.h"
51
#include "php_unicode.h"
52
#include "TSRM.h"
53
54
#include "mb_gpc.h"
55
56
#ifdef HAVE_MBREGEX
57
# include "php_mbregex.h"
58
# include "php_onig_compat.h"
59
# include <oniguruma.h>
60
# undef UChar
61
# if !defined(ONIGURUMA_VERSION_INT) || ONIGURUMA_VERSION_INT < 60800
62
typedef void OnigMatchParam;
63
#define onig_new_match_param() (NULL)
64
#define onig_initialize_match_param(x) (void)(x)
65
#define onig_set_match_stack_limit_size_of_match_param(x, y)
66
#define onig_set_retry_limit_in_match_of_match_param(x, y)
67
#define onig_free_match_param(x)
68
#define onig_search_with_param(reg, str, end, start, range, region, option, mp) \
69
onig_search(reg, str, end, start, range, region, option)
70
#define onig_match_with_param(re, str, end, at, region, option, mp) \
71
onig_match(re, str, end, at, region, option)
72
# endif
73
#else
74
# include "ext/pcre/php_pcre.h"
75
#endif
76
77
#include "zend_multibyte.h"
78
#include "mbstring_arginfo.h"
79
/* }}} */
80
81
#ifdef HAVE_MBSTRING
82
83
/* {{{ prototypes */
84
ZEND_DECLARE_MODULE_GLOBALS(mbstring)
85
86
static PHP_GINIT_FUNCTION(mbstring);
87
static PHP_GSHUTDOWN_FUNCTION(mbstring);
88
89
static void php_mb_populate_current_detect_order_list(void);
90
91
static int php_mb_encoding_translation(void);
92
93
static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
94
95
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
96
97
static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
98
99
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
100
/* }}} */
101
102
/* {{{ php_mb_default_identify_list */
103
typedef struct _php_mb_nls_ident_list {
104
  enum mbfl_no_language lang;
105
  const enum mbfl_no_encoding *list;
106
  size_t list_size;
107
} php_mb_nls_ident_list;
108
109
static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
110
  mbfl_no_encoding_ascii,
111
  mbfl_no_encoding_jis,
112
  mbfl_no_encoding_utf8,
113
  mbfl_no_encoding_euc_jp,
114
  mbfl_no_encoding_sjis
115
};
116
117
static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
118
  mbfl_no_encoding_ascii,
119
  mbfl_no_encoding_utf8,
120
  mbfl_no_encoding_euc_cn,
121
  mbfl_no_encoding_cp936
122
};
123
124
static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
125
  mbfl_no_encoding_ascii,
126
  mbfl_no_encoding_utf8,
127
  mbfl_no_encoding_euc_tw,
128
  mbfl_no_encoding_big5
129
};
130
131
static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
132
  mbfl_no_encoding_ascii,
133
  mbfl_no_encoding_utf8,
134
  mbfl_no_encoding_euc_kr,
135
  mbfl_no_encoding_uhc
136
};
137
138
static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
139
  mbfl_no_encoding_ascii,
140
  mbfl_no_encoding_utf8,
141
  mbfl_no_encoding_koi8r,
142
  mbfl_no_encoding_cp1251,
143
  mbfl_no_encoding_cp866
144
};
145
146
static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
147
  mbfl_no_encoding_ascii,
148
  mbfl_no_encoding_utf8,
149
  mbfl_no_encoding_armscii8
150
};
151
152
static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
153
  mbfl_no_encoding_ascii,
154
  mbfl_no_encoding_utf8,
155
  mbfl_no_encoding_cp1254,
156
  mbfl_no_encoding_8859_9
157
};
158
159
static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
160
  mbfl_no_encoding_ascii,
161
  mbfl_no_encoding_utf8,
162
  mbfl_no_encoding_koi8u
163
};
164
165
static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
166
  mbfl_no_encoding_ascii,
167
  mbfl_no_encoding_utf8
168
};
169
170
171
static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
172
  { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
173
  { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
174
  { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
175
  { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
176
  { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
177
  { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
178
  { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
179
  { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
180
  { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
181
};
182
183
/* }}} */
184
185
/* {{{ zend_module_entry mbstring_module_entry */
186
zend_module_entry mbstring_module_entry = {
187
  STANDARD_MODULE_HEADER,
188
  "mbstring",
189
  ext_functions,
190
  PHP_MINIT(mbstring),
191
  PHP_MSHUTDOWN(mbstring),
192
  PHP_RINIT(mbstring),
193
  PHP_RSHUTDOWN(mbstring),
194
  PHP_MINFO(mbstring),
195
  PHP_MBSTRING_VERSION,
196
  PHP_MODULE_GLOBALS(mbstring),
197
  PHP_GINIT(mbstring),
198
  PHP_GSHUTDOWN(mbstring),
199
  NULL,
200
  STANDARD_MODULE_PROPERTIES_EX
201
};
202
/* }}} */
203
204
/* {{{ static sapi_post_entry php_post_entries[] */
205
static const sapi_post_entry php_post_entries[] = {
206
  { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
207
  { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
208
  { NULL, 0, NULL, NULL }
209
};
210
/* }}} */
211
212
#ifdef COMPILE_DL_MBSTRING
213
#ifdef ZTS
214
ZEND_TSRMLS_CACHE_DEFINE()
215
#endif
216
ZEND_GET_MODULE(mbstring)
217
#endif
218
219
/* {{{ allocators */
220
static void *_php_mb_allocators_malloc(size_t sz)
221
1.25k
{
222
1.25k
  return emalloc(sz);
223
1.25k
}
224
225
static void *_php_mb_allocators_realloc(void *ptr, size_t sz)
226
11.4k
{
227
11.4k
  return erealloc(ptr, sz);
228
11.4k
}
229
230
static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem)
231
0
{
232
0
  return ecalloc(nelems, szelem);
233
0
}
234
235
static void _php_mb_allocators_free(void *ptr)
236
8.67k
{
237
8.67k
  efree(ptr);
238
8.67k
}
239
240
static const mbfl_allocators _php_mb_allocators = {
241
  _php_mb_allocators_malloc,
242
  _php_mb_allocators_realloc,
243
  _php_mb_allocators_calloc,
244
  _php_mb_allocators_free,
245
};
246
/* }}} */
247
248
/* {{{ static sapi_post_entry mbstr_post_entries[] */
249
static const sapi_post_entry mbstr_post_entries[] = {
250
  { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
251
  { MULTIPART_CONTENT_TYPE,    sizeof(MULTIPART_CONTENT_TYPE)-1,    NULL,                         rfc1867_post_handler },
252
  { NULL, 0, NULL, NULL }
253
};
254
/* }}} */
255
256
0
static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
257
0
  if (encoding_name) {
258
0
    const mbfl_encoding *encoding;
259
0
    zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
260
0
    if (last_encoding_name && (last_encoding_name == encoding_name
261
0
        || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
262
0
      return MBSTRG(last_used_encoding);
263
0
    }
264
265
0
    encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
266
0
    if (!encoding) {
267
0
      zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
268
0
      return NULL;
269
0
    }
270
271
0
    if (last_encoding_name) {
272
0
      zend_string_release(last_encoding_name);
273
0
    }
274
0
    MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
275
0
    MBSTRG(last_used_encoding) = encoding;
276
0
    return encoding;
277
0
  } else {
278
0
    return MBSTRG(current_internal_encoding);
279
0
  }
280
0
}
281
282
54.8k
static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
283
54.8k
  if (strcmp(encoding_name, "pass") == 0) {
284
0
    return &mbfl_encoding_pass;
285
0
  }
286
287
54.8k
  return mbfl_name2encoding(encoding_name);
288
54.8k
}
289
290
42.6k
static size_t count_commas(const char *p, const char *end) {
291
42.6k
  size_t count = 0;
292
42.6k
  while ((p = memchr(p, ',', end - p))) {
293
0
    count++;
294
0
    p++;
295
0
  }
296
42.6k
  return count;
297
42.6k
}
298
299
/* {{{ static zend_result php_mb_parse_encoding_list()
300
 *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
301
 *  Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
302
 */
303
static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
304
  const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
305
  zend_bool allow_pass_encoding)
306
48.7k
{
307
48.7k
  if (value == NULL || value_length == 0) {
308
6.08k
    *return_list = NULL;
309
6.08k
    *return_size = 0;
310
6.08k
    return SUCCESS;
311
42.6k
  } else {
312
42.6k
    zend_bool included_auto;
313
42.6k
    size_t n, size;
314
42.6k
    char *p1, *endp, *tmpstr;
315
42.6k
    const mbfl_encoding **entry, **list;
316
317
    /* copy the value string for work */
318
42.6k
    if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
319
0
      tmpstr = (char *)estrndup(value+1, value_length-2);
320
0
      value_length -= 2;
321
42.6k
    } else {
322
42.6k
      tmpstr = (char *)estrndup(value, value_length);
323
42.6k
    }
324
325
42.6k
    endp = tmpstr + value_length;
326
42.6k
    size = 1 + count_commas(tmpstr, endp) + MBSTRG(default_detect_order_list_size);
327
42.6k
    list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
328
42.6k
    entry = list;
329
42.6k
    n = 0;
330
42.6k
    included_auto = 0;
331
42.6k
    p1 = tmpstr;
332
42.6k
    while (1) {
333
42.6k
      char *comma = (char *) php_memnstr(p1, ",", 1, endp);
334
42.6k
      char *p = comma ? comma : endp;
335
42.6k
      *p = '\0';
336
      /* trim spaces */
337
42.6k
      while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
338
0
        p1++;
339
0
      }
340
42.6k
      p--;
341
42.6k
      while (p > p1 && (*p == ' ' || *p == '\t')) {
342
0
        *p = '\0';
343
0
        p--;
344
0
      }
345
      /* convert to the encoding number and check encoding */
346
42.6k
      if (strcasecmp(p1, "auto") == 0) {
347
0
        if (!included_auto) {
348
0
          const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
349
0
          const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
350
0
          size_t i;
351
0
          included_auto = 1;
352
0
          for (i = 0; i < identify_list_size; i++) {
353
0
            *entry++ = mbfl_no2encoding(*src++);
354
0
            n++;
355
0
          }
356
0
        }
357
42.6k
      } else {
358
42.6k
        const mbfl_encoding *encoding =
359
42.6k
          allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
360
42.6k
        if (!encoding) {
361
          /* Called from an INI setting modification */
362
0
          if (arg_num == 0) {
363
0
            php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
364
0
          } else {
365
0
            zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
366
0
          }
367
0
          efree(tmpstr);
368
0
          pefree(ZEND_VOIDP(list), persistent);
369
0
          return FAILURE;
370
0
        }
371
372
42.6k
        *entry++ = encoding;
373
42.6k
        n++;
374
42.6k
      }
375
42.6k
      if (n >= size || comma == NULL) {
376
42.6k
        break;
377
42.6k
      }
378
0
      p1 = comma + 1;
379
0
    }
380
42.6k
    *return_list = list;
381
42.6k
    *return_size = n;
382
42.6k
    efree(tmpstr);
383
42.6k
  }
384
385
42.6k
  return SUCCESS;
386
48.7k
}
387
/* }}} */
388
389
/* {{{ static int php_mb_parse_encoding_array()
390
 *  Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
391
 *  Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
392
 */
393
static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
394
  size_t *return_size, uint32_t arg_num)
395
0
{
396
  /* Allocate enough space to include the default detect order if "auto" is used. */
397
0
  size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
398
0
  const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
399
0
  const mbfl_encoding **entry = list;
400
0
  zend_bool included_auto = 0;
401
0
  size_t n = 0;
402
0
  zval *hash_entry;
403
0
  ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
404
0
    zend_string *encoding_str = zval_try_get_string(hash_entry);
405
0
    if (UNEXPECTED(!encoding_str)) {
406
0
      efree(ZEND_VOIDP(list));
407
0
      return FAILURE;
408
0
    }
409
410
0
    if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
411
0
      if (!included_auto) {
412
0
        const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
413
0
        const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
414
0
        size_t j;
415
416
0
        included_auto = 1;
417
0
        for (j = 0; j < identify_list_size; j++) {
418
0
          *entry++ = mbfl_no2encoding(*src++);
419
0
          n++;
420
0
        }
421
0
      }
422
0
    } else {
423
0
      const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
424
0
      if (encoding) {
425
0
        *entry++ = encoding;
426
0
        n++;
427
0
      } else {
428
0
        zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
429
0
        zend_string_release(encoding_str);
430
0
        efree(ZEND_VOIDP(list));
431
0
        return FAILURE;
432
0
      }
433
0
    }
434
0
    zend_string_release(encoding_str);
435
0
  } ZEND_HASH_FOREACH_END();
436
0
  *return_list = list;
437
0
  *return_size = n;
438
0
  return SUCCESS;
439
0
}
440
/* }}} */
441
442
/* {{{ zend_multibyte interface */
443
static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
444
30.4k
{
445
30.4k
  return (const zend_encoding*)mbfl_name2encoding(encoding_name);
446
30.4k
}
447
448
static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
449
0
{
450
0
  return ((const mbfl_encoding *)encoding)->name;
451
0
}
452
453
static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
454
0
{
455
0
  const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
456
0
  if (encoding->flag & MBFL_ENCTYPE_SBCS) {
457
0
    return 1;
458
0
  }
459
0
  if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
460
0
    return 1;
461
0
  }
462
0
  return 0;
463
0
}
464
465
static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
466
0
{
467
0
  mbfl_string string;
468
469
0
  if (!list) {
470
0
    list = (const zend_encoding **)MBSTRG(current_detect_order_list);
471
0
    list_size = MBSTRG(current_detect_order_list_size);
472
0
  }
473
474
0
  mbfl_string_init(&string);
475
0
  string.val = (unsigned char *)arg_string;
476
0
  string.len = arg_length;
477
0
  return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
478
0
}
479
480
static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
481
0
{
482
0
  mbfl_string string, result;
483
0
  mbfl_buffer_converter *convd;
484
0
  int status;
485
0
  size_t loc;
486
487
  /* new encoding */
488
  /* initialize string */
489
0
  string.encoding = (const mbfl_encoding*)encoding_from;
490
0
  string.val = (unsigned char*)from;
491
0
  string.len = from_length;
492
493
  /* initialize converter */
494
0
  convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
495
0
  if (convd == NULL) {
496
0
    return (size_t) -1;
497
0
  }
498
499
0
  mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
500
0
  mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
501
502
  /* do it */
503
0
  status = mbfl_buffer_converter_feed2(convd, &string, &loc);
504
0
  if (status) {
505
0
    mbfl_buffer_converter_delete(convd);
506
0
    return (size_t)-1;
507
0
  }
508
509
0
  mbfl_buffer_converter_flush(convd);
510
0
  mbfl_string_init(&result);
511
0
  if (!mbfl_buffer_converter_result(convd, &result)) {
512
0
    mbfl_buffer_converter_delete(convd);
513
0
    return (size_t)-1;
514
0
  }
515
516
0
  *to = result.val;
517
0
  *to_length = result.len;
518
519
0
  mbfl_buffer_converter_delete(convd);
520
521
0
  return loc;
522
0
}
523
524
static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
525
36.5k
{
526
36.5k
  return php_mb_parse_encoding_list(
527
36.5k
    encoding_list, encoding_list_len,
528
36.5k
    (const mbfl_encoding ***)return_list, return_size,
529
36.5k
    persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
530
36.5k
}
531
532
static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
533
0
{
534
0
  return (const zend_encoding *)MBSTRG(internal_encoding);
535
0
}
536
537
static zend_result php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
538
721k
{
539
721k
  MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
540
721k
  return SUCCESS;
541
721k
}
542
543
static zend_multibyte_functions php_mb_zend_multibyte_functions = {
544
  "mbstring",
545
  php_mb_zend_encoding_fetcher,
546
  php_mb_zend_encoding_name_getter,
547
  php_mb_zend_encoding_lexer_compatibility_checker,
548
  php_mb_zend_encoding_detector,
549
  php_mb_zend_encoding_converter,
550
  php_mb_zend_encoding_list_parser,
551
  php_mb_zend_internal_encoding_getter,
552
  php_mb_zend_internal_encoding_setter
553
};
554
/* }}} */
555
556
static void *_php_mb_compile_regex(const char *pattern);
557
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
558
static void _php_mb_free_regex(void *opaque);
559
560
#ifdef HAVE_MBREGEX
561
/* {{{ _php_mb_compile_regex */
562
static void *_php_mb_compile_regex(const char *pattern)
563
6.08k
{
564
6.08k
  php_mb_regex_t *retval;
565
6.08k
  OnigErrorInfo err_info;
566
6.08k
  int err_code;
567
568
6.08k
  if ((err_code = onig_new(&retval,
569
6.08k
      (const OnigUChar *)pattern,
570
6.08k
      (const OnigUChar *)pattern + strlen(pattern),
571
6.08k
      ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
572
6.08k
      ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
573
0
    OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
574
0
    onig_error_code_to_str(err_str, err_code, err_info);
575
0
    php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str);
576
0
    retval = NULL;
577
0
  }
578
6.08k
  return retval;
579
6.08k
}
580
/* }}} */
581
582
/* {{{ _php_mb_match_regex */
583
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
584
0
{
585
0
  OnigMatchParam *mp = onig_new_match_param();
586
0
  int err;
587
0
  onig_initialize_match_param(mp);
588
0
  if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) {
589
0
    onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit));
590
0
  }
591
0
  if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) {
592
0
    onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit));
593
0
  }
594
  /* search */
595
0
  err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str,
596
0
    (const OnigUChar*)str + str_len, (const OnigUChar *)str,
597
0
    (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp);
598
0
  onig_free_match_param(mp);
599
0
  return err >= 0;
600
0
}
601
/* }}} */
602
603
/* {{{ _php_mb_free_regex */
604
static void _php_mb_free_regex(void *opaque)
605
0
{
606
0
  onig_free((php_mb_regex_t *)opaque);
607
0
}
608
/* }}} */
609
#else
610
/* {{{ _php_mb_compile_regex */
611
static void *_php_mb_compile_regex(const char *pattern)
612
{
613
  pcre2_code *retval;
614
  PCRE2_SIZE err_offset;
615
  int errnum;
616
617
  if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
618
      PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
619
    PCRE2_UCHAR err_str[128];
620
    pcre2_get_error_message(errnum, err_str, sizeof(err_str));
621
    php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
622
  }
623
  return retval;
624
}
625
/* }}} */
626
627
/* {{{ _php_mb_match_regex */
628
static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
629
{
630
  int res;
631
632
  pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
633
  if (NULL == match_data) {
634
    pcre2_code_free(opaque);
635
    php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
636
    return FAILURE;
637
  }
638
  res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
639
  php_pcre_free_match_data(match_data);
640
641
  return res;
642
}
643
/* }}} */
644
645
/* {{{ _php_mb_free_regex */
646
static void _php_mb_free_regex(void *opaque)
647
{
648
  pcre2_code_free(opaque);
649
}
650
/* }}} */
651
#endif
652
653
/* {{{ php_mb_nls_get_default_detect_order_list */
654
static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
655
6.08k
{
656
6.08k
  size_t i;
657
658
6.08k
  *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
659
6.08k
  *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
660
661
54.8k
  for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
662
54.8k
    if (php_mb_default_identify_list[i].lang == lang) {
663
6.08k
      *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
664
6.08k
      *plist_size = php_mb_default_identify_list[i].list_size;
665
6.08k
      return 1;
666
6.08k
    }
667
54.8k
  }
668
0
  return 0;
669
6.08k
}
670
/* }}} */
671
672
static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
673
0
{
674
0
  char *result = emalloc(len + 2);
675
0
  char *resp = result;
676
0
  size_t i;
677
678
0
  for (i = 0; i < len && start[i] != quote; ++i) {
679
0
    if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
680
0
      *resp++ = start[++i];
681
0
    } else {
682
0
      size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
683
684
0
      while (j-- > 0 && i < len) {
685
0
        *resp++ = start[i++];
686
0
      }
687
0
      --i;
688
0
    }
689
0
  }
690
691
0
  *resp = '\0';
692
0
  return result;
693
0
}
694
695
static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
696
0
{
697
0
  char *pos = *line, quote;
698
0
  char *res;
699
700
0
  while (*pos && *pos != stop) {
701
0
    if ((quote = *pos) == '"' || quote == '\'') {
702
0
      ++pos;
703
0
      while (*pos && *pos != quote) {
704
0
        if (*pos == '\\' && pos[1] && pos[1] == quote) {
705
0
          pos += 2;
706
0
        } else {
707
0
          ++pos;
708
0
        }
709
0
      }
710
0
      if (*pos) {
711
0
        ++pos;
712
0
      }
713
0
    } else {
714
0
      pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
715
716
0
    }
717
0
  }
718
0
  if (*pos == '\0') {
719
0
    res = estrdup(*line);
720
0
    *line += strlen(*line);
721
0
    return res;
722
0
  }
723
724
0
  res = estrndup(*line, pos - *line);
725
726
0
  while (*pos == stop) {
727
0
    pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
728
0
  }
729
730
0
  *line = pos;
731
0
  return res;
732
0
}
733
/* }}} */
734
735
static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
736
0
{
737
0
  while (*str && isspace(*(unsigned char *)str)) {
738
0
    ++str;
739
0
  }
740
741
0
  if (!*str) {
742
0
    return estrdup("");
743
0
  }
744
745
0
  if (*str == '"' || *str == '\'') {
746
0
    char quote = *str;
747
748
0
    str++;
749
0
    return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
750
0
  } else {
751
0
    char *strend = str;
752
753
0
    while (*strend && !isspace(*(unsigned char *)strend)) {
754
0
      ++strend;
755
0
    }
756
0
    return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
757
0
  }
758
0
}
759
/* }}} */
760
761
static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
762
0
{
763
0
  char *s, *s2;
764
0
  const size_t filename_len = strlen(filename);
765
766
  /* The \ check should technically be needed for win32 systems only where
767
   * it is a valid path separator. However, IE in all it's wisdom always sends
768
   * the full path of the file on the user's filesystem, which means that unless
769
   * the user does basename() they get a bogus file name. Until IE's user base drops
770
   * to nill or problem is fixed this code must remain enabled for all systems. */
771
0
  s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
772
0
  s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
773
774
0
  if (s && s2) {
775
0
    if (s > s2) {
776
0
      return ++s;
777
0
    } else {
778
0
      return ++s2;
779
0
    }
780
0
  } else if (s) {
781
0
    return ++s;
782
0
  } else if (s2) {
783
0
    return ++s2;
784
0
  } else {
785
0
    return filename;
786
0
  }
787
0
}
788
/* }}} */
789
790
/* {{{ php.ini directive handler */
791
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
792
static PHP_INI_MH(OnUpdate_mbstring_language)
793
6.08k
{
794
6.08k
  enum mbfl_no_language no_language;
795
796
6.08k
  no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
797
6.08k
  if (no_language == mbfl_no_language_invalid) {
798
0
    MBSTRG(language) = mbfl_no_language_neutral;
799
0
    return FAILURE;
800
0
  }
801
6.08k
  MBSTRG(language) = no_language;
802
6.08k
  php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
803
6.08k
  return SUCCESS;
804
6.08k
}
805
/* }}} */
806
807
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
808
static PHP_INI_MH(OnUpdate_mbstring_detect_order)
809
6.08k
{
810
6.08k
  const mbfl_encoding **list;
811
6.08k
  size_t size;
812
813
6.08k
  if (!new_value) {
814
6.08k
    if (MBSTRG(detect_order_list)) {
815
0
      pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
816
0
    }
817
6.08k
    MBSTRG(detect_order_list) = NULL;
818
6.08k
    MBSTRG(detect_order_list_size) = 0;
819
6.08k
    return SUCCESS;
820
6.08k
  }
821
822
0
  if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
823
0
    return FAILURE;
824
0
  }
825
826
0
  if (MBSTRG(detect_order_list)) {
827
0
    pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
828
0
  }
829
0
  MBSTRG(detect_order_list) = list;
830
0
  MBSTRG(detect_order_list_size) = size;
831
0
  return SUCCESS;
832
0
}
833
/* }}} */
834
835
12.1k
static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
836
12.1k
  const mbfl_encoding **list;
837
12.1k
  size_t size;
838
12.1k
  if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
839
0
    return FAILURE;
840
0
  }
841
12.1k
  if (MBSTRG(http_input_list)) {
842
6.08k
    pefree(ZEND_VOIDP(MBSTRG(http_input_list)), 1);
843
6.08k
  }
844
12.1k
  MBSTRG(http_input_list) = list;
845
12.1k
  MBSTRG(http_input_list_size) = size;
846
12.1k
  return SUCCESS;
847
12.1k
}
848
849
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
850
static PHP_INI_MH(OnUpdate_mbstring_http_input)
851
6.08k
{
852
6.08k
  if (new_value) {
853
0
    php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
854
0
  }
855
856
6.08k
  if (!new_value || !ZSTR_VAL(new_value)) {
857
6.08k
    const char *encoding = php_get_input_encoding();
858
6.08k
    MBSTRG(http_input_set) = 0;
859
6.08k
    _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
860
6.08k
    return SUCCESS;
861
6.08k
  }
862
863
0
  MBSTRG(http_input_set) = 1;
864
0
  return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
865
0
}
866
/* }}} */
867
868
12.1k
static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
869
12.1k
  const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
870
12.1k
  if (!encoding) {
871
0
    return FAILURE;
872
0
  }
873
874
12.1k
  MBSTRG(http_output_encoding) = encoding;
875
12.1k
  MBSTRG(current_http_output_encoding) = encoding;
876
12.1k
  return SUCCESS;
877
12.1k
}
878
879
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
880
static PHP_INI_MH(OnUpdate_mbstring_http_output)
881
6.08k
{
882
6.08k
  if (new_value) {
883
0
    php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
884
0
  }
885
886
6.08k
  if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
887
6.08k
    MBSTRG(http_output_set) = 0;
888
6.08k
    _php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
889
6.08k
    return SUCCESS;
890
6.08k
  }
891
892
0
  MBSTRG(http_output_set) = 1;
893
0
  return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
894
0
}
895
/* }}} */
896
897
/* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
898
static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
899
12.1k
{
900
12.1k
  const mbfl_encoding *encoding;
901
902
12.1k
  if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
903
    /* falls back to UTF-8 if an unknown encoding name is given */
904
0
    if (new_value) {
905
0
      php_error_docref("ref.mbstring", E_WARNING,
906
0
        "Unknown encoding \"%s\" in ini setting", new_value);
907
0
    }
908
0
    encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
909
0
  }
910
12.1k
  MBSTRG(internal_encoding) = encoding;
911
12.1k
  MBSTRG(current_internal_encoding) = encoding;
912
12.1k
#ifdef HAVE_MBREGEX
913
12.1k
  {
914
12.1k
    const char *enc_name = new_value;
915
12.1k
    if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
916
      /* falls back to UTF-8 if an unknown encoding name is given */
917
0
      enc_name = "UTF-8";
918
0
      php_mb_regex_set_default_mbctype(enc_name);
919
0
    }
920
12.1k
    php_mb_regex_set_mbctype(new_value);
921
12.1k
  }
922
12.1k
#endif
923
12.1k
  return SUCCESS;
924
12.1k
}
925
/* }}} */
926
927
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
928
static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
929
6.08k
{
930
6.08k
  if (new_value) {
931
0
    php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
932
0
  }
933
934
6.08k
  if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
935
0
    return FAILURE;
936
0
  }
937
938
6.08k
  if (new_value && ZSTR_LEN(new_value)) {
939
0
    MBSTRG(internal_encoding_set) = 1;
940
0
    return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
941
6.08k
  } else {
942
6.08k
    const char *encoding = php_get_internal_encoding();
943
6.08k
    MBSTRG(internal_encoding_set) = 0;
944
6.08k
    return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
945
6.08k
  }
946
6.08k
}
947
/* }}} */
948
949
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
950
static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
951
6.08k
{
952
6.08k
  int c;
953
6.08k
  char *endptr = NULL;
954
955
6.08k
  if (new_value != NULL) {
956
0
    if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
957
0
      MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
958
0
      MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
959
0
    } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
960
0
      MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
961
0
      MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
962
0
    } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
963
0
      MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
964
0
      MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
965
0
    } else {
966
0
      MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
967
0
      MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
968
0
      if (ZSTR_LEN(new_value) > 0) {
969
0
        c = strtol(ZSTR_VAL(new_value), &endptr, 0);
970
0
        if (*endptr == '\0') {
971
0
          MBSTRG(filter_illegal_substchar) = c;
972
0
          MBSTRG(current_filter_illegal_substchar) = c;
973
0
        }
974
0
      }
975
0
    }
976
6.08k
  } else {
977
6.08k
    MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
978
6.08k
    MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
979
6.08k
    MBSTRG(filter_illegal_substchar) = 0x3f;  /* '?' */
980
6.08k
    MBSTRG(current_filter_illegal_substchar) = 0x3f;  /* '?' */
981
6.08k
  }
982
983
6.08k
  return SUCCESS;
984
6.08k
}
985
/* }}} */
986
987
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
988
static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
989
6.08k
{
990
6.08k
  if (new_value == NULL) {
991
0
    return FAILURE;
992
0
  }
993
994
6.08k
  OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
995
996
6.08k
  if (MBSTRG(encoding_translation)) {
997
0
    sapi_unregister_post_entry(php_post_entries);
998
0
    sapi_register_post_entries(mbstr_post_entries);
999
6.08k
  } else {
1000
6.08k
    sapi_unregister_post_entry(mbstr_post_entries);
1001
6.08k
    sapi_register_post_entries(php_post_entries);
1002
6.08k
  }
1003
1004
6.08k
  return SUCCESS;
1005
6.08k
}
1006
/* }}} */
1007
1008
/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
1009
static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1010
6.08k
{
1011
6.08k
  zend_string *tmp;
1012
6.08k
  void *re = NULL;
1013
1014
6.08k
  if (!new_value) {
1015
0
    new_value = entry->orig_value;
1016
0
  }
1017
6.08k
  tmp = php_trim(new_value, NULL, 0, 3);
1018
1019
6.08k
  if (ZSTR_LEN(tmp) > 0) {
1020
6.08k
    if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
1021
0
      zend_string_release_ex(tmp, 0);
1022
0
      return FAILURE;
1023
0
    }
1024
6.08k
  }
1025
1026
6.08k
  if (MBSTRG(http_output_conv_mimetypes)) {
1027
0
    _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1028
0
  }
1029
1030
6.08k
  MBSTRG(http_output_conv_mimetypes) = re;
1031
1032
6.08k
  zend_string_release_ex(tmp, 0);
1033
6.08k
  return SUCCESS;
1034
6.08k
}
1035
/* }}} */
1036
/* }}} */
1037
1038
/* {{{ php.ini directive registration */
1039
PHP_INI_BEGIN()
1040
  PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1041
  PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1042
  PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1043
  PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1044
  STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1045
  PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1046
1047
  STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1048
    PHP_INI_SYSTEM | PHP_INI_PERDIR,
1049
    OnUpdate_mbstring_encoding_translation,
1050
    encoding_translation, zend_mbstring_globals, mbstring_globals)
1051
  PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1052
    "^(text/|application/xhtml\\+xml)",
1053
    PHP_INI_ALL,
1054
    OnUpdate_mbstring_http_output_conv_mimetypes)
1055
1056
  STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1057
    PHP_INI_ALL,
1058
    OnUpdateBool,
1059
    strict_detection, zend_mbstring_globals, mbstring_globals)
1060
#ifdef HAVE_MBREGEX
1061
  STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
1062
  STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
1063
#endif
1064
PHP_INI_END()
1065
/* }}} */
1066
1067
6.08k
static void mbstring_internal_encoding_changed_hook(void) {
1068
  /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
1069
6.08k
  if (!MBSTRG(internal_encoding_set)) {
1070
6.08k
    const char *encoding = php_get_internal_encoding();
1071
6.08k
    _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
1072
6.08k
  }
1073
1074
6.08k
  if (!MBSTRG(http_output_set)) {
1075
6.08k
    const char *encoding = php_get_output_encoding();
1076
6.08k
    _php_mb_ini_mbstring_http_output_set(encoding);
1077
6.08k
  }
1078
1079
6.08k
  if (!MBSTRG(http_input_set)) {
1080
6.08k
    const char *encoding = php_get_input_encoding();
1081
6.08k
    _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
1082
6.08k
  }
1083
6.08k
}
1084
1085
/* {{{ module global initialize handler */
1086
static PHP_GINIT_FUNCTION(mbstring)
1087
6.08k
{
1088
#if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1089
ZEND_TSRMLS_CACHE_UPDATE();
1090
#endif
1091
1092
6.08k
  mbstring_globals->language = mbfl_no_language_uni;
1093
6.08k
  mbstring_globals->internal_encoding = NULL;
1094
6.08k
  mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1095
6.08k
  mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1096
6.08k
  mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1097
6.08k
  mbstring_globals->http_input_identify = NULL;
1098
6.08k
  mbstring_globals->http_input_identify_get = NULL;
1099
6.08k
  mbstring_globals->http_input_identify_post = NULL;
1100
6.08k
  mbstring_globals->http_input_identify_cookie = NULL;
1101
6.08k
  mbstring_globals->http_input_identify_string = NULL;
1102
6.08k
  mbstring_globals->http_input_list = NULL;
1103
6.08k
  mbstring_globals->http_input_list_size = 0;
1104
6.08k
  mbstring_globals->detect_order_list = NULL;
1105
6.08k
  mbstring_globals->detect_order_list_size = 0;
1106
6.08k
  mbstring_globals->current_detect_order_list = NULL;
1107
6.08k
  mbstring_globals->current_detect_order_list_size = 0;
1108
6.08k
  mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1109
6.08k
  mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1110
6.08k
  mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1111
6.08k
  mbstring_globals->filter_illegal_substchar = 0x3f;  /* '?' */
1112
6.08k
  mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1113
6.08k
  mbstring_globals->current_filter_illegal_substchar = 0x3f;  /* '?' */
1114
6.08k
  mbstring_globals->illegalchars = 0;
1115
6.08k
  mbstring_globals->encoding_translation = 0;
1116
6.08k
  mbstring_globals->strict_detection = 0;
1117
6.08k
  mbstring_globals->outconv = NULL;
1118
6.08k
  mbstring_globals->http_output_conv_mimetypes = NULL;
1119
6.08k
#ifdef HAVE_MBREGEX
1120
6.08k
  mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1121
6.08k
#endif
1122
6.08k
  mbstring_globals->last_used_encoding_name = NULL;
1123
6.08k
  mbstring_globals->last_used_encoding = NULL;
1124
6.08k
  mbstring_globals->internal_encoding_set = 0;
1125
6.08k
  mbstring_globals->http_output_set = 0;
1126
6.08k
  mbstring_globals->http_input_set = 0;
1127
6.08k
}
1128
/* }}} */
1129
1130
/* {{{ PHP_GSHUTDOWN_FUNCTION */
1131
static PHP_GSHUTDOWN_FUNCTION(mbstring)
1132
0
{
1133
0
  if (mbstring_globals->http_input_list) {
1134
0
    free(ZEND_VOIDP(mbstring_globals->http_input_list));
1135
0
  }
1136
0
  if (mbstring_globals->detect_order_list) {
1137
0
    free(ZEND_VOIDP(mbstring_globals->detect_order_list));
1138
0
  }
1139
0
  if (mbstring_globals->http_output_conv_mimetypes) {
1140
0
    _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1141
0
  }
1142
0
#ifdef HAVE_MBREGEX
1143
0
  php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1144
0
#endif
1145
0
}
1146
/* }}} */
1147
1148
/* {{{ PHP_MINIT_FUNCTION(mbstring) */
1149
PHP_MINIT_FUNCTION(mbstring)
1150
6.08k
{
1151
#if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1152
ZEND_TSRMLS_CACHE_UPDATE();
1153
#endif
1154
6.08k
  __mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators;
1155
1156
6.08k
  REGISTER_INI_ENTRIES();
1157
1158
  /* We assume that we're the only user of the hook. */
1159
6.08k
  ZEND_ASSERT(php_internal_encoding_changed == NULL);
1160
6.08k
  php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
1161
6.08k
  mbstring_internal_encoding_changed_hook();
1162
1163
  /* This is a global handler. Should not be set in a per-request handler. */
1164
6.08k
  sapi_register_treat_data(mbstr_treat_data);
1165
1166
  /* Post handlers are stored in the thread-local context. */
1167
6.08k
  if (MBSTRG(encoding_translation)) {
1168
0
    sapi_register_post_entries(mbstr_post_entries);
1169
0
  }
1170
1171
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1172
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1173
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1174
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1175
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1176
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1177
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1178
6.08k
  REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1179
1180
6.08k
#ifdef HAVE_MBREGEX
1181
6.08k
  PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1182
6.08k
#endif
1183
1184
6.08k
  if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1185
0
    return FAILURE;
1186
0
  }
1187
1188
6.08k
  php_rfc1867_set_multibyte_callbacks(
1189
6.08k
    php_mb_encoding_translation,
1190
6.08k
    php_mb_gpc_get_detect_order,
1191
6.08k
    php_mb_gpc_set_input_encoding,
1192
6.08k
    php_mb_rfc1867_getword,
1193
6.08k
    php_mb_rfc1867_getword_conf,
1194
6.08k
    php_mb_rfc1867_basename);
1195
1196
6.08k
  return SUCCESS;
1197
6.08k
}
1198
/* }}} */
1199
1200
/* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
1201
PHP_MSHUTDOWN_FUNCTION(mbstring)
1202
0
{
1203
0
  UNREGISTER_INI_ENTRIES();
1204
1205
0
  zend_multibyte_restore_functions();
1206
1207
0
#ifdef HAVE_MBREGEX
1208
0
  PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1209
0
#endif
1210
1211
0
  php_internal_encoding_changed = NULL;
1212
1213
0
  return SUCCESS;
1214
0
}
1215
/* }}} */
1216
1217
/* {{{ PHP_RINIT_FUNCTION(mbstring) */
1218
PHP_RINIT_FUNCTION(mbstring)
1219
721k
{
1220
721k
  MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1221
721k
  MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1222
721k
  MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1223
721k
  MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1224
1225
721k
  MBSTRG(illegalchars) = 0;
1226
1227
721k
  php_mb_populate_current_detect_order_list();
1228
1229
721k
#ifdef HAVE_MBREGEX
1230
721k
  PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1231
721k
#endif
1232
721k
  zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1233
1234
721k
  return SUCCESS;
1235
721k
}
1236
/* }}} */
1237
1238
/* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
1239
PHP_RSHUTDOWN_FUNCTION(mbstring)
1240
719k
{
1241
719k
  if (MBSTRG(current_detect_order_list) != NULL) {
1242
719k
    efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1243
719k
    MBSTRG(current_detect_order_list) = NULL;
1244
719k
    MBSTRG(current_detect_order_list_size) = 0;
1245
719k
  }
1246
719k
  if (MBSTRG(outconv) != NULL) {
1247
0
    MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1248
0
    mbfl_buffer_converter_delete(MBSTRG(outconv));
1249
0
    MBSTRG(outconv) = NULL;
1250
0
  }
1251
1252
  /* clear http input identification. */
1253
719k
  MBSTRG(http_input_identify) = NULL;
1254
719k
  MBSTRG(http_input_identify_post) = NULL;
1255
719k
  MBSTRG(http_input_identify_get) = NULL;
1256
719k
  MBSTRG(http_input_identify_cookie) = NULL;
1257
719k
  MBSTRG(http_input_identify_string) = NULL;
1258
1259
719k
  if (MBSTRG(last_used_encoding_name)) {
1260
0
    zend_string_release(MBSTRG(last_used_encoding_name));
1261
0
    MBSTRG(last_used_encoding_name) = NULL;
1262
0
  }
1263
1264
719k
  MBSTRG(internal_encoding_set) = 0;
1265
719k
  MBSTRG(http_output_set) = 0;
1266
719k
  MBSTRG(http_input_set) = 0;
1267
1268
719k
#ifdef HAVE_MBREGEX
1269
719k
  PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1270
719k
#endif
1271
1272
719k
  return SUCCESS;
1273
719k
}
1274
/* }}} */
1275
1276
/* {{{ PHP_MINFO_FUNCTION(mbstring) */
1277
PHP_MINFO_FUNCTION(mbstring)
1278
19
{
1279
19
  php_info_print_table_start();
1280
19
  php_info_print_table_row(2, "Multibyte Support", "enabled");
1281
19
  php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1282
19
  php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1283
19
  {
1284
19
    char tmp[256];
1285
19
    snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1286
19
    php_info_print_table_row(2, "libmbfl version", tmp);
1287
19
  }
1288
19
  php_info_print_table_end();
1289
1290
19
  php_info_print_table_start();
1291
19
  php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1292
19
  php_info_print_table_end();
1293
1294
19
#ifdef HAVE_MBREGEX
1295
19
  PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1296
19
#endif
1297
1298
19
  DISPLAY_INI_ENTRIES();
1299
19
}
1300
/* }}} */
1301
1302
/* {{{ Sets the current language or Returns the current language as a string */
1303
PHP_FUNCTION(mb_language)
1304
0
{
1305
0
  zend_string *name = NULL;
1306
1307
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S!", &name) == FAILURE) {
1308
0
    RETURN_THROWS();
1309
0
  }
1310
0
  if (name == NULL) {
1311
0
    RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1312
0
  } else {
1313
0
    zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1314
0
    if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1315
0
      zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
1316
0
      zend_string_release_ex(ini_name, 0);
1317
0
      RETURN_THROWS();
1318
0
    }
1319
    // TODO Make return void
1320
0
    RETVAL_TRUE;
1321
0
    zend_string_release_ex(ini_name, 0);
1322
0
  }
1323
0
}
1324
/* }}} */
1325
1326
/* {{{ Sets the current internal encoding or Returns the current internal encoding as a string */
1327
PHP_FUNCTION(mb_internal_encoding)
1328
0
{
1329
0
  const char *name = NULL;
1330
0
  size_t name_len;
1331
0
  const mbfl_encoding *encoding;
1332
1333
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &name, &name_len) == FAILURE) {
1334
0
    RETURN_THROWS();
1335
0
  }
1336
0
  if (name == NULL) {
1337
0
    ZEND_ASSERT(MBSTRG(current_internal_encoding));
1338
0
    RETURN_STRING(MBSTRG(current_internal_encoding)->name);
1339
0
  } else {
1340
0
    encoding = mbfl_name2encoding(name);
1341
0
    if (!encoding) {
1342
0
      zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1343
0
      RETURN_THROWS();
1344
0
    } else {
1345
0
      MBSTRG(current_internal_encoding) = encoding;
1346
0
      MBSTRG(internal_encoding_set) = 1;
1347
      /* TODO Return old encoding */
1348
0
      RETURN_TRUE;
1349
0
    }
1350
0
  }
1351
0
}
1352
/* }}} */
1353
1354
/* {{{ Returns the input encoding */
1355
PHP_FUNCTION(mb_http_input)
1356
0
{
1357
0
  char *typ = NULL;
1358
0
  size_t typ_len = 0;
1359
0
  int retname;
1360
0
  char *list, *temp;
1361
0
  const mbfl_encoding *result = NULL;
1362
1363
0
  retname = 1;
1364
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &typ, &typ_len) == FAILURE) {
1365
0
    RETURN_THROWS();
1366
0
  }
1367
0
  if (typ == NULL) {
1368
0
    result = MBSTRG(http_input_identify);
1369
0
  } else {
1370
0
    switch (*typ) {
1371
0
    case 'G':
1372
0
    case 'g':
1373
0
      result = MBSTRG(http_input_identify_get);
1374
0
      break;
1375
0
    case 'P':
1376
0
    case 'p':
1377
0
      result = MBSTRG(http_input_identify_post);
1378
0
      break;
1379
0
    case 'C':
1380
0
    case 'c':
1381
0
      result = MBSTRG(http_input_identify_cookie);
1382
0
      break;
1383
0
    case 'S':
1384
0
    case 's':
1385
0
      result = MBSTRG(http_input_identify_string);
1386
0
      break;
1387
0
    case 'I':
1388
0
    case 'i':
1389
0
      {
1390
0
        const mbfl_encoding **entry = MBSTRG(http_input_list);
1391
0
        const size_t n = MBSTRG(http_input_list_size);
1392
0
        size_t i;
1393
0
        array_init(return_value);
1394
0
        for (i = 0; i < n; i++) {
1395
0
          add_next_index_string(return_value, (*entry)->name);
1396
0
          entry++;
1397
0
        }
1398
0
        retname = 0;
1399
0
      }
1400
0
      break;
1401
0
    case 'L':
1402
0
    case 'l':
1403
0
      {
1404
0
        const mbfl_encoding **entry = MBSTRG(http_input_list);
1405
0
        const size_t n = MBSTRG(http_input_list_size);
1406
0
        size_t i;
1407
0
        list = NULL;
1408
0
        for (i = 0; i < n; i++) {
1409
0
          if (list) {
1410
0
            temp = list;
1411
0
            spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1412
0
            efree(temp);
1413
0
            if (!list) {
1414
0
              break;
1415
0
            }
1416
0
          } else {
1417
0
            list = estrdup((*entry)->name);
1418
0
          }
1419
0
          entry++;
1420
0
        }
1421
0
      }
1422
0
      if (!list) {
1423
        // TODO should return empty string?
1424
0
        RETURN_FALSE;
1425
0
      }
1426
0
      RETVAL_STRING(list);
1427
0
      efree(list);
1428
0
      retname = 0;
1429
0
      break;
1430
0
    default:
1431
      // TODO ValueError
1432
0
      result = MBSTRG(http_input_identify);
1433
0
      break;
1434
0
    }
1435
0
  }
1436
1437
  // FIXME this bloc seems useless except for default switch case
1438
0
  if (retname) {
1439
0
    if (result) {
1440
0
      RETVAL_STRING(result->name);
1441
0
    } else {
1442
0
      RETVAL_FALSE;
1443
0
    }
1444
0
  }
1445
0
}
1446
/* }}} */
1447
1448
/* {{{ Sets the current output_encoding or returns the current output_encoding as a string */
1449
PHP_FUNCTION(mb_http_output)
1450
0
{
1451
0
  const char *name = NULL;
1452
0
  size_t name_len;
1453
0
  const mbfl_encoding *encoding;
1454
1455
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &name, &name_len) == FAILURE) {
1456
0
    RETURN_THROWS();
1457
0
  }
1458
1459
0
  if (name == NULL) {
1460
0
    ZEND_ASSERT(MBSTRG(current_http_output_encoding));
1461
0
    RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
1462
0
  } else {
1463
0
    encoding = php_mb_get_encoding_or_pass(name);
1464
0
    if (!encoding) {
1465
0
      zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1466
0
      RETURN_THROWS();
1467
0
    } else {
1468
0
      MBSTRG(http_output_set) = 1;
1469
0
      MBSTRG(current_http_output_encoding) = encoding;
1470
      /* TODO Return previous encoding? */
1471
0
      RETURN_TRUE;
1472
0
    }
1473
0
  }
1474
0
}
1475
/* }}} */
1476
1477
/* {{{ Sets the current detect_order or Return the current detect_order as a array */
1478
PHP_FUNCTION(mb_detect_order)
1479
0
{
1480
0
  zend_string *order_str = NULL;
1481
0
  HashTable *order_ht = NULL;
1482
1483
0
  ZEND_PARSE_PARAMETERS_START(0, 1)
1484
0
    Z_PARAM_OPTIONAL
1485
0
    Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(order_str, order_ht)
1486
0
  ZEND_PARSE_PARAMETERS_END();
1487
1488
0
  if (!order_str && !order_ht) {
1489
0
    size_t i;
1490
0
    size_t n = MBSTRG(current_detect_order_list_size);
1491
0
    const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1492
0
    array_init(return_value);
1493
0
    for (i = 0; i < n; i++) {
1494
0
      add_next_index_string(return_value, (*entry)->name);
1495
0
      entry++;
1496
0
    }
1497
0
  } else {
1498
0
    const mbfl_encoding **list;
1499
0
    size_t size;
1500
0
    if (order_ht) {
1501
0
      if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
1502
0
        RETURN_THROWS();
1503
0
      }
1504
0
    } else {
1505
0
      if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
1506
0
        RETURN_THROWS();
1507
0
      }
1508
0
    }
1509
1510
0
    if (size == 0) {
1511
0
      efree(ZEND_VOIDP(list));
1512
0
      zend_argument_value_error(1, "must specify at least one encoding");
1513
0
      RETURN_THROWS();
1514
0
    }
1515
1516
0
    if (MBSTRG(current_detect_order_list)) {
1517
0
      efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1518
0
    }
1519
0
    MBSTRG(current_detect_order_list) = list;
1520
0
    MBSTRG(current_detect_order_list_size) = size;
1521
0
    RETURN_TRUE;
1522
0
  }
1523
0
}
1524
/* }}} */
1525
1526
static inline int php_mb_check_code_point(zend_long cp)
1527
0
{
1528
0
  if (cp < 0 || cp >= 0x110000) {
1529
    /* Out of Unicode range */
1530
0
    return 0;
1531
0
  }
1532
1533
0
  if (cp >= 0xd800 && cp <= 0xdfff) {
1534
    /* Surrogate code-point. These are never valid on their own and we only allow a single
1535
     * substitute character. */
1536
0
    return 0;
1537
0
  }
1538
1539
  /* As the we do not know the target encoding of the conversion operation that is going to
1540
   * use the substitution character, we cannot check whether the codepoint is actually mapped
1541
   * in the given encoding at this point. Thus we have to accept everything. */
1542
0
  return 1;
1543
0
}
1544
1545
/* {{{ Sets the current substitute_character or returns the current substitute_character */
1546
PHP_FUNCTION(mb_substitute_character)
1547
0
{
1548
0
  zend_string *substitute_character = NULL;
1549
0
  zend_long substitute_codepoint;
1550
0
  zend_bool substitute_is_null = 1;
1551
1552
0
  ZEND_PARSE_PARAMETERS_START(0, 1)
1553
0
    Z_PARAM_OPTIONAL
1554
0
    Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
1555
0
  ZEND_PARSE_PARAMETERS_END();
1556
1557
0
  if (substitute_is_null) {
1558
0
    if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1559
0
      RETURN_STRING("none");
1560
0
    }
1561
0
    if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1562
0
      RETURN_STRING("long");
1563
0
    }
1564
0
    if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1565
0
      RETURN_STRING("entity");
1566
0
    }
1567
0
    RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1568
0
  }
1569
1570
0
  if (substitute_character != NULL) {
1571
0
    if (zend_string_equals_literal_ci(substitute_character, "none")) {
1572
0
      MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1573
0
      RETURN_TRUE;
1574
0
    }
1575
0
    if (zend_string_equals_literal_ci(substitute_character, "long")) {
1576
0
      MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1577
0
      RETURN_TRUE;
1578
0
    }
1579
0
    if (zend_string_equals_literal_ci(substitute_character, "entity")) {
1580
0
      MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1581
0
      RETURN_TRUE;
1582
0
    }
1583
    /* Invalid string value */
1584
0
    zend_argument_value_error(1, "must be \"none\", \"long\", \"entity\" or a valid codepoint");
1585
0
    RETURN_THROWS();
1586
0
  }
1587
  /* Integer codepoint passed */
1588
0
  if (!php_mb_check_code_point(substitute_codepoint)) {
1589
0
    zend_argument_value_error(1, "is not a valid codepoint");
1590
0
    RETURN_THROWS();
1591
0
  }
1592
1593
0
  MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1594
0
  MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
1595
0
  RETURN_TRUE;
1596
0
}
1597
/* }}} */
1598
1599
/* {{{ Return the preferred MIME name (charset) as a string */
1600
PHP_FUNCTION(mb_preferred_mime_name)
1601
0
{
1602
0
  enum mbfl_no_encoding no_encoding;
1603
0
  char *name = NULL;
1604
0
  size_t name_len;
1605
1606
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
1607
0
    RETURN_THROWS();
1608
0
  }
1609
1610
0
  no_encoding = mbfl_name2no_encoding(name);
1611
0
  if (no_encoding == mbfl_no_encoding_invalid) {
1612
0
    zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1613
0
    RETURN_THROWS();
1614
0
  }
1615
1616
0
  const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1617
0
  if (preferred_name == NULL || *preferred_name == '\0') {
1618
0
    php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1619
0
    RETVAL_FALSE;
1620
0
  } else {
1621
0
    RETVAL_STRING((char *)preferred_name);
1622
0
  }
1623
0
}
1624
/* }}} */
1625
1626
#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
1627
#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
1628
1629
/* {{{ Parses GET/POST/COOKIE data and sets global variables */
1630
PHP_FUNCTION(mb_parse_str)
1631
431
{
1632
431
  zval *track_vars_array;
1633
431
  char *encstr;
1634
431
  size_t encstr_len;
1635
431
  php_mb_encoding_handler_info_t info;
1636
431
  const mbfl_encoding *detected;
1637
1638
431
  track_vars_array = NULL;
1639
431
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
1640
6
    RETURN_THROWS();
1641
6
  }
1642
1643
425
  track_vars_array = zend_try_array_init(track_vars_array);
1644
425
  if (!track_vars_array) {
1645
0
    RETURN_THROWS();
1646
0
  }
1647
1648
425
  encstr = estrndup(encstr, encstr_len);
1649
1650
425
  info.data_type              = PARSE_STRING;
1651
425
  info.separator              = PG(arg_separator).input;
1652
425
  info.report_errors          = 1;
1653
425
  info.to_encoding            = MBSTRG(current_internal_encoding);
1654
425
  info.to_language            = MBSTRG(language);
1655
425
  info.from_encodings         = MBSTRG(http_input_list);
1656
425
  info.num_from_encodings     = MBSTRG(http_input_list_size);
1657
425
  info.from_language          = MBSTRG(language);
1658
1659
425
  detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
1660
1661
425
  MBSTRG(http_input_identify) = detected;
1662
1663
425
  RETVAL_BOOL(detected);
1664
1665
425
  if (encstr != NULL) efree(encstr);
1666
425
}
1667
/* }}} */
1668
1669
/* {{{ Returns string in output buffer converted to the http_output encoding */
1670
PHP_FUNCTION(mb_output_handler)
1671
0
{
1672
0
  char *arg_string;
1673
0
  size_t arg_string_len;
1674
0
  zend_long arg_status;
1675
0
  mbfl_string string, result;
1676
0
  const char *charset;
1677
0
  char *p;
1678
0
  const mbfl_encoding *encoding;
1679
0
  int last_feed;
1680
0
  size_t len;
1681
0
  unsigned char send_text_mimetype = 0;
1682
0
  char *s, *mimetype = NULL;
1683
1684
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
1685
0
    RETURN_THROWS();
1686
0
  }
1687
1688
0
  encoding = MBSTRG(current_http_output_encoding);
1689
1690
  /* start phase only */
1691
0
  if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1692
    /* delete the converter just in case. */
1693
0
    if (MBSTRG(outconv)) {
1694
0
      MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1695
0
      mbfl_buffer_converter_delete(MBSTRG(outconv));
1696
0
      MBSTRG(outconv) = NULL;
1697
0
      }
1698
0
    if (encoding == &mbfl_encoding_pass) {
1699
0
      RETURN_STRINGL(arg_string, arg_string_len);
1700
0
    }
1701
1702
    /* analyze mime type */
1703
0
    if (SG(sapi_headers).mimetype &&
1704
0
      _php_mb_match_regex(
1705
0
        MBSTRG(http_output_conv_mimetypes),
1706
0
        SG(sapi_headers).mimetype,
1707
0
        strlen(SG(sapi_headers).mimetype))) {
1708
0
      if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
1709
0
        mimetype = estrdup(SG(sapi_headers).mimetype);
1710
0
      } else {
1711
0
        mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1712
0
      }
1713
0
      send_text_mimetype = 1;
1714
0
    } else if (SG(sapi_headers).send_default_content_type) {
1715
0
      mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1716
0
    }
1717
1718
    /* if content-type is not yet set, set it and activate the converter */
1719
0
    if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1720
0
      charset = encoding->mime_name;
1721
0
      if (charset) {
1722
0
        len = spprintf( &p, 0, "Content-Type: %s; charset=%s",  mimetype, charset );
1723
0
        if (sapi_add_header(p, len, 0) != FAILURE) {
1724
0
          SG(sapi_headers).send_default_content_type = 0;
1725
0
        }
1726
0
      }
1727
      /* activate the converter */
1728
0
      MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1729
0
      if (send_text_mimetype){
1730
0
        efree(mimetype);
1731
0
      }
1732
0
    }
1733
0
    }
1734
1735
  /* just return if the converter is not activated. */
1736
0
  if (MBSTRG(outconv) == NULL) {
1737
0
    RETURN_STRINGL(arg_string, arg_string_len);
1738
0
  }
1739
1740
  /* flag */
1741
0
  last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1742
  /* mode */
1743
0
  mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1744
0
  mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1745
1746
  /* feed the string */
1747
0
  mbfl_string_init(&string);
1748
  /* these are not needed. convd has encoding info.
1749
  string.encoding = MBSTRG(current_internal_encoding);
1750
  */
1751
0
  string.val = (unsigned char *)arg_string;
1752
0
  string.len = arg_string_len;
1753
0
  mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
1754
0
  if (last_feed) {
1755
0
    mbfl_buffer_converter_flush(MBSTRG(outconv));
1756
0
  }
1757
  /* get the converter output, and return it */
1758
0
  mbfl_buffer_converter_result(MBSTRG(outconv), &result);
1759
  // TODO: avoid reallocation ???
1760
0
  RETVAL_STRINGL((char *)result.val, result.len);   /* the string is already strdup()'ed */
1761
0
  efree(result.val);
1762
1763
  /* delete the converter if it is the last feed. */
1764
0
  if (last_feed) {
1765
0
    MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1766
0
    mbfl_buffer_converter_delete(MBSTRG(outconv));
1767
0
    MBSTRG(outconv) = NULL;
1768
0
  }
1769
0
}
1770
/* }}} */
1771
1772
/* {{{ Convert a multibyte string to an array. If split_length is specified,
1773
 break the string down into chunks each split_length characters long. */
1774
1775
/* structure to pass split params to the callback */
1776
struct mbfl_split_params {
1777
    zval *return_value; /* php function return value structure pointer */
1778
    mbfl_string *result_string; /* string to store result chunk */
1779
    size_t mb_chunk_length; /* actual chunk length in chars */
1780
    size_t split_length; /* split length in chars */
1781
    mbfl_convert_filter *next_filter; /* widechar to encoding converter */
1782
};
1783
1784
/* callback function to fill split array */
1785
static int mbfl_split_output(int c, void *data)
1786
0
{
1787
0
    struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
1788
1789
0
    (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
1790
1791
0
    if(params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
1792
0
        mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
1793
0
        mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
1794
0
        mbfl_string *chunk = params->result_string;
1795
0
        mbfl_memory_device_result(device, chunk); /* make chunk */
1796
0
        add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
1797
0
        efree(chunk->val);
1798
0
        params->mb_chunk_length = 0; /* reset mb_chunk size */
1799
0
    }
1800
0
    return 0;
1801
0
}
1802
1803
/* TODO Document this function on php.net */
1804
PHP_FUNCTION(mb_str_split)
1805
0
{
1806
0
  zend_string *str, *encoding = NULL;
1807
0
  size_t mb_len, chunks, chunk_len;
1808
0
  const char *p, *last; /* pointer for the string cursor and last string char */
1809
0
  mbfl_string string, result_string;
1810
0
  const mbfl_encoding *mbfl_encoding;
1811
0
  zend_long split_length = 1;
1812
1813
0
  ZEND_PARSE_PARAMETERS_START(1, 3)
1814
0
    Z_PARAM_STR(str)
1815
0
    Z_PARAM_OPTIONAL
1816
0
    Z_PARAM_LONG(split_length)
1817
0
    Z_PARAM_STR_OR_NULL(encoding)
1818
0
  ZEND_PARSE_PARAMETERS_END();
1819
1820
0
  if (split_length <= 0) {
1821
0
    zend_argument_value_error(2, "must be greater than 0");
1822
0
    RETURN_THROWS();
1823
0
  }
1824
1825
  /* fill mbfl_string structure */
1826
0
  string.val = (unsigned char *) ZSTR_VAL(str);
1827
0
  string.len = ZSTR_LEN(str);
1828
0
  string.encoding = php_mb_get_encoding(encoding, 3);
1829
0
  if (!string.encoding) {
1830
0
    RETURN_THROWS();
1831
0
  }
1832
1833
0
  p = ZSTR_VAL(str); /* string cursor pointer */
1834
0
  last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
1835
1836
0
  mbfl_encoding = string.encoding;
1837
1838
  /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1839
0
  if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
1840
0
    mb_len = string.len;
1841
0
    chunk_len = (size_t)split_length; /* chunk length in bytes */
1842
0
  } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */
1843
0
    mb_len = string.len / 2;
1844
0
    chunk_len = split_length * 2;
1845
0
  } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
1846
0
    mb_len = string.len / 4;
1847
0
    chunk_len = split_length * 4;
1848
0
  } else if (mbfl_encoding->mblen_table != NULL) {
1849
    /* second scenario: variable width encodings with length table */
1850
0
    char unsigned const *mbtab = mbfl_encoding->mblen_table;
1851
1852
    /* assume that we have 1-bytes characters */
1853
0
    array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1854
1855
0
    while (p < last) { /* split cycle work until the cursor has reached the last byte */
1856
0
      char const *chunk_p = p; /* chunk first byte pointer */
1857
0
      chunk_len = 0; /* chunk length in bytes */
1858
0
      zend_long char_count;
1859
1860
0
      for (char_count = 0; char_count < split_length && p < last; ++char_count) {
1861
0
        char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
1862
0
        chunk_len += m;
1863
0
        p += m;
1864
0
      }
1865
0
      if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
1866
0
      add_next_index_stringl(return_value, chunk_p, chunk_len);
1867
0
    }
1868
0
    return;
1869
0
  } else {
1870
    /* third scenario: other multibyte encodings */
1871
0
    mbfl_convert_filter *filter, *decoder;
1872
1873
    /* assume that we have 1-bytes characters */
1874
0
    array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1875
1876
    /* decoder filter to decode wchar to encoding */
1877
0
    mbfl_memory_device device;
1878
0
    mbfl_memory_device_init(&device, split_length + 1, 0);
1879
1880
0
    decoder = mbfl_convert_filter_new(
1881
0
        &mbfl_encoding_wchar,
1882
0
        string.encoding,
1883
0
        mbfl_memory_device_output,
1884
0
        NULL,
1885
0
        &device);
1886
    /* assert that nothing is wrong with the decoder */
1887
0
    ZEND_ASSERT(decoder != NULL);
1888
1889
    /* wchar filter */
1890
0
    mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
1891
0
    struct mbfl_split_params params = { /* init callback function params structure */
1892
0
      .return_value = return_value,
1893
0
      .result_string = &result_string,
1894
0
      .mb_chunk_length = 0,
1895
0
      .split_length = (size_t)split_length,
1896
0
      .next_filter = decoder,
1897
0
    };
1898
1899
0
    filter = mbfl_convert_filter_new(
1900
0
        string.encoding,
1901
0
        &mbfl_encoding_wchar,
1902
0
        mbfl_split_output,
1903
0
        NULL,
1904
0
        &params);
1905
    /* assert that nothing is wrong with the filter */
1906
0
    ZEND_ASSERT(filter != NULL);
1907
1908
0
    while (p < last - 1) { /* cycle each byte except last with callback function */
1909
0
      (*filter->filter_function)(*p++, filter);
1910
0
    }
1911
0
    params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
1912
0
    (*filter->filter_function)(*p++, filter); /*process last char */
1913
1914
0
    mbfl_convert_filter_delete(decoder);
1915
0
    mbfl_convert_filter_delete(filter);
1916
0
    mbfl_memory_device_clear(&device);
1917
0
    return;
1918
0
  }
1919
1920
  /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1921
0
  chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
1922
0
  array_init_size(return_value, chunks);
1923
0
  if (chunks != 0) {
1924
0
    zend_long i;
1925
1926
0
    for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
1927
0
      add_next_index_stringl(return_value, p, chunk_len);
1928
0
    }
1929
0
    add_next_index_stringl(return_value, p, last - p);
1930
0
  }
1931
0
}
1932
/* }}} */
1933
1934
/* {{{ Get character numbers of a string */
1935
PHP_FUNCTION(mb_strlen)
1936
0
{
1937
0
  size_t n;
1938
0
  mbfl_string string;
1939
0
  char *str;
1940
0
  size_t str_len;
1941
0
  zend_string *enc_name = NULL;
1942
1943
0
  ZEND_PARSE_PARAMETERS_START(1, 2)
1944
0
    Z_PARAM_STRING(str, str_len)
1945
0
    Z_PARAM_OPTIONAL
1946
0
    Z_PARAM_STR_OR_NULL(enc_name)
1947
0
  ZEND_PARSE_PARAMETERS_END();
1948
1949
0
  string.val = (unsigned char *) str;
1950
0
  string.len = str_len;
1951
0
  string.encoding = php_mb_get_encoding(enc_name, 2);
1952
0
  if (!string.encoding) {
1953
0
    RETURN_THROWS();
1954
0
  }
1955
1956
0
  n = mbfl_strlen(&string);
1957
  /* Only way this can fail is if the conversion creation fails
1958
   * this would imply some sort of memory allocation failure which is a bug */
1959
0
  ZEND_ASSERT(!mbfl_is_error(n));
1960
0
  RETVAL_LONG(n);
1961
0
}
1962
/* }}} */
1963
1964
0
static void handle_strpos_error(size_t error) {
1965
0
  switch (error) {
1966
0
  case MBFL_ERROR_NOT_FOUND:
1967
0
    break;
1968
0
  case MBFL_ERROR_ENCODING:
1969
0
    php_error_docref(NULL, E_WARNING, "Conversion error");
1970
0
    break;
1971
0
  case MBFL_ERROR_OFFSET:
1972
0
    zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1973
0
    break;
1974
0
  default:
1975
0
    zend_value_error("mb_strpos(): Unknown error");
1976
0
    break;
1977
0
  }
1978
0
}
1979
1980
/* {{{ Find position of first occurrence of a string within another */
1981
PHP_FUNCTION(mb_strpos)
1982
0
{
1983
0
  int reverse = 0;
1984
0
  zend_long offset = 0;
1985
0
  mbfl_string haystack, needle;
1986
0
  zend_string *enc_name = NULL;
1987
0
  size_t n;
1988
1989
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) {
1990
0
    RETURN_THROWS();
1991
0
  }
1992
1993
0
  haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
1994
0
  if (!haystack.encoding) {
1995
0
    RETURN_THROWS();
1996
0
  }
1997
1998
0
  n = mbfl_strpos(&haystack, &needle, offset, reverse);
1999
0
  if (!mbfl_is_error(n)) {
2000
0
    RETVAL_LONG(n);
2001
0
  } else {
2002
0
    handle_strpos_error(n);
2003
0
    RETVAL_FALSE;
2004
0
  }
2005
0
}
2006
/* }}} */
2007
2008
/* {{{ Find position of last occurrence of a string within another */
2009
PHP_FUNCTION(mb_strrpos)
2010
0
{
2011
0
  mbfl_string haystack, needle;
2012
0
  zend_string *enc_name = NULL;
2013
0
  zend_long offset = 0, n;
2014
2015
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) {
2016
0
    RETURN_THROWS();
2017
0
  }
2018
2019
0
  haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
2020
0
  if (!haystack.encoding) {
2021
0
    RETURN_THROWS();
2022
0
  }
2023
2024
0
  n = mbfl_strpos(&haystack, &needle, offset, 1);
2025
0
  if (!mbfl_is_error(n)) {
2026
0
    RETVAL_LONG(n);
2027
0
  } else {
2028
0
    handle_strpos_error(n);
2029
0
    RETVAL_FALSE;
2030
0
  }
2031
0
}
2032
/* }}} */
2033
2034
/* {{{ Finds position of first occurrence of a string within another, case insensitive */
2035
PHP_FUNCTION(mb_stripos)
2036
0
{
2037
0
  size_t n = (size_t) -1;
2038
0
  zend_long offset = 0;
2039
0
  mbfl_string haystack, needle;
2040
0
  zend_string *from_encoding = NULL;
2041
0
  const mbfl_encoding *enc;
2042
2043
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
2044
0
    RETURN_THROWS();
2045
0
  }
2046
2047
0
  enc = php_mb_get_encoding(from_encoding, 4);
2048
0
  if (!enc) {
2049
0
    RETURN_THROWS();
2050
0
  }
2051
2052
0
  n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
2053
2054
0
  if (!mbfl_is_error(n)) {
2055
0
    RETVAL_LONG(n);
2056
0
  } else {
2057
0
    handle_strpos_error(n);
2058
0
    RETVAL_FALSE;
2059
0
  }
2060
0
}
2061
/* }}} */
2062
2063
/* {{{ Finds position of last occurrence of a string within another, case insensitive */
2064
PHP_FUNCTION(mb_strripos)
2065
0
{
2066
0
  size_t n = (size_t) -1;
2067
0
  zend_long offset = 0;
2068
0
  mbfl_string haystack, needle;
2069
0
  zend_string *from_encoding = NULL;
2070
0
  const mbfl_encoding *enc;
2071
2072
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) {
2073
0
    RETURN_THROWS();
2074
0
  }
2075
2076
0
  enc = php_mb_get_encoding(from_encoding, 4);
2077
0
  if (!enc) {
2078
0
    RETURN_THROWS();
2079
0
  }
2080
2081
0
  n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
2082
2083
0
  if (!mbfl_is_error(n)) {
2084
0
    RETVAL_LONG(n);
2085
0
  } else {
2086
0
    handle_strpos_error(n);
2087
0
    RETVAL_FALSE;
2088
0
  }
2089
0
}
2090
/* }}} */
2091
2092
0
#define MB_STRSTR 1
2093
0
#define MB_STRRCHR 2
2094
0
#define MB_STRISTR 3
2095
0
#define MB_STRRICHR 4
2096
/* {{{ php_mb_strstr_variants */
2097
static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int variant)
2098
0
{
2099
0
  int reverse_mode = 0;
2100
0
  size_t n;
2101
0
  mbfl_string haystack, needle, result, *ret = NULL;
2102
0
  zend_string *encoding_name = NULL;
2103
0
  zend_bool part = 0;
2104
2105
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS!",
2106
0
    (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len,
2107
0
    &part, &encoding_name) == FAILURE
2108
0
  ) {
2109
0
    RETURN_THROWS();
2110
0
  }
2111
2112
0
  haystack.encoding = needle.encoding = php_mb_get_encoding(encoding_name, 4);
2113
0
  if (!haystack.encoding) {
2114
0
    RETURN_THROWS();
2115
0
  }
2116
2117
0
  if (variant == MB_STRRCHR || variant == MB_STRRICHR) { reverse_mode = 1; }
2118
2119
0
  if (variant == MB_STRISTR || variant == MB_STRRICHR) {
2120
0
    n = php_mb_stripos(reverse_mode, (char *)haystack.val, haystack.len, (char *)needle.val,
2121
0
      needle.len, 0, needle.encoding);
2122
0
  } else {
2123
0
    n = mbfl_strpos(&haystack, &needle, 0, reverse_mode);
2124
0
  }
2125
2126
0
  if (!mbfl_is_error(n)) {
2127
0
    if (part) {
2128
0
      ret = mbfl_substr(&haystack, &result, 0, n);
2129
0
      ZEND_ASSERT(ret != NULL);
2130
      // TODO: avoid reallocation ???
2131
0
      RETVAL_STRINGL((char *)ret->val, ret->len);
2132
0
      efree(ret->val);
2133
0
    } else {
2134
0
      ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2135
0
      ZEND_ASSERT(ret != NULL);
2136
      // TODO: avoid reallocation ???
2137
0
      RETVAL_STRINGL((char *)ret->val, ret->len);
2138
0
      efree(ret->val);
2139
0
    }
2140
0
  } else {
2141
    // FIXME use handle_strpos_error(n)
2142
0
    RETVAL_FALSE;
2143
0
  }
2144
0
}
2145
2146
/* {{{ Finds first occurrence of a string within another */
2147
PHP_FUNCTION(mb_strstr)
2148
0
{
2149
0
  php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRSTR);
2150
0
}
2151
/* }}} */
2152
2153
/* {{{ Finds the last occurrence of a character in a string within another */
2154
PHP_FUNCTION(mb_strrchr)
2155
0
{
2156
0
  php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRCHR);
2157
0
}
2158
/* }}} */
2159
2160
/* {{{ Finds first occurrence of a string within another, case insensitive */
2161
PHP_FUNCTION(mb_stristr)
2162
0
{
2163
0
  php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRISTR);
2164
0
}
2165
/* }}} */
2166
2167
/* {{{ Finds the last occurrence of a character in a string within another, case insensitive */
2168
PHP_FUNCTION(mb_strrichr)
2169
0
{
2170
0
  php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRICHR);
2171
0
}
2172
/* }}} */
2173
2174
#undef MB_STRSTR
2175
#undef MB_STRRCHR
2176
#undef MB_STRISTR
2177
#undef MB_STRRICHR
2178
2179
/* {{{ Count the number of substring occurrences */
2180
PHP_FUNCTION(mb_substr_count)
2181
0
{
2182
0
  size_t n;
2183
0
  mbfl_string haystack, needle;
2184
0
  zend_string *enc_name = NULL;
2185
2186
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|S!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name) == FAILURE) {
2187
0
    RETURN_THROWS();
2188
0
  }
2189
2190
0
  if (needle.len == 0) {
2191
0
    zend_argument_value_error(2, "must not be empty");
2192
0
    RETURN_THROWS();
2193
0
  }
2194
2195
0
  haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
2196
0
  if (!haystack.encoding) {
2197
0
    RETURN_THROWS();
2198
0
  }
2199
2200
0
  n = mbfl_substr_count(&haystack, &needle);
2201
  /* An error can only occur if needle is empty,
2202
   * an encoding error happens (which should not happen at this stage and is a bug)
2203
   * or the haystack is more than sizeof(size_t) bytes
2204
   * If one of these things occur this is a bug and should be flagged as such */
2205
0
  ZEND_ASSERT(!mbfl_is_error(n));
2206
0
  RETVAL_LONG(n);
2207
0
}
2208
/* }}} */
2209
2210
/* {{{ Returns part of a string */
2211
PHP_FUNCTION(mb_substr)
2212
0
{
2213
0
  char *str;
2214
0
  zend_string *encoding = NULL;
2215
0
  zend_long from, len;
2216
0
  size_t mblen, real_from, real_len;
2217
0
  size_t str_len;
2218
0
  zend_bool len_is_null = 1;
2219
0
  mbfl_string string, result, *ret;
2220
2221
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S!", &str, &str_len, &from, &len, &len_is_null, &encoding) == FAILURE) {
2222
0
    RETURN_THROWS();
2223
0
  }
2224
2225
0
  string.encoding = php_mb_get_encoding(encoding, 4);
2226
0
  if (!string.encoding) {
2227
0
    RETURN_THROWS();
2228
0
  }
2229
2230
0
  string.val = (unsigned char *)str;
2231
0
  string.len = str_len;
2232
2233
  /* measures length */
2234
0
  mblen = 0;
2235
0
  if (from < 0 || (!len_is_null && len < 0)) {
2236
0
    mblen = mbfl_strlen(&string);
2237
0
  }
2238
2239
  /* if "from" position is negative, count start position from the end
2240
   * of the string
2241
   */
2242
0
  if (from >= 0) {
2243
0
    real_from = (size_t) from;
2244
0
  } else if (-from < mblen) {
2245
0
    real_from = mblen + from;
2246
0
  } else {
2247
0
    real_from = 0;
2248
0
  }
2249
2250
  /* if "length" position is negative, set it to the length
2251
   * needed to stop that many chars from the end of the string
2252
   */
2253
0
  if (len_is_null) {
2254
0
    real_len = MBFL_SUBSTR_UNTIL_END;
2255
0
  } else if (len >= 0) {
2256
0
    real_len = (size_t) len;
2257
0
  } else if (real_from < mblen && -len < mblen - real_from) {
2258
0
    real_len = (mblen - real_from) + len;
2259
0
  } else {
2260
0
    real_len = 0;
2261
0
  }
2262
2263
0
  ret = mbfl_substr(&string, &result, real_from, real_len);
2264
0
  ZEND_ASSERT(ret != NULL);
2265
2266
  // TODO: avoid reallocation ???
2267
0
  RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2268
0
  efree(ret->val);
2269
0
}
2270
/* }}} */
2271
2272
/* {{{ Returns part of a string */
2273
PHP_FUNCTION(mb_strcut)
2274
0
{
2275
0
  zend_string *encoding = NULL;
2276
0
  zend_long from, len;
2277
0
  zend_bool len_is_null = 1;
2278
0
  mbfl_string string, result, *ret;
2279
2280
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding) == FAILURE) {
2281
0
    RETURN_THROWS();
2282
0
  }
2283
2284
0
  string.encoding = php_mb_get_encoding(encoding, 4);
2285
0
  if (!string.encoding) {
2286
0
    RETURN_THROWS();
2287
0
  }
2288
2289
0
  if (len_is_null) {
2290
0
    len = string.len;
2291
0
  }
2292
2293
  /* if "from" position is negative, count start position from the end
2294
   * of the string
2295
   */
2296
0
  if (from < 0) {
2297
0
    from = string.len + from;
2298
0
    if (from < 0) {
2299
0
      from = 0;
2300
0
    }
2301
0
  }
2302
2303
  /* if "length" position is negative, set it to the length
2304
   * needed to stop that many chars from the end of the string
2305
   */
2306
0
  if (len < 0) {
2307
0
    len = (string.len - from) + len;
2308
0
    if (len < 0) {
2309
0
      len = 0;
2310
0
    }
2311
0
  }
2312
2313
0
  if (from > string.len) {
2314
    // TODO Out of bounds ValueError
2315
0
    RETURN_FALSE;
2316
0
  }
2317
2318
0
  ret = mbfl_strcut(&string, &result, from, len);
2319
0
  ZEND_ASSERT(ret != NULL);
2320
2321
  // TODO: avoid reallocation ???
2322
0
  RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2323
0
  efree(ret->val);
2324
0
}
2325
/* }}} */
2326
2327
/* {{{ Gets terminal width of a string */
2328
PHP_FUNCTION(mb_strwidth)
2329
0
{
2330
0
  size_t n;
2331
0
  mbfl_string string;
2332
0
  zend_string *enc_name = NULL;
2333
2334
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", (char **)&string.val, &string.len, &enc_name) == FAILURE) {
2335
0
    RETURN_THROWS();
2336
0
  }
2337
2338
0
  string.encoding = php_mb_get_encoding(enc_name, 2);
2339
0
  if (!string.encoding) {
2340
0
    RETURN_THROWS();
2341
0
  }
2342
2343
0
  n = mbfl_strwidth(&string);
2344
0
  ZEND_ASSERT(n != (size_t) -1);
2345
0
  RETVAL_LONG(n);
2346
0
}
2347
/* }}} */
2348
2349
/* {{{ Trim the string in terminal width */
2350
PHP_FUNCTION(mb_strimwidth)
2351
0
{
2352
0
  char *str, *trimmarker = NULL;
2353
0
  zend_string *encoding = NULL;
2354
0
  zend_long from, width, swidth = 0;
2355
0
  size_t str_len, trimmarker_len;
2356
0
  mbfl_string string, result, marker, *ret;
2357
2358
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|sS!", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding) == FAILURE) {
2359
0
    RETURN_THROWS();
2360
0
  }
2361
2362
0
  string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
2363
0
  if (!string.encoding) {
2364
0
    RETURN_THROWS();
2365
0
  }
2366
2367
0
  string.val = (unsigned char *)str;
2368
0
  string.len = str_len;
2369
0
  marker.val = NULL;
2370
0
  marker.len = 0;
2371
2372
0
  if ((from < 0) || (width < 0)) {
2373
0
    swidth = mbfl_strwidth(&string);
2374
0
  }
2375
2376
0
  if (from < 0) {
2377
0
    from += swidth;
2378
0
  }
2379
2380
0
  if (from < 0 || (size_t)from > str_len) {
2381
0
    zend_argument_value_error(2, "is out of range");
2382
0
    RETURN_THROWS();
2383
0
  }
2384
2385
0
  if (width < 0) {
2386
0
    width = swidth + width - from;
2387
0
  }
2388
2389
0
  if (width < 0) {
2390
0
    zend_argument_value_error(3, "is out of range");
2391
0
    RETURN_THROWS();
2392
0
  }
2393
2394
0
  if (trimmarker) {
2395
0
    marker.val = (unsigned char *)trimmarker;
2396
0
    marker.len = trimmarker_len;
2397
0
  }
2398
2399
0
  ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2400
0
  ZEND_ASSERT(ret != NULL);
2401
  // TODO: avoid reallocation ???
2402
0
  RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2403
0
  efree(ret->val);
2404
0
}
2405
/* }}} */
2406
2407
2408
/* See mbfl_no_encoding definition for list of unsupported encodings */
2409
static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
2410
0
{
2411
0
  return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
2412
0
      || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
2413
0
      || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
2414
0
      || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
2415
0
}
2416
2417
2418
/* See mbfl_no_encoding definition for list of UTF-8 encodings */
2419
static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
2420
0
{
2421
0
  return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
2422
0
}
2423
2424
MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
2425
0
{
2426
0
  mbfl_string string, result, *ret;
2427
0
  mbfl_buffer_converter *convd;
2428
0
  char *output = NULL;
2429
2430
0
  if (output_len) {
2431
0
    *output_len = 0;
2432
0
  }
2433
2434
  /* initialize string */
2435
0
  string.encoding = from_encoding;
2436
0
  string.val = (unsigned char *)input;
2437
0
  string.len = length;
2438
2439
  /* initialize converter */
2440
0
  convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2441
  /* If this assertion fails this means some memory allocation failure which is a bug */
2442
0
  ZEND_ASSERT(convd != NULL);
2443
2444
0
  mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2445
0
  mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2446
2447
  /* do it */
2448
0
  mbfl_string_init(&result);
2449
0
  ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2450
0
  if (ret) {
2451
0
    if (output_len) {
2452
0
      *output_len = ret->len;
2453
0
    }
2454
0
    output = (char *)ret->val;
2455
0
  }
2456
2457
0
  MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2458
0
  mbfl_buffer_converter_delete(convd);
2459
0
  return output;
2460
0
}
2461
/* }}} */
2462
2463
/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
2464
MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len)
2465
0
{
2466
0
  const mbfl_encoding *from_encoding;
2467
2468
0
  if (output_len) {
2469
0
    *output_len = 0;
2470
0
  }
2471
2472
  /* pre-conversion encoding */
2473
0
  ZEND_ASSERT(num_from_encodings >= 1);
2474
0
  if (num_from_encodings == 1) {
2475
0
    from_encoding = *from_encodings;
2476
0
  } else {
2477
    /* auto detect */
2478
0
    mbfl_string string;
2479
0
    mbfl_string_init(&string);
2480
0
    string.val = (unsigned char *)input;
2481
0
    string.len = length;
2482
0
    from_encoding = mbfl_identify_encoding(
2483
0
      &string, from_encodings, num_from_encodings, MBSTRG(strict_detection));
2484
0
    if (!from_encoding) {
2485
0
      php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
2486
0
      return NULL;
2487
0
    }
2488
0
  }
2489
2490
0
  return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
2491
0
}
2492
/* }}} */
2493
2494
MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
2495
0
{
2496
0
  HashTable *output, *chash;
2497
0
  zend_long idx;
2498
0
  zend_string *key;
2499
0
  zval *entry, entry_tmp;
2500
0
  size_t ckey_len, cval_len;
2501
0
  char *ckey, *cval;
2502
2503
0
  if (!input) {
2504
0
    return NULL;
2505
0
  }
2506
2507
0
  if (GC_IS_RECURSIVE(input)) {
2508
0
    GC_UNPROTECT_RECURSION(input);
2509
0
    php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
2510
0
    return NULL;
2511
0
  }
2512
0
  GC_TRY_PROTECT_RECURSION(input);
2513
0
  output = zend_new_array(zend_hash_num_elements(input));
2514
0
  ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
2515
    /* convert key */
2516
0
    if (key) {
2517
0
      ckey = php_mb_convert_encoding(
2518
0
        ZSTR_VAL(key), ZSTR_LEN(key),
2519
0
        to_encoding, from_encodings, num_from_encodings, &ckey_len);
2520
0
      key = zend_string_init(ckey, ckey_len, 0);
2521
0
      efree(ckey);
2522
0
    }
2523
    /* convert value */
2524
0
    ZEND_ASSERT(entry);
2525
0
    switch(Z_TYPE_P(entry)) {
2526
0
      case IS_STRING:
2527
0
        cval = php_mb_convert_encoding(
2528
0
          Z_STRVAL_P(entry), Z_STRLEN_P(entry),
2529
0
          to_encoding, from_encodings, num_from_encodings, &cval_len);
2530
0
        ZVAL_STRINGL(&entry_tmp, cval, cval_len);
2531
0
        efree(cval);
2532
0
        break;
2533
0
      case IS_NULL:
2534
0
      case IS_TRUE:
2535
0
      case IS_FALSE:
2536
0
      case IS_LONG:
2537
0
      case IS_DOUBLE:
2538
0
        ZVAL_COPY(&entry_tmp, entry);
2539
0
        break;
2540
0
      case IS_ARRAY:
2541
0
        chash = php_mb_convert_encoding_recursive(
2542
0
          Z_ARRVAL_P(entry), to_encoding, from_encodings, num_from_encodings);
2543
0
        if (chash) {
2544
0
          ZVAL_ARR(&entry_tmp, chash);
2545
0
        } else {
2546
0
          ZVAL_EMPTY_ARRAY(&entry_tmp);
2547
0
        }
2548
0
        break;
2549
0
      case IS_OBJECT:
2550
0
      default:
2551
0
        if (key) {
2552
0
          zend_string_release(key);
2553
0
        }
2554
0
        php_error_docref(NULL, E_WARNING, "Object is not supported");
2555
0
        continue;
2556
0
    }
2557
0
    if (key) {
2558
0
      zend_hash_add(output, key, &entry_tmp);
2559
0
      zend_string_release(key);
2560
0
    } else {
2561
0
      zend_hash_index_add(output, idx, &entry_tmp);
2562
0
    }
2563
0
  } ZEND_HASH_FOREACH_END();
2564
0
  GC_TRY_UNPROTECT_RECURSION(input);
2565
2566
0
  return output;
2567
0
}
2568
/* }}} */
2569
2570
2571
/* {{{ Returns converted string in desired encoding */
2572
PHP_FUNCTION(mb_convert_encoding)
2573
0
{
2574
0
  zend_string *to_encoding_name;
2575
0
  zend_string *input_str, *from_encodings_str = NULL;
2576
0
  HashTable *input_ht, *from_encodings_ht = NULL;
2577
0
  const mbfl_encoding *to_encoding;
2578
0
  const mbfl_encoding **from_encodings;
2579
0
  size_t num_from_encodings;
2580
0
  zend_bool free_from_encodings;
2581
2582
0
  ZEND_PARSE_PARAMETERS_START(2, 3)
2583
0
    Z_PARAM_STR_OR_ARRAY_HT(input_str, input_ht)
2584
0
    Z_PARAM_STR(to_encoding_name)
2585
0
    Z_PARAM_OPTIONAL
2586
0
    Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(from_encodings_str, from_encodings_ht)
2587
0
  ZEND_PARSE_PARAMETERS_END();
2588
2589
0
  to_encoding = php_mb_get_encoding(to_encoding_name, 2);
2590
0
  if (!to_encoding) {
2591
0
    RETURN_THROWS();
2592
0
  }
2593
2594
0
  if (from_encodings_ht) {
2595
0
    if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) {
2596
0
      RETURN_THROWS();
2597
0
    }
2598
0
    free_from_encodings = 1;
2599
0
  } else if (from_encodings_str) {
2600
0
    if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
2601
0
        &from_encodings, &num_from_encodings,
2602
0
        /* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
2603
0
      RETURN_THROWS();
2604
0
    }
2605
0
    free_from_encodings = 1;
2606
0
  } else {
2607
0
    from_encodings = &MBSTRG(current_internal_encoding);
2608
0
    num_from_encodings = 1;
2609
0
    free_from_encodings = 0;
2610
0
  }
2611
2612
0
  if (!num_from_encodings) {
2613
0
    efree(ZEND_VOIDP(from_encodings));
2614
0
    zend_argument_value_error(3, "must specify at least one encoding");
2615
0
    RETURN_THROWS();
2616
0
  }
2617
2618
0
  if (input_str) {
2619
    /* new encoding */
2620
0
    size_t size;
2621
0
    char *ret = php_mb_convert_encoding(
2622
0
      ZSTR_VAL(input_str), ZSTR_LEN(input_str),
2623
0
      to_encoding, from_encodings, num_from_encodings, &size);
2624
0
    if (ret != NULL) {
2625
      // TODO: avoid reallocation ???
2626
0
      RETVAL_STRINGL(ret, size);    /* the string is already strdup()'ed */
2627
0
      efree(ret);
2628
0
    } else {
2629
0
      RETVAL_FALSE;
2630
0
    }
2631
0
  } else {
2632
0
    HashTable *tmp;
2633
0
    tmp = php_mb_convert_encoding_recursive(
2634
0
      input_ht, to_encoding, from_encodings, num_from_encodings);
2635
0
    RETVAL_ARR(tmp);
2636
0
  }
2637
2638
0
  if (free_from_encodings) {
2639
0
    efree(ZEND_VOIDP(from_encodings));
2640
0
  }
2641
0
}
2642
/* }}} */
2643
2644
static char *mbstring_convert_case(
2645
    int case_mode, const char *str, size_t str_len, size_t *ret_len,
2646
0
    const mbfl_encoding *enc) {
2647
0
  return php_unicode_convert_case(
2648
0
    case_mode, str, str_len, ret_len, enc,
2649
0
    MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
2650
0
}
2651
2652
/* {{{ Returns a case-folded version of source_string */
2653
PHP_FUNCTION(mb_convert_case)
2654
0
{
2655
0
  zend_string *from_encoding = NULL;
2656
0
  char *str;
2657
0
  size_t str_len;
2658
0
  zend_long case_mode = 0;
2659
0
  char *newstr;
2660
0
  size_t ret_len;
2661
0
  const mbfl_encoding *enc;
2662
2663
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|S!", &str, &str_len, &case_mode, &from_encoding) == FAILURE) {
2664
0
    RETURN_THROWS();
2665
0
  }
2666
2667
0
  enc = php_mb_get_encoding(from_encoding, 3);
2668
0
  if (!enc) {
2669
0
    RETURN_THROWS();
2670
0
  }
2671
2672
0
  if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
2673
0
    zend_argument_value_error(2, "must be one of MB_CASE_UPPER, MB_CASE_LOWER, MB_CASE_TITLE, MB_CASE_FOLD,"
2674
0
      " MB_CASE_UPPER_SIMPLE, MB_CASE_LOWER_SIMPLE, MB_CASE_TITLE_SIMPLE, or MB_CASE_FOLD_SIMPLE");
2675
0
    RETURN_THROWS();
2676
0
  }
2677
2678
0
  newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
2679
  /* If newstr is NULL something went wrong in mbfl and this is a bug */
2680
0
  ZEND_ASSERT(newstr != NULL);
2681
2682
  // TODO: avoid reallocation ???
2683
0
  RETVAL_STRINGL(newstr, ret_len);
2684
0
  efree(newstr);
2685
0
}
2686
/* }}} */
2687
2688
/* {{{ Returns a upper cased version of source_string */
2689
PHP_FUNCTION(mb_strtoupper)
2690
0
{
2691
0
  zend_string *from_encoding = NULL;
2692
0
  char *str;
2693
0
  size_t str_len;
2694
0
  char *newstr;
2695
0
  size_t ret_len;
2696
0
  const mbfl_encoding *enc;
2697
2698
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len, &from_encoding) == FAILURE) {
2699
0
    RETURN_THROWS();
2700
0
  }
2701
2702
0
  enc = php_mb_get_encoding(from_encoding, 2);
2703
0
  if (!enc) {
2704
0
    RETURN_THROWS();
2705
0
  }
2706
2707
0
  newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
2708
  /* If newstr is NULL something went wrong in mbfl and this is a bug */
2709
0
  ZEND_ASSERT(newstr != NULL);
2710
2711
  // TODO: avoid reallocation ???
2712
0
  RETVAL_STRINGL(newstr, ret_len);
2713
0
  efree(newstr);
2714
0
}
2715
/* }}} */
2716
2717
/* {{{ Returns a lower cased version of source_string */
2718
PHP_FUNCTION(mb_strtolower)
2719
0
{
2720
0
  zend_string *from_encoding = NULL;
2721
0
  char *str;
2722
0
  size_t str_len;
2723
0
  char *newstr;
2724
0
  size_t ret_len;
2725
0
  const mbfl_encoding *enc;
2726
2727
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len, &from_encoding) == FAILURE) {
2728
0
    RETURN_THROWS();
2729
0
  }
2730
2731
0
  enc = php_mb_get_encoding(from_encoding, 2);
2732
0
  if (!enc) {
2733
0
    RETURN_THROWS();
2734
0
  }
2735
2736
0
  newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
2737
  /* If newstr is NULL something went wrong in mbfl and this is a bug */
2738
0
  ZEND_ASSERT(newstr != NULL);
2739
2740
  // TODO: avoid reallocation ???
2741
0
  RETVAL_STRINGL(newstr, ret_len);
2742
0
  efree(newstr);
2743
0
}
2744
/* }}} */
2745
2746
/* {{{ Encodings of the given string is returned (as a string) */
2747
PHP_FUNCTION(mb_detect_encoding)
2748
0
{
2749
0
  char *str;
2750
0
  size_t str_len;
2751
0
  zend_string *encoding_str = NULL;
2752
0
  HashTable *encoding_ht = NULL;
2753
0
  zend_bool strict = 0;
2754
2755
0
  mbfl_string string;
2756
0
  const mbfl_encoding *ret;
2757
0
  const mbfl_encoding **elist;
2758
0
  size_t size;
2759
0
  zend_bool free_elist;
2760
2761
0
  ZEND_PARSE_PARAMETERS_START(1, 3)
2762
0
    Z_PARAM_STRING(str, str_len)
2763
0
    Z_PARAM_OPTIONAL
2764
0
    Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(encoding_str, encoding_ht)
2765
0
    Z_PARAM_BOOL(strict)
2766
0
  ZEND_PARSE_PARAMETERS_END();
2767
2768
  /* make encoding list */
2769
0
  if (encoding_ht) {
2770
0
    if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) {
2771
0
      RETURN_THROWS();
2772
0
    }
2773
0
    free_elist = 1;
2774
0
  } else if (encoding_str) {
2775
0
    if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
2776
0
      RETURN_THROWS();
2777
0
    }
2778
0
    free_elist = 1;
2779
0
  } else {
2780
0
    elist = MBSTRG(current_detect_order_list);
2781
0
    size = MBSTRG(current_detect_order_list_size);
2782
0
    free_elist = 0;
2783
0
  }
2784
2785
0
  if (size == 0) {
2786
0
    efree(ZEND_VOIDP(elist));
2787
0
    zend_argument_value_error(2, "must specify at least one encoding");
2788
0
    RETURN_THROWS();
2789
0
  }
2790
2791
0
  if (ZEND_NUM_ARGS() < 3) {
2792
0
    strict = MBSTRG(strict_detection);
2793
0
  }
2794
2795
0
  mbfl_string_init(&string);
2796
0
  string.val = (unsigned char *)str;
2797
0
  string.len = str_len;
2798
0
  ret = mbfl_identify_encoding(&string, elist, size, strict);
2799
2800
0
  if (free_elist) {
2801
0
    efree(ZEND_VOIDP(elist));
2802
0
  }
2803
2804
0
  if (ret == NULL) {
2805
0
    RETURN_FALSE;
2806
0
  }
2807
2808
0
  RETVAL_STRING((char *)ret->name);
2809
0
}
2810
/* }}} */
2811
2812
/* {{{ Returns an array of all supported entity encodings */
2813
PHP_FUNCTION(mb_list_encodings)
2814
0
{
2815
0
  const mbfl_encoding **encodings;
2816
0
  const mbfl_encoding *encoding;
2817
0
  int i;
2818
2819
0
  if (zend_parse_parameters_none() == FAILURE) {
2820
0
    RETURN_THROWS();
2821
0
  }
2822
2823
0
  array_init(return_value);
2824
0
  i = 0;
2825
0
  encodings = mbfl_get_supported_encodings();
2826
0
  while ((encoding = encodings[i++]) != NULL) {
2827
0
    add_next_index_string(return_value, (char *) encoding->name);
2828
0
  }
2829
0
}
2830
/* }}} */
2831
2832
/* {{{ Returns an array of the aliases of a given encoding name */
2833
PHP_FUNCTION(mb_encoding_aliases)
2834
0
{
2835
0
  const mbfl_encoding *encoding;
2836
0
  zend_string *encoding_name = NULL;
2837
2838
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &encoding_name) == FAILURE) {
2839
0
    RETURN_THROWS();
2840
0
  }
2841
2842
0
  encoding = php_mb_get_encoding(encoding_name, 1);
2843
0
  if (!encoding) {
2844
0
    RETURN_THROWS();
2845
0
  }
2846
2847
0
  array_init(return_value);
2848
0
  if (encoding->aliases != NULL) {
2849
0
    const char **alias;
2850
0
    for (alias = *encoding->aliases; *alias; ++alias) {
2851
0
      add_next_index_string(return_value, (char *)*alias);
2852
0
    }
2853
0
  }
2854
0
}
2855
/* }}} */
2856
2857
/* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
2858
PHP_FUNCTION(mb_encode_mimeheader)
2859
0
{
2860
0
  const mbfl_encoding *charset, *transenc;
2861
0
  mbfl_string  string, result, *ret;
2862
0
  zend_string *charset_name = NULL;
2863
0
  char *trans_enc_name = NULL;
2864
0
  size_t trans_enc_name_len;
2865
0
  char *linefeed = "\r\n";
2866
0
  size_t linefeed_len;
2867
0
  zend_long indent = 0;
2868
2869
0
  string.encoding = MBSTRG(current_internal_encoding);
2870
2871
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!s!sl",
2872
0
    (char **)&string.val, &string.len, &charset_name, &trans_enc_name, &trans_enc_name_len,
2873
0
    &linefeed, &linefeed_len, &indent
2874
0
  ) == FAILURE) {
2875
0
    RETURN_THROWS();
2876
0
  }
2877
2878
0
  charset = &mbfl_encoding_pass;
2879
0
  transenc = &mbfl_encoding_base64;
2880
2881
0
  if (charset_name != NULL) {
2882
0
    charset = php_mb_get_encoding(charset_name, 2);
2883
0
    if (!charset) {
2884
0
      RETURN_THROWS();
2885
0
    }
2886
0
  } else {
2887
0
    const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
2888
0
    if (lang != NULL) {
2889
0
      charset = mbfl_no2encoding(lang->mail_charset);
2890
0
      transenc = mbfl_no2encoding(lang->mail_header_encoding);
2891
0
    }
2892
0
  }
2893
2894
0
  if (trans_enc_name != NULL) {
2895
0
    if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
2896
0
      transenc = &mbfl_encoding_base64;
2897
0
    } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
2898
0
      transenc = &mbfl_encoding_qprint;
2899
0
    }
2900
0
  }
2901
2902
0
  mbfl_string_init(&result);
2903
0
  ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
2904
0
  ZEND_ASSERT(ret != NULL);
2905
  // TODO: avoid reallocation ???
2906
0
  RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2907
0
  efree(ret->val);
2908
0
}
2909
/* }}} */
2910
2911
/* {{{ Decodes the MIME "encoded-word" in the string */
2912
PHP_FUNCTION(mb_decode_mimeheader)
2913
0
{
2914
0
  mbfl_string string, result, *ret;
2915
2916
0
  string.encoding = MBSTRG(current_internal_encoding);
2917
2918
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) {
2919
0
    RETURN_THROWS();
2920
0
  }
2921
2922
0
  mbfl_string_init(&result);
2923
0
  ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
2924
0
  ZEND_ASSERT(ret != NULL);
2925
  // TODO: avoid reallocation ???
2926
0
  RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2927
0
  efree(ret->val);
2928
0
}
2929
/* }}} */
2930
2931
/* {{{ Conversion between full-width character and half-width character (Japanese) */
2932
PHP_FUNCTION(mb_convert_kana)
2933
0
{
2934
0
  int opt;
2935
0
  mbfl_string string, result, *ret;
2936
0
  char *optstr = NULL;
2937
0
  size_t optstr_len;
2938
0
  zend_string *encname = NULL;
2939
2940
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!S!", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname) == FAILURE) {
2941
0
    RETURN_THROWS();
2942
0
  }
2943
2944
  /* option */
2945
0
  if (optstr != NULL) {
2946
0
    char *p = optstr;
2947
0
    size_t i = 0, n = optstr_len;
2948
0
    opt = 0;
2949
0
    while (i < n) {
2950
0
      i++;
2951
0
      switch (*p++) {
2952
0
      case 'A':
2953
0
        opt |= 0x1;
2954
0
        break;
2955
0
      case 'a':
2956
0
        opt |= 0x10;
2957
0
        break;
2958
0
      case 'R':
2959
0
        opt |= 0x2;
2960
0
        break;
2961
0
      case 'r':
2962
0
        opt |= 0x20;
2963
0
        break;
2964
0
      case 'N':
2965
0
        opt |= 0x4;
2966
0
        break;
2967
0
      case 'n':
2968
0
        opt |= 0x40;
2969
0
        break;
2970
0
      case 'S':
2971
0
        opt |= 0x8;
2972
0
        break;
2973
0
      case 's':
2974
0
        opt |= 0x80;
2975
0
        break;
2976
0
      case 'K':
2977
0
        opt |= 0x100;
2978
0
        break;
2979
0
      case 'k':
2980
0
        opt |= 0x1000;
2981
0
        break;
2982
0
      case 'H':
2983
0
        opt |= 0x200;
2984
0
        break;
2985
0
      case 'h':
2986
0
        opt |= 0x2000;
2987
0
        break;
2988
0
      case 'V':
2989
0
        opt |= 0x800;
2990
0
        break;
2991
0
      case 'C':
2992
0
        opt |= 0x10000;
2993
0
        break;
2994
0
      case 'c':
2995
0
        opt |= 0x20000;
2996
0
        break;
2997
0
      case 'M':
2998
0
        opt |= 0x100000;
2999
0
        break;
3000
0
      case 'm':
3001
0
        opt |= 0x200000;
3002
0
        break;
3003
0
      }
3004
0
    }
3005
0
  } else {
3006
0
    opt = 0x900;
3007
0
  }
3008
3009
  /* encoding */
3010
0
  string.encoding = php_mb_get_encoding(encname, 3);
3011
0
  if (!string.encoding) {
3012
0
    RETURN_THROWS();
3013
0
  }
3014
3015
0
  ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3016
0
  ZEND_ASSERT(ret != NULL);
3017
  // TODO: avoid reallocation ???
3018
0
  RETVAL_STRINGL((char *)ret->val, ret->len);   /* the string is already strdup()'ed */
3019
0
  efree(ret->val);
3020
0
}
3021
/* }}} */
3022
3023
static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
3024
0
{
3025
0
  mbfl_string string;
3026
0
  HashTable *ht;
3027
0
  zval *entry;
3028
3029
0
  ZVAL_DEREF(var);
3030
0
  if (Z_TYPE_P(var) == IS_STRING) {
3031
0
    string.val = (unsigned char *)Z_STRVAL_P(var);
3032
0
    string.len = Z_STRLEN_P(var);
3033
0
    if (mbfl_encoding_detector_feed(identd, &string)) {
3034
0
      return 1; /* complete detecting */
3035
0
    }
3036
0
  } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3037
0
    if (Z_REFCOUNTED_P(var)) {
3038
0
      if (Z_IS_RECURSIVE_P(var)) {
3039
0
        *recursion_error = 1;
3040
0
        return 0;
3041
0
      }
3042
0
      Z_PROTECT_RECURSION_P(var);
3043
0
    }
3044
3045
0
    ht = HASH_OF(var);
3046
0
    if (ht != NULL) {
3047
0
      ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3048
0
        if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
3049
0
          if (Z_REFCOUNTED_P(var)) {
3050
0
            Z_UNPROTECT_RECURSION_P(var);
3051
0
          }
3052
0
          return 1;
3053
0
        } else if (*recursion_error) {
3054
0
          if (Z_REFCOUNTED_P(var)) {
3055
0
            Z_UNPROTECT_RECURSION_P(var);
3056
0
          }
3057
0
          return 0;
3058
0
        }
3059
0
      } ZEND_HASH_FOREACH_END();
3060
0
    }
3061
3062
0
    if (Z_REFCOUNTED_P(var)) {
3063
0
      Z_UNPROTECT_RECURSION_P(var);
3064
0
    }
3065
0
  }
3066
0
  return 0;
3067
0
} /* }}} */
3068
3069
static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3070
0
{
3071
0
  mbfl_string string, result, *ret;
3072
0
  HashTable *ht;
3073
0
  zval *entry, *orig_var;
3074
3075
0
  orig_var = var;
3076
0
  ZVAL_DEREF(var);
3077
0
  if (Z_TYPE_P(var) == IS_STRING) {
3078
0
    string.val = (unsigned char *)Z_STRVAL_P(var);
3079
0
    string.len = Z_STRLEN_P(var);
3080
0
    ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3081
0
    if (ret != NULL) {
3082
0
      zval_ptr_dtor(orig_var);
3083
      // TODO: avoid reallocation ???
3084
0
      ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3085
0
      efree(ret->val);
3086
0
    }
3087
0
  } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3088
0
    if (Z_TYPE_P(var) == IS_ARRAY) {
3089
0
      SEPARATE_ARRAY(var);
3090
0
    }
3091
0
    if (Z_REFCOUNTED_P(var)) {
3092
0
      if (Z_IS_RECURSIVE_P(var)) {
3093
0
        return 1;
3094
0
      }
3095
0
      Z_PROTECT_RECURSION_P(var);
3096
0
    }
3097
3098
0
    ht = HASH_OF(var);
3099
0
    if (ht != NULL) {
3100
0
      ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3101
0
        if (mb_recursive_convert_variable(convd, entry)) {
3102
0
          if (Z_REFCOUNTED_P(var)) {
3103
0
            Z_UNPROTECT_RECURSION_P(var);
3104
0
          }
3105
0
          return 1;
3106
0
        }
3107
0
      } ZEND_HASH_FOREACH_END();
3108
0
    }
3109
3110
0
    if (Z_REFCOUNTED_P(var)) {
3111
0
      Z_UNPROTECT_RECURSION_P(var);
3112
0
    }
3113
0
  }
3114
0
  return 0;
3115
0
} /* }}} */
3116
3117
/* {{{ Converts the string resource in variables to desired encoding */
3118
PHP_FUNCTION(mb_convert_variables)
3119
0
{
3120
0
  zval *args;
3121
0
  zend_string *to_enc_str;
3122
0
  zend_string *from_enc_str;
3123
0
  HashTable *from_enc_ht;
3124
0
  mbfl_string string, result;
3125
0
  const mbfl_encoding *from_encoding, *to_encoding;
3126
0
  mbfl_encoding_detector *identd;
3127
0
  mbfl_buffer_converter *convd;
3128
0
  int n, argc;
3129
0
  size_t elistsz;
3130
0
  const mbfl_encoding **elist;
3131
0
  int recursion_error = 0;
3132
3133
0
  ZEND_PARSE_PARAMETERS_START(3, -1)
3134
0
    Z_PARAM_STR(to_enc_str)
3135
0
    Z_PARAM_STR_OR_ARRAY_HT(from_enc_str, from_enc_ht)
3136
0
    Z_PARAM_VARIADIC('+', args, argc)
3137
0
  ZEND_PARSE_PARAMETERS_END();
3138
3139
  /* new encoding */
3140
0
  to_encoding = php_mb_get_encoding(to_enc_str, 1);
3141
0
  if (!to_encoding) {
3142
0
    RETURN_THROWS();
3143
0
  }
3144
3145
  /* initialize string */
3146
0
  mbfl_string_init(&string);
3147
0
  mbfl_string_init(&result);
3148
0
  from_encoding = MBSTRG(current_internal_encoding);
3149
0
  string.encoding = from_encoding;
3150
3151
  /* pre-conversion encoding */
3152
0
  if (from_enc_ht) {
3153
0
    if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) {
3154
0
      RETURN_THROWS();
3155
0
    }
3156
0
  } else {
3157
0
    if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
3158
0
      RETURN_THROWS();
3159
0
    }
3160
0
  }
3161
3162
0
  if (elistsz == 0) {
3163
0
    efree(ZEND_VOIDP(elist));
3164
0
    zend_argument_value_error(2, "must specify at least one encoding");
3165
0
    RETURN_THROWS();
3166
0
  }
3167
3168
0
  if (elistsz == 1) {
3169
0
    from_encoding = *elist;
3170
0
  } else {
3171
    /* auto detect */
3172
0
    from_encoding = NULL;
3173
0
    identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3174
0
    if (identd != NULL) {
3175
0
      n = 0;
3176
0
      while (n < argc) {
3177
0
        if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
3178
0
          break;
3179
0
        }
3180
0
        n++;
3181
0
      }
3182
0
      from_encoding = mbfl_encoding_detector_judge(identd);
3183
0
      mbfl_encoding_detector_delete(identd);
3184
0
      if (recursion_error) {
3185
0
        efree(ZEND_VOIDP(elist));
3186
0
        php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3187
0
        RETURN_FALSE;
3188
0
      }
3189
0
    }
3190
3191
0
    if (!from_encoding) {
3192
0
      php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3193
0
      efree(ZEND_VOIDP(elist));
3194
0
      RETURN_FALSE;
3195
0
    }
3196
0
  }
3197
3198
0
  efree(ZEND_VOIDP(elist));
3199
3200
0
  convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3201
  /* If this assertion fails this means some memory allocation failure which is a bug */
3202
0
  ZEND_ASSERT(convd != NULL);
3203
3204
0
  mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3205
0
  mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3206
3207
  /* convert */
3208
0
  n = 0;
3209
0
  while (n < argc) {
3210
0
    zval *zv = &args[n];
3211
3212
0
    ZVAL_DEREF(zv);
3213
0
    recursion_error = mb_recursive_convert_variable(convd, zv);
3214
0
    if (recursion_error) {
3215
0
      break;
3216
0
    }
3217
0
    n++;
3218
0
  }
3219
3220
0
  MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3221
0
  mbfl_buffer_converter_delete(convd);
3222
3223
0
  if (recursion_error) {
3224
0
    php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3225
0
    RETURN_FALSE;
3226
0
  }
3227
3228
0
  RETURN_STRING(from_encoding->name);
3229
0
}
3230
/* }}} */
3231
3232
/* {{{ HTML numeric entity */
3233
/* {{{ static void php_mb_numericentity_exec() */
3234
static void
3235
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3236
0
{
3237
0
  char *str = NULL;
3238
0
  size_t str_len;
3239
0
  zend_string *encoding = NULL;
3240
0
  zval *hash_entry;
3241
0
  HashTable *target_hash;
3242
0
  int i, *convmap, *mapelm, mapsize=0;
3243
0
  zend_bool is_hex = 0;
3244
0
  mbfl_string string, result, *ret;
3245
3246
0
  if (type == 0) {
3247
0
    if (zend_parse_parameters(ZEND_NUM_ARGS(), "sh|S!b", &str, &str_len, &target_hash, &encoding, &is_hex) == FAILURE) {
3248
0
      RETURN_THROWS();
3249
0
    }
3250
0
  } else {
3251
0
    if (zend_parse_parameters(ZEND_NUM_ARGS(), "sh|S!", &str, &str_len, &target_hash, &encoding) == FAILURE) {
3252
0
      RETURN_THROWS();
3253
0
    }
3254
0
  }
3255
3256
0
  string.val = (unsigned char *)str;
3257
0
  string.len = str_len;
3258
0
  string.encoding = php_mb_get_encoding(encoding, 3);
3259
0
  if (!string.encoding) {
3260
0
    RETURN_THROWS();
3261
0
  }
3262
3263
0
  if (type == 0 && is_hex) {
3264
0
    type = 2; /* output in hex format */
3265
0
  }
3266
3267
  /* conversion map */
3268
0
  i = zend_hash_num_elements(target_hash);
3269
0
  if (i % 4 != 0) {
3270
0
    zend_argument_value_error(2, "must have a multiple of 4 elements");
3271
0
    RETURN_THROWS();
3272
0
  }
3273
0
  convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3274
0
  mapelm = convmap;
3275
0
  mapsize = 0;
3276
0
  ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3277
0
    *mapelm++ = zval_get_long(hash_entry);
3278
0
    mapsize++;
3279
0
  } ZEND_HASH_FOREACH_END();
3280
0
  mapsize /= 4;
3281
3282
0
  ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3283
0
  ZEND_ASSERT(ret != NULL);
3284
  // TODO: avoid reallocation ???
3285
0
  RETVAL_STRINGL((char *)ret->val, ret->len);
3286
0
  efree(ret->val);
3287
0
  efree((void *)convmap);
3288
0
}
3289
/* }}} */
3290
3291
/* {{{ Converts specified characters to HTML numeric entities */
3292
PHP_FUNCTION(mb_encode_numericentity)
3293
0
{
3294
0
  php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3295
0
}
3296
/* }}} */
3297
3298
/* {{{ Converts HTML numeric entities to character code */
3299
PHP_FUNCTION(mb_decode_numericentity)
3300
0
{
3301
0
  php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3302
0
}
3303
/* }}} */
3304
/* }}} */
3305
3306
/* {{{ Sends an email message with MIME scheme */
3307
3308
#define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos)                   \
3309
0
  if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3310
0
    pos += 2;                     \
3311
0
    while (str[pos + 1] == ' ' || str[pos + 1] == '\t') {             \
3312
0
      pos++;                      \
3313
0
    }                        \
3314
0
    continue;                     \
3315
0
  }
3316
3317
#define MAIL_ASCIIZ_CHECK_MBSTRING(str, len)      \
3318
0
  pp = str;         \
3319
0
  ee = pp + len;          \
3320
0
  while ((pp = memchr(pp, '\0', (ee - pp)))) { \
3321
0
    *pp = ' ';        \
3322
0
  }            \
3323
3324
static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3325
0
{
3326
0
  const char *ps;
3327
0
  size_t icnt;
3328
0
  int state = 0;
3329
0
  int crlf_state = -1;
3330
0
  char *token = NULL;
3331
0
  size_t token_pos = 0;
3332
0
  zend_string *fld_name, *fld_val;
3333
3334
0
  ps = str;
3335
0
  icnt = str_len;
3336
0
  fld_name = fld_val = NULL;
3337
3338
  /*
3339
   *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3340
   *             ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3341
   *      state  0            1           2          3
3342
   *
3343
   *             C o n t e n t - T y p e :   t e x t / h t m l \r\n
3344
   *             ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3345
   * crlf_state -1                       0                     1 -1
3346
   *
3347
   */
3348
3349
0
  while (icnt > 0) {
3350
0
    switch (*ps) {
3351
0
      case ':':
3352
0
        if (crlf_state == 1) {
3353
0
          token_pos++;
3354
0
        }
3355
3356
0
        if (state == 0 || state == 1) {
3357
0
          if(token && token_pos > 0) {
3358
0
            fld_name = zend_string_init(token, token_pos, 0);
3359
0
          }
3360
0
          state = 2;
3361
0
        } else {
3362
0
          token_pos++;
3363
0
        }
3364
3365
0
        crlf_state = 0;
3366
0
        break;
3367
3368
0
      case '\n':
3369
0
        if (crlf_state == -1) {
3370
0
          goto out;
3371
0
        }
3372
0
        crlf_state = -1;
3373
0
        break;
3374
3375
0
      case '\r':
3376
0
        if (crlf_state == 1) {
3377
0
          token_pos++;
3378
0
        } else {
3379
0
          crlf_state = 1;
3380
0
        }
3381
0
        break;
3382
3383
0
      case ' ': case '\t':
3384
0
        if (crlf_state == -1) {
3385
0
          if (state == 3) {
3386
            /* continuing from the previous line */
3387
0
            state = 4;
3388
0
          } else {
3389
            /* simply skipping this new line */
3390
0
            state = 5;
3391
0
          }
3392
0
        } else {
3393
0
          if (crlf_state == 1) {
3394
0
            token_pos++;
3395
0
          }
3396
0
          if (state == 1 || state == 3) {
3397
0
            token_pos++;
3398
0
          }
3399
0
        }
3400
0
        crlf_state = 0;
3401
0
        break;
3402
3403
0
      default:
3404
0
        switch (state) {
3405
0
          case 0:
3406
0
            token = (char*)ps;
3407
0
            token_pos = 0;
3408
0
            state = 1;
3409
0
            break;
3410
3411
0
          case 2:
3412
0
            if (crlf_state != -1) {
3413
0
              token = (char*)ps;
3414
0
              token_pos = 0;
3415
3416
0
              state = 3;
3417
0
              break;
3418
0
            }
3419
            /* break is missing intentionally */
3420
3421
0
          case 3:
3422
0
            if (crlf_state == -1) {
3423
0
              if(token && token_pos > 0) {
3424
0
                fld_val = zend_string_init(token, token_pos, 0);
3425
0
              }
3426
3427
0
              if (fld_name != NULL && fld_val != NULL) {
3428
0
                zval val;
3429
                /* FIXME: some locale free implementation is
3430
                 * really required here,,, */
3431
0
                php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3432
0
                ZVAL_STR(&val, fld_val);
3433
3434
0
                zend_hash_update(ht, fld_name, &val);
3435
3436
0
                zend_string_release_ex(fld_name, 0);
3437
0
              }
3438
3439
0
              fld_name = fld_val = NULL;
3440
0
              token = (char*)ps;
3441
0
              token_pos = 0;
3442
3443
0
              state = 1;
3444
0
            }
3445
0
            break;
3446
3447
0
          case 4:
3448
0
            token_pos++;
3449
0
            state = 3;
3450
0
            break;
3451
0
        }
3452
3453
0
        if (crlf_state == 1) {
3454
0
          token_pos++;
3455
0
        }
3456
3457
0
        token_pos++;
3458
3459
0
        crlf_state = 0;
3460
0
        break;
3461
0
    }
3462
0
    ps++, icnt--;
3463
0
  }
3464
0
out:
3465
0
  if (state == 2) {
3466
0
    token = "";
3467
0
    token_pos = 0;
3468
3469
0
    state = 3;
3470
0
  }
3471
0
  if (state == 3) {
3472
0
    if(token && token_pos > 0) {
3473
0
      fld_val = zend_string_init(token, token_pos, 0);
3474
0
    }
3475
0
    if (fld_name != NULL && fld_val != NULL) {
3476
0
      zval val;
3477
      /* FIXME: some locale free implementation is
3478
       * really required here,,, */
3479
0
      php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3480
0
      ZVAL_STR(&val, fld_val);
3481
3482
0
      zend_hash_update(ht, fld_name, &val);
3483
3484
0
      zend_string_release_ex(fld_name, 0);
3485
0
    }
3486
0
  }
3487
0
  return state;
3488
0
}
3489
3490
PHP_FUNCTION(mb_send_mail)
3491
0
{
3492
0
  char *to;
3493
0
  size_t to_len;
3494
0
  char *message;
3495
0
  size_t message_len;
3496
0
  char *subject;
3497
0
  size_t subject_len;
3498
0
  zval *headers = NULL;
3499
0
  zend_string *extra_cmd = NULL;
3500
0
  zend_string *str_headers = NULL, *tmp_headers;
3501
0
  size_t n, i;
3502
0
  char *to_r = NULL;
3503
0
  char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3504
0
  struct {
3505
0
    int cnt_type:1;
3506
0
    int cnt_trans_enc:1;
3507
0
  } suppressed_hdrs = { 0, 0 };
3508
3509
0
  char *message_buf = NULL, *subject_buf = NULL, *p;
3510
0
  mbfl_string orig_str, conv_str;
3511
0
  mbfl_string *pstr;  /* pointer to mbfl string for return value */
3512
0
  enum mbfl_no_encoding;
3513
0
  const mbfl_encoding *tran_cs, /* transfar text charset */
3514
0
            *head_enc,  /* header transfar encoding */
3515
0
            *body_enc;  /* body transfar encoding */
3516
0
  mbfl_memory_device device;  /* automatic allocateable buffer for additional header */
3517
0
  const mbfl_language *lang;
3518
0
  int err = 0;
3519
0
  HashTable ht_headers;
3520
0
  zval *s;
3521
0
  extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3522
0
  char *pp, *ee;
3523
3524
  /* initialize */
3525
0
  mbfl_memory_device_init(&device, 0, 0);
3526
0
  mbfl_string_init(&orig_str);
3527
0
  mbfl_string_init(&conv_str);
3528
3529
  /* character-set, transfer-encoding */
3530
0
  tran_cs = &mbfl_encoding_utf8;
3531
0
  head_enc = &mbfl_encoding_base64;
3532
0
  body_enc = &mbfl_encoding_base64;
3533
0
  lang = mbfl_no2language(MBSTRG(language));
3534
0
  if (lang != NULL) {
3535
0
    tran_cs = mbfl_no2encoding(lang->mail_charset);
3536
0
    head_enc = mbfl_no2encoding(lang->mail_header_encoding);
3537
0
    body_enc = mbfl_no2encoding(lang->mail_body_encoding);
3538
0
  }
3539
3540
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|z!S!", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
3541
0
    RETURN_THROWS();
3542
0
  }
3543
3544
  /* ASCIIZ check */
3545
0
  MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
3546
0
  MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
3547
0
  MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
3548
0
  if (headers) {
3549
0
    switch(Z_TYPE_P(headers)) {
3550
0
      case IS_STRING:
3551
0
        tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
3552
0
        MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
3553
0
        str_headers = php_trim(tmp_headers, NULL, 0, 2);
3554
0
        zend_string_release_ex(tmp_headers, 0);
3555
0
        break;
3556
0
      case IS_ARRAY:
3557
0
        str_headers = php_mail_build_headers(Z_ARRVAL_P(headers));
3558
0
        break;
3559
0
      default:
3560
0
        zend_argument_value_error(4, "must be of type string|array|null, %s given", zend_zval_type_name(headers));
3561
0
        RETURN_THROWS();
3562
0
    }
3563
0
  }
3564
0
  if (extra_cmd) {
3565
0
    MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
3566
0
  }
3567
3568
0
  zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
3569
3570
0
  if (str_headers != NULL) {
3571
0
    _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
3572
0
  }
3573
3574
0
  if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
3575
0
    char *tmp;
3576
0
    char *param_name;
3577
0
    char *charset = NULL;
3578
3579
0
    ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3580
0
    p = strchr(Z_STRVAL_P(s), ';');
3581
3582
0
    if (p != NULL) {
3583
      /* skipping the padded spaces */
3584
0
      do {
3585
0
        ++p;
3586
0
      } while (*p == ' ' || *p == '\t');
3587
3588
0
      if (*p != '\0') {
3589
0
        if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3590
0
          if (strcasecmp(param_name, "charset") == 0) {
3591
0
            const mbfl_encoding *_tran_cs = tran_cs;
3592
3593
0
            charset = php_strtok_r(NULL, "= \"", &tmp);
3594
0
            if (charset != NULL) {
3595
0
              _tran_cs = mbfl_name2encoding(charset);
3596
0
            }
3597
3598
0
            if (!_tran_cs) {
3599
0
              php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3600
0
              _tran_cs = &mbfl_encoding_ascii;
3601
0
            }
3602
0
            tran_cs = _tran_cs;
3603
0
          }
3604
0
        }
3605
0
      }
3606
0
    }
3607
0
    suppressed_hdrs.cnt_type = 1;
3608
0
  }
3609
3610
0
  if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
3611
0
    const mbfl_encoding *_body_enc;
3612
3613
0
    ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3614
0
    _body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
3615
0
    switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
3616
0
      case mbfl_no_encoding_base64:
3617
0
      case mbfl_no_encoding_7bit:
3618
0
      case mbfl_no_encoding_8bit:
3619
0
        body_enc = _body_enc;
3620
0
        break;
3621
3622
0
      default:
3623
0
        php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
3624
0
        body_enc =  &mbfl_encoding_8bit;
3625
0
        break;
3626
0
    }
3627
0
    suppressed_hdrs.cnt_trans_enc = 1;
3628
0
  }
3629
3630
  /* To: */
3631
0
  if (to_len > 0) {
3632
0
    to_r = estrndup(to, to_len);
3633
0
    for (; to_len; to_len--) {
3634
0
      if (!isspace((unsigned char) to_r[to_len - 1])) {
3635
0
        break;
3636
0
      }
3637
0
      to_r[to_len - 1] = '\0';
3638
0
    }
3639
0
    for (i = 0; to_r[i]; i++) {
3640
0
    if (iscntrl((unsigned char) to_r[i])) {
3641
      /* According to RFC 822, section 3.1.1 long headers may be separated into
3642
       * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3643
       * To prevent these separators from being replaced with a space, we use the
3644
       * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3645
       */
3646
0
      SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3647
0
      to_r[i] = ' ';
3648
0
    }
3649
0
    }
3650
0
  } else {
3651
0
    to_r = to;
3652
0
  }
3653
3654
  /* Subject: */
3655
0
  orig_str.val = (unsigned char *)subject;
3656
0
  orig_str.len = subject_len;
3657
0
  orig_str.encoding = MBSTRG(current_internal_encoding);
3658
0
  if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3659
0
      || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3660
0
    orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3661
0
  }
3662
0
  pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
3663
0
  if (pstr != NULL) {
3664
0
    subject_buf = subject = (char *)pstr->val;
3665
0
  }
3666
3667
  /* message body */
3668
0
  orig_str.val = (unsigned char *)message;
3669
0
  orig_str.len = message_len;
3670
0
  orig_str.encoding = MBSTRG(current_internal_encoding);
3671
3672
0
  if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3673
0
      || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3674
0
    orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3675
0
  }
3676
3677
0
  pstr = NULL;
3678
0
  {
3679
0
    mbfl_string tmpstr;
3680
3681
0
    if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
3682
0
      tmpstr.encoding = &mbfl_encoding_8bit;
3683
0
      pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
3684
0
      efree(tmpstr.val);
3685
0
    }
3686
0
  }
3687
0
  if (pstr != NULL) {
3688
0
    message_buf = message = (char *)pstr->val;
3689
0
  }
3690
3691
  /* other headers */
3692
0
#define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
3693
0
#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
3694
0
#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
3695
0
#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
3696
0
  if (str_headers != NULL) {
3697
0
    p = ZSTR_VAL(str_headers);
3698
0
    n = ZSTR_LEN(str_headers);
3699
0
    mbfl_memory_device_strncat(&device, p, n);
3700
0
    if (n > 0 && p[n - 1] != '\n') {
3701
0
      mbfl_memory_device_strncat(&device, "\n", 1);
3702
0
    }
3703
0
    zend_string_release_ex(str_headers, 0);
3704
0
  }
3705
3706
0
  if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
3707
0
    mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
3708
0
    mbfl_memory_device_strncat(&device, "\n", 1);
3709
0
  }
3710
3711
0
  if (!suppressed_hdrs.cnt_type) {
3712
0
    mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
3713
3714
0
    p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
3715
0
    if (p != NULL) {
3716
0
      mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
3717
0
      mbfl_memory_device_strcat(&device, p);
3718
0
    }
3719
0
    mbfl_memory_device_strncat(&device, "\n", 1);
3720
0
  }
3721
0
  if (!suppressed_hdrs.cnt_trans_enc) {
3722
0
    mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
3723
0
    p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
3724
0
    if (p == NULL) {
3725
0
      p = "7bit";
3726
0
    }
3727
0
    mbfl_memory_device_strcat(&device, p);
3728
0
    mbfl_memory_device_strncat(&device, "\n", 1);
3729
0
  }
3730
3731
0
  mbfl_memory_device_unput(&device);
3732
0
  mbfl_memory_device_output('\0', &device);
3733
0
  str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
3734
3735
0
  if (force_extra_parameters) {
3736
0
    extra_cmd = php_escape_shell_cmd(force_extra_parameters);
3737
0
  } else if (extra_cmd) {
3738
0
    extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
3739
0
  }
3740
3741
0
  if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
3742
0
    RETVAL_TRUE;
3743
0
  } else {
3744
0
    RETVAL_FALSE;
3745
0
  }
3746
3747
0
  if (extra_cmd) {
3748
0
    zend_string_release_ex(extra_cmd, 0);
3749
0
  }
3750
3751
0
  if (to_r != to) {
3752
0
    efree(to_r);
3753
0
  }
3754
0
  if (subject_buf) {
3755
0
    efree((void *)subject_buf);
3756
0
  }
3757
0
  if (message_buf) {
3758
0
    efree((void *)message_buf);
3759
0
  }
3760
0
  mbfl_memory_device_clear(&device);
3761
0
  zend_hash_destroy(&ht_headers);
3762
0
  if (str_headers) {
3763
0
    zend_string_release_ex(str_headers, 0);
3764
0
  }
3765
0
}
3766
3767
#undef SKIP_LONG_HEADER_SEP_MBSTRING
3768
#undef MAIL_ASCIIZ_CHECK_MBSTRING
3769
#undef PHP_MBSTR_MAIL_MIME_HEADER1
3770
#undef PHP_MBSTR_MAIL_MIME_HEADER2
3771
#undef PHP_MBSTR_MAIL_MIME_HEADER3
3772
#undef PHP_MBSTR_MAIL_MIME_HEADER4
3773
/* }}} */
3774
3775
/* {{{ Returns the current settings of mbstring */
3776
PHP_FUNCTION(mb_get_info)
3777
0
{
3778
0
  char *typ = NULL;
3779
0
  size_t typ_len;
3780
0
  size_t n;
3781
0
  char *name;
3782
0
  zval row;
3783
0
  const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3784
0
  const mbfl_encoding **entry;
3785
3786
0
  if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) {
3787
0
    RETURN_THROWS();
3788
0
  }
3789
3790
0
  if (!typ || !strcasecmp("all", typ)) {
3791
0
    array_init(return_value);
3792
0
    if (MBSTRG(current_internal_encoding)) {
3793
0
      add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
3794
0
    }
3795
0
    if (MBSTRG(http_input_identify)) {
3796
0
      add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
3797
0
    }
3798
0
    if (MBSTRG(current_http_output_encoding)) {
3799
0
      add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
3800
0
    }
3801
0
    if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3802
0
      add_assoc_string(return_value, "http_output_conv_mimetypes", name);
3803
0
    }
3804
0
    if (lang != NULL) {
3805
0
      if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3806
0
        add_assoc_string(return_value, "mail_charset", name);
3807
0
      }
3808
0
      if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3809
0
        add_assoc_string(return_value, "mail_header_encoding", name);
3810
0
      }
3811
0
      if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3812
0
        add_assoc_string(return_value, "mail_body_encoding", name);
3813
0
      }
3814
0
    }
3815
0
    add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
3816
0
    if (MBSTRG(encoding_translation)) {
3817
0
      add_assoc_string(return_value, "encoding_translation", "On");
3818
0
    } else {
3819
0
      add_assoc_string(return_value, "encoding_translation", "Off");
3820
0
    }
3821
0
    if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3822
0
      add_assoc_string(return_value, "language", name);
3823
0
    }
3824
0
    n = MBSTRG(current_detect_order_list_size);
3825
0
    entry = MBSTRG(current_detect_order_list);
3826
0
    if (n > 0) {
3827
0
      size_t i;
3828
0
      array_init(&row);
3829
0
      for (i = 0; i < n; i++) {
3830
0
        add_next_index_string(&row, (*entry)->name);
3831
0
        entry++;
3832
0
      }
3833
0
      add_assoc_zval(return_value, "detect_order", &row);
3834
0
    }
3835
0
    if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3836
0
      add_assoc_string(return_value, "substitute_character", "none");
3837
0
    } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3838
0
      add_assoc_string(return_value, "substitute_character", "long");
3839
0
    } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3840
0
      add_assoc_string(return_value, "substitute_character", "entity");
3841
0
    } else {
3842
0
      add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
3843
0
    }
3844
0
    if (MBSTRG(strict_detection)) {
3845
0
      add_assoc_string(return_value, "strict_detection", "On");
3846
0
    } else {
3847
0
      add_assoc_string(return_value, "strict_detection", "Off");
3848
0
    }
3849
0
  } else if (!strcasecmp("internal_encoding", typ)) {
3850
0
    if (MBSTRG(current_internal_encoding)) {
3851
0
      RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
3852
0
    }
3853
0
  } else if (!strcasecmp("http_input", typ)) {
3854
0
    if (MBSTRG(http_input_identify)) {
3855
0
      RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
3856
0
    }
3857
0
  } else if (!strcasecmp("http_output", typ)) {
3858
0
    if (MBSTRG(current_http_output_encoding)) {
3859
0
      RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
3860
0
    }
3861
0
  } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
3862
0
    if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3863
0
      RETVAL_STRING(name);
3864
0
    }
3865
0
  } else if (!strcasecmp("mail_charset", typ)) {
3866
0
    if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3867
0
      RETVAL_STRING(name);
3868
0
    }
3869
0
  } else if (!strcasecmp("mail_header_encoding", typ)) {
3870
0
    if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3871
0
      RETVAL_STRING(name);
3872
0
    }
3873
0
  } else if (!strcasecmp("mail_body_encoding", typ)) {
3874
0
    if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3875
0
      RETVAL_STRING(name);
3876
0
    }
3877
0
  } else if (!strcasecmp("illegal_chars", typ)) {
3878
0
    RETVAL_LONG(MBSTRG(illegalchars));
3879
0
  } else if (!strcasecmp("encoding_translation", typ)) {
3880
0
    if (MBSTRG(encoding_translation)) {
3881
0
      RETVAL_STRING("On");
3882
0
    } else {
3883
0
      RETVAL_STRING("Off");
3884
0
    }
3885
0
  } else if (!strcasecmp("language", typ)) {
3886
0
    if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3887
0
      RETVAL_STRING(name);
3888
0
    }
3889
0
  } else if (!strcasecmp("detect_order", typ)) {
3890
0
    n = MBSTRG(current_detect_order_list_size);
3891
0
    entry = MBSTRG(current_detect_order_list);
3892
0
    if (n > 0) {
3893
0
      size_t i;
3894
0
      array_init(return_value);
3895
0
      for (i = 0; i < n; i++) {
3896
0
        add_next_index_string(return_value, (*entry)->name);
3897
0
        entry++;
3898
0
      }
3899
0
    }
3900
0
  } else if (!strcasecmp("substitute_character", typ)) {
3901
0
    if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3902
0
      RETVAL_STRING("none");
3903
0
    } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3904
0
      RETVAL_STRING("long");
3905
0
    } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3906
0
      RETVAL_STRING("entity");
3907
0
    } else {
3908
0
      RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
3909
0
    }
3910
0
  } else if (!strcasecmp("strict_detection", typ)) {
3911
0
    if (MBSTRG(strict_detection)) {
3912
0
      RETVAL_STRING("On");
3913
0
    } else {
3914
0
      RETVAL_STRING("Off");
3915
0
    }
3916
0
  } else {
3917
    // TODO Convert to ValueError
3918
0
    RETURN_FALSE;
3919
0
  }
3920
0
}
3921
/* }}} */
3922
3923
3924
static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
3925
0
{
3926
0
  mbfl_buffer_converter *convd;
3927
3928
0
  convd = mbfl_buffer_converter_new(encoding, encoding, 0);
3929
0
  if (convd == NULL) {
3930
0
    return NULL;
3931
0
  }
3932
0
  mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
3933
0
  mbfl_buffer_converter_illegal_substchar(convd, 0);
3934
0
  return convd;
3935
0
}
3936
3937
3938
0
static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
3939
0
  mbfl_string string, result, *ret = NULL;
3940
0
  size_t illegalchars = 0;
3941
3942
  /* initialize string */
3943
0
  mbfl_string_init_set(&string, encoding);
3944
0
  mbfl_string_init(&result);
3945
3946
0
  string.val = (unsigned char *) input;
3947
0
  string.len = length;
3948
3949
0
  ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3950
0
  illegalchars = mbfl_buffer_illegalchars(convd);
3951
3952
0
  if (ret != NULL) {
3953
0
    if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
3954
0
      mbfl_string_clear(&result);
3955
0
      return 1;
3956
0
    }
3957
0
    mbfl_string_clear(&result);
3958
0
  }
3959
0
  return 0;
3960
0
}
3961
3962
MBSTRING_API int php_mb_check_encoding(
3963
    const char *input, size_t length, const mbfl_encoding *encoding)
3964
0
{
3965
0
  mbfl_buffer_converter *convd;
3966
3967
0
  convd = php_mb_init_convd(encoding);
3968
  /* If this assertion fails this means some memory allocation failure which is a bug */
3969
0
  ZEND_ASSERT(convd != NULL);
3970
3971
0
  if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
3972
0
    mbfl_buffer_converter_delete(convd);
3973
0
    return 1;
3974
0
  }
3975
0
  mbfl_buffer_converter_delete(convd);
3976
0
  return 0;
3977
0
}
3978
3979
static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding)
3980
0
{
3981
0
  mbfl_buffer_converter *convd;
3982
0
  zend_long idx;
3983
0
  zend_string *key;
3984
0
  zval *entry;
3985
0
  int valid = 1;
3986
3987
0
  (void)(idx);
3988
3989
0
  convd = php_mb_init_convd(encoding);
3990
  /* If this assertion fails this means some memory allocation failure which is a bug */
3991
0
  ZEND_ASSERT(convd != NULL);
3992
3993
0
  if (GC_IS_RECURSIVE(vars)) {
3994
0
    mbfl_buffer_converter_delete(convd);
3995
0
    php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
3996
0
    return 0;
3997
0
  }
3998
0
  GC_TRY_PROTECT_RECURSION(vars);
3999
0
  ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
4000
0
    ZVAL_DEREF(entry);
4001
0
    if (key) {
4002
0
      if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
4003
0
        valid = 0;
4004
0
        break;
4005
0
      }
4006
0
    }
4007
0
    switch (Z_TYPE_P(entry)) {
4008
0
      case IS_STRING:
4009
0
        if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
4010
0
          valid = 0;
4011
0
          break;
4012
0
        }
4013
0
        break;
4014
0
      case IS_ARRAY:
4015
0
        if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), encoding)) {
4016
0
          valid = 0;
4017
0
          break;
4018
0
        }
4019
0
        break;
4020
0
      case IS_LONG:
4021
0
      case IS_DOUBLE:
4022
0
      case IS_NULL:
4023
0
      case IS_TRUE:
4024
0
      case IS_FALSE:
4025
0
        break;
4026
0
      default:
4027
        /* Other types are error. */
4028
0
        valid = 0;
4029
0
        break;
4030
0
    }
4031
0
  } ZEND_HASH_FOREACH_END();
4032
0
  GC_TRY_UNPROTECT_RECURSION(vars);
4033
0
  mbfl_buffer_converter_delete(convd);
4034
0
  return valid;
4035
0
}
4036
4037
4038
/* {{{ Check if the string is valid for the specified encoding */
4039
PHP_FUNCTION(mb_check_encoding)
4040
0
{
4041
0
  zend_string *input_str = NULL, *enc = NULL;
4042
0
  HashTable *input_ht = NULL;
4043
0
  const mbfl_encoding *encoding;
4044
4045
0
  ZEND_PARSE_PARAMETERS_START(0, 2)
4046
0
    Z_PARAM_OPTIONAL
4047
0
    Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(input_str, input_ht)
4048
0
    Z_PARAM_STR_OR_NULL(enc)
4049
0
  ZEND_PARSE_PARAMETERS_END();
4050
4051
0
  encoding = php_mb_get_encoding(enc, 2);
4052
0
  if (!encoding) {
4053
0
    RETURN_THROWS();
4054
0
  }
4055
4056
0
  if (input_ht) {
4057
0
    if (!php_mb_check_encoding_recursive(input_ht, encoding)) {
4058
0
      RETURN_FALSE;
4059
0
    }
4060
0
    RETURN_TRUE;
4061
0
  } else if (input_str) {
4062
0
    if (!php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding)) {
4063
0
      RETURN_FALSE;
4064
0
    }
4065
0
    RETURN_TRUE;
4066
0
  } else {
4067
    /* FIXME: Actually check all inputs, except $_FILES file content. */
4068
0
    if (MBSTRG(illegalchars) == 0) {
4069
0
      RETURN_TRUE;
4070
0
    }
4071
0
    RETURN_FALSE;
4072
0
  }
4073
0
}
4074
/* }}} */
4075
4076
4077
static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
4078
  const uint32_t enc_name_arg_num)
4079
0
{
4080
0
  const mbfl_encoding *enc;
4081
0
  enum mbfl_no_encoding no_enc;
4082
4083
0
  ZEND_ASSERT(str_len > 0);
4084
4085
0
  enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4086
0
  if (!enc) {
4087
0
    return -2;
4088
0
  }
4089
4090
0
  no_enc = enc->no_encoding;
4091
0
  if (php_mb_is_unsupported_no_encoding(no_enc)) {
4092
0
    zend_value_error("mb_ord() does not support the \"%s\" encoding", enc->name);
4093
0
    return -2;
4094
0
  }
4095
4096
0
  {
4097
0
    mbfl_wchar_device dev;
4098
0
    mbfl_convert_filter *filter;
4099
0
    zend_long cp;
4100
4101
0
    mbfl_wchar_device_init(&dev);
4102
0
    filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
4103
    /* If this assertion fails this means some memory allocation failure which is a bug */
4104
0
    ZEND_ASSERT(filter != NULL);
4105
4106
0
    mbfl_convert_filter_feed_string(filter, (const unsigned char *) str, str_len);
4107
0
    mbfl_convert_filter_flush(filter);
4108
4109
0
    if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
4110
0
      mbfl_convert_filter_delete(filter);
4111
0
      mbfl_wchar_device_clear(&dev);
4112
0
      return -1;
4113
0
    }
4114
4115
0
    cp = dev.buffer[0];
4116
0
    mbfl_convert_filter_delete(filter);
4117
0
    mbfl_wchar_device_clear(&dev);
4118
0
    return cp;
4119
0
  }
4120
0
}
4121
4122
4123
/* {{{ */
4124
PHP_FUNCTION(mb_ord)
4125
0
{
4126
0
  char *str;
4127
0
  size_t str_len;
4128
0
  zend_string *enc = NULL;
4129
0
  zend_long cp;
4130
4131
0
  ZEND_PARSE_PARAMETERS_START(1, 2)
4132
0
    Z_PARAM_STRING(str, str_len)
4133
0
    Z_PARAM_OPTIONAL
4134
0
    Z_PARAM_STR_OR_NULL(enc)
4135
0
  ZEND_PARSE_PARAMETERS_END();
4136
4137
0
  if (str_len == 0) {
4138
0
    zend_argument_value_error(1, "must not be empty");
4139
0
    RETURN_THROWS();
4140
0
  }
4141
4142
0
  cp = php_mb_ord(str, str_len, enc, 2);
4143
4144
0
  if (0 > cp) {
4145
0
    if (cp == -2) {
4146
0
      RETURN_THROWS();
4147
0
    }
4148
0
    RETURN_FALSE;
4149
0
  }
4150
4151
0
  RETURN_LONG(cp);
4152
0
}
4153
/* }}} */
4154
4155
4156
static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
4157
0
{
4158
0
  const mbfl_encoding *enc;
4159
0
  enum mbfl_no_encoding no_enc;
4160
0
  zend_string *ret;
4161
0
  char* buf;
4162
0
  size_t buf_len;
4163
4164
0
  enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4165
0
  if (!enc) {
4166
0
    return NULL;
4167
0
  }
4168
4169
0
  no_enc = enc->no_encoding;
4170
0
  if (php_mb_is_unsupported_no_encoding(no_enc)) {
4171
0
    zend_value_error("mb_chr() does not support the \"%s\" encoding", enc->name);
4172
0
    return NULL;
4173
0
  }
4174
4175
0
  if (cp < 0 || cp > 0x10ffff) {
4176
0
    return NULL;
4177
0
  }
4178
4179
0
  if (php_mb_is_no_encoding_utf8(no_enc)) {
4180
0
    if (cp > 0xd7ff && 0xe000 > cp) {
4181
0
      return NULL;
4182
0
    }
4183
4184
0
    if (cp < 0x80) {
4185
0
      ret = ZSTR_CHAR(cp);
4186
0
    } else if (cp < 0x800) {
4187
0
      ret = zend_string_alloc(2, 0);
4188
0
      ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
4189
0
      ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
4190
0
      ZSTR_VAL(ret)[2] = 0;
4191
0
    } else if (cp < 0x10000) {
4192
0
      ret = zend_string_alloc(3, 0);
4193
0
      ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
4194
0
      ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
4195
0
      ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
4196
0
      ZSTR_VAL(ret)[3] = 0;
4197
0
    } else {
4198
0
      ret = zend_string_alloc(4, 0);
4199
0
      ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
4200
0
      ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
4201
0
      ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
4202
0
      ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
4203
0
      ZSTR_VAL(ret)[4] = 0;
4204
0
    }
4205
4206
0
    return ret;
4207
0
  }
4208
4209
0
  buf_len = 4;
4210
0
  buf = (char *) emalloc(buf_len + 1);
4211
0
  buf[0] = (cp >> 24) & 0xff;
4212
0
  buf[1] = (cp >> 16) & 0xff;
4213
0
  buf[2] = (cp >>  8) & 0xff;
4214
0
  buf[3] = cp & 0xff;
4215
0
  buf[4] = 0;
4216
4217
0
  {
4218
0
    char *ret_str;
4219
0
    size_t ret_len;
4220
0
    long orig_illegalchars = MBSTRG(illegalchars);
4221
0
    MBSTRG(illegalchars) = 0;
4222
0
    ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
4223
0
    if (MBSTRG(illegalchars) != 0) {
4224
0
      efree(buf);
4225
0
      efree(ret_str);
4226
0
      MBSTRG(illegalchars) = orig_illegalchars;
4227
0
      return NULL;
4228
0
    }
4229
4230
0
    ret = zend_string_init(ret_str, ret_len, 0);
4231
0
    efree(ret_str);
4232
0
    MBSTRG(illegalchars) = orig_illegalchars;
4233
0
  }
4234
4235
0
  efree(buf);
4236
0
  return ret;
4237
0
}
4238
4239
4240
/* {{{ */
4241
PHP_FUNCTION(mb_chr)
4242
0
{
4243
0
  zend_long cp;
4244
0
  zend_string *enc = NULL;
4245
0
  zend_string* ret;
4246
4247
0
  ZEND_PARSE_PARAMETERS_START(1, 2)
4248
0
    Z_PARAM_LONG(cp)
4249
0
    Z_PARAM_OPTIONAL
4250
0
    Z_PARAM_STR_OR_NULL(enc)
4251
0
  ZEND_PARSE_PARAMETERS_END();
4252
4253
0
  ret = php_mb_chr(cp, enc, 2);
4254
0
  if (ret == NULL) {
4255
0
    RETURN_FALSE;
4256
0
  }
4257
4258
0
  RETURN_STR(ret);
4259
0
}
4260
/* }}} */
4261
4262
/* {{{ */
4263
PHP_FUNCTION(mb_scrub)
4264
0
{
4265
0
  const mbfl_encoding *enc;
4266
0
  char* str;
4267
0
  size_t str_len;
4268
0
  zend_string *enc_name = NULL;
4269
0
  char *ret;
4270
0
  size_t ret_len;
4271
4272
0
  ZEND_PARSE_PARAMETERS_START(1, 2)
4273
0
    Z_PARAM_STRING(str, str_len)
4274
0
    Z_PARAM_OPTIONAL
4275
0
    Z_PARAM_STR_OR_NULL(enc_name)
4276
0
  ZEND_PARSE_PARAMETERS_END();
4277
4278
0
  enc = php_mb_get_encoding(enc_name, 2);
4279
0
  if (!enc) {
4280
0
    RETURN_THROWS();
4281
0
  }
4282
4283
0
  ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
4284
4285
0
  RETVAL_STRINGL(ret, ret_len);
4286
0
  efree(ret);
4287
0
}
4288
/* }}} */
4289
4290
4291
/* {{{ php_mb_populate_current_detect_order_list */
4292
static void php_mb_populate_current_detect_order_list(void)
4293
721k
{
4294
721k
  const mbfl_encoding **entry = 0;
4295
721k
  size_t nentries;
4296
4297
721k
  if (MBSTRG(current_detect_order_list)) {
4298
0
    return;
4299
0
  }
4300
4301
721k
  if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4302
0
    nentries = MBSTRG(detect_order_list_size);
4303
0
    entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4304
0
    memcpy(ZEND_VOIDP(entry), MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4305
721k
  } else {
4306
721k
    const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4307
721k
    size_t i;
4308
721k
    nentries = MBSTRG(default_detect_order_list_size);
4309
721k
    entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4310
2.16M
    for (i = 0; i < nentries; i++) {
4311
1.44M
      entry[i] = mbfl_no2encoding(src[i]);
4312
1.44M
    }
4313
721k
  }
4314
721k
  MBSTRG(current_detect_order_list) = entry;
4315
721k
  MBSTRG(current_detect_order_list_size) = nentries;
4316
721k
}
4317
/* }}} */
4318
4319
/* {{{ static int php_mb_encoding_translation() */
4320
static int php_mb_encoding_translation(void)
4321
0
{
4322
0
  return MBSTRG(encoding_translation);
4323
0
}
4324
/* }}} */
4325
4326
/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
4327
MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4328
0
{
4329
0
  if (enc != NULL) {
4330
0
    if (enc->flag & MBFL_ENCTYPE_MBCS) {
4331
0
      if (enc->mblen_table != NULL) {
4332
0
        if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4333
0
      }
4334
0
    } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4335
0
      return 2;
4336
0
    } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4337
0
      return 4;
4338
0
    }
4339
0
  }
4340
0
  return 1;
4341
0
}
4342
/* }}} */
4343
4344
/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
4345
MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
4346
0
{
4347
0
  return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4348
0
}
4349
/* }}} */
4350
4351
/* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
4352
MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4353
0
{
4354
0
  register const char *p = s;
4355
0
  char *last=NULL;
4356
4357
0
  if (nbytes == (size_t)-1) {
4358
0
    size_t nb = 0;
4359
4360
0
    while (*p != '\0') {
4361
0
      if (nb == 0) {
4362
0
        if ((unsigned char)*p == (unsigned char)c) {
4363
0
          last = (char *)p;
4364
0
        }
4365
0
        nb = php_mb_mbchar_bytes_ex(p, enc);
4366
0
        if (nb == 0) {
4367
0
          return NULL; /* something is going wrong! */
4368
0
        }
4369
0
      }
4370
0
      --nb;
4371
0
      ++p;
4372
0
    }
4373
0
  } else {
4374
0
    register size_t bcnt = nbytes;
4375
0
    register size_t nbytes_char;
4376
0
    while (bcnt > 0) {
4377
0
      if ((unsigned char)*p == (unsigned char)c) {
4378
0
        last = (char *)p;
4379
0
      }
4380
0
      nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4381
0
      if (bcnt < nbytes_char) {
4382
0
        return NULL;
4383
0
      }
4384
0
      p += nbytes_char;
4385
0
      bcnt -= nbytes_char;
4386
0
    }
4387
0
  }
4388
0
  return last;
4389
0
}
4390
/* }}} */
4391
4392
/* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
4393
MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
4394
0
{
4395
0
  return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4396
0
}
4397
/* }}} */
4398
4399
/* {{{ MBSTRING_API int php_mb_stripos() */
4400
MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const mbfl_encoding *enc)
4401
0
{
4402
0
  size_t n = (size_t) -1;
4403
0
  mbfl_string haystack, needle;
4404
4405
0
  mbfl_string_init(&haystack);
4406
0
  mbfl_string_init(&needle);
4407
0
  haystack.encoding = enc;
4408
0
  needle.encoding = enc;
4409
4410
0
  do {
4411
    /* We're using simple case-folding here, because we'd have to deal with remapping of
4412
     * offsets otherwise. */
4413
4414
0
    size_t len = 0;
4415
0
    haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
4416
0
    haystack.len = len;
4417
4418
0
    if (!haystack.val) {
4419
0
      break;
4420
0
    }
4421
4422
0
    if (haystack.len == 0) {
4423
0
      break;
4424
0
    }
4425
4426
0
    needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
4427
0
    needle.len = len;
4428
4429
0
    if (!needle.val) {
4430
0
      break;
4431
0
    }
4432
4433
0
    n = mbfl_strpos(&haystack, &needle, offset, mode);
4434
0
  } while(0);
4435
4436
0
  if (haystack.val) {
4437
0
    efree(haystack.val);
4438
0
  }
4439
4440
0
  if (needle.val) {
4441
0
    efree(needle.val);
4442
0
  }
4443
4444
0
  return n;
4445
0
}
4446
/* }}} */
4447
4448
static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
4449
0
{
4450
0
  *list = (const zend_encoding **)MBSTRG(http_input_list);
4451
0
  *list_size = MBSTRG(http_input_list_size);
4452
0
}
4453
/* }}} */
4454
4455
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
4456
0
{
4457
0
  MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4458
0
}
4459
/* }}} */
4460
4461
#endif  /* HAVE_MBSTRING */