/src/php-src/ext/mbstring/mbstring.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright (c) The PHP Group | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to version 3.01 of the PHP license, | |
6 | | | that is bundled with this package in the file LICENSE, and is | |
7 | | | available through the world-wide-web at the following url: | |
8 | | | http://www.php.net/license/3_01.txt | |
9 | | | If you did not receive a copy of the PHP license and are unable to | |
10 | | | obtain it through the world-wide-web, please send a note to | |
11 | | | license@php.net so we can mail you a copy immediately. | |
12 | | +----------------------------------------------------------------------+ |
13 | | | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> | |
14 | | | Rui Hirokawa <hirokawa@php.net> | |
15 | | | Hironori Sato <satoh@jpnnet.com> | |
16 | | | Shigeru Kanemoto <sgk@happysize.co.jp> | |
17 | | +----------------------------------------------------------------------+ |
18 | | */ |
19 | | |
20 | | /* {{{ includes */ |
21 | | #ifdef HAVE_CONFIG_H |
22 | | #include "config.h" |
23 | | #endif |
24 | | |
25 | | #include "php.h" |
26 | | #include "php_ini.h" |
27 | | #include "php_variables.h" |
28 | | #include "mbstring.h" |
29 | | #include "ext/standard/php_string.h" |
30 | | #include "ext/standard/php_mail.h" |
31 | | #include "ext/standard/exec.h" |
32 | | #include "ext/standard/url.h" |
33 | | #include "main/php_output.h" |
34 | | #include "ext/standard/info.h" |
35 | | |
36 | | #include "libmbfl/mbfl/mbfl_allocators.h" |
37 | | #include "libmbfl/mbfl/mbfilter_8bit.h" |
38 | | #include "libmbfl/mbfl/mbfilter_pass.h" |
39 | | #include "libmbfl/mbfl/mbfilter_wchar.h" |
40 | | #include "libmbfl/filters/mbfilter_ascii.h" |
41 | | #include "libmbfl/filters/mbfilter_base64.h" |
42 | | #include "libmbfl/filters/mbfilter_qprint.h" |
43 | | #include "libmbfl/filters/mbfilter_ucs4.h" |
44 | | #include "libmbfl/filters/mbfilter_utf8.h" |
45 | | |
46 | | #include "php_variables.h" |
47 | | #include "php_globals.h" |
48 | | #include "rfc1867.h" |
49 | | #include "php_content_types.h" |
50 | | #include "SAPI.h" |
51 | | #include "php_unicode.h" |
52 | | #include "TSRM.h" |
53 | | |
54 | | #include "mb_gpc.h" |
55 | | |
56 | | #ifdef HAVE_MBREGEX |
57 | | # include "php_mbregex.h" |
58 | | # include "php_onig_compat.h" |
59 | | # include <oniguruma.h> |
60 | | # undef UChar |
61 | | # if !defined(ONIGURUMA_VERSION_INT) || ONIGURUMA_VERSION_INT < 60800 |
62 | | typedef void OnigMatchParam; |
63 | | #define onig_new_match_param() (NULL) |
64 | | #define onig_initialize_match_param(x) (void)(x) |
65 | | #define onig_set_match_stack_limit_size_of_match_param(x, y) |
66 | | #define onig_set_retry_limit_in_match_of_match_param(x, y) |
67 | | #define onig_free_match_param(x) |
68 | | #define onig_search_with_param(reg, str, end, start, range, region, option, mp) \ |
69 | | onig_search(reg, str, end, start, range, region, option) |
70 | | #define onig_match_with_param(re, str, end, at, region, option, mp) \ |
71 | | onig_match(re, str, end, at, region, option) |
72 | | # endif |
73 | | #else |
74 | | # include "ext/pcre/php_pcre.h" |
75 | | #endif |
76 | | |
77 | | #include "zend_multibyte.h" |
78 | | #include "mbstring_arginfo.h" |
79 | | /* }}} */ |
80 | | |
81 | | #ifdef HAVE_MBSTRING |
82 | | |
83 | | /* {{{ prototypes */ |
84 | | ZEND_DECLARE_MODULE_GLOBALS(mbstring) |
85 | | |
86 | | static PHP_GINIT_FUNCTION(mbstring); |
87 | | static PHP_GSHUTDOWN_FUNCTION(mbstring); |
88 | | |
89 | | static void php_mb_populate_current_detect_order_list(void); |
90 | | |
91 | | static int php_mb_encoding_translation(void); |
92 | | |
93 | | static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size); |
94 | | |
95 | | static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding); |
96 | | |
97 | | static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc); |
98 | | |
99 | | static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); |
100 | | /* }}} */ |
101 | | |
102 | | /* {{{ php_mb_default_identify_list */ |
103 | | typedef struct _php_mb_nls_ident_list { |
104 | | enum mbfl_no_language lang; |
105 | | const enum mbfl_no_encoding *list; |
106 | | size_t list_size; |
107 | | } php_mb_nls_ident_list; |
108 | | |
109 | | static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = { |
110 | | mbfl_no_encoding_ascii, |
111 | | mbfl_no_encoding_jis, |
112 | | mbfl_no_encoding_utf8, |
113 | | mbfl_no_encoding_euc_jp, |
114 | | mbfl_no_encoding_sjis |
115 | | }; |
116 | | |
117 | | static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = { |
118 | | mbfl_no_encoding_ascii, |
119 | | mbfl_no_encoding_utf8, |
120 | | mbfl_no_encoding_euc_cn, |
121 | | mbfl_no_encoding_cp936 |
122 | | }; |
123 | | |
124 | | static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = { |
125 | | mbfl_no_encoding_ascii, |
126 | | mbfl_no_encoding_utf8, |
127 | | mbfl_no_encoding_euc_tw, |
128 | | mbfl_no_encoding_big5 |
129 | | }; |
130 | | |
131 | | static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = { |
132 | | mbfl_no_encoding_ascii, |
133 | | mbfl_no_encoding_utf8, |
134 | | mbfl_no_encoding_euc_kr, |
135 | | mbfl_no_encoding_uhc |
136 | | }; |
137 | | |
138 | | static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = { |
139 | | mbfl_no_encoding_ascii, |
140 | | mbfl_no_encoding_utf8, |
141 | | mbfl_no_encoding_koi8r, |
142 | | mbfl_no_encoding_cp1251, |
143 | | mbfl_no_encoding_cp866 |
144 | | }; |
145 | | |
146 | | static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = { |
147 | | mbfl_no_encoding_ascii, |
148 | | mbfl_no_encoding_utf8, |
149 | | mbfl_no_encoding_armscii8 |
150 | | }; |
151 | | |
152 | | static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = { |
153 | | mbfl_no_encoding_ascii, |
154 | | mbfl_no_encoding_utf8, |
155 | | mbfl_no_encoding_cp1254, |
156 | | mbfl_no_encoding_8859_9 |
157 | | }; |
158 | | |
159 | | static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = { |
160 | | mbfl_no_encoding_ascii, |
161 | | mbfl_no_encoding_utf8, |
162 | | mbfl_no_encoding_koi8u |
163 | | }; |
164 | | |
165 | | static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = { |
166 | | mbfl_no_encoding_ascii, |
167 | | mbfl_no_encoding_utf8 |
168 | | }; |
169 | | |
170 | | |
171 | | static const php_mb_nls_ident_list php_mb_default_identify_list[] = { |
172 | | { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) }, |
173 | | { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) }, |
174 | | { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) }, |
175 | | { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) }, |
176 | | { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) }, |
177 | | { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) }, |
178 | | { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) }, |
179 | | { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) }, |
180 | | { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) } |
181 | | }; |
182 | | |
183 | | /* }}} */ |
184 | | |
185 | | /* {{{ zend_module_entry mbstring_module_entry */ |
186 | | zend_module_entry mbstring_module_entry = { |
187 | | STANDARD_MODULE_HEADER, |
188 | | "mbstring", |
189 | | ext_functions, |
190 | | PHP_MINIT(mbstring), |
191 | | PHP_MSHUTDOWN(mbstring), |
192 | | PHP_RINIT(mbstring), |
193 | | PHP_RSHUTDOWN(mbstring), |
194 | | PHP_MINFO(mbstring), |
195 | | PHP_MBSTRING_VERSION, |
196 | | PHP_MODULE_GLOBALS(mbstring), |
197 | | PHP_GINIT(mbstring), |
198 | | PHP_GSHUTDOWN(mbstring), |
199 | | NULL, |
200 | | STANDARD_MODULE_PROPERTIES_EX |
201 | | }; |
202 | | /* }}} */ |
203 | | |
204 | | /* {{{ static sapi_post_entry php_post_entries[] */ |
205 | | static const sapi_post_entry php_post_entries[] = { |
206 | | { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler }, |
207 | | { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, |
208 | | { NULL, 0, NULL, NULL } |
209 | | }; |
210 | | /* }}} */ |
211 | | |
212 | | #ifdef COMPILE_DL_MBSTRING |
213 | | #ifdef ZTS |
214 | | ZEND_TSRMLS_CACHE_DEFINE() |
215 | | #endif |
216 | | ZEND_GET_MODULE(mbstring) |
217 | | #endif |
218 | | |
219 | | /* {{{ allocators */ |
220 | | static void *_php_mb_allocators_malloc(size_t sz) |
221 | 15.6k | { |
222 | 15.6k | return emalloc(sz); |
223 | 15.6k | } |
224 | | |
225 | | static void *_php_mb_allocators_realloc(void *ptr, size_t sz) |
226 | 62.8k | { |
227 | 62.8k | return erealloc(ptr, sz); |
228 | 62.8k | } |
229 | | |
230 | | static void *_php_mb_allocators_calloc(size_t nelems, size_t szelem) |
231 | 0 | { |
232 | 0 | return ecalloc(nelems, szelem); |
233 | 0 | } |
234 | | |
235 | | static void _php_mb_allocators_free(void *ptr) |
236 | 58.1k | { |
237 | 58.1k | efree(ptr); |
238 | 58.1k | } |
239 | | |
240 | | static const mbfl_allocators _php_mb_allocators = { |
241 | | _php_mb_allocators_malloc, |
242 | | _php_mb_allocators_realloc, |
243 | | _php_mb_allocators_calloc, |
244 | | _php_mb_allocators_free, |
245 | | }; |
246 | | /* }}} */ |
247 | | |
248 | | /* {{{ static sapi_post_entry mbstr_post_entries[] */ |
249 | | static const sapi_post_entry mbstr_post_entries[] = { |
250 | | { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler }, |
251 | | { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler }, |
252 | | { NULL, 0, NULL, NULL } |
253 | | }; |
254 | | /* }}} */ |
255 | | |
256 | 0 | static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) { |
257 | 0 | if (encoding_name) { |
258 | 0 | const mbfl_encoding *encoding; |
259 | 0 | zend_string *last_encoding_name = MBSTRG(last_used_encoding_name); |
260 | 0 | if (last_encoding_name && (last_encoding_name == encoding_name |
261 | 0 | || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) { |
262 | 0 | return MBSTRG(last_used_encoding); |
263 | 0 | } |
264 | | |
265 | 0 | encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name)); |
266 | 0 | if (!encoding) { |
267 | 0 | zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name)); |
268 | 0 | return NULL; |
269 | 0 | } |
270 | | |
271 | 0 | if (last_encoding_name) { |
272 | 0 | zend_string_release(last_encoding_name); |
273 | 0 | } |
274 | 0 | MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name); |
275 | 0 | MBSTRG(last_used_encoding) = encoding; |
276 | 0 | return encoding; |
277 | 0 | } else { |
278 | 0 | return MBSTRG(current_internal_encoding); |
279 | 0 | } |
280 | 0 | } |
281 | | |
282 | 36.6k | static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) { |
283 | 36.6k | if (strcmp(encoding_name, "pass") == 0) { |
284 | 0 | return &mbfl_encoding_pass; |
285 | 0 | } |
286 | | |
287 | 36.6k | return mbfl_name2encoding(encoding_name); |
288 | 36.6k | } |
289 | | |
290 | 28.4k | static size_t count_commas(const char *p, const char *end) { |
291 | 28.4k | size_t count = 0; |
292 | 28.4k | while ((p = memchr(p, ',', end - p))) { |
293 | 0 | count++; |
294 | 0 | p++; |
295 | 0 | } |
296 | 28.4k | return count; |
297 | 28.4k | } |
298 | | |
299 | | /* {{{ static zend_result php_mb_parse_encoding_list() |
300 | | * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS. |
301 | | * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0. |
302 | | */ |
303 | | static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length, |
304 | | const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num, |
305 | | zend_bool allow_pass_encoding) |
306 | 32.5k | { |
307 | 32.5k | if (value == NULL || value_length == 0) { |
308 | 4.06k | *return_list = NULL; |
309 | 4.06k | *return_size = 0; |
310 | 4.06k | return SUCCESS; |
311 | 28.4k | } else { |
312 | 28.4k | zend_bool included_auto; |
313 | 28.4k | size_t n, size; |
314 | 28.4k | char *p1, *endp, *tmpstr; |
315 | 28.4k | const mbfl_encoding **entry, **list; |
316 | | |
317 | | /* copy the value string for work */ |
318 | 28.4k | if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) { |
319 | 0 | tmpstr = (char *)estrndup(value+1, value_length-2); |
320 | 0 | value_length -= 2; |
321 | 28.4k | } else { |
322 | 28.4k | tmpstr = (char *)estrndup(value, value_length); |
323 | 28.4k | } |
324 | | |
325 | 28.4k | endp = tmpstr + value_length; |
326 | 28.4k | size = 1 + count_commas(tmpstr, endp) + MBSTRG(default_detect_order_list_size); |
327 | 28.4k | list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent); |
328 | 28.4k | entry = list; |
329 | 28.4k | n = 0; |
330 | 28.4k | included_auto = 0; |
331 | 28.4k | p1 = tmpstr; |
332 | 28.4k | while (1) { |
333 | 28.4k | char *comma = (char *) php_memnstr(p1, ",", 1, endp); |
334 | 28.4k | char *p = comma ? comma : endp; |
335 | 28.4k | *p = '\0'; |
336 | | /* trim spaces */ |
337 | 28.4k | while (p1 < p && (*p1 == ' ' || *p1 == '\t')) { |
338 | 0 | p1++; |
339 | 0 | } |
340 | 28.4k | p--; |
341 | 28.4k | while (p > p1 && (*p == ' ' || *p == '\t')) { |
342 | 0 | *p = '\0'; |
343 | 0 | p--; |
344 | 0 | } |
345 | | /* convert to the encoding number and check encoding */ |
346 | 28.4k | if (strcasecmp(p1, "auto") == 0) { |
347 | 0 | if (!included_auto) { |
348 | 0 | const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); |
349 | 0 | const size_t identify_list_size = MBSTRG(default_detect_order_list_size); |
350 | 0 | size_t i; |
351 | 0 | included_auto = 1; |
352 | 0 | for (i = 0; i < identify_list_size; i++) { |
353 | 0 | *entry++ = mbfl_no2encoding(*src++); |
354 | 0 | n++; |
355 | 0 | } |
356 | 0 | } |
357 | 28.4k | } else { |
358 | 28.4k | const mbfl_encoding *encoding = |
359 | 28.4k | allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1); |
360 | 28.4k | if (!encoding) { |
361 | | /* Called from an INI setting modification */ |
362 | 0 | if (arg_num == 0) { |
363 | 0 | php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1); |
364 | 0 | } else { |
365 | 0 | zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1); |
366 | 0 | } |
367 | 0 | efree(tmpstr); |
368 | 0 | pefree(ZEND_VOIDP(list), persistent); |
369 | 0 | return FAILURE; |
370 | 0 | } |
371 | | |
372 | 28.4k | *entry++ = encoding; |
373 | 28.4k | n++; |
374 | 28.4k | } |
375 | 28.4k | if (n >= size || comma == NULL) { |
376 | 28.4k | break; |
377 | 28.4k | } |
378 | 0 | p1 = comma + 1; |
379 | 0 | } |
380 | 28.4k | *return_list = list; |
381 | 28.4k | *return_size = n; |
382 | 28.4k | efree(tmpstr); |
383 | 28.4k | } |
384 | | |
385 | 28.4k | return SUCCESS; |
386 | 32.5k | } |
387 | | /* }}} */ |
388 | | |
389 | | /* {{{ static int php_mb_parse_encoding_array() |
390 | | * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS. |
391 | | * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0. |
392 | | */ |
393 | | static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list, |
394 | | size_t *return_size, uint32_t arg_num) |
395 | 0 | { |
396 | | /* Allocate enough space to include the default detect order if "auto" is used. */ |
397 | 0 | size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size); |
398 | 0 | const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*)); |
399 | 0 | const mbfl_encoding **entry = list; |
400 | 0 | zend_bool included_auto = 0; |
401 | 0 | size_t n = 0; |
402 | 0 | zval *hash_entry; |
403 | 0 | ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) { |
404 | 0 | zend_string *encoding_str = zval_try_get_string(hash_entry); |
405 | 0 | if (UNEXPECTED(!encoding_str)) { |
406 | 0 | efree(ZEND_VOIDP(list)); |
407 | 0 | return FAILURE; |
408 | 0 | } |
409 | | |
410 | 0 | if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) { |
411 | 0 | if (!included_auto) { |
412 | 0 | const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); |
413 | 0 | const size_t identify_list_size = MBSTRG(default_detect_order_list_size); |
414 | 0 | size_t j; |
415 | |
|
416 | 0 | included_auto = 1; |
417 | 0 | for (j = 0; j < identify_list_size; j++) { |
418 | 0 | *entry++ = mbfl_no2encoding(*src++); |
419 | 0 | n++; |
420 | 0 | } |
421 | 0 | } |
422 | 0 | } else { |
423 | 0 | const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str)); |
424 | 0 | if (encoding) { |
425 | 0 | *entry++ = encoding; |
426 | 0 | n++; |
427 | 0 | } else { |
428 | 0 | zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str)); |
429 | 0 | zend_string_release(encoding_str); |
430 | 0 | efree(ZEND_VOIDP(list)); |
431 | 0 | return FAILURE; |
432 | 0 | } |
433 | 0 | } |
434 | 0 | zend_string_release(encoding_str); |
435 | 0 | } ZEND_HASH_FOREACH_END(); |
436 | 0 | *return_list = list; |
437 | 0 | *return_size = n; |
438 | 0 | return SUCCESS; |
439 | 0 | } |
440 | | /* }}} */ |
441 | | |
442 | | /* {{{ zend_multibyte interface */ |
443 | | static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name) |
444 | 20.3k | { |
445 | 20.3k | return (const zend_encoding*)mbfl_name2encoding(encoding_name); |
446 | 20.3k | } |
447 | | |
448 | | static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding) |
449 | 0 | { |
450 | 0 | return ((const mbfl_encoding *)encoding)->name; |
451 | 0 | } |
452 | | |
453 | | static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding) |
454 | 0 | { |
455 | 0 | const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding; |
456 | 0 | if (encoding->flag & MBFL_ENCTYPE_SBCS) { |
457 | 0 | return 1; |
458 | 0 | } |
459 | 0 | if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) { |
460 | 0 | return 1; |
461 | 0 | } |
462 | 0 | return 0; |
463 | 0 | } |
464 | | |
465 | | static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size) |
466 | 0 | { |
467 | 0 | mbfl_string string; |
468 | |
|
469 | 0 | if (!list) { |
470 | 0 | list = (const zend_encoding **)MBSTRG(current_detect_order_list); |
471 | 0 | list_size = MBSTRG(current_detect_order_list_size); |
472 | 0 | } |
473 | |
|
474 | 0 | mbfl_string_init(&string); |
475 | 0 | string.val = (unsigned char *)arg_string; |
476 | 0 | string.len = arg_length; |
477 | 0 | return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0); |
478 | 0 | } |
479 | | |
480 | | static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from) |
481 | 0 | { |
482 | 0 | mbfl_string string, result; |
483 | 0 | mbfl_buffer_converter *convd; |
484 | 0 | int status; |
485 | 0 | size_t loc; |
486 | | |
487 | | /* new encoding */ |
488 | | /* initialize string */ |
489 | 0 | string.encoding = (const mbfl_encoding*)encoding_from; |
490 | 0 | string.val = (unsigned char*)from; |
491 | 0 | string.len = from_length; |
492 | | |
493 | | /* initialize converter */ |
494 | 0 | convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len); |
495 | 0 | if (convd == NULL) { |
496 | 0 | return (size_t) -1; |
497 | 0 | } |
498 | | |
499 | 0 | mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); |
500 | 0 | mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); |
501 | | |
502 | | /* do it */ |
503 | 0 | status = mbfl_buffer_converter_feed2(convd, &string, &loc); |
504 | 0 | if (status) { |
505 | 0 | mbfl_buffer_converter_delete(convd); |
506 | 0 | return (size_t)-1; |
507 | 0 | } |
508 | | |
509 | 0 | mbfl_buffer_converter_flush(convd); |
510 | 0 | mbfl_string_init(&result); |
511 | 0 | if (!mbfl_buffer_converter_result(convd, &result)) { |
512 | 0 | mbfl_buffer_converter_delete(convd); |
513 | 0 | return (size_t)-1; |
514 | 0 | } |
515 | | |
516 | 0 | *to = result.val; |
517 | 0 | *to_length = result.len; |
518 | |
|
519 | 0 | mbfl_buffer_converter_delete(convd); |
520 | |
|
521 | 0 | return loc; |
522 | 0 | } |
523 | | |
524 | | static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent) |
525 | 24.4k | { |
526 | 24.4k | return php_mb_parse_encoding_list( |
527 | 24.4k | encoding_list, encoding_list_len, |
528 | 24.4k | (const mbfl_encoding ***)return_list, return_size, |
529 | 24.4k | persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1); |
530 | 24.4k | } |
531 | | |
532 | | static const zend_encoding *php_mb_zend_internal_encoding_getter(void) |
533 | 0 | { |
534 | 0 | return (const zend_encoding *)MBSTRG(internal_encoding); |
535 | 0 | } |
536 | | |
537 | | static zend_result php_mb_zend_internal_encoding_setter(const zend_encoding *encoding) |
538 | 939k | { |
539 | 939k | MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding; |
540 | 939k | return SUCCESS; |
541 | 939k | } |
542 | | |
543 | | static zend_multibyte_functions php_mb_zend_multibyte_functions = { |
544 | | "mbstring", |
545 | | php_mb_zend_encoding_fetcher, |
546 | | php_mb_zend_encoding_name_getter, |
547 | | php_mb_zend_encoding_lexer_compatibility_checker, |
548 | | php_mb_zend_encoding_detector, |
549 | | php_mb_zend_encoding_converter, |
550 | | php_mb_zend_encoding_list_parser, |
551 | | php_mb_zend_internal_encoding_getter, |
552 | | php_mb_zend_internal_encoding_setter |
553 | | }; |
554 | | /* }}} */ |
555 | | |
556 | | static void *_php_mb_compile_regex(const char *pattern); |
557 | | static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len); |
558 | | static void _php_mb_free_regex(void *opaque); |
559 | | |
560 | | #ifdef HAVE_MBREGEX |
561 | | /* {{{ _php_mb_compile_regex */ |
562 | | static void *_php_mb_compile_regex(const char *pattern) |
563 | 4.06k | { |
564 | 4.06k | php_mb_regex_t *retval; |
565 | 4.06k | OnigErrorInfo err_info; |
566 | 4.06k | int err_code; |
567 | | |
568 | 4.06k | if ((err_code = onig_new(&retval, |
569 | 4.06k | (const OnigUChar *)pattern, |
570 | 4.06k | (const OnigUChar *)pattern + strlen(pattern), |
571 | 4.06k | ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP, |
572 | 4.06k | ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) { |
573 | 0 | OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; |
574 | 0 | onig_error_code_to_str(err_str, err_code, err_info); |
575 | 0 | php_error_docref(NULL, E_WARNING, "%s: %s", pattern, err_str); |
576 | 0 | retval = NULL; |
577 | 0 | } |
578 | 4.06k | return retval; |
579 | 4.06k | } |
580 | | /* }}} */ |
581 | | |
582 | | /* {{{ _php_mb_match_regex */ |
583 | | static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) |
584 | 0 | { |
585 | 0 | OnigMatchParam *mp = onig_new_match_param(); |
586 | 0 | int err; |
587 | 0 | onig_initialize_match_param(mp); |
588 | 0 | if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_stack_limit))) { |
589 | 0 | onig_set_match_stack_limit_size_of_match_param(mp, (unsigned int)MBSTRG(regex_stack_limit)); |
590 | 0 | } |
591 | 0 | if (!ZEND_LONG_UINT_OVFL(MBSTRG(regex_retry_limit))) { |
592 | 0 | onig_set_retry_limit_in_match_of_match_param(mp, (unsigned int)MBSTRG(regex_retry_limit)); |
593 | 0 | } |
594 | | /* search */ |
595 | 0 | err = onig_search_with_param((php_mb_regex_t *)opaque, (const OnigUChar *)str, |
596 | 0 | (const OnigUChar*)str + str_len, (const OnigUChar *)str, |
597 | 0 | (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE, mp); |
598 | 0 | onig_free_match_param(mp); |
599 | 0 | return err >= 0; |
600 | 0 | } |
601 | | /* }}} */ |
602 | | |
603 | | /* {{{ _php_mb_free_regex */ |
604 | | static void _php_mb_free_regex(void *opaque) |
605 | 0 | { |
606 | 0 | onig_free((php_mb_regex_t *)opaque); |
607 | 0 | } |
608 | | /* }}} */ |
609 | | #else |
610 | | /* {{{ _php_mb_compile_regex */ |
611 | | static void *_php_mb_compile_regex(const char *pattern) |
612 | | { |
613 | | pcre2_code *retval; |
614 | | PCRE2_SIZE err_offset; |
615 | | int errnum; |
616 | | |
617 | | if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, |
618 | | PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) { |
619 | | PCRE2_UCHAR err_str[128]; |
620 | | pcre2_get_error_message(errnum, err_str, sizeof(err_str)); |
621 | | php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str); |
622 | | } |
623 | | return retval; |
624 | | } |
625 | | /* }}} */ |
626 | | |
627 | | /* {{{ _php_mb_match_regex */ |
628 | | static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) |
629 | | { |
630 | | int res; |
631 | | |
632 | | pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque); |
633 | | if (NULL == match_data) { |
634 | | pcre2_code_free(opaque); |
635 | | php_error_docref(NULL, E_WARNING, "Cannot allocate match data"); |
636 | | return FAILURE; |
637 | | } |
638 | | res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0; |
639 | | php_pcre_free_match_data(match_data); |
640 | | |
641 | | return res; |
642 | | } |
643 | | /* }}} */ |
644 | | |
645 | | /* {{{ _php_mb_free_regex */ |
646 | | static void _php_mb_free_regex(void *opaque) |
647 | | { |
648 | | pcre2_code_free(opaque); |
649 | | } |
650 | | /* }}} */ |
651 | | #endif |
652 | | |
653 | | /* {{{ php_mb_nls_get_default_detect_order_list */ |
654 | | static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size) |
655 | 4.06k | { |
656 | 4.06k | size_t i; |
657 | | |
658 | 4.06k | *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut; |
659 | 4.06k | *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]); |
660 | | |
661 | 36.6k | for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) { |
662 | 36.6k | if (php_mb_default_identify_list[i].lang == lang) { |
663 | 4.06k | *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list; |
664 | 4.06k | *plist_size = php_mb_default_identify_list[i].list_size; |
665 | 4.06k | return 1; |
666 | 4.06k | } |
667 | 36.6k | } |
668 | 0 | return 0; |
669 | 4.06k | } |
670 | | /* }}} */ |
671 | | |
672 | | static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote) |
673 | 0 | { |
674 | 0 | char *result = emalloc(len + 2); |
675 | 0 | char *resp = result; |
676 | 0 | size_t i; |
677 | |
|
678 | 0 | for (i = 0; i < len && start[i] != quote; ++i) { |
679 | 0 | if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) { |
680 | 0 | *resp++ = start[++i]; |
681 | 0 | } else { |
682 | 0 | size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding); |
683 | |
|
684 | 0 | while (j-- > 0 && i < len) { |
685 | 0 | *resp++ = start[i++]; |
686 | 0 | } |
687 | 0 | --i; |
688 | 0 | } |
689 | 0 | } |
690 | |
|
691 | 0 | *resp = '\0'; |
692 | 0 | return result; |
693 | 0 | } |
694 | | |
695 | | static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */ |
696 | 0 | { |
697 | 0 | char *pos = *line, quote; |
698 | 0 | char *res; |
699 | |
|
700 | 0 | while (*pos && *pos != stop) { |
701 | 0 | if ((quote = *pos) == '"' || quote == '\'') { |
702 | 0 | ++pos; |
703 | 0 | while (*pos && *pos != quote) { |
704 | 0 | if (*pos == '\\' && pos[1] && pos[1] == quote) { |
705 | 0 | pos += 2; |
706 | 0 | } else { |
707 | 0 | ++pos; |
708 | 0 | } |
709 | 0 | } |
710 | 0 | if (*pos) { |
711 | 0 | ++pos; |
712 | 0 | } |
713 | 0 | } else { |
714 | 0 | pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding); |
715 | |
|
716 | 0 | } |
717 | 0 | } |
718 | 0 | if (*pos == '\0') { |
719 | 0 | res = estrdup(*line); |
720 | 0 | *line += strlen(*line); |
721 | 0 | return res; |
722 | 0 | } |
723 | | |
724 | 0 | res = estrndup(*line, pos - *line); |
725 | |
|
726 | 0 | while (*pos == stop) { |
727 | 0 | pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding); |
728 | 0 | } |
729 | |
|
730 | 0 | *line = pos; |
731 | 0 | return res; |
732 | 0 | } |
733 | | /* }}} */ |
734 | | |
735 | | static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */ |
736 | 0 | { |
737 | 0 | while (*str && isspace(*(unsigned char *)str)) { |
738 | 0 | ++str; |
739 | 0 | } |
740 | |
|
741 | 0 | if (!*str) { |
742 | 0 | return estrdup(""); |
743 | 0 | } |
744 | | |
745 | 0 | if (*str == '"' || *str == '\'') { |
746 | 0 | char quote = *str; |
747 | |
|
748 | 0 | str++; |
749 | 0 | return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote); |
750 | 0 | } else { |
751 | 0 | char *strend = str; |
752 | |
|
753 | 0 | while (*strend && !isspace(*(unsigned char *)strend)) { |
754 | 0 | ++strend; |
755 | 0 | } |
756 | 0 | return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0); |
757 | 0 | } |
758 | 0 | } |
759 | | /* }}} */ |
760 | | |
761 | | static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */ |
762 | 0 | { |
763 | 0 | char *s, *s2; |
764 | 0 | const size_t filename_len = strlen(filename); |
765 | | |
766 | | /* The \ check should technically be needed for win32 systems only where |
767 | | * it is a valid path separator. However, IE in all it's wisdom always sends |
768 | | * the full path of the file on the user's filesystem, which means that unless |
769 | | * the user does basename() they get a bogus file name. Until IE's user base drops |
770 | | * to nill or problem is fixed this code must remain enabled for all systems. */ |
771 | 0 | s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding); |
772 | 0 | s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding); |
773 | |
|
774 | 0 | if (s && s2) { |
775 | 0 | if (s > s2) { |
776 | 0 | return ++s; |
777 | 0 | } else { |
778 | 0 | return ++s2; |
779 | 0 | } |
780 | 0 | } else if (s) { |
781 | 0 | return ++s; |
782 | 0 | } else if (s2) { |
783 | 0 | return ++s2; |
784 | 0 | } else { |
785 | 0 | return filename; |
786 | 0 | } |
787 | 0 | } |
788 | | /* }}} */ |
789 | | |
790 | | /* {{{ php.ini directive handler */ |
791 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */ |
792 | | static PHP_INI_MH(OnUpdate_mbstring_language) |
793 | 4.06k | { |
794 | 4.06k | enum mbfl_no_language no_language; |
795 | | |
796 | 4.06k | no_language = mbfl_name2no_language(ZSTR_VAL(new_value)); |
797 | 4.06k | if (no_language == mbfl_no_language_invalid) { |
798 | 0 | MBSTRG(language) = mbfl_no_language_neutral; |
799 | 0 | return FAILURE; |
800 | 0 | } |
801 | 4.06k | MBSTRG(language) = no_language; |
802 | 4.06k | php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size)); |
803 | 4.06k | return SUCCESS; |
804 | 4.06k | } |
805 | | /* }}} */ |
806 | | |
807 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */ |
808 | | static PHP_INI_MH(OnUpdate_mbstring_detect_order) |
809 | 4.06k | { |
810 | 4.06k | const mbfl_encoding **list; |
811 | 4.06k | size_t size; |
812 | | |
813 | 4.06k | if (!new_value) { |
814 | 4.06k | if (MBSTRG(detect_order_list)) { |
815 | 0 | pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1); |
816 | 0 | } |
817 | 4.06k | MBSTRG(detect_order_list) = NULL; |
818 | 4.06k | MBSTRG(detect_order_list_size) = 0; |
819 | 4.06k | return SUCCESS; |
820 | 4.06k | } |
821 | | |
822 | 0 | if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) { |
823 | 0 | return FAILURE; |
824 | 0 | } |
825 | | |
826 | 0 | if (MBSTRG(detect_order_list)) { |
827 | 0 | pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1); |
828 | 0 | } |
829 | 0 | MBSTRG(detect_order_list) = list; |
830 | 0 | MBSTRG(detect_order_list_size) = size; |
831 | 0 | return SUCCESS; |
832 | 0 | } |
833 | | /* }}} */ |
834 | | |
835 | 8.13k | static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) { |
836 | 8.13k | const mbfl_encoding **list; |
837 | 8.13k | size_t size; |
838 | 8.13k | if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) { |
839 | 0 | return FAILURE; |
840 | 0 | } |
841 | 8.13k | if (MBSTRG(http_input_list)) { |
842 | 4.06k | pefree(ZEND_VOIDP(MBSTRG(http_input_list)), 1); |
843 | 4.06k | } |
844 | 8.13k | MBSTRG(http_input_list) = list; |
845 | 8.13k | MBSTRG(http_input_list_size) = size; |
846 | 8.13k | return SUCCESS; |
847 | 8.13k | } |
848 | | |
849 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ |
850 | | static PHP_INI_MH(OnUpdate_mbstring_http_input) |
851 | 4.06k | { |
852 | 4.06k | if (new_value) { |
853 | 0 | php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated"); |
854 | 0 | } |
855 | | |
856 | 4.06k | if (!new_value || !ZSTR_VAL(new_value)) { |
857 | 4.06k | const char *encoding = php_get_input_encoding(); |
858 | 4.06k | MBSTRG(http_input_set) = 0; |
859 | 4.06k | _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding)); |
860 | 4.06k | return SUCCESS; |
861 | 4.06k | } |
862 | | |
863 | 0 | MBSTRG(http_input_set) = 1; |
864 | 0 | return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value)); |
865 | 0 | } |
866 | | /* }}} */ |
867 | | |
868 | 8.13k | static int _php_mb_ini_mbstring_http_output_set(const char *new_value) { |
869 | 8.13k | const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value); |
870 | 8.13k | if (!encoding) { |
871 | 0 | return FAILURE; |
872 | 0 | } |
873 | | |
874 | 8.13k | MBSTRG(http_output_encoding) = encoding; |
875 | 8.13k | MBSTRG(current_http_output_encoding) = encoding; |
876 | 8.13k | return SUCCESS; |
877 | 8.13k | } |
878 | | |
879 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ |
880 | | static PHP_INI_MH(OnUpdate_mbstring_http_output) |
881 | 4.06k | { |
882 | 4.06k | if (new_value) { |
883 | 0 | php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated"); |
884 | 0 | } |
885 | | |
886 | 4.06k | if (new_value == NULL || ZSTR_LEN(new_value) == 0) { |
887 | 4.06k | MBSTRG(http_output_set) = 0; |
888 | 4.06k | _php_mb_ini_mbstring_http_output_set(php_get_output_encoding()); |
889 | 4.06k | return SUCCESS; |
890 | 4.06k | } |
891 | | |
892 | 0 | MBSTRG(http_output_set) = 1; |
893 | 0 | return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value)); |
894 | 0 | } |
895 | | /* }}} */ |
896 | | |
897 | | /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ |
898 | | static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length) |
899 | 8.13k | { |
900 | 8.13k | const mbfl_encoding *encoding; |
901 | | |
902 | 8.13k | if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) { |
903 | | /* falls back to UTF-8 if an unknown encoding name is given */ |
904 | 0 | if (new_value) { |
905 | 0 | php_error_docref("ref.mbstring", E_WARNING, |
906 | 0 | "Unknown encoding \"%s\" in ini setting", new_value); |
907 | 0 | } |
908 | 0 | encoding = mbfl_no2encoding(mbfl_no_encoding_utf8); |
909 | 0 | } |
910 | 8.13k | MBSTRG(internal_encoding) = encoding; |
911 | 8.13k | MBSTRG(current_internal_encoding) = encoding; |
912 | 8.13k | #ifdef HAVE_MBREGEX |
913 | 8.13k | { |
914 | 8.13k | const char *enc_name = new_value; |
915 | 8.13k | if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) { |
916 | | /* falls back to UTF-8 if an unknown encoding name is given */ |
917 | 0 | enc_name = "UTF-8"; |
918 | 0 | php_mb_regex_set_default_mbctype(enc_name); |
919 | 0 | } |
920 | 8.13k | php_mb_regex_set_mbctype(new_value); |
921 | 8.13k | } |
922 | 8.13k | #endif |
923 | 8.13k | return SUCCESS; |
924 | 8.13k | } |
925 | | /* }}} */ |
926 | | |
927 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */ |
928 | | static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) |
929 | 4.06k | { |
930 | 4.06k | if (new_value) { |
931 | 0 | php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated"); |
932 | 0 | } |
933 | | |
934 | 4.06k | if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) { |
935 | 0 | return FAILURE; |
936 | 0 | } |
937 | | |
938 | 4.06k | if (new_value && ZSTR_LEN(new_value)) { |
939 | 0 | MBSTRG(internal_encoding_set) = 1; |
940 | 0 | return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value)); |
941 | 4.06k | } else { |
942 | 4.06k | const char *encoding = php_get_internal_encoding(); |
943 | 4.06k | MBSTRG(internal_encoding_set) = 0; |
944 | 4.06k | return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding)); |
945 | 4.06k | } |
946 | 4.06k | } |
947 | | /* }}} */ |
948 | | |
949 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */ |
950 | | static PHP_INI_MH(OnUpdate_mbstring_substitute_character) |
951 | 4.06k | { |
952 | 4.06k | int c; |
953 | 4.06k | char *endptr = NULL; |
954 | | |
955 | 4.06k | if (new_value != NULL) { |
956 | 0 | if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) { |
957 | 0 | MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; |
958 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; |
959 | 0 | } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) { |
960 | 0 | MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; |
961 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; |
962 | 0 | } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) { |
963 | 0 | MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; |
964 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; |
965 | 0 | } else { |
966 | 0 | MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; |
967 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; |
968 | 0 | if (ZSTR_LEN(new_value) > 0) { |
969 | 0 | c = strtol(ZSTR_VAL(new_value), &endptr, 0); |
970 | 0 | if (*endptr == '\0') { |
971 | 0 | MBSTRG(filter_illegal_substchar) = c; |
972 | 0 | MBSTRG(current_filter_illegal_substchar) = c; |
973 | 0 | } |
974 | 0 | } |
975 | 0 | } |
976 | 4.06k | } else { |
977 | 4.06k | MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; |
978 | 4.06k | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; |
979 | 4.06k | MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */ |
980 | 4.06k | MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */ |
981 | 4.06k | } |
982 | | |
983 | 4.06k | return SUCCESS; |
984 | 4.06k | } |
985 | | /* }}} */ |
986 | | |
987 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */ |
988 | | static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) |
989 | 4.06k | { |
990 | 4.06k | if (new_value == NULL) { |
991 | 0 | return FAILURE; |
992 | 0 | } |
993 | | |
994 | 4.06k | OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage); |
995 | | |
996 | 4.06k | if (MBSTRG(encoding_translation)) { |
997 | 0 | sapi_unregister_post_entry(php_post_entries); |
998 | 0 | sapi_register_post_entries(mbstr_post_entries); |
999 | 4.06k | } else { |
1000 | 4.06k | sapi_unregister_post_entry(mbstr_post_entries); |
1001 | 4.06k | sapi_register_post_entries(php_post_entries); |
1002 | 4.06k | } |
1003 | | |
1004 | 4.06k | return SUCCESS; |
1005 | 4.06k | } |
1006 | | /* }}} */ |
1007 | | |
1008 | | /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */ |
1009 | | static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes) |
1010 | 4.06k | { |
1011 | 4.06k | zend_string *tmp; |
1012 | 4.06k | void *re = NULL; |
1013 | | |
1014 | 4.06k | if (!new_value) { |
1015 | 0 | new_value = entry->orig_value; |
1016 | 0 | } |
1017 | 4.06k | tmp = php_trim(new_value, NULL, 0, 3); |
1018 | | |
1019 | 4.06k | if (ZSTR_LEN(tmp) > 0) { |
1020 | 4.06k | if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) { |
1021 | 0 | zend_string_release_ex(tmp, 0); |
1022 | 0 | return FAILURE; |
1023 | 0 | } |
1024 | 4.06k | } |
1025 | | |
1026 | 4.06k | if (MBSTRG(http_output_conv_mimetypes)) { |
1027 | 0 | _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes)); |
1028 | 0 | } |
1029 | | |
1030 | 4.06k | MBSTRG(http_output_conv_mimetypes) = re; |
1031 | | |
1032 | 4.06k | zend_string_release_ex(tmp, 0); |
1033 | 4.06k | return SUCCESS; |
1034 | 4.06k | } |
1035 | | /* }}} */ |
1036 | | /* }}} */ |
1037 | | |
1038 | | /* {{{ php.ini directive registration */ |
1039 | | PHP_INI_BEGIN() |
1040 | | PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language) |
1041 | | PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order) |
1042 | | PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input) |
1043 | | PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output) |
1044 | | STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals) |
1045 | | PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) |
1046 | | |
1047 | | STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0", |
1048 | | PHP_INI_SYSTEM | PHP_INI_PERDIR, |
1049 | | OnUpdate_mbstring_encoding_translation, |
1050 | | encoding_translation, zend_mbstring_globals, mbstring_globals) |
1051 | | PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes", |
1052 | | "^(text/|application/xhtml\\+xml)", |
1053 | | PHP_INI_ALL, |
1054 | | OnUpdate_mbstring_http_output_conv_mimetypes) |
1055 | | |
1056 | | STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0", |
1057 | | PHP_INI_ALL, |
1058 | | OnUpdateBool, |
1059 | | strict_detection, zend_mbstring_globals, mbstring_globals) |
1060 | | #ifdef HAVE_MBREGEX |
1061 | | STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals) |
1062 | | STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals) |
1063 | | #endif |
1064 | | PHP_INI_END() |
1065 | | /* }}} */ |
1066 | | |
1067 | 4.06k | static void mbstring_internal_encoding_changed_hook(void) { |
1068 | | /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */ |
1069 | 4.06k | if (!MBSTRG(internal_encoding_set)) { |
1070 | 4.06k | const char *encoding = php_get_internal_encoding(); |
1071 | 4.06k | _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding)); |
1072 | 4.06k | } |
1073 | | |
1074 | 4.06k | if (!MBSTRG(http_output_set)) { |
1075 | 4.06k | const char *encoding = php_get_output_encoding(); |
1076 | 4.06k | _php_mb_ini_mbstring_http_output_set(encoding); |
1077 | 4.06k | } |
1078 | | |
1079 | 4.06k | if (!MBSTRG(http_input_set)) { |
1080 | 4.06k | const char *encoding = php_get_input_encoding(); |
1081 | 4.06k | _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding)); |
1082 | 4.06k | } |
1083 | 4.06k | } |
1084 | | |
1085 | | /* {{{ module global initialize handler */ |
1086 | | static PHP_GINIT_FUNCTION(mbstring) |
1087 | 4.06k | { |
1088 | | #if defined(COMPILE_DL_MBSTRING) && defined(ZTS) |
1089 | | ZEND_TSRMLS_CACHE_UPDATE(); |
1090 | | #endif |
1091 | | |
1092 | 4.06k | mbstring_globals->language = mbfl_no_language_uni; |
1093 | 4.06k | mbstring_globals->internal_encoding = NULL; |
1094 | 4.06k | mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding; |
1095 | 4.06k | mbstring_globals->http_output_encoding = &mbfl_encoding_pass; |
1096 | 4.06k | mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass; |
1097 | 4.06k | mbstring_globals->http_input_identify = NULL; |
1098 | 4.06k | mbstring_globals->http_input_identify_get = NULL; |
1099 | 4.06k | mbstring_globals->http_input_identify_post = NULL; |
1100 | 4.06k | mbstring_globals->http_input_identify_cookie = NULL; |
1101 | 4.06k | mbstring_globals->http_input_identify_string = NULL; |
1102 | 4.06k | mbstring_globals->http_input_list = NULL; |
1103 | 4.06k | mbstring_globals->http_input_list_size = 0; |
1104 | 4.06k | mbstring_globals->detect_order_list = NULL; |
1105 | 4.06k | mbstring_globals->detect_order_list_size = 0; |
1106 | 4.06k | mbstring_globals->current_detect_order_list = NULL; |
1107 | 4.06k | mbstring_globals->current_detect_order_list_size = 0; |
1108 | 4.06k | mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut; |
1109 | 4.06k | mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]); |
1110 | 4.06k | mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; |
1111 | 4.06k | mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */ |
1112 | 4.06k | mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; |
1113 | 4.06k | mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */ |
1114 | 4.06k | mbstring_globals->illegalchars = 0; |
1115 | 4.06k | mbstring_globals->encoding_translation = 0; |
1116 | 4.06k | mbstring_globals->strict_detection = 0; |
1117 | 4.06k | mbstring_globals->outconv = NULL; |
1118 | 4.06k | mbstring_globals->http_output_conv_mimetypes = NULL; |
1119 | 4.06k | #ifdef HAVE_MBREGEX |
1120 | 4.06k | mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(); |
1121 | 4.06k | #endif |
1122 | 4.06k | mbstring_globals->last_used_encoding_name = NULL; |
1123 | 4.06k | mbstring_globals->last_used_encoding = NULL; |
1124 | 4.06k | mbstring_globals->internal_encoding_set = 0; |
1125 | 4.06k | mbstring_globals->http_output_set = 0; |
1126 | 4.06k | mbstring_globals->http_input_set = 0; |
1127 | 4.06k | } |
1128 | | /* }}} */ |
1129 | | |
1130 | | /* {{{ PHP_GSHUTDOWN_FUNCTION */ |
1131 | | static PHP_GSHUTDOWN_FUNCTION(mbstring) |
1132 | 0 | { |
1133 | 0 | if (mbstring_globals->http_input_list) { |
1134 | 0 | free(ZEND_VOIDP(mbstring_globals->http_input_list)); |
1135 | 0 | } |
1136 | 0 | if (mbstring_globals->detect_order_list) { |
1137 | 0 | free(ZEND_VOIDP(mbstring_globals->detect_order_list)); |
1138 | 0 | } |
1139 | 0 | if (mbstring_globals->http_output_conv_mimetypes) { |
1140 | 0 | _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes); |
1141 | 0 | } |
1142 | 0 | #ifdef HAVE_MBREGEX |
1143 | 0 | php_mb_regex_globals_free(mbstring_globals->mb_regex_globals); |
1144 | 0 | #endif |
1145 | 0 | } |
1146 | | /* }}} */ |
1147 | | |
1148 | | /* {{{ PHP_MINIT_FUNCTION(mbstring) */ |
1149 | | PHP_MINIT_FUNCTION(mbstring) |
1150 | 4.06k | { |
1151 | | #if defined(COMPILE_DL_MBSTRING) && defined(ZTS) |
1152 | | ZEND_TSRMLS_CACHE_UPDATE(); |
1153 | | #endif |
1154 | 4.06k | __mbfl_allocators = (mbfl_allocators*)&_php_mb_allocators; |
1155 | | |
1156 | 4.06k | REGISTER_INI_ENTRIES(); |
1157 | | |
1158 | | /* We assume that we're the only user of the hook. */ |
1159 | 4.06k | ZEND_ASSERT(php_internal_encoding_changed == NULL); |
1160 | 4.06k | php_internal_encoding_changed = mbstring_internal_encoding_changed_hook; |
1161 | 4.06k | mbstring_internal_encoding_changed_hook(); |
1162 | | |
1163 | | /* This is a global handler. Should not be set in a per-request handler. */ |
1164 | 4.06k | sapi_register_treat_data(mbstr_treat_data); |
1165 | | |
1166 | | /* Post handlers are stored in the thread-local context. */ |
1167 | 4.06k | if (MBSTRG(encoding_translation)) { |
1168 | 0 | sapi_register_post_entries(mbstr_post_entries); |
1169 | 0 | } |
1170 | | |
1171 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT); |
1172 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT); |
1173 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT); |
1174 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT); |
1175 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT); |
1176 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT); |
1177 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT); |
1178 | 4.06k | REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT); |
1179 | | |
1180 | 4.06k | #ifdef HAVE_MBREGEX |
1181 | 4.06k | PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); |
1182 | 4.06k | #endif |
1183 | | |
1184 | 4.06k | if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) { |
1185 | 0 | return FAILURE; |
1186 | 0 | } |
1187 | | |
1188 | 4.06k | php_rfc1867_set_multibyte_callbacks( |
1189 | 4.06k | php_mb_encoding_translation, |
1190 | 4.06k | php_mb_gpc_get_detect_order, |
1191 | 4.06k | php_mb_gpc_set_input_encoding, |
1192 | 4.06k | php_mb_rfc1867_getword, |
1193 | 4.06k | php_mb_rfc1867_getword_conf, |
1194 | 4.06k | php_mb_rfc1867_basename); |
1195 | | |
1196 | 4.06k | return SUCCESS; |
1197 | 4.06k | } |
1198 | | /* }}} */ |
1199 | | |
1200 | | /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */ |
1201 | | PHP_MSHUTDOWN_FUNCTION(mbstring) |
1202 | 0 | { |
1203 | 0 | UNREGISTER_INI_ENTRIES(); |
1204 | |
|
1205 | 0 | zend_multibyte_restore_functions(); |
1206 | |
|
1207 | 0 | #ifdef HAVE_MBREGEX |
1208 | 0 | PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); |
1209 | 0 | #endif |
1210 | |
|
1211 | 0 | php_internal_encoding_changed = NULL; |
1212 | |
|
1213 | 0 | return SUCCESS; |
1214 | 0 | } |
1215 | | /* }}} */ |
1216 | | |
1217 | | /* {{{ PHP_RINIT_FUNCTION(mbstring) */ |
1218 | | PHP_RINIT_FUNCTION(mbstring) |
1219 | 939k | { |
1220 | 939k | MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding); |
1221 | 939k | MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding); |
1222 | 939k | MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode); |
1223 | 939k | MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar); |
1224 | | |
1225 | 939k | MBSTRG(illegalchars) = 0; |
1226 | | |
1227 | 939k | php_mb_populate_current_detect_order_list(); |
1228 | | |
1229 | 939k | #ifdef HAVE_MBREGEX |
1230 | 939k | PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); |
1231 | 939k | #endif |
1232 | 939k | zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding)); |
1233 | | |
1234 | 939k | return SUCCESS; |
1235 | 939k | } |
1236 | | /* }}} */ |
1237 | | |
1238 | | /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */ |
1239 | | PHP_RSHUTDOWN_FUNCTION(mbstring) |
1240 | 938k | { |
1241 | 938k | if (MBSTRG(current_detect_order_list) != NULL) { |
1242 | 938k | efree(ZEND_VOIDP(MBSTRG(current_detect_order_list))); |
1243 | 938k | MBSTRG(current_detect_order_list) = NULL; |
1244 | 938k | MBSTRG(current_detect_order_list_size) = 0; |
1245 | 938k | } |
1246 | 938k | if (MBSTRG(outconv) != NULL) { |
1247 | 0 | MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); |
1248 | 0 | mbfl_buffer_converter_delete(MBSTRG(outconv)); |
1249 | 0 | MBSTRG(outconv) = NULL; |
1250 | 0 | } |
1251 | | |
1252 | | /* clear http input identification. */ |
1253 | 938k | MBSTRG(http_input_identify) = NULL; |
1254 | 938k | MBSTRG(http_input_identify_post) = NULL; |
1255 | 938k | MBSTRG(http_input_identify_get) = NULL; |
1256 | 938k | MBSTRG(http_input_identify_cookie) = NULL; |
1257 | 938k | MBSTRG(http_input_identify_string) = NULL; |
1258 | | |
1259 | 938k | if (MBSTRG(last_used_encoding_name)) { |
1260 | 0 | zend_string_release(MBSTRG(last_used_encoding_name)); |
1261 | 0 | MBSTRG(last_used_encoding_name) = NULL; |
1262 | 0 | } |
1263 | | |
1264 | 938k | MBSTRG(internal_encoding_set) = 0; |
1265 | 938k | MBSTRG(http_output_set) = 0; |
1266 | 938k | MBSTRG(http_input_set) = 0; |
1267 | | |
1268 | 938k | #ifdef HAVE_MBREGEX |
1269 | 938k | PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); |
1270 | 938k | #endif |
1271 | | |
1272 | 938k | return SUCCESS; |
1273 | 938k | } |
1274 | | /* }}} */ |
1275 | | |
1276 | | /* {{{ PHP_MINFO_FUNCTION(mbstring) */ |
1277 | | PHP_MINFO_FUNCTION(mbstring) |
1278 | 25 | { |
1279 | 25 | php_info_print_table_start(); |
1280 | 25 | php_info_print_table_row(2, "Multibyte Support", "enabled"); |
1281 | 25 | php_info_print_table_row(2, "Multibyte string engine", "libmbfl"); |
1282 | 25 | php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled"); |
1283 | 25 | { |
1284 | 25 | char tmp[256]; |
1285 | 25 | snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY); |
1286 | 25 | php_info_print_table_row(2, "libmbfl version", tmp); |
1287 | 25 | } |
1288 | 25 | php_info_print_table_end(); |
1289 | | |
1290 | 25 | php_info_print_table_start(); |
1291 | 25 | php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1."); |
1292 | 25 | php_info_print_table_end(); |
1293 | | |
1294 | 25 | #ifdef HAVE_MBREGEX |
1295 | 25 | PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU); |
1296 | 25 | #endif |
1297 | | |
1298 | 25 | DISPLAY_INI_ENTRIES(); |
1299 | 25 | } |
1300 | | /* }}} */ |
1301 | | |
1302 | | /* {{{ Sets the current language or Returns the current language as a string */ |
1303 | | PHP_FUNCTION(mb_language) |
1304 | 0 | { |
1305 | 0 | zend_string *name = NULL; |
1306 | |
|
1307 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "|S!", &name) == FAILURE) { |
1308 | 0 | RETURN_THROWS(); |
1309 | 0 | } |
1310 | 0 | if (name == NULL) { |
1311 | 0 | RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language))); |
1312 | 0 | } else { |
1313 | 0 | zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0); |
1314 | 0 | if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) { |
1315 | 0 | zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name)); |
1316 | 0 | zend_string_release_ex(ini_name, 0); |
1317 | 0 | RETURN_THROWS(); |
1318 | 0 | } |
1319 | | // TODO Make return void |
1320 | 0 | RETVAL_TRUE; |
1321 | 0 | zend_string_release_ex(ini_name, 0); |
1322 | 0 | } |
1323 | 0 | } |
1324 | | /* }}} */ |
1325 | | |
1326 | | /* {{{ Sets the current internal encoding or Returns the current internal encoding as a string */ |
1327 | | PHP_FUNCTION(mb_internal_encoding) |
1328 | 0 | { |
1329 | 0 | const char *name = NULL; |
1330 | 0 | size_t name_len; |
1331 | 0 | const mbfl_encoding *encoding; |
1332 | |
|
1333 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &name, &name_len) == FAILURE) { |
1334 | 0 | RETURN_THROWS(); |
1335 | 0 | } |
1336 | 0 | if (name == NULL) { |
1337 | 0 | ZEND_ASSERT(MBSTRG(current_internal_encoding)); |
1338 | 0 | RETURN_STRING(MBSTRG(current_internal_encoding)->name); |
1339 | 0 | } else { |
1340 | 0 | encoding = mbfl_name2encoding(name); |
1341 | 0 | if (!encoding) { |
1342 | 0 | zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name); |
1343 | 0 | RETURN_THROWS(); |
1344 | 0 | } else { |
1345 | 0 | MBSTRG(current_internal_encoding) = encoding; |
1346 | 0 | MBSTRG(internal_encoding_set) = 1; |
1347 | | /* TODO Return old encoding */ |
1348 | 0 | RETURN_TRUE; |
1349 | 0 | } |
1350 | 0 | } |
1351 | 0 | } |
1352 | | /* }}} */ |
1353 | | |
1354 | | /* {{{ Returns the input encoding */ |
1355 | | PHP_FUNCTION(mb_http_input) |
1356 | 0 | { |
1357 | 0 | char *typ = NULL; |
1358 | 0 | size_t typ_len = 0; |
1359 | 0 | int retname; |
1360 | 0 | char *list, *temp; |
1361 | 0 | const mbfl_encoding *result = NULL; |
1362 | |
|
1363 | 0 | retname = 1; |
1364 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &typ, &typ_len) == FAILURE) { |
1365 | 0 | RETURN_THROWS(); |
1366 | 0 | } |
1367 | 0 | if (typ == NULL) { |
1368 | 0 | result = MBSTRG(http_input_identify); |
1369 | 0 | } else { |
1370 | 0 | switch (*typ) { |
1371 | 0 | case 'G': |
1372 | 0 | case 'g': |
1373 | 0 | result = MBSTRG(http_input_identify_get); |
1374 | 0 | break; |
1375 | 0 | case 'P': |
1376 | 0 | case 'p': |
1377 | 0 | result = MBSTRG(http_input_identify_post); |
1378 | 0 | break; |
1379 | 0 | case 'C': |
1380 | 0 | case 'c': |
1381 | 0 | result = MBSTRG(http_input_identify_cookie); |
1382 | 0 | break; |
1383 | 0 | case 'S': |
1384 | 0 | case 's': |
1385 | 0 | result = MBSTRG(http_input_identify_string); |
1386 | 0 | break; |
1387 | 0 | case 'I': |
1388 | 0 | case 'i': |
1389 | 0 | { |
1390 | 0 | const mbfl_encoding **entry = MBSTRG(http_input_list); |
1391 | 0 | const size_t n = MBSTRG(http_input_list_size); |
1392 | 0 | size_t i; |
1393 | 0 | array_init(return_value); |
1394 | 0 | for (i = 0; i < n; i++) { |
1395 | 0 | add_next_index_string(return_value, (*entry)->name); |
1396 | 0 | entry++; |
1397 | 0 | } |
1398 | 0 | retname = 0; |
1399 | 0 | } |
1400 | 0 | break; |
1401 | 0 | case 'L': |
1402 | 0 | case 'l': |
1403 | 0 | { |
1404 | 0 | const mbfl_encoding **entry = MBSTRG(http_input_list); |
1405 | 0 | const size_t n = MBSTRG(http_input_list_size); |
1406 | 0 | size_t i; |
1407 | 0 | list = NULL; |
1408 | 0 | for (i = 0; i < n; i++) { |
1409 | 0 | if (list) { |
1410 | 0 | temp = list; |
1411 | 0 | spprintf(&list, 0, "%s,%s", temp, (*entry)->name); |
1412 | 0 | efree(temp); |
1413 | 0 | if (!list) { |
1414 | 0 | break; |
1415 | 0 | } |
1416 | 0 | } else { |
1417 | 0 | list = estrdup((*entry)->name); |
1418 | 0 | } |
1419 | 0 | entry++; |
1420 | 0 | } |
1421 | 0 | } |
1422 | 0 | if (!list) { |
1423 | | // TODO should return empty string? |
1424 | 0 | RETURN_FALSE; |
1425 | 0 | } |
1426 | 0 | RETVAL_STRING(list); |
1427 | 0 | efree(list); |
1428 | 0 | retname = 0; |
1429 | 0 | break; |
1430 | 0 | default: |
1431 | | // TODO ValueError |
1432 | 0 | result = MBSTRG(http_input_identify); |
1433 | 0 | break; |
1434 | 0 | } |
1435 | 0 | } |
1436 | | |
1437 | | // FIXME this bloc seems useless except for default switch case |
1438 | 0 | if (retname) { |
1439 | 0 | if (result) { |
1440 | 0 | RETVAL_STRING(result->name); |
1441 | 0 | } else { |
1442 | 0 | RETVAL_FALSE; |
1443 | 0 | } |
1444 | 0 | } |
1445 | 0 | } |
1446 | | /* }}} */ |
1447 | | |
1448 | | /* {{{ Sets the current output_encoding or returns the current output_encoding as a string */ |
1449 | | PHP_FUNCTION(mb_http_output) |
1450 | 0 | { |
1451 | 0 | const char *name = NULL; |
1452 | 0 | size_t name_len; |
1453 | 0 | const mbfl_encoding *encoding; |
1454 | |
|
1455 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s!", &name, &name_len) == FAILURE) { |
1456 | 0 | RETURN_THROWS(); |
1457 | 0 | } |
1458 | |
|
1459 | 0 | if (name == NULL) { |
1460 | 0 | ZEND_ASSERT(MBSTRG(current_http_output_encoding)); |
1461 | 0 | RETURN_STRING(MBSTRG(current_http_output_encoding)->name); |
1462 | 0 | } else { |
1463 | 0 | encoding = php_mb_get_encoding_or_pass(name); |
1464 | 0 | if (!encoding) { |
1465 | 0 | zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name); |
1466 | 0 | RETURN_THROWS(); |
1467 | 0 | } else { |
1468 | 0 | MBSTRG(http_output_set) = 1; |
1469 | 0 | MBSTRG(current_http_output_encoding) = encoding; |
1470 | | /* TODO Return previous encoding? */ |
1471 | 0 | RETURN_TRUE; |
1472 | 0 | } |
1473 | 0 | } |
1474 | 0 | } |
1475 | | /* }}} */ |
1476 | | |
1477 | | /* {{{ Sets the current detect_order or Return the current detect_order as a array */ |
1478 | | PHP_FUNCTION(mb_detect_order) |
1479 | 0 | { |
1480 | 0 | zend_string *order_str = NULL; |
1481 | 0 | HashTable *order_ht = NULL; |
1482 | |
|
1483 | 0 | ZEND_PARSE_PARAMETERS_START(0, 1) |
1484 | 0 | Z_PARAM_OPTIONAL |
1485 | 0 | Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(order_str, order_ht) |
1486 | 0 | ZEND_PARSE_PARAMETERS_END(); |
1487 | |
|
1488 | 0 | if (!order_str && !order_ht) { |
1489 | 0 | size_t i; |
1490 | 0 | size_t n = MBSTRG(current_detect_order_list_size); |
1491 | 0 | const mbfl_encoding **entry = MBSTRG(current_detect_order_list); |
1492 | 0 | array_init(return_value); |
1493 | 0 | for (i = 0; i < n; i++) { |
1494 | 0 | add_next_index_string(return_value, (*entry)->name); |
1495 | 0 | entry++; |
1496 | 0 | } |
1497 | 0 | } else { |
1498 | 0 | const mbfl_encoding **list; |
1499 | 0 | size_t size; |
1500 | 0 | if (order_ht) { |
1501 | 0 | if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) { |
1502 | 0 | RETURN_THROWS(); |
1503 | 0 | } |
1504 | 0 | } else { |
1505 | 0 | if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) { |
1506 | 0 | RETURN_THROWS(); |
1507 | 0 | } |
1508 | 0 | } |
1509 | |
|
1510 | 0 | if (size == 0) { |
1511 | 0 | efree(ZEND_VOIDP(list)); |
1512 | 0 | zend_argument_value_error(1, "must specify at least one encoding"); |
1513 | 0 | RETURN_THROWS(); |
1514 | 0 | } |
1515 | |
|
1516 | 0 | if (MBSTRG(current_detect_order_list)) { |
1517 | 0 | efree(ZEND_VOIDP(MBSTRG(current_detect_order_list))); |
1518 | 0 | } |
1519 | 0 | MBSTRG(current_detect_order_list) = list; |
1520 | 0 | MBSTRG(current_detect_order_list_size) = size; |
1521 | 0 | RETURN_TRUE; |
1522 | 0 | } |
1523 | 0 | } |
1524 | | /* }}} */ |
1525 | | |
1526 | | static inline int php_mb_check_code_point(zend_long cp) |
1527 | 0 | { |
1528 | 0 | if (cp < 0 || cp >= 0x110000) { |
1529 | | /* Out of Unicode range */ |
1530 | 0 | return 0; |
1531 | 0 | } |
1532 | | |
1533 | 0 | if (cp >= 0xd800 && cp <= 0xdfff) { |
1534 | | /* Surrogate code-point. These are never valid on their own and we only allow a single |
1535 | | * substitute character. */ |
1536 | 0 | return 0; |
1537 | 0 | } |
1538 | | |
1539 | | /* As the we do not know the target encoding of the conversion operation that is going to |
1540 | | * use the substitution character, we cannot check whether the codepoint is actually mapped |
1541 | | * in the given encoding at this point. Thus we have to accept everything. */ |
1542 | 0 | return 1; |
1543 | 0 | } |
1544 | | |
1545 | | /* {{{ Sets the current substitute_character or returns the current substitute_character */ |
1546 | | PHP_FUNCTION(mb_substitute_character) |
1547 | 0 | { |
1548 | 0 | zend_string *substitute_character = NULL; |
1549 | 0 | zend_long substitute_codepoint; |
1550 | 0 | zend_bool substitute_is_null = 1; |
1551 | |
|
1552 | 0 | ZEND_PARSE_PARAMETERS_START(0, 1) |
1553 | 0 | Z_PARAM_OPTIONAL |
1554 | 0 | Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null) |
1555 | 0 | ZEND_PARSE_PARAMETERS_END(); |
1556 | |
|
1557 | 0 | if (substitute_is_null) { |
1558 | 0 | if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { |
1559 | 0 | RETURN_STRING("none"); |
1560 | 0 | } |
1561 | 0 | if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { |
1562 | 0 | RETURN_STRING("long"); |
1563 | 0 | } |
1564 | 0 | if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { |
1565 | 0 | RETURN_STRING("entity"); |
1566 | 0 | } |
1567 | 0 | RETURN_LONG(MBSTRG(current_filter_illegal_substchar)); |
1568 | 0 | } |
1569 | |
|
1570 | 0 | if (substitute_character != NULL) { |
1571 | 0 | if (zend_string_equals_literal_ci(substitute_character, "none")) { |
1572 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; |
1573 | 0 | RETURN_TRUE; |
1574 | 0 | } |
1575 | 0 | if (zend_string_equals_literal_ci(substitute_character, "long")) { |
1576 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG; |
1577 | 0 | RETURN_TRUE; |
1578 | 0 | } |
1579 | 0 | if (zend_string_equals_literal_ci(substitute_character, "entity")) { |
1580 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY; |
1581 | 0 | RETURN_TRUE; |
1582 | 0 | } |
1583 | | /* Invalid string value */ |
1584 | 0 | zend_argument_value_error(1, "must be \"none\", \"long\", \"entity\" or a valid codepoint"); |
1585 | 0 | RETURN_THROWS(); |
1586 | 0 | } |
1587 | | /* Integer codepoint passed */ |
1588 | 0 | if (!php_mb_check_code_point(substitute_codepoint)) { |
1589 | 0 | zend_argument_value_error(1, "is not a valid codepoint"); |
1590 | 0 | RETURN_THROWS(); |
1591 | 0 | } |
1592 | |
|
1593 | 0 | MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR; |
1594 | 0 | MBSTRG(current_filter_illegal_substchar) = substitute_codepoint; |
1595 | 0 | RETURN_TRUE; |
1596 | 0 | } |
1597 | | /* }}} */ |
1598 | | |
1599 | | /* {{{ Return the preferred MIME name (charset) as a string */ |
1600 | | PHP_FUNCTION(mb_preferred_mime_name) |
1601 | 0 | { |
1602 | 0 | enum mbfl_no_encoding no_encoding; |
1603 | 0 | char *name = NULL; |
1604 | 0 | size_t name_len; |
1605 | |
|
1606 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) { |
1607 | 0 | RETURN_THROWS(); |
1608 | 0 | } |
1609 | |
|
1610 | 0 | no_encoding = mbfl_name2no_encoding(name); |
1611 | 0 | if (no_encoding == mbfl_no_encoding_invalid) { |
1612 | 0 | zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name); |
1613 | 0 | RETURN_THROWS(); |
1614 | 0 | } |
1615 | |
|
1616 | 0 | const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding); |
1617 | 0 | if (preferred_name == NULL || *preferred_name == '\0') { |
1618 | 0 | php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name); |
1619 | 0 | RETVAL_FALSE; |
1620 | 0 | } else { |
1621 | 0 | RETVAL_STRING((char *)preferred_name); |
1622 | 0 | } |
1623 | 0 | } |
1624 | | /* }}} */ |
1625 | | |
1626 | | #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0) |
1627 | | #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0) |
1628 | | |
1629 | | /* {{{ Parses GET/POST/COOKIE data and sets global variables */ |
1630 | | PHP_FUNCTION(mb_parse_str) |
1631 | 5.29k | { |
1632 | 5.29k | zval *track_vars_array; |
1633 | 5.29k | char *encstr; |
1634 | 5.29k | size_t encstr_len; |
1635 | 5.29k | php_mb_encoding_handler_info_t info; |
1636 | 5.29k | const mbfl_encoding *detected; |
1637 | | |
1638 | 5.29k | track_vars_array = NULL; |
1639 | 5.29k | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sz", &encstr, &encstr_len, &track_vars_array) == FAILURE) { |
1640 | 21 | RETURN_THROWS(); |
1641 | 21 | } |
1642 | | |
1643 | 5.27k | track_vars_array = zend_try_array_init(track_vars_array); |
1644 | 5.27k | if (!track_vars_array) { |
1645 | 0 | RETURN_THROWS(); |
1646 | 0 | } |
1647 | | |
1648 | 5.27k | encstr = estrndup(encstr, encstr_len); |
1649 | | |
1650 | 5.27k | info.data_type = PARSE_STRING; |
1651 | 5.27k | info.separator = PG(arg_separator).input; |
1652 | 5.27k | info.report_errors = 1; |
1653 | 5.27k | info.to_encoding = MBSTRG(current_internal_encoding); |
1654 | 5.27k | info.to_language = MBSTRG(language); |
1655 | 5.27k | info.from_encodings = MBSTRG(http_input_list); |
1656 | 5.27k | info.num_from_encodings = MBSTRG(http_input_list_size); |
1657 | 5.27k | info.from_language = MBSTRG(language); |
1658 | | |
1659 | 5.27k | detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr); |
1660 | | |
1661 | 5.27k | MBSTRG(http_input_identify) = detected; |
1662 | | |
1663 | 5.27k | RETVAL_BOOL(detected); |
1664 | | |
1665 | 5.27k | if (encstr != NULL) efree(encstr); |
1666 | 5.27k | } |
1667 | | /* }}} */ |
1668 | | |
1669 | | /* {{{ Returns string in output buffer converted to the http_output encoding */ |
1670 | | PHP_FUNCTION(mb_output_handler) |
1671 | 0 | { |
1672 | 0 | char *arg_string; |
1673 | 0 | size_t arg_string_len; |
1674 | 0 | zend_long arg_status; |
1675 | 0 | mbfl_string string, result; |
1676 | 0 | const char *charset; |
1677 | 0 | char *p; |
1678 | 0 | const mbfl_encoding *encoding; |
1679 | 0 | int last_feed; |
1680 | 0 | size_t len; |
1681 | 0 | unsigned char send_text_mimetype = 0; |
1682 | 0 | char *s, *mimetype = NULL; |
1683 | |
|
1684 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) { |
1685 | 0 | RETURN_THROWS(); |
1686 | 0 | } |
1687 | |
|
1688 | 0 | encoding = MBSTRG(current_http_output_encoding); |
1689 | | |
1690 | | /* start phase only */ |
1691 | 0 | if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) { |
1692 | | /* delete the converter just in case. */ |
1693 | 0 | if (MBSTRG(outconv)) { |
1694 | 0 | MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); |
1695 | 0 | mbfl_buffer_converter_delete(MBSTRG(outconv)); |
1696 | 0 | MBSTRG(outconv) = NULL; |
1697 | 0 | } |
1698 | 0 | if (encoding == &mbfl_encoding_pass) { |
1699 | 0 | RETURN_STRINGL(arg_string, arg_string_len); |
1700 | 0 | } |
1701 | | |
1702 | | /* analyze mime type */ |
1703 | 0 | if (SG(sapi_headers).mimetype && |
1704 | 0 | _php_mb_match_regex( |
1705 | 0 | MBSTRG(http_output_conv_mimetypes), |
1706 | 0 | SG(sapi_headers).mimetype, |
1707 | 0 | strlen(SG(sapi_headers).mimetype))) { |
1708 | 0 | if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){ |
1709 | 0 | mimetype = estrdup(SG(sapi_headers).mimetype); |
1710 | 0 | } else { |
1711 | 0 | mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype); |
1712 | 0 | } |
1713 | 0 | send_text_mimetype = 1; |
1714 | 0 | } else if (SG(sapi_headers).send_default_content_type) { |
1715 | 0 | mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE; |
1716 | 0 | } |
1717 | | |
1718 | | /* if content-type is not yet set, set it and activate the converter */ |
1719 | 0 | if (SG(sapi_headers).send_default_content_type || send_text_mimetype) { |
1720 | 0 | charset = encoding->mime_name; |
1721 | 0 | if (charset) { |
1722 | 0 | len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset ); |
1723 | 0 | if (sapi_add_header(p, len, 0) != FAILURE) { |
1724 | 0 | SG(sapi_headers).send_default_content_type = 0; |
1725 | 0 | } |
1726 | 0 | } |
1727 | | /* activate the converter */ |
1728 | 0 | MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0); |
1729 | 0 | if (send_text_mimetype){ |
1730 | 0 | efree(mimetype); |
1731 | 0 | } |
1732 | 0 | } |
1733 | 0 | } |
1734 | | |
1735 | | /* just return if the converter is not activated. */ |
1736 | 0 | if (MBSTRG(outconv) == NULL) { |
1737 | 0 | RETURN_STRINGL(arg_string, arg_string_len); |
1738 | 0 | } |
1739 | | |
1740 | | /* flag */ |
1741 | 0 | last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0); |
1742 | | /* mode */ |
1743 | 0 | mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode)); |
1744 | 0 | mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar)); |
1745 | | |
1746 | | /* feed the string */ |
1747 | 0 | mbfl_string_init(&string); |
1748 | | /* these are not needed. convd has encoding info. |
1749 | | string.encoding = MBSTRG(current_internal_encoding); |
1750 | | */ |
1751 | 0 | string.val = (unsigned char *)arg_string; |
1752 | 0 | string.len = arg_string_len; |
1753 | 0 | mbfl_buffer_converter_feed(MBSTRG(outconv), &string); |
1754 | 0 | if (last_feed) { |
1755 | 0 | mbfl_buffer_converter_flush(MBSTRG(outconv)); |
1756 | 0 | } |
1757 | | /* get the converter output, and return it */ |
1758 | 0 | mbfl_buffer_converter_result(MBSTRG(outconv), &result); |
1759 | | // TODO: avoid reallocation ??? |
1760 | 0 | RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */ |
1761 | 0 | efree(result.val); |
1762 | | |
1763 | | /* delete the converter if it is the last feed. */ |
1764 | 0 | if (last_feed) { |
1765 | 0 | MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv)); |
1766 | 0 | mbfl_buffer_converter_delete(MBSTRG(outconv)); |
1767 | 0 | MBSTRG(outconv) = NULL; |
1768 | 0 | } |
1769 | 0 | } |
1770 | | /* }}} */ |
1771 | | |
1772 | | /* {{{ Convert a multibyte string to an array. If split_length is specified, |
1773 | | break the string down into chunks each split_length characters long. */ |
1774 | | |
1775 | | /* structure to pass split params to the callback */ |
1776 | | struct mbfl_split_params { |
1777 | | zval *return_value; /* php function return value structure pointer */ |
1778 | | mbfl_string *result_string; /* string to store result chunk */ |
1779 | | size_t mb_chunk_length; /* actual chunk length in chars */ |
1780 | | size_t split_length; /* split length in chars */ |
1781 | | mbfl_convert_filter *next_filter; /* widechar to encoding converter */ |
1782 | | }; |
1783 | | |
1784 | | /* callback function to fill split array */ |
1785 | | static int mbfl_split_output(int c, void *data) |
1786 | 0 | { |
1787 | 0 | struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */ |
1788 | |
|
1789 | 0 | (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */ |
1790 | |
|
1791 | 0 | if(params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */ |
1792 | 0 | mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */ |
1793 | 0 | mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */ |
1794 | 0 | mbfl_string *chunk = params->result_string; |
1795 | 0 | mbfl_memory_device_result(device, chunk); /* make chunk */ |
1796 | 0 | add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */ |
1797 | 0 | efree(chunk->val); |
1798 | 0 | params->mb_chunk_length = 0; /* reset mb_chunk size */ |
1799 | 0 | } |
1800 | 0 | return 0; |
1801 | 0 | } |
1802 | | |
1803 | | /* TODO Document this function on php.net */ |
1804 | | PHP_FUNCTION(mb_str_split) |
1805 | 0 | { |
1806 | 0 | zend_string *str, *encoding = NULL; |
1807 | 0 | size_t mb_len, chunks, chunk_len; |
1808 | 0 | const char *p, *last; /* pointer for the string cursor and last string char */ |
1809 | 0 | mbfl_string string, result_string; |
1810 | 0 | const mbfl_encoding *mbfl_encoding; |
1811 | 0 | zend_long split_length = 1; |
1812 | |
|
1813 | 0 | ZEND_PARSE_PARAMETERS_START(1, 3) |
1814 | 0 | Z_PARAM_STR(str) |
1815 | 0 | Z_PARAM_OPTIONAL |
1816 | 0 | Z_PARAM_LONG(split_length) |
1817 | 0 | Z_PARAM_STR_OR_NULL(encoding) |
1818 | 0 | ZEND_PARSE_PARAMETERS_END(); |
1819 | |
|
1820 | 0 | if (split_length <= 0) { |
1821 | 0 | zend_argument_value_error(2, "must be greater than 0"); |
1822 | 0 | RETURN_THROWS(); |
1823 | 0 | } |
1824 | | |
1825 | | /* fill mbfl_string structure */ |
1826 | 0 | string.val = (unsigned char *) ZSTR_VAL(str); |
1827 | 0 | string.len = ZSTR_LEN(str); |
1828 | 0 | string.encoding = php_mb_get_encoding(encoding, 3); |
1829 | 0 | if (!string.encoding) { |
1830 | 0 | RETURN_THROWS(); |
1831 | 0 | } |
1832 | |
|
1833 | 0 | p = ZSTR_VAL(str); /* string cursor pointer */ |
1834 | 0 | last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */ |
1835 | |
|
1836 | 0 | mbfl_encoding = string.encoding; |
1837 | | |
1838 | | /* first scenario: 1,2,4-bytes fixed width encodings (head part) */ |
1839 | 0 | if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */ |
1840 | 0 | mb_len = string.len; |
1841 | 0 | chunk_len = (size_t)split_length; /* chunk length in bytes */ |
1842 | 0 | } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */ |
1843 | 0 | mb_len = string.len / 2; |
1844 | 0 | chunk_len = split_length * 2; |
1845 | 0 | } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */ |
1846 | 0 | mb_len = string.len / 4; |
1847 | 0 | chunk_len = split_length * 4; |
1848 | 0 | } else if (mbfl_encoding->mblen_table != NULL) { |
1849 | | /* second scenario: variable width encodings with length table */ |
1850 | 0 | char unsigned const *mbtab = mbfl_encoding->mblen_table; |
1851 | | |
1852 | | /* assume that we have 1-bytes characters */ |
1853 | 0 | array_init_size(return_value, (string.len + split_length) / split_length); /* round up */ |
1854 | |
|
1855 | 0 | while (p < last) { /* split cycle work until the cursor has reached the last byte */ |
1856 | 0 | char const *chunk_p = p; /* chunk first byte pointer */ |
1857 | 0 | chunk_len = 0; /* chunk length in bytes */ |
1858 | 0 | zend_long char_count; |
1859 | |
|
1860 | 0 | for (char_count = 0; char_count < split_length && p < last; ++char_count) { |
1861 | 0 | char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */ |
1862 | 0 | chunk_len += m; |
1863 | 0 | p += m; |
1864 | 0 | } |
1865 | 0 | if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */ |
1866 | 0 | add_next_index_stringl(return_value, chunk_p, chunk_len); |
1867 | 0 | } |
1868 | 0 | return; |
1869 | 0 | } else { |
1870 | | /* third scenario: other multibyte encodings */ |
1871 | 0 | mbfl_convert_filter *filter, *decoder; |
1872 | | |
1873 | | /* assume that we have 1-bytes characters */ |
1874 | 0 | array_init_size(return_value, (string.len + split_length) / split_length); /* round up */ |
1875 | | |
1876 | | /* decoder filter to decode wchar to encoding */ |
1877 | 0 | mbfl_memory_device device; |
1878 | 0 | mbfl_memory_device_init(&device, split_length + 1, 0); |
1879 | |
|
1880 | 0 | decoder = mbfl_convert_filter_new( |
1881 | 0 | &mbfl_encoding_wchar, |
1882 | 0 | string.encoding, |
1883 | 0 | mbfl_memory_device_output, |
1884 | 0 | NULL, |
1885 | 0 | &device); |
1886 | | /* assert that nothing is wrong with the decoder */ |
1887 | 0 | ZEND_ASSERT(decoder != NULL); |
1888 | | |
1889 | | /* wchar filter */ |
1890 | 0 | mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */ |
1891 | 0 | struct mbfl_split_params params = { /* init callback function params structure */ |
1892 | 0 | .return_value = return_value, |
1893 | 0 | .result_string = &result_string, |
1894 | 0 | .mb_chunk_length = 0, |
1895 | 0 | .split_length = (size_t)split_length, |
1896 | 0 | .next_filter = decoder, |
1897 | 0 | }; |
1898 | |
|
1899 | 0 | filter = mbfl_convert_filter_new( |
1900 | 0 | string.encoding, |
1901 | 0 | &mbfl_encoding_wchar, |
1902 | 0 | mbfl_split_output, |
1903 | 0 | NULL, |
1904 | 0 | ¶ms); |
1905 | | /* assert that nothing is wrong with the filter */ |
1906 | 0 | ZEND_ASSERT(filter != NULL); |
1907 | |
|
1908 | 0 | while (p < last - 1) { /* cycle each byte except last with callback function */ |
1909 | 0 | (*filter->filter_function)(*p++, filter); |
1910 | 0 | } |
1911 | 0 | params.mb_chunk_length = split_length - 1; /* force to finish current chunk */ |
1912 | 0 | (*filter->filter_function)(*p++, filter); /*process last char */ |
1913 | |
|
1914 | 0 | mbfl_convert_filter_delete(decoder); |
1915 | 0 | mbfl_convert_filter_delete(filter); |
1916 | 0 | mbfl_memory_device_clear(&device); |
1917 | 0 | return; |
1918 | 0 | } |
1919 | | |
1920 | | /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */ |
1921 | 0 | chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */ |
1922 | 0 | array_init_size(return_value, chunks); |
1923 | 0 | if (chunks != 0) { |
1924 | 0 | zend_long i; |
1925 | |
|
1926 | 0 | for (i = 0; i < chunks - 1; p += chunk_len, ++i) { |
1927 | 0 | add_next_index_stringl(return_value, p, chunk_len); |
1928 | 0 | } |
1929 | 0 | add_next_index_stringl(return_value, p, last - p); |
1930 | 0 | } |
1931 | 0 | } |
1932 | | /* }}} */ |
1933 | | |
1934 | | /* {{{ Get character numbers of a string */ |
1935 | | PHP_FUNCTION(mb_strlen) |
1936 | 0 | { |
1937 | 0 | size_t n; |
1938 | 0 | mbfl_string string; |
1939 | 0 | char *str; |
1940 | 0 | size_t str_len; |
1941 | 0 | zend_string *enc_name = NULL; |
1942 | |
|
1943 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
1944 | 0 | Z_PARAM_STRING(str, str_len) |
1945 | 0 | Z_PARAM_OPTIONAL |
1946 | 0 | Z_PARAM_STR_OR_NULL(enc_name) |
1947 | 0 | ZEND_PARSE_PARAMETERS_END(); |
1948 | |
|
1949 | 0 | string.val = (unsigned char *) str; |
1950 | 0 | string.len = str_len; |
1951 | 0 | string.encoding = php_mb_get_encoding(enc_name, 2); |
1952 | 0 | if (!string.encoding) { |
1953 | 0 | RETURN_THROWS(); |
1954 | 0 | } |
1955 | |
|
1956 | 0 | n = mbfl_strlen(&string); |
1957 | | /* Only way this can fail is if the conversion creation fails |
1958 | | * this would imply some sort of memory allocation failure which is a bug */ |
1959 | 0 | ZEND_ASSERT(!mbfl_is_error(n)); |
1960 | 0 | RETVAL_LONG(n); |
1961 | 0 | } |
1962 | | /* }}} */ |
1963 | | |
1964 | 0 | static void handle_strpos_error(size_t error) { |
1965 | 0 | switch (error) { |
1966 | 0 | case MBFL_ERROR_NOT_FOUND: |
1967 | 0 | break; |
1968 | 0 | case MBFL_ERROR_ENCODING: |
1969 | 0 | php_error_docref(NULL, E_WARNING, "Conversion error"); |
1970 | 0 | break; |
1971 | 0 | case MBFL_ERROR_OFFSET: |
1972 | 0 | zend_argument_value_error(3, "must be contained in argument #1 ($haystack)"); |
1973 | 0 | break; |
1974 | 0 | default: |
1975 | 0 | zend_value_error("mb_strpos(): Unknown error"); |
1976 | 0 | break; |
1977 | 0 | } |
1978 | 0 | } |
1979 | | |
1980 | | /* {{{ Find position of first occurrence of a string within another */ |
1981 | | PHP_FUNCTION(mb_strpos) |
1982 | 0 | { |
1983 | 0 | int reverse = 0; |
1984 | 0 | zend_long offset = 0; |
1985 | 0 | mbfl_string haystack, needle; |
1986 | 0 | zend_string *enc_name = NULL; |
1987 | 0 | size_t n; |
1988 | |
|
1989 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) { |
1990 | 0 | RETURN_THROWS(); |
1991 | 0 | } |
1992 | |
|
1993 | 0 | haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4); |
1994 | 0 | if (!haystack.encoding) { |
1995 | 0 | RETURN_THROWS(); |
1996 | 0 | } |
1997 | |
|
1998 | 0 | n = mbfl_strpos(&haystack, &needle, offset, reverse); |
1999 | 0 | if (!mbfl_is_error(n)) { |
2000 | 0 | RETVAL_LONG(n); |
2001 | 0 | } else { |
2002 | 0 | handle_strpos_error(n); |
2003 | 0 | RETVAL_FALSE; |
2004 | 0 | } |
2005 | 0 | } |
2006 | | /* }}} */ |
2007 | | |
2008 | | /* {{{ Find position of last occurrence of a string within another */ |
2009 | | PHP_FUNCTION(mb_strrpos) |
2010 | 0 | { |
2011 | 0 | mbfl_string haystack, needle; |
2012 | 0 | zend_string *enc_name = NULL; |
2013 | 0 | zend_long offset = 0, n; |
2014 | |
|
2015 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name) == FAILURE) { |
2016 | 0 | RETURN_THROWS(); |
2017 | 0 | } |
2018 | |
|
2019 | 0 | haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4); |
2020 | 0 | if (!haystack.encoding) { |
2021 | 0 | RETURN_THROWS(); |
2022 | 0 | } |
2023 | |
|
2024 | 0 | n = mbfl_strpos(&haystack, &needle, offset, 1); |
2025 | 0 | if (!mbfl_is_error(n)) { |
2026 | 0 | RETVAL_LONG(n); |
2027 | 0 | } else { |
2028 | 0 | handle_strpos_error(n); |
2029 | 0 | RETVAL_FALSE; |
2030 | 0 | } |
2031 | 0 | } |
2032 | | /* }}} */ |
2033 | | |
2034 | | /* {{{ Finds position of first occurrence of a string within another, case insensitive */ |
2035 | | PHP_FUNCTION(mb_stripos) |
2036 | 0 | { |
2037 | 0 | size_t n = (size_t) -1; |
2038 | 0 | zend_long offset = 0; |
2039 | 0 | mbfl_string haystack, needle; |
2040 | 0 | zend_string *from_encoding = NULL; |
2041 | 0 | const mbfl_encoding *enc; |
2042 | |
|
2043 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) { |
2044 | 0 | RETURN_THROWS(); |
2045 | 0 | } |
2046 | |
|
2047 | 0 | enc = php_mb_get_encoding(from_encoding, 4); |
2048 | 0 | if (!enc) { |
2049 | 0 | RETURN_THROWS(); |
2050 | 0 | } |
2051 | |
|
2052 | 0 | n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc); |
2053 | |
|
2054 | 0 | if (!mbfl_is_error(n)) { |
2055 | 0 | RETVAL_LONG(n); |
2056 | 0 | } else { |
2057 | 0 | handle_strpos_error(n); |
2058 | 0 | RETVAL_FALSE; |
2059 | 0 | } |
2060 | 0 | } |
2061 | | /* }}} */ |
2062 | | |
2063 | | /* {{{ Finds position of last occurrence of a string within another, case insensitive */ |
2064 | | PHP_FUNCTION(mb_strripos) |
2065 | 0 | { |
2066 | 0 | size_t n = (size_t) -1; |
2067 | 0 | zend_long offset = 0; |
2068 | 0 | mbfl_string haystack, needle; |
2069 | 0 | zend_string *from_encoding = NULL; |
2070 | 0 | const mbfl_encoding *enc; |
2071 | |
|
2072 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|lS!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &from_encoding) == FAILURE) { |
2073 | 0 | RETURN_THROWS(); |
2074 | 0 | } |
2075 | |
|
2076 | 0 | enc = php_mb_get_encoding(from_encoding, 4); |
2077 | 0 | if (!enc) { |
2078 | 0 | RETURN_THROWS(); |
2079 | 0 | } |
2080 | |
|
2081 | 0 | n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc); |
2082 | |
|
2083 | 0 | if (!mbfl_is_error(n)) { |
2084 | 0 | RETVAL_LONG(n); |
2085 | 0 | } else { |
2086 | 0 | handle_strpos_error(n); |
2087 | 0 | RETVAL_FALSE; |
2088 | 0 | } |
2089 | 0 | } |
2090 | | /* }}} */ |
2091 | | |
2092 | 0 | #define MB_STRSTR 1 |
2093 | 0 | #define MB_STRRCHR 2 |
2094 | 0 | #define MB_STRISTR 3 |
2095 | 0 | #define MB_STRRICHR 4 |
2096 | | /* {{{ php_mb_strstr_variants */ |
2097 | | static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int variant) |
2098 | 0 | { |
2099 | 0 | int reverse_mode = 0; |
2100 | 0 | size_t n; |
2101 | 0 | mbfl_string haystack, needle, result, *ret = NULL; |
2102 | 0 | zend_string *encoding_name = NULL; |
2103 | 0 | zend_bool part = 0; |
2104 | |
|
2105 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|bS!", |
2106 | 0 | (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, |
2107 | 0 | &part, &encoding_name) == FAILURE |
2108 | 0 | ) { |
2109 | 0 | RETURN_THROWS(); |
2110 | 0 | } |
2111 | |
|
2112 | 0 | haystack.encoding = needle.encoding = php_mb_get_encoding(encoding_name, 4); |
2113 | 0 | if (!haystack.encoding) { |
2114 | 0 | RETURN_THROWS(); |
2115 | 0 | } |
2116 | |
|
2117 | 0 | if (variant == MB_STRRCHR || variant == MB_STRRICHR) { reverse_mode = 1; } |
2118 | |
|
2119 | 0 | if (variant == MB_STRISTR || variant == MB_STRRICHR) { |
2120 | 0 | n = php_mb_stripos(reverse_mode, (char *)haystack.val, haystack.len, (char *)needle.val, |
2121 | 0 | needle.len, 0, needle.encoding); |
2122 | 0 | } else { |
2123 | 0 | n = mbfl_strpos(&haystack, &needle, 0, reverse_mode); |
2124 | 0 | } |
2125 | |
|
2126 | 0 | if (!mbfl_is_error(n)) { |
2127 | 0 | if (part) { |
2128 | 0 | ret = mbfl_substr(&haystack, &result, 0, n); |
2129 | 0 | ZEND_ASSERT(ret != NULL); |
2130 | | // TODO: avoid reallocation ??? |
2131 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); |
2132 | 0 | efree(ret->val); |
2133 | 0 | } else { |
2134 | 0 | ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END); |
2135 | 0 | ZEND_ASSERT(ret != NULL); |
2136 | | // TODO: avoid reallocation ??? |
2137 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); |
2138 | 0 | efree(ret->val); |
2139 | 0 | } |
2140 | 0 | } else { |
2141 | | // FIXME use handle_strpos_error(n) |
2142 | 0 | RETVAL_FALSE; |
2143 | 0 | } |
2144 | 0 | } |
2145 | | |
2146 | | /* {{{ Finds first occurrence of a string within another */ |
2147 | | PHP_FUNCTION(mb_strstr) |
2148 | 0 | { |
2149 | 0 | php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRSTR); |
2150 | 0 | } |
2151 | | /* }}} */ |
2152 | | |
2153 | | /* {{{ Finds the last occurrence of a character in a string within another */ |
2154 | | PHP_FUNCTION(mb_strrchr) |
2155 | 0 | { |
2156 | 0 | php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRCHR); |
2157 | 0 | } |
2158 | | /* }}} */ |
2159 | | |
2160 | | /* {{{ Finds first occurrence of a string within another, case insensitive */ |
2161 | | PHP_FUNCTION(mb_stristr) |
2162 | 0 | { |
2163 | 0 | php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRISTR); |
2164 | 0 | } |
2165 | | /* }}} */ |
2166 | | |
2167 | | /* {{{ Finds the last occurrence of a character in a string within another, case insensitive */ |
2168 | | PHP_FUNCTION(mb_strrichr) |
2169 | 0 | { |
2170 | 0 | php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRICHR); |
2171 | 0 | } |
2172 | | /* }}} */ |
2173 | | |
2174 | | #undef MB_STRSTR |
2175 | | #undef MB_STRRCHR |
2176 | | #undef MB_STRISTR |
2177 | | #undef MB_STRRICHR |
2178 | | |
2179 | | /* {{{ Count the number of substring occurrences */ |
2180 | | PHP_FUNCTION(mb_substr_count) |
2181 | 0 | { |
2182 | 0 | size_t n; |
2183 | 0 | mbfl_string haystack, needle; |
2184 | 0 | zend_string *enc_name = NULL; |
2185 | |
|
2186 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|S!", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name) == FAILURE) { |
2187 | 0 | RETURN_THROWS(); |
2188 | 0 | } |
2189 | |
|
2190 | 0 | if (needle.len == 0) { |
2191 | 0 | zend_argument_value_error(2, "must not be empty"); |
2192 | 0 | RETURN_THROWS(); |
2193 | 0 | } |
2194 | |
|
2195 | 0 | haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3); |
2196 | 0 | if (!haystack.encoding) { |
2197 | 0 | RETURN_THROWS(); |
2198 | 0 | } |
2199 | |
|
2200 | 0 | n = mbfl_substr_count(&haystack, &needle); |
2201 | | /* An error can only occur if needle is empty, |
2202 | | * an encoding error happens (which should not happen at this stage and is a bug) |
2203 | | * or the haystack is more than sizeof(size_t) bytes |
2204 | | * If one of these things occur this is a bug and should be flagged as such */ |
2205 | 0 | ZEND_ASSERT(!mbfl_is_error(n)); |
2206 | 0 | RETVAL_LONG(n); |
2207 | 0 | } |
2208 | | /* }}} */ |
2209 | | |
2210 | | /* {{{ Returns part of a string */ |
2211 | | PHP_FUNCTION(mb_substr) |
2212 | 0 | { |
2213 | 0 | char *str; |
2214 | 0 | zend_string *encoding = NULL; |
2215 | 0 | zend_long from, len; |
2216 | 0 | size_t mblen, real_from, real_len; |
2217 | 0 | size_t str_len; |
2218 | 0 | zend_bool len_is_null = 1; |
2219 | 0 | mbfl_string string, result, *ret; |
2220 | |
|
2221 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S!", &str, &str_len, &from, &len, &len_is_null, &encoding) == FAILURE) { |
2222 | 0 | RETURN_THROWS(); |
2223 | 0 | } |
2224 | |
|
2225 | 0 | string.encoding = php_mb_get_encoding(encoding, 4); |
2226 | 0 | if (!string.encoding) { |
2227 | 0 | RETURN_THROWS(); |
2228 | 0 | } |
2229 | |
|
2230 | 0 | string.val = (unsigned char *)str; |
2231 | 0 | string.len = str_len; |
2232 | | |
2233 | | /* measures length */ |
2234 | 0 | mblen = 0; |
2235 | 0 | if (from < 0 || (!len_is_null && len < 0)) { |
2236 | 0 | mblen = mbfl_strlen(&string); |
2237 | 0 | } |
2238 | | |
2239 | | /* if "from" position is negative, count start position from the end |
2240 | | * of the string |
2241 | | */ |
2242 | 0 | if (from >= 0) { |
2243 | 0 | real_from = (size_t) from; |
2244 | 0 | } else if (-from < mblen) { |
2245 | 0 | real_from = mblen + from; |
2246 | 0 | } else { |
2247 | 0 | real_from = 0; |
2248 | 0 | } |
2249 | | |
2250 | | /* if "length" position is negative, set it to the length |
2251 | | * needed to stop that many chars from the end of the string |
2252 | | */ |
2253 | 0 | if (len_is_null) { |
2254 | 0 | real_len = MBFL_SUBSTR_UNTIL_END; |
2255 | 0 | } else if (len >= 0) { |
2256 | 0 | real_len = (size_t) len; |
2257 | 0 | } else if (real_from < mblen && -len < mblen - real_from) { |
2258 | 0 | real_len = (mblen - real_from) + len; |
2259 | 0 | } else { |
2260 | 0 | real_len = 0; |
2261 | 0 | } |
2262 | |
|
2263 | 0 | ret = mbfl_substr(&string, &result, real_from, real_len); |
2264 | 0 | ZEND_ASSERT(ret != NULL); |
2265 | | |
2266 | | // TODO: avoid reallocation ??? |
2267 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ |
2268 | 0 | efree(ret->val); |
2269 | 0 | } |
2270 | | /* }}} */ |
2271 | | |
2272 | | /* {{{ Returns part of a string */ |
2273 | | PHP_FUNCTION(mb_strcut) |
2274 | 0 | { |
2275 | 0 | zend_string *encoding = NULL; |
2276 | 0 | zend_long from, len; |
2277 | 0 | zend_bool len_is_null = 1; |
2278 | 0 | mbfl_string string, result, *ret; |
2279 | |
|
2280 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|l!S", (char **)&string.val, &string.len, &from, &len, &len_is_null, &encoding) == FAILURE) { |
2281 | 0 | RETURN_THROWS(); |
2282 | 0 | } |
2283 | |
|
2284 | 0 | string.encoding = php_mb_get_encoding(encoding, 4); |
2285 | 0 | if (!string.encoding) { |
2286 | 0 | RETURN_THROWS(); |
2287 | 0 | } |
2288 | |
|
2289 | 0 | if (len_is_null) { |
2290 | 0 | len = string.len; |
2291 | 0 | } |
2292 | | |
2293 | | /* if "from" position is negative, count start position from the end |
2294 | | * of the string |
2295 | | */ |
2296 | 0 | if (from < 0) { |
2297 | 0 | from = string.len + from; |
2298 | 0 | if (from < 0) { |
2299 | 0 | from = 0; |
2300 | 0 | } |
2301 | 0 | } |
2302 | | |
2303 | | /* if "length" position is negative, set it to the length |
2304 | | * needed to stop that many chars from the end of the string |
2305 | | */ |
2306 | 0 | if (len < 0) { |
2307 | 0 | len = (string.len - from) + len; |
2308 | 0 | if (len < 0) { |
2309 | 0 | len = 0; |
2310 | 0 | } |
2311 | 0 | } |
2312 | |
|
2313 | 0 | if (from > string.len) { |
2314 | | // TODO Out of bounds ValueError |
2315 | 0 | RETURN_FALSE; |
2316 | 0 | } |
2317 | |
|
2318 | 0 | ret = mbfl_strcut(&string, &result, from, len); |
2319 | 0 | ZEND_ASSERT(ret != NULL); |
2320 | | |
2321 | | // TODO: avoid reallocation ??? |
2322 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ |
2323 | 0 | efree(ret->val); |
2324 | 0 | } |
2325 | | /* }}} */ |
2326 | | |
2327 | | /* {{{ Gets terminal width of a string */ |
2328 | | PHP_FUNCTION(mb_strwidth) |
2329 | 0 | { |
2330 | 0 | size_t n; |
2331 | 0 | mbfl_string string; |
2332 | 0 | zend_string *enc_name = NULL; |
2333 | |
|
2334 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", (char **)&string.val, &string.len, &enc_name) == FAILURE) { |
2335 | 0 | RETURN_THROWS(); |
2336 | 0 | } |
2337 | |
|
2338 | 0 | string.encoding = php_mb_get_encoding(enc_name, 2); |
2339 | 0 | if (!string.encoding) { |
2340 | 0 | RETURN_THROWS(); |
2341 | 0 | } |
2342 | |
|
2343 | 0 | n = mbfl_strwidth(&string); |
2344 | 0 | ZEND_ASSERT(n != (size_t) -1); |
2345 | 0 | RETVAL_LONG(n); |
2346 | 0 | } |
2347 | | /* }}} */ |
2348 | | |
2349 | | /* {{{ Trim the string in terminal width */ |
2350 | | PHP_FUNCTION(mb_strimwidth) |
2351 | 0 | { |
2352 | 0 | char *str, *trimmarker = NULL; |
2353 | 0 | zend_string *encoding = NULL; |
2354 | 0 | zend_long from, width, swidth = 0; |
2355 | 0 | size_t str_len, trimmarker_len; |
2356 | 0 | mbfl_string string, result, marker, *ret; |
2357 | |
|
2358 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sll|sS!", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding) == FAILURE) { |
2359 | 0 | RETURN_THROWS(); |
2360 | 0 | } |
2361 | |
|
2362 | 0 | string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5); |
2363 | 0 | if (!string.encoding) { |
2364 | 0 | RETURN_THROWS(); |
2365 | 0 | } |
2366 | |
|
2367 | 0 | string.val = (unsigned char *)str; |
2368 | 0 | string.len = str_len; |
2369 | 0 | marker.val = NULL; |
2370 | 0 | marker.len = 0; |
2371 | |
|
2372 | 0 | if ((from < 0) || (width < 0)) { |
2373 | 0 | swidth = mbfl_strwidth(&string); |
2374 | 0 | } |
2375 | |
|
2376 | 0 | if (from < 0) { |
2377 | 0 | from += swidth; |
2378 | 0 | } |
2379 | |
|
2380 | 0 | if (from < 0 || (size_t)from > str_len) { |
2381 | 0 | zend_argument_value_error(2, "is out of range"); |
2382 | 0 | RETURN_THROWS(); |
2383 | 0 | } |
2384 | |
|
2385 | 0 | if (width < 0) { |
2386 | 0 | width = swidth + width - from; |
2387 | 0 | } |
2388 | |
|
2389 | 0 | if (width < 0) { |
2390 | 0 | zend_argument_value_error(3, "is out of range"); |
2391 | 0 | RETURN_THROWS(); |
2392 | 0 | } |
2393 | |
|
2394 | 0 | if (trimmarker) { |
2395 | 0 | marker.val = (unsigned char *)trimmarker; |
2396 | 0 | marker.len = trimmarker_len; |
2397 | 0 | } |
2398 | |
|
2399 | 0 | ret = mbfl_strimwidth(&string, &marker, &result, from, width); |
2400 | 0 | ZEND_ASSERT(ret != NULL); |
2401 | | // TODO: avoid reallocation ??? |
2402 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ |
2403 | 0 | efree(ret->val); |
2404 | 0 | } |
2405 | | /* }}} */ |
2406 | | |
2407 | | |
2408 | | /* See mbfl_no_encoding definition for list of unsupported encodings */ |
2409 | | static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) |
2410 | 0 | { |
2411 | 0 | return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint) |
2412 | 0 | || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap) |
2413 | 0 | || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms) |
2414 | 0 | || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222)); |
2415 | 0 | } |
2416 | | |
2417 | | |
2418 | | /* See mbfl_no_encoding definition for list of UTF-8 encodings */ |
2419 | | static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) |
2420 | 0 | { |
2421 | 0 | return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb); |
2422 | 0 | } |
2423 | | |
2424 | | MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len) |
2425 | 0 | { |
2426 | 0 | mbfl_string string, result, *ret; |
2427 | 0 | mbfl_buffer_converter *convd; |
2428 | 0 | char *output = NULL; |
2429 | |
|
2430 | 0 | if (output_len) { |
2431 | 0 | *output_len = 0; |
2432 | 0 | } |
2433 | | |
2434 | | /* initialize string */ |
2435 | 0 | string.encoding = from_encoding; |
2436 | 0 | string.val = (unsigned char *)input; |
2437 | 0 | string.len = length; |
2438 | | |
2439 | | /* initialize converter */ |
2440 | 0 | convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len); |
2441 | | /* If this assertion fails this means some memory allocation failure which is a bug */ |
2442 | 0 | ZEND_ASSERT(convd != NULL); |
2443 | |
|
2444 | 0 | mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); |
2445 | 0 | mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); |
2446 | | |
2447 | | /* do it */ |
2448 | 0 | mbfl_string_init(&result); |
2449 | 0 | ret = mbfl_buffer_converter_feed_result(convd, &string, &result); |
2450 | 0 | if (ret) { |
2451 | 0 | if (output_len) { |
2452 | 0 | *output_len = ret->len; |
2453 | 0 | } |
2454 | 0 | output = (char *)ret->val; |
2455 | 0 | } |
2456 | |
|
2457 | 0 | MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); |
2458 | 0 | mbfl_buffer_converter_delete(convd); |
2459 | 0 | return output; |
2460 | 0 | } |
2461 | | /* }}} */ |
2462 | | |
2463 | | /* {{{ MBSTRING_API char *php_mb_convert_encoding() */ |
2464 | | MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len) |
2465 | 0 | { |
2466 | 0 | const mbfl_encoding *from_encoding; |
2467 | |
|
2468 | 0 | if (output_len) { |
2469 | 0 | *output_len = 0; |
2470 | 0 | } |
2471 | | |
2472 | | /* pre-conversion encoding */ |
2473 | 0 | ZEND_ASSERT(num_from_encodings >= 1); |
2474 | 0 | if (num_from_encodings == 1) { |
2475 | 0 | from_encoding = *from_encodings; |
2476 | 0 | } else { |
2477 | | /* auto detect */ |
2478 | 0 | mbfl_string string; |
2479 | 0 | mbfl_string_init(&string); |
2480 | 0 | string.val = (unsigned char *)input; |
2481 | 0 | string.len = length; |
2482 | 0 | from_encoding = mbfl_identify_encoding( |
2483 | 0 | &string, from_encodings, num_from_encodings, MBSTRG(strict_detection)); |
2484 | 0 | if (!from_encoding) { |
2485 | 0 | php_error_docref(NULL, E_WARNING, "Unable to detect character encoding"); |
2486 | 0 | return NULL; |
2487 | 0 | } |
2488 | 0 | } |
2489 | | |
2490 | 0 | return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len); |
2491 | 0 | } |
2492 | | /* }}} */ |
2493 | | |
2494 | | MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings) |
2495 | 0 | { |
2496 | 0 | HashTable *output, *chash; |
2497 | 0 | zend_long idx; |
2498 | 0 | zend_string *key; |
2499 | 0 | zval *entry, entry_tmp; |
2500 | 0 | size_t ckey_len, cval_len; |
2501 | 0 | char *ckey, *cval; |
2502 | |
|
2503 | 0 | if (!input) { |
2504 | 0 | return NULL; |
2505 | 0 | } |
2506 | | |
2507 | 0 | if (GC_IS_RECURSIVE(input)) { |
2508 | 0 | GC_UNPROTECT_RECURSION(input); |
2509 | 0 | php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values"); |
2510 | 0 | return NULL; |
2511 | 0 | } |
2512 | 0 | GC_TRY_PROTECT_RECURSION(input); |
2513 | 0 | output = zend_new_array(zend_hash_num_elements(input)); |
2514 | 0 | ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) { |
2515 | | /* convert key */ |
2516 | 0 | if (key) { |
2517 | 0 | ckey = php_mb_convert_encoding( |
2518 | 0 | ZSTR_VAL(key), ZSTR_LEN(key), |
2519 | 0 | to_encoding, from_encodings, num_from_encodings, &ckey_len); |
2520 | 0 | key = zend_string_init(ckey, ckey_len, 0); |
2521 | 0 | efree(ckey); |
2522 | 0 | } |
2523 | | /* convert value */ |
2524 | 0 | ZEND_ASSERT(entry); |
2525 | 0 | switch(Z_TYPE_P(entry)) { |
2526 | 0 | case IS_STRING: |
2527 | 0 | cval = php_mb_convert_encoding( |
2528 | 0 | Z_STRVAL_P(entry), Z_STRLEN_P(entry), |
2529 | 0 | to_encoding, from_encodings, num_from_encodings, &cval_len); |
2530 | 0 | ZVAL_STRINGL(&entry_tmp, cval, cval_len); |
2531 | 0 | efree(cval); |
2532 | 0 | break; |
2533 | 0 | case IS_NULL: |
2534 | 0 | case IS_TRUE: |
2535 | 0 | case IS_FALSE: |
2536 | 0 | case IS_LONG: |
2537 | 0 | case IS_DOUBLE: |
2538 | 0 | ZVAL_COPY(&entry_tmp, entry); |
2539 | 0 | break; |
2540 | 0 | case IS_ARRAY: |
2541 | 0 | chash = php_mb_convert_encoding_recursive( |
2542 | 0 | Z_ARRVAL_P(entry), to_encoding, from_encodings, num_from_encodings); |
2543 | 0 | if (chash) { |
2544 | 0 | ZVAL_ARR(&entry_tmp, chash); |
2545 | 0 | } else { |
2546 | 0 | ZVAL_EMPTY_ARRAY(&entry_tmp); |
2547 | 0 | } |
2548 | 0 | break; |
2549 | 0 | case IS_OBJECT: |
2550 | 0 | default: |
2551 | 0 | if (key) { |
2552 | 0 | zend_string_release(key); |
2553 | 0 | } |
2554 | 0 | php_error_docref(NULL, E_WARNING, "Object is not supported"); |
2555 | 0 | continue; |
2556 | 0 | } |
2557 | 0 | if (key) { |
2558 | 0 | zend_hash_add(output, key, &entry_tmp); |
2559 | 0 | zend_string_release(key); |
2560 | 0 | } else { |
2561 | 0 | zend_hash_index_add(output, idx, &entry_tmp); |
2562 | 0 | } |
2563 | 0 | } ZEND_HASH_FOREACH_END(); |
2564 | 0 | GC_TRY_UNPROTECT_RECURSION(input); |
2565 | |
|
2566 | 0 | return output; |
2567 | 0 | } |
2568 | | /* }}} */ |
2569 | | |
2570 | | |
2571 | | /* {{{ Returns converted string in desired encoding */ |
2572 | | PHP_FUNCTION(mb_convert_encoding) |
2573 | 0 | { |
2574 | 0 | zend_string *to_encoding_name; |
2575 | 0 | zend_string *input_str, *from_encodings_str = NULL; |
2576 | 0 | HashTable *input_ht, *from_encodings_ht = NULL; |
2577 | 0 | const mbfl_encoding *to_encoding; |
2578 | 0 | const mbfl_encoding **from_encodings; |
2579 | 0 | size_t num_from_encodings; |
2580 | 0 | zend_bool free_from_encodings; |
2581 | |
|
2582 | 0 | ZEND_PARSE_PARAMETERS_START(2, 3) |
2583 | 0 | Z_PARAM_STR_OR_ARRAY_HT(input_str, input_ht) |
2584 | 0 | Z_PARAM_STR(to_encoding_name) |
2585 | 0 | Z_PARAM_OPTIONAL |
2586 | 0 | Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(from_encodings_str, from_encodings_ht) |
2587 | 0 | ZEND_PARSE_PARAMETERS_END(); |
2588 | |
|
2589 | 0 | to_encoding = php_mb_get_encoding(to_encoding_name, 2); |
2590 | 0 | if (!to_encoding) { |
2591 | 0 | RETURN_THROWS(); |
2592 | 0 | } |
2593 | |
|
2594 | 0 | if (from_encodings_ht) { |
2595 | 0 | if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) { |
2596 | 0 | RETURN_THROWS(); |
2597 | 0 | } |
2598 | 0 | free_from_encodings = 1; |
2599 | 0 | } else if (from_encodings_str) { |
2600 | 0 | if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str), |
2601 | 0 | &from_encodings, &num_from_encodings, |
2602 | 0 | /* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) { |
2603 | 0 | RETURN_THROWS(); |
2604 | 0 | } |
2605 | 0 | free_from_encodings = 1; |
2606 | 0 | } else { |
2607 | 0 | from_encodings = &MBSTRG(current_internal_encoding); |
2608 | 0 | num_from_encodings = 1; |
2609 | 0 | free_from_encodings = 0; |
2610 | 0 | } |
2611 | |
|
2612 | 0 | if (!num_from_encodings) { |
2613 | 0 | efree(ZEND_VOIDP(from_encodings)); |
2614 | 0 | zend_argument_value_error(3, "must specify at least one encoding"); |
2615 | 0 | RETURN_THROWS(); |
2616 | 0 | } |
2617 | |
|
2618 | 0 | if (input_str) { |
2619 | | /* new encoding */ |
2620 | 0 | size_t size; |
2621 | 0 | char *ret = php_mb_convert_encoding( |
2622 | 0 | ZSTR_VAL(input_str), ZSTR_LEN(input_str), |
2623 | 0 | to_encoding, from_encodings, num_from_encodings, &size); |
2624 | 0 | if (ret != NULL) { |
2625 | | // TODO: avoid reallocation ??? |
2626 | 0 | RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */ |
2627 | 0 | efree(ret); |
2628 | 0 | } else { |
2629 | 0 | RETVAL_FALSE; |
2630 | 0 | } |
2631 | 0 | } else { |
2632 | 0 | HashTable *tmp; |
2633 | 0 | tmp = php_mb_convert_encoding_recursive( |
2634 | 0 | input_ht, to_encoding, from_encodings, num_from_encodings); |
2635 | 0 | RETVAL_ARR(tmp); |
2636 | 0 | } |
2637 | |
|
2638 | 0 | if (free_from_encodings) { |
2639 | 0 | efree(ZEND_VOIDP(from_encodings)); |
2640 | 0 | } |
2641 | 0 | } |
2642 | | /* }}} */ |
2643 | | |
2644 | | static char *mbstring_convert_case( |
2645 | | int case_mode, const char *str, size_t str_len, size_t *ret_len, |
2646 | 0 | const mbfl_encoding *enc) { |
2647 | 0 | return php_unicode_convert_case( |
2648 | 0 | case_mode, str, str_len, ret_len, enc, |
2649 | 0 | MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar)); |
2650 | 0 | } |
2651 | | |
2652 | | /* {{{ Returns a case-folded version of source_string */ |
2653 | | PHP_FUNCTION(mb_convert_case) |
2654 | 0 | { |
2655 | 0 | zend_string *from_encoding = NULL; |
2656 | 0 | char *str; |
2657 | 0 | size_t str_len; |
2658 | 0 | zend_long case_mode = 0; |
2659 | 0 | char *newstr; |
2660 | 0 | size_t ret_len; |
2661 | 0 | const mbfl_encoding *enc; |
2662 | |
|
2663 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|S!", &str, &str_len, &case_mode, &from_encoding) == FAILURE) { |
2664 | 0 | RETURN_THROWS(); |
2665 | 0 | } |
2666 | |
|
2667 | 0 | enc = php_mb_get_encoding(from_encoding, 3); |
2668 | 0 | if (!enc) { |
2669 | 0 | RETURN_THROWS(); |
2670 | 0 | } |
2671 | |
|
2672 | 0 | if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) { |
2673 | 0 | zend_argument_value_error(2, "must be one of MB_CASE_UPPER, MB_CASE_LOWER, MB_CASE_TITLE, MB_CASE_FOLD," |
2674 | 0 | " MB_CASE_UPPER_SIMPLE, MB_CASE_LOWER_SIMPLE, MB_CASE_TITLE_SIMPLE, or MB_CASE_FOLD_SIMPLE"); |
2675 | 0 | RETURN_THROWS(); |
2676 | 0 | } |
2677 | |
|
2678 | 0 | newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc); |
2679 | | /* If newstr is NULL something went wrong in mbfl and this is a bug */ |
2680 | 0 | ZEND_ASSERT(newstr != NULL); |
2681 | | |
2682 | | // TODO: avoid reallocation ??? |
2683 | 0 | RETVAL_STRINGL(newstr, ret_len); |
2684 | 0 | efree(newstr); |
2685 | 0 | } |
2686 | | /* }}} */ |
2687 | | |
2688 | | /* {{{ Returns a upper cased version of source_string */ |
2689 | | PHP_FUNCTION(mb_strtoupper) |
2690 | 0 | { |
2691 | 0 | zend_string *from_encoding = NULL; |
2692 | 0 | char *str; |
2693 | 0 | size_t str_len; |
2694 | 0 | char *newstr; |
2695 | 0 | size_t ret_len; |
2696 | 0 | const mbfl_encoding *enc; |
2697 | |
|
2698 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len, &from_encoding) == FAILURE) { |
2699 | 0 | RETURN_THROWS(); |
2700 | 0 | } |
2701 | |
|
2702 | 0 | enc = php_mb_get_encoding(from_encoding, 2); |
2703 | 0 | if (!enc) { |
2704 | 0 | RETURN_THROWS(); |
2705 | 0 | } |
2706 | |
|
2707 | 0 | newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc); |
2708 | | /* If newstr is NULL something went wrong in mbfl and this is a bug */ |
2709 | 0 | ZEND_ASSERT(newstr != NULL); |
2710 | | |
2711 | | // TODO: avoid reallocation ??? |
2712 | 0 | RETVAL_STRINGL(newstr, ret_len); |
2713 | 0 | efree(newstr); |
2714 | 0 | } |
2715 | | /* }}} */ |
2716 | | |
2717 | | /* {{{ Returns a lower cased version of source_string */ |
2718 | | PHP_FUNCTION(mb_strtolower) |
2719 | 0 | { |
2720 | 0 | zend_string *from_encoding = NULL; |
2721 | 0 | char *str; |
2722 | 0 | size_t str_len; |
2723 | 0 | char *newstr; |
2724 | 0 | size_t ret_len; |
2725 | 0 | const mbfl_encoding *enc; |
2726 | |
|
2727 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!", &str, &str_len, &from_encoding) == FAILURE) { |
2728 | 0 | RETURN_THROWS(); |
2729 | 0 | } |
2730 | |
|
2731 | 0 | enc = php_mb_get_encoding(from_encoding, 2); |
2732 | 0 | if (!enc) { |
2733 | 0 | RETURN_THROWS(); |
2734 | 0 | } |
2735 | |
|
2736 | 0 | newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc); |
2737 | | /* If newstr is NULL something went wrong in mbfl and this is a bug */ |
2738 | 0 | ZEND_ASSERT(newstr != NULL); |
2739 | | |
2740 | | // TODO: avoid reallocation ??? |
2741 | 0 | RETVAL_STRINGL(newstr, ret_len); |
2742 | 0 | efree(newstr); |
2743 | 0 | } |
2744 | | /* }}} */ |
2745 | | |
2746 | | /* {{{ Encodings of the given string is returned (as a string) */ |
2747 | | PHP_FUNCTION(mb_detect_encoding) |
2748 | 0 | { |
2749 | 0 | char *str; |
2750 | 0 | size_t str_len; |
2751 | 0 | zend_string *encoding_str = NULL; |
2752 | 0 | HashTable *encoding_ht = NULL; |
2753 | 0 | zend_bool strict = 0; |
2754 | |
|
2755 | 0 | mbfl_string string; |
2756 | 0 | const mbfl_encoding *ret; |
2757 | 0 | const mbfl_encoding **elist; |
2758 | 0 | size_t size; |
2759 | 0 | zend_bool free_elist; |
2760 | |
|
2761 | 0 | ZEND_PARSE_PARAMETERS_START(1, 3) |
2762 | 0 | Z_PARAM_STRING(str, str_len) |
2763 | 0 | Z_PARAM_OPTIONAL |
2764 | 0 | Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(encoding_str, encoding_ht) |
2765 | 0 | Z_PARAM_BOOL(strict) |
2766 | 0 | ZEND_PARSE_PARAMETERS_END(); |
2767 | | |
2768 | | /* make encoding list */ |
2769 | 0 | if (encoding_ht) { |
2770 | 0 | if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) { |
2771 | 0 | RETURN_THROWS(); |
2772 | 0 | } |
2773 | 0 | free_elist = 1; |
2774 | 0 | } else if (encoding_str) { |
2775 | 0 | if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) { |
2776 | 0 | RETURN_THROWS(); |
2777 | 0 | } |
2778 | 0 | free_elist = 1; |
2779 | 0 | } else { |
2780 | 0 | elist = MBSTRG(current_detect_order_list); |
2781 | 0 | size = MBSTRG(current_detect_order_list_size); |
2782 | 0 | free_elist = 0; |
2783 | 0 | } |
2784 | |
|
2785 | 0 | if (size == 0) { |
2786 | 0 | efree(ZEND_VOIDP(elist)); |
2787 | 0 | zend_argument_value_error(2, "must specify at least one encoding"); |
2788 | 0 | RETURN_THROWS(); |
2789 | 0 | } |
2790 | |
|
2791 | 0 | if (ZEND_NUM_ARGS() < 3) { |
2792 | 0 | strict = MBSTRG(strict_detection); |
2793 | 0 | } |
2794 | |
|
2795 | 0 | mbfl_string_init(&string); |
2796 | 0 | string.val = (unsigned char *)str; |
2797 | 0 | string.len = str_len; |
2798 | 0 | ret = mbfl_identify_encoding(&string, elist, size, strict); |
2799 | |
|
2800 | 0 | if (free_elist) { |
2801 | 0 | efree(ZEND_VOIDP(elist)); |
2802 | 0 | } |
2803 | |
|
2804 | 0 | if (ret == NULL) { |
2805 | 0 | RETURN_FALSE; |
2806 | 0 | } |
2807 | |
|
2808 | 0 | RETVAL_STRING((char *)ret->name); |
2809 | 0 | } |
2810 | | /* }}} */ |
2811 | | |
2812 | | /* {{{ Returns an array of all supported entity encodings */ |
2813 | | PHP_FUNCTION(mb_list_encodings) |
2814 | 0 | { |
2815 | 0 | const mbfl_encoding **encodings; |
2816 | 0 | const mbfl_encoding *encoding; |
2817 | 0 | int i; |
2818 | |
|
2819 | 0 | if (zend_parse_parameters_none() == FAILURE) { |
2820 | 0 | RETURN_THROWS(); |
2821 | 0 | } |
2822 | |
|
2823 | 0 | array_init(return_value); |
2824 | 0 | i = 0; |
2825 | 0 | encodings = mbfl_get_supported_encodings(); |
2826 | 0 | while ((encoding = encodings[i++]) != NULL) { |
2827 | 0 | add_next_index_string(return_value, (char *) encoding->name); |
2828 | 0 | } |
2829 | 0 | } |
2830 | | /* }}} */ |
2831 | | |
2832 | | /* {{{ Returns an array of the aliases of a given encoding name */ |
2833 | | PHP_FUNCTION(mb_encoding_aliases) |
2834 | 0 | { |
2835 | 0 | const mbfl_encoding *encoding; |
2836 | 0 | zend_string *encoding_name = NULL; |
2837 | |
|
2838 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &encoding_name) == FAILURE) { |
2839 | 0 | RETURN_THROWS(); |
2840 | 0 | } |
2841 | |
|
2842 | 0 | encoding = php_mb_get_encoding(encoding_name, 1); |
2843 | 0 | if (!encoding) { |
2844 | 0 | RETURN_THROWS(); |
2845 | 0 | } |
2846 | |
|
2847 | 0 | array_init(return_value); |
2848 | 0 | if (encoding->aliases != NULL) { |
2849 | 0 | const char **alias; |
2850 | 0 | for (alias = *encoding->aliases; *alias; ++alias) { |
2851 | 0 | add_next_index_string(return_value, (char *)*alias); |
2852 | 0 | } |
2853 | 0 | } |
2854 | 0 | } |
2855 | | /* }}} */ |
2856 | | |
2857 | | /* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */ |
2858 | | PHP_FUNCTION(mb_encode_mimeheader) |
2859 | 0 | { |
2860 | 0 | const mbfl_encoding *charset, *transenc; |
2861 | 0 | mbfl_string string, result, *ret; |
2862 | 0 | zend_string *charset_name = NULL; |
2863 | 0 | char *trans_enc_name = NULL; |
2864 | 0 | size_t trans_enc_name_len; |
2865 | 0 | char *linefeed = "\r\n"; |
2866 | 0 | size_t linefeed_len; |
2867 | 0 | zend_long indent = 0; |
2868 | |
|
2869 | 0 | string.encoding = MBSTRG(current_internal_encoding); |
2870 | |
|
2871 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|S!s!sl", |
2872 | 0 | (char **)&string.val, &string.len, &charset_name, &trans_enc_name, &trans_enc_name_len, |
2873 | 0 | &linefeed, &linefeed_len, &indent |
2874 | 0 | ) == FAILURE) { |
2875 | 0 | RETURN_THROWS(); |
2876 | 0 | } |
2877 | |
|
2878 | 0 | charset = &mbfl_encoding_pass; |
2879 | 0 | transenc = &mbfl_encoding_base64; |
2880 | |
|
2881 | 0 | if (charset_name != NULL) { |
2882 | 0 | charset = php_mb_get_encoding(charset_name, 2); |
2883 | 0 | if (!charset) { |
2884 | 0 | RETURN_THROWS(); |
2885 | 0 | } |
2886 | 0 | } else { |
2887 | 0 | const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); |
2888 | 0 | if (lang != NULL) { |
2889 | 0 | charset = mbfl_no2encoding(lang->mail_charset); |
2890 | 0 | transenc = mbfl_no2encoding(lang->mail_header_encoding); |
2891 | 0 | } |
2892 | 0 | } |
2893 | |
|
2894 | 0 | if (trans_enc_name != NULL) { |
2895 | 0 | if (*trans_enc_name == 'B' || *trans_enc_name == 'b') { |
2896 | 0 | transenc = &mbfl_encoding_base64; |
2897 | 0 | } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') { |
2898 | 0 | transenc = &mbfl_encoding_qprint; |
2899 | 0 | } |
2900 | 0 | } |
2901 | |
|
2902 | 0 | mbfl_string_init(&result); |
2903 | 0 | ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent); |
2904 | 0 | ZEND_ASSERT(ret != NULL); |
2905 | | // TODO: avoid reallocation ??? |
2906 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ |
2907 | 0 | efree(ret->val); |
2908 | 0 | } |
2909 | | /* }}} */ |
2910 | | |
2911 | | /* {{{ Decodes the MIME "encoded-word" in the string */ |
2912 | | PHP_FUNCTION(mb_decode_mimeheader) |
2913 | 0 | { |
2914 | 0 | mbfl_string string, result, *ret; |
2915 | |
|
2916 | 0 | string.encoding = MBSTRG(current_internal_encoding); |
2917 | |
|
2918 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", (char **)&string.val, &string.len) == FAILURE) { |
2919 | 0 | RETURN_THROWS(); |
2920 | 0 | } |
2921 | |
|
2922 | 0 | mbfl_string_init(&result); |
2923 | 0 | ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)); |
2924 | 0 | ZEND_ASSERT(ret != NULL); |
2925 | | // TODO: avoid reallocation ??? |
2926 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ |
2927 | 0 | efree(ret->val); |
2928 | 0 | } |
2929 | | /* }}} */ |
2930 | | |
2931 | | /* {{{ Conversion between full-width character and half-width character (Japanese) */ |
2932 | | PHP_FUNCTION(mb_convert_kana) |
2933 | 0 | { |
2934 | 0 | int opt; |
2935 | 0 | mbfl_string string, result, *ret; |
2936 | 0 | char *optstr = NULL; |
2937 | 0 | size_t optstr_len; |
2938 | 0 | zend_string *encname = NULL; |
2939 | |
|
2940 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!S!", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname) == FAILURE) { |
2941 | 0 | RETURN_THROWS(); |
2942 | 0 | } |
2943 | | |
2944 | | /* option */ |
2945 | 0 | if (optstr != NULL) { |
2946 | 0 | char *p = optstr; |
2947 | 0 | size_t i = 0, n = optstr_len; |
2948 | 0 | opt = 0; |
2949 | 0 | while (i < n) { |
2950 | 0 | i++; |
2951 | 0 | switch (*p++) { |
2952 | 0 | case 'A': |
2953 | 0 | opt |= 0x1; |
2954 | 0 | break; |
2955 | 0 | case 'a': |
2956 | 0 | opt |= 0x10; |
2957 | 0 | break; |
2958 | 0 | case 'R': |
2959 | 0 | opt |= 0x2; |
2960 | 0 | break; |
2961 | 0 | case 'r': |
2962 | 0 | opt |= 0x20; |
2963 | 0 | break; |
2964 | 0 | case 'N': |
2965 | 0 | opt |= 0x4; |
2966 | 0 | break; |
2967 | 0 | case 'n': |
2968 | 0 | opt |= 0x40; |
2969 | 0 | break; |
2970 | 0 | case 'S': |
2971 | 0 | opt |= 0x8; |
2972 | 0 | break; |
2973 | 0 | case 's': |
2974 | 0 | opt |= 0x80; |
2975 | 0 | break; |
2976 | 0 | case 'K': |
2977 | 0 | opt |= 0x100; |
2978 | 0 | break; |
2979 | 0 | case 'k': |
2980 | 0 | opt |= 0x1000; |
2981 | 0 | break; |
2982 | 0 | case 'H': |
2983 | 0 | opt |= 0x200; |
2984 | 0 | break; |
2985 | 0 | case 'h': |
2986 | 0 | opt |= 0x2000; |
2987 | 0 | break; |
2988 | 0 | case 'V': |
2989 | 0 | opt |= 0x800; |
2990 | 0 | break; |
2991 | 0 | case 'C': |
2992 | 0 | opt |= 0x10000; |
2993 | 0 | break; |
2994 | 0 | case 'c': |
2995 | 0 | opt |= 0x20000; |
2996 | 0 | break; |
2997 | 0 | case 'M': |
2998 | 0 | opt |= 0x100000; |
2999 | 0 | break; |
3000 | 0 | case 'm': |
3001 | 0 | opt |= 0x200000; |
3002 | 0 | break; |
3003 | 0 | } |
3004 | 0 | } |
3005 | 0 | } else { |
3006 | 0 | opt = 0x900; |
3007 | 0 | } |
3008 | | |
3009 | | /* encoding */ |
3010 | 0 | string.encoding = php_mb_get_encoding(encname, 3); |
3011 | 0 | if (!string.encoding) { |
3012 | 0 | RETURN_THROWS(); |
3013 | 0 | } |
3014 | |
|
3015 | 0 | ret = mbfl_ja_jp_hantozen(&string, &result, opt); |
3016 | 0 | ZEND_ASSERT(ret != NULL); |
3017 | | // TODO: avoid reallocation ??? |
3018 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */ |
3019 | 0 | efree(ret->val); |
3020 | 0 | } |
3021 | | /* }}} */ |
3022 | | |
3023 | | static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */ |
3024 | 0 | { |
3025 | 0 | mbfl_string string; |
3026 | 0 | HashTable *ht; |
3027 | 0 | zval *entry; |
3028 | |
|
3029 | 0 | ZVAL_DEREF(var); |
3030 | 0 | if (Z_TYPE_P(var) == IS_STRING) { |
3031 | 0 | string.val = (unsigned char *)Z_STRVAL_P(var); |
3032 | 0 | string.len = Z_STRLEN_P(var); |
3033 | 0 | if (mbfl_encoding_detector_feed(identd, &string)) { |
3034 | 0 | return 1; /* complete detecting */ |
3035 | 0 | } |
3036 | 0 | } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { |
3037 | 0 | if (Z_REFCOUNTED_P(var)) { |
3038 | 0 | if (Z_IS_RECURSIVE_P(var)) { |
3039 | 0 | *recursion_error = 1; |
3040 | 0 | return 0; |
3041 | 0 | } |
3042 | 0 | Z_PROTECT_RECURSION_P(var); |
3043 | 0 | } |
3044 | |
|
3045 | 0 | ht = HASH_OF(var); |
3046 | 0 | if (ht != NULL) { |
3047 | 0 | ZEND_HASH_FOREACH_VAL_IND(ht, entry) { |
3048 | 0 | if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) { |
3049 | 0 | if (Z_REFCOUNTED_P(var)) { |
3050 | 0 | Z_UNPROTECT_RECURSION_P(var); |
3051 | 0 | } |
3052 | 0 | return 1; |
3053 | 0 | } else if (*recursion_error) { |
3054 | 0 | if (Z_REFCOUNTED_P(var)) { |
3055 | 0 | Z_UNPROTECT_RECURSION_P(var); |
3056 | 0 | } |
3057 | 0 | return 0; |
3058 | 0 | } |
3059 | 0 | } ZEND_HASH_FOREACH_END(); |
3060 | 0 | } |
3061 | |
|
3062 | 0 | if (Z_REFCOUNTED_P(var)) { |
3063 | 0 | Z_UNPROTECT_RECURSION_P(var); |
3064 | 0 | } |
3065 | 0 | } |
3066 | 0 | return 0; |
3067 | 0 | } /* }}} */ |
3068 | | |
3069 | | static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */ |
3070 | 0 | { |
3071 | 0 | mbfl_string string, result, *ret; |
3072 | 0 | HashTable *ht; |
3073 | 0 | zval *entry, *orig_var; |
3074 | |
|
3075 | 0 | orig_var = var; |
3076 | 0 | ZVAL_DEREF(var); |
3077 | 0 | if (Z_TYPE_P(var) == IS_STRING) { |
3078 | 0 | string.val = (unsigned char *)Z_STRVAL_P(var); |
3079 | 0 | string.len = Z_STRLEN_P(var); |
3080 | 0 | ret = mbfl_buffer_converter_feed_result(convd, &string, &result); |
3081 | 0 | if (ret != NULL) { |
3082 | 0 | zval_ptr_dtor(orig_var); |
3083 | | // TODO: avoid reallocation ??? |
3084 | 0 | ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len); |
3085 | 0 | efree(ret->val); |
3086 | 0 | } |
3087 | 0 | } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) { |
3088 | 0 | if (Z_TYPE_P(var) == IS_ARRAY) { |
3089 | 0 | SEPARATE_ARRAY(var); |
3090 | 0 | } |
3091 | 0 | if (Z_REFCOUNTED_P(var)) { |
3092 | 0 | if (Z_IS_RECURSIVE_P(var)) { |
3093 | 0 | return 1; |
3094 | 0 | } |
3095 | 0 | Z_PROTECT_RECURSION_P(var); |
3096 | 0 | } |
3097 | |
|
3098 | 0 | ht = HASH_OF(var); |
3099 | 0 | if (ht != NULL) { |
3100 | 0 | ZEND_HASH_FOREACH_VAL_IND(ht, entry) { |
3101 | 0 | if (mb_recursive_convert_variable(convd, entry)) { |
3102 | 0 | if (Z_REFCOUNTED_P(var)) { |
3103 | 0 | Z_UNPROTECT_RECURSION_P(var); |
3104 | 0 | } |
3105 | 0 | return 1; |
3106 | 0 | } |
3107 | 0 | } ZEND_HASH_FOREACH_END(); |
3108 | 0 | } |
3109 | |
|
3110 | 0 | if (Z_REFCOUNTED_P(var)) { |
3111 | 0 | Z_UNPROTECT_RECURSION_P(var); |
3112 | 0 | } |
3113 | 0 | } |
3114 | 0 | return 0; |
3115 | 0 | } /* }}} */ |
3116 | | |
3117 | | /* {{{ Converts the string resource in variables to desired encoding */ |
3118 | | PHP_FUNCTION(mb_convert_variables) |
3119 | 0 | { |
3120 | 0 | zval *args; |
3121 | 0 | zend_string *to_enc_str; |
3122 | 0 | zend_string *from_enc_str; |
3123 | 0 | HashTable *from_enc_ht; |
3124 | 0 | mbfl_string string, result; |
3125 | 0 | const mbfl_encoding *from_encoding, *to_encoding; |
3126 | 0 | mbfl_encoding_detector *identd; |
3127 | 0 | mbfl_buffer_converter *convd; |
3128 | 0 | int n, argc; |
3129 | 0 | size_t elistsz; |
3130 | 0 | const mbfl_encoding **elist; |
3131 | 0 | int recursion_error = 0; |
3132 | |
|
3133 | 0 | ZEND_PARSE_PARAMETERS_START(3, -1) |
3134 | 0 | Z_PARAM_STR(to_enc_str) |
3135 | 0 | Z_PARAM_STR_OR_ARRAY_HT(from_enc_str, from_enc_ht) |
3136 | 0 | Z_PARAM_VARIADIC('+', args, argc) |
3137 | 0 | ZEND_PARSE_PARAMETERS_END(); |
3138 | | |
3139 | | /* new encoding */ |
3140 | 0 | to_encoding = php_mb_get_encoding(to_enc_str, 1); |
3141 | 0 | if (!to_encoding) { |
3142 | 0 | RETURN_THROWS(); |
3143 | 0 | } |
3144 | | |
3145 | | /* initialize string */ |
3146 | 0 | mbfl_string_init(&string); |
3147 | 0 | mbfl_string_init(&result); |
3148 | 0 | from_encoding = MBSTRG(current_internal_encoding); |
3149 | 0 | string.encoding = from_encoding; |
3150 | | |
3151 | | /* pre-conversion encoding */ |
3152 | 0 | if (from_enc_ht) { |
3153 | 0 | if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) { |
3154 | 0 | RETURN_THROWS(); |
3155 | 0 | } |
3156 | 0 | } else { |
3157 | 0 | if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) { |
3158 | 0 | RETURN_THROWS(); |
3159 | 0 | } |
3160 | 0 | } |
3161 | |
|
3162 | 0 | if (elistsz == 0) { |
3163 | 0 | efree(ZEND_VOIDP(elist)); |
3164 | 0 | zend_argument_value_error(2, "must specify at least one encoding"); |
3165 | 0 | RETURN_THROWS(); |
3166 | 0 | } |
3167 | |
|
3168 | 0 | if (elistsz == 1) { |
3169 | 0 | from_encoding = *elist; |
3170 | 0 | } else { |
3171 | | /* auto detect */ |
3172 | 0 | from_encoding = NULL; |
3173 | 0 | identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection)); |
3174 | 0 | if (identd != NULL) { |
3175 | 0 | n = 0; |
3176 | 0 | while (n < argc) { |
3177 | 0 | if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) { |
3178 | 0 | break; |
3179 | 0 | } |
3180 | 0 | n++; |
3181 | 0 | } |
3182 | 0 | from_encoding = mbfl_encoding_detector_judge(identd); |
3183 | 0 | mbfl_encoding_detector_delete(identd); |
3184 | 0 | if (recursion_error) { |
3185 | 0 | efree(ZEND_VOIDP(elist)); |
3186 | 0 | php_error_docref(NULL, E_WARNING, "Cannot handle recursive references"); |
3187 | 0 | RETURN_FALSE; |
3188 | 0 | } |
3189 | 0 | } |
3190 | |
|
3191 | 0 | if (!from_encoding) { |
3192 | 0 | php_error_docref(NULL, E_WARNING, "Unable to detect encoding"); |
3193 | 0 | efree(ZEND_VOIDP(elist)); |
3194 | 0 | RETURN_FALSE; |
3195 | 0 | } |
3196 | 0 | } |
3197 | |
|
3198 | 0 | efree(ZEND_VOIDP(elist)); |
3199 | |
|
3200 | 0 | convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); |
3201 | | /* If this assertion fails this means some memory allocation failure which is a bug */ |
3202 | 0 | ZEND_ASSERT(convd != NULL); |
3203 | |
|
3204 | 0 | mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); |
3205 | 0 | mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); |
3206 | | |
3207 | | /* convert */ |
3208 | 0 | n = 0; |
3209 | 0 | while (n < argc) { |
3210 | 0 | zval *zv = &args[n]; |
3211 | |
|
3212 | 0 | ZVAL_DEREF(zv); |
3213 | 0 | recursion_error = mb_recursive_convert_variable(convd, zv); |
3214 | 0 | if (recursion_error) { |
3215 | 0 | break; |
3216 | 0 | } |
3217 | 0 | n++; |
3218 | 0 | } |
3219 | |
|
3220 | 0 | MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd); |
3221 | 0 | mbfl_buffer_converter_delete(convd); |
3222 | |
|
3223 | 0 | if (recursion_error) { |
3224 | 0 | php_error_docref(NULL, E_WARNING, "Cannot handle recursive references"); |
3225 | 0 | RETURN_FALSE; |
3226 | 0 | } |
3227 | |
|
3228 | 0 | RETURN_STRING(from_encoding->name); |
3229 | 0 | } |
3230 | | /* }}} */ |
3231 | | |
3232 | | /* {{{ HTML numeric entity */ |
3233 | | /* {{{ static void php_mb_numericentity_exec() */ |
3234 | | static void |
3235 | | php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type) |
3236 | 0 | { |
3237 | 0 | char *str = NULL; |
3238 | 0 | size_t str_len; |
3239 | 0 | zend_string *encoding = NULL; |
3240 | 0 | zval *hash_entry; |
3241 | 0 | HashTable *target_hash; |
3242 | 0 | int i, *convmap, *mapelm, mapsize=0; |
3243 | 0 | zend_bool is_hex = 0; |
3244 | 0 | mbfl_string string, result, *ret; |
3245 | |
|
3246 | 0 | if (type == 0) { |
3247 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sh|S!b", &str, &str_len, &target_hash, &encoding, &is_hex) == FAILURE) { |
3248 | 0 | RETURN_THROWS(); |
3249 | 0 | } |
3250 | 0 | } else { |
3251 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sh|S!", &str, &str_len, &target_hash, &encoding) == FAILURE) { |
3252 | 0 | RETURN_THROWS(); |
3253 | 0 | } |
3254 | 0 | } |
3255 | |
|
3256 | 0 | string.val = (unsigned char *)str; |
3257 | 0 | string.len = str_len; |
3258 | 0 | string.encoding = php_mb_get_encoding(encoding, 3); |
3259 | 0 | if (!string.encoding) { |
3260 | 0 | RETURN_THROWS(); |
3261 | 0 | } |
3262 | |
|
3263 | 0 | if (type == 0 && is_hex) { |
3264 | 0 | type = 2; /* output in hex format */ |
3265 | 0 | } |
3266 | | |
3267 | | /* conversion map */ |
3268 | 0 | i = zend_hash_num_elements(target_hash); |
3269 | 0 | if (i % 4 != 0) { |
3270 | 0 | zend_argument_value_error(2, "must have a multiple of 4 elements"); |
3271 | 0 | RETURN_THROWS(); |
3272 | 0 | } |
3273 | 0 | convmap = (int *)safe_emalloc(i, sizeof(int), 0); |
3274 | 0 | mapelm = convmap; |
3275 | 0 | mapsize = 0; |
3276 | 0 | ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) { |
3277 | 0 | *mapelm++ = zval_get_long(hash_entry); |
3278 | 0 | mapsize++; |
3279 | 0 | } ZEND_HASH_FOREACH_END(); |
3280 | 0 | mapsize /= 4; |
3281 | |
|
3282 | 0 | ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type); |
3283 | 0 | ZEND_ASSERT(ret != NULL); |
3284 | | // TODO: avoid reallocation ??? |
3285 | 0 | RETVAL_STRINGL((char *)ret->val, ret->len); |
3286 | 0 | efree(ret->val); |
3287 | 0 | efree((void *)convmap); |
3288 | 0 | } |
3289 | | /* }}} */ |
3290 | | |
3291 | | /* {{{ Converts specified characters to HTML numeric entities */ |
3292 | | PHP_FUNCTION(mb_encode_numericentity) |
3293 | 0 | { |
3294 | 0 | php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); |
3295 | 0 | } |
3296 | | /* }}} */ |
3297 | | |
3298 | | /* {{{ Converts HTML numeric entities to character code */ |
3299 | | PHP_FUNCTION(mb_decode_numericentity) |
3300 | 0 | { |
3301 | 0 | php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); |
3302 | 0 | } |
3303 | | /* }}} */ |
3304 | | /* }}} */ |
3305 | | |
3306 | | /* {{{ Sends an email message with MIME scheme */ |
3307 | | |
3308 | | #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \ |
3309 | 0 | if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \ |
3310 | 0 | pos += 2; \ |
3311 | 0 | while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \ |
3312 | 0 | pos++; \ |
3313 | 0 | } \ |
3314 | 0 | continue; \ |
3315 | 0 | } |
3316 | | |
3317 | | #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \ |
3318 | 0 | pp = str; \ |
3319 | 0 | ee = pp + len; \ |
3320 | 0 | while ((pp = memchr(pp, '\0', (ee - pp)))) { \ |
3321 | 0 | *pp = ' '; \ |
3322 | 0 | } \ |
3323 | | |
3324 | | static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len) |
3325 | 0 | { |
3326 | 0 | const char *ps; |
3327 | 0 | size_t icnt; |
3328 | 0 | int state = 0; |
3329 | 0 | int crlf_state = -1; |
3330 | 0 | char *token = NULL; |
3331 | 0 | size_t token_pos = 0; |
3332 | 0 | zend_string *fld_name, *fld_val; |
3333 | |
|
3334 | 0 | ps = str; |
3335 | 0 | icnt = str_len; |
3336 | 0 | fld_name = fld_val = NULL; |
3337 | | |
3338 | | /* |
3339 | | * C o n t e n t - T y p e : t e x t / h t m l \r\n |
3340 | | * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^ |
3341 | | * state 0 1 2 3 |
3342 | | * |
3343 | | * C o n t e n t - T y p e : t e x t / h t m l \r\n |
3344 | | * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^ |
3345 | | * crlf_state -1 0 1 -1 |
3346 | | * |
3347 | | */ |
3348 | |
|
3349 | 0 | while (icnt > 0) { |
3350 | 0 | switch (*ps) { |
3351 | 0 | case ':': |
3352 | 0 | if (crlf_state == 1) { |
3353 | 0 | token_pos++; |
3354 | 0 | } |
3355 | |
|
3356 | 0 | if (state == 0 || state == 1) { |
3357 | 0 | if(token && token_pos > 0) { |
3358 | 0 | fld_name = zend_string_init(token, token_pos, 0); |
3359 | 0 | } |
3360 | 0 | state = 2; |
3361 | 0 | } else { |
3362 | 0 | token_pos++; |
3363 | 0 | } |
3364 | |
|
3365 | 0 | crlf_state = 0; |
3366 | 0 | break; |
3367 | | |
3368 | 0 | case '\n': |
3369 | 0 | if (crlf_state == -1) { |
3370 | 0 | goto out; |
3371 | 0 | } |
3372 | 0 | crlf_state = -1; |
3373 | 0 | break; |
3374 | |
|
3375 | 0 | case '\r': |
3376 | 0 | if (crlf_state == 1) { |
3377 | 0 | token_pos++; |
3378 | 0 | } else { |
3379 | 0 | crlf_state = 1; |
3380 | 0 | } |
3381 | 0 | break; |
3382 | |
|
3383 | 0 | case ' ': case '\t': |
3384 | 0 | if (crlf_state == -1) { |
3385 | 0 | if (state == 3) { |
3386 | | /* continuing from the previous line */ |
3387 | 0 | state = 4; |
3388 | 0 | } else { |
3389 | | /* simply skipping this new line */ |
3390 | 0 | state = 5; |
3391 | 0 | } |
3392 | 0 | } else { |
3393 | 0 | if (crlf_state == 1) { |
3394 | 0 | token_pos++; |
3395 | 0 | } |
3396 | 0 | if (state == 1 || state == 3) { |
3397 | 0 | token_pos++; |
3398 | 0 | } |
3399 | 0 | } |
3400 | 0 | crlf_state = 0; |
3401 | 0 | break; |
3402 | |
|
3403 | 0 | default: |
3404 | 0 | switch (state) { |
3405 | 0 | case 0: |
3406 | 0 | token = (char*)ps; |
3407 | 0 | token_pos = 0; |
3408 | 0 | state = 1; |
3409 | 0 | break; |
3410 | | |
3411 | 0 | case 2: |
3412 | 0 | if (crlf_state != -1) { |
3413 | 0 | token = (char*)ps; |
3414 | 0 | token_pos = 0; |
3415 | |
|
3416 | 0 | state = 3; |
3417 | 0 | break; |
3418 | 0 | } |
3419 | | /* break is missing intentionally */ |
3420 | | |
3421 | 0 | case 3: |
3422 | 0 | if (crlf_state == -1) { |
3423 | 0 | if(token && token_pos > 0) { |
3424 | 0 | fld_val = zend_string_init(token, token_pos, 0); |
3425 | 0 | } |
3426 | |
|
3427 | 0 | if (fld_name != NULL && fld_val != NULL) { |
3428 | 0 | zval val; |
3429 | | /* FIXME: some locale free implementation is |
3430 | | * really required here,,, */ |
3431 | 0 | php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name)); |
3432 | 0 | ZVAL_STR(&val, fld_val); |
3433 | |
|
3434 | 0 | zend_hash_update(ht, fld_name, &val); |
3435 | |
|
3436 | 0 | zend_string_release_ex(fld_name, 0); |
3437 | 0 | } |
3438 | |
|
3439 | 0 | fld_name = fld_val = NULL; |
3440 | 0 | token = (char*)ps; |
3441 | 0 | token_pos = 0; |
3442 | |
|
3443 | 0 | state = 1; |
3444 | 0 | } |
3445 | 0 | break; |
3446 | |
|
3447 | 0 | case 4: |
3448 | 0 | token_pos++; |
3449 | 0 | state = 3; |
3450 | 0 | break; |
3451 | 0 | } |
3452 | | |
3453 | 0 | if (crlf_state == 1) { |
3454 | 0 | token_pos++; |
3455 | 0 | } |
3456 | |
|
3457 | 0 | token_pos++; |
3458 | |
|
3459 | 0 | crlf_state = 0; |
3460 | 0 | break; |
3461 | 0 | } |
3462 | 0 | ps++, icnt--; |
3463 | 0 | } |
3464 | 0 | out: |
3465 | 0 | if (state == 2) { |
3466 | 0 | token = ""; |
3467 | 0 | token_pos = 0; |
3468 | |
|
3469 | 0 | state = 3; |
3470 | 0 | } |
3471 | 0 | if (state == 3) { |
3472 | 0 | if(token && token_pos > 0) { |
3473 | 0 | fld_val = zend_string_init(token, token_pos, 0); |
3474 | 0 | } |
3475 | 0 | if (fld_name != NULL && fld_val != NULL) { |
3476 | 0 | zval val; |
3477 | | /* FIXME: some locale free implementation is |
3478 | | * really required here,,, */ |
3479 | 0 | php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name)); |
3480 | 0 | ZVAL_STR(&val, fld_val); |
3481 | |
|
3482 | 0 | zend_hash_update(ht, fld_name, &val); |
3483 | |
|
3484 | 0 | zend_string_release_ex(fld_name, 0); |
3485 | 0 | } |
3486 | 0 | } |
3487 | 0 | return state; |
3488 | 0 | } |
3489 | | |
3490 | | PHP_FUNCTION(mb_send_mail) |
3491 | 0 | { |
3492 | 0 | char *to; |
3493 | 0 | size_t to_len; |
3494 | 0 | char *message; |
3495 | 0 | size_t message_len; |
3496 | 0 | char *subject; |
3497 | 0 | size_t subject_len; |
3498 | 0 | zval *headers = NULL; |
3499 | 0 | zend_string *extra_cmd = NULL; |
3500 | 0 | zend_string *str_headers = NULL, *tmp_headers; |
3501 | 0 | size_t n, i; |
3502 | 0 | char *to_r = NULL; |
3503 | 0 | char *force_extra_parameters = INI_STR("mail.force_extra_parameters"); |
3504 | 0 | struct { |
3505 | 0 | int cnt_type:1; |
3506 | 0 | int cnt_trans_enc:1; |
3507 | 0 | } suppressed_hdrs = { 0, 0 }; |
3508 | |
|
3509 | 0 | char *message_buf = NULL, *subject_buf = NULL, *p; |
3510 | 0 | mbfl_string orig_str, conv_str; |
3511 | 0 | mbfl_string *pstr; /* pointer to mbfl string for return value */ |
3512 | 0 | enum mbfl_no_encoding; |
3513 | 0 | const mbfl_encoding *tran_cs, /* transfar text charset */ |
3514 | 0 | *head_enc, /* header transfar encoding */ |
3515 | 0 | *body_enc; /* body transfar encoding */ |
3516 | 0 | mbfl_memory_device device; /* automatic allocateable buffer for additional header */ |
3517 | 0 | const mbfl_language *lang; |
3518 | 0 | int err = 0; |
3519 | 0 | HashTable ht_headers; |
3520 | 0 | zval *s; |
3521 | 0 | extern void mbfl_memory_device_unput(mbfl_memory_device *device); |
3522 | 0 | char *pp, *ee; |
3523 | | |
3524 | | /* initialize */ |
3525 | 0 | mbfl_memory_device_init(&device, 0, 0); |
3526 | 0 | mbfl_string_init(&orig_str); |
3527 | 0 | mbfl_string_init(&conv_str); |
3528 | | |
3529 | | /* character-set, transfer-encoding */ |
3530 | 0 | tran_cs = &mbfl_encoding_utf8; |
3531 | 0 | head_enc = &mbfl_encoding_base64; |
3532 | 0 | body_enc = &mbfl_encoding_base64; |
3533 | 0 | lang = mbfl_no2language(MBSTRG(language)); |
3534 | 0 | if (lang != NULL) { |
3535 | 0 | tran_cs = mbfl_no2encoding(lang->mail_charset); |
3536 | 0 | head_enc = mbfl_no2encoding(lang->mail_header_encoding); |
3537 | 0 | body_enc = mbfl_no2encoding(lang->mail_body_encoding); |
3538 | 0 | } |
3539 | |
|
3540 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|z!S!", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) { |
3541 | 0 | RETURN_THROWS(); |
3542 | 0 | } |
3543 | | |
3544 | | /* ASCIIZ check */ |
3545 | 0 | MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len); |
3546 | 0 | MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len); |
3547 | 0 | MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len); |
3548 | 0 | if (headers) { |
3549 | 0 | switch(Z_TYPE_P(headers)) { |
3550 | 0 | case IS_STRING: |
3551 | 0 | tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0); |
3552 | 0 | MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers)); |
3553 | 0 | str_headers = php_trim(tmp_headers, NULL, 0, 2); |
3554 | 0 | zend_string_release_ex(tmp_headers, 0); |
3555 | 0 | break; |
3556 | 0 | case IS_ARRAY: |
3557 | 0 | str_headers = php_mail_build_headers(Z_ARRVAL_P(headers)); |
3558 | 0 | break; |
3559 | 0 | default: |
3560 | 0 | zend_argument_value_error(4, "must be of type string|array|null, %s given", zend_zval_type_name(headers)); |
3561 | 0 | RETURN_THROWS(); |
3562 | 0 | } |
3563 | 0 | } |
3564 | 0 | if (extra_cmd) { |
3565 | 0 | MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd)); |
3566 | 0 | } |
3567 | |
|
3568 | 0 | zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0); |
3569 | |
|
3570 | 0 | if (str_headers != NULL) { |
3571 | 0 | _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers)); |
3572 | 0 | } |
3573 | |
|
3574 | 0 | if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) { |
3575 | 0 | char *tmp; |
3576 | 0 | char *param_name; |
3577 | 0 | char *charset = NULL; |
3578 | |
|
3579 | 0 | ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING); |
3580 | 0 | p = strchr(Z_STRVAL_P(s), ';'); |
3581 | |
|
3582 | 0 | if (p != NULL) { |
3583 | | /* skipping the padded spaces */ |
3584 | 0 | do { |
3585 | 0 | ++p; |
3586 | 0 | } while (*p == ' ' || *p == '\t'); |
3587 | |
|
3588 | 0 | if (*p != '\0') { |
3589 | 0 | if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) { |
3590 | 0 | if (strcasecmp(param_name, "charset") == 0) { |
3591 | 0 | const mbfl_encoding *_tran_cs = tran_cs; |
3592 | |
|
3593 | 0 | charset = php_strtok_r(NULL, "= \"", &tmp); |
3594 | 0 | if (charset != NULL) { |
3595 | 0 | _tran_cs = mbfl_name2encoding(charset); |
3596 | 0 | } |
3597 | |
|
3598 | 0 | if (!_tran_cs) { |
3599 | 0 | php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset); |
3600 | 0 | _tran_cs = &mbfl_encoding_ascii; |
3601 | 0 | } |
3602 | 0 | tran_cs = _tran_cs; |
3603 | 0 | } |
3604 | 0 | } |
3605 | 0 | } |
3606 | 0 | } |
3607 | 0 | suppressed_hdrs.cnt_type = 1; |
3608 | 0 | } |
3609 | |
|
3610 | 0 | if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) { |
3611 | 0 | const mbfl_encoding *_body_enc; |
3612 | |
|
3613 | 0 | ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING); |
3614 | 0 | _body_enc = mbfl_name2encoding(Z_STRVAL_P(s)); |
3615 | 0 | switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) { |
3616 | 0 | case mbfl_no_encoding_base64: |
3617 | 0 | case mbfl_no_encoding_7bit: |
3618 | 0 | case mbfl_no_encoding_8bit: |
3619 | 0 | body_enc = _body_enc; |
3620 | 0 | break; |
3621 | |
|
3622 | 0 | default: |
3623 | 0 | php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s)); |
3624 | 0 | body_enc = &mbfl_encoding_8bit; |
3625 | 0 | break; |
3626 | 0 | } |
3627 | 0 | suppressed_hdrs.cnt_trans_enc = 1; |
3628 | 0 | } |
3629 | | |
3630 | | /* To: */ |
3631 | 0 | if (to_len > 0) { |
3632 | 0 | to_r = estrndup(to, to_len); |
3633 | 0 | for (; to_len; to_len--) { |
3634 | 0 | if (!isspace((unsigned char) to_r[to_len - 1])) { |
3635 | 0 | break; |
3636 | 0 | } |
3637 | 0 | to_r[to_len - 1] = '\0'; |
3638 | 0 | } |
3639 | 0 | for (i = 0; to_r[i]; i++) { |
3640 | 0 | if (iscntrl((unsigned char) to_r[i])) { |
3641 | | /* According to RFC 822, section 3.1.1 long headers may be separated into |
3642 | | * parts using CRLF followed at least one linear-white-space character ('\t' or ' '). |
3643 | | * To prevent these separators from being replaced with a space, we use the |
3644 | | * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them. |
3645 | | */ |
3646 | 0 | SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i); |
3647 | 0 | to_r[i] = ' '; |
3648 | 0 | } |
3649 | 0 | } |
3650 | 0 | } else { |
3651 | 0 | to_r = to; |
3652 | 0 | } |
3653 | | |
3654 | | /* Subject: */ |
3655 | 0 | orig_str.val = (unsigned char *)subject; |
3656 | 0 | orig_str.len = subject_len; |
3657 | 0 | orig_str.encoding = MBSTRG(current_internal_encoding); |
3658 | 0 | if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid |
3659 | 0 | || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) { |
3660 | 0 | orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); |
3661 | 0 | } |
3662 | 0 | pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]")); |
3663 | 0 | if (pstr != NULL) { |
3664 | 0 | subject_buf = subject = (char *)pstr->val; |
3665 | 0 | } |
3666 | | |
3667 | | /* message body */ |
3668 | 0 | orig_str.val = (unsigned char *)message; |
3669 | 0 | orig_str.len = message_len; |
3670 | 0 | orig_str.encoding = MBSTRG(current_internal_encoding); |
3671 | |
|
3672 | 0 | if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid |
3673 | 0 | || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) { |
3674 | 0 | orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); |
3675 | 0 | } |
3676 | |
|
3677 | 0 | pstr = NULL; |
3678 | 0 | { |
3679 | 0 | mbfl_string tmpstr; |
3680 | |
|
3681 | 0 | if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) { |
3682 | 0 | tmpstr.encoding = &mbfl_encoding_8bit; |
3683 | 0 | pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc); |
3684 | 0 | efree(tmpstr.val); |
3685 | 0 | } |
3686 | 0 | } |
3687 | 0 | if (pstr != NULL) { |
3688 | 0 | message_buf = message = (char *)pstr->val; |
3689 | 0 | } |
3690 | | |
3691 | | /* other headers */ |
3692 | 0 | #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0" |
3693 | 0 | #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain" |
3694 | 0 | #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset=" |
3695 | 0 | #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: " |
3696 | 0 | if (str_headers != NULL) { |
3697 | 0 | p = ZSTR_VAL(str_headers); |
3698 | 0 | n = ZSTR_LEN(str_headers); |
3699 | 0 | mbfl_memory_device_strncat(&device, p, n); |
3700 | 0 | if (n > 0 && p[n - 1] != '\n') { |
3701 | 0 | mbfl_memory_device_strncat(&device, "\n", 1); |
3702 | 0 | } |
3703 | 0 | zend_string_release_ex(str_headers, 0); |
3704 | 0 | } |
3705 | |
|
3706 | 0 | if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) { |
3707 | 0 | mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1); |
3708 | 0 | mbfl_memory_device_strncat(&device, "\n", 1); |
3709 | 0 | } |
3710 | |
|
3711 | 0 | if (!suppressed_hdrs.cnt_type) { |
3712 | 0 | mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1); |
3713 | |
|
3714 | 0 | p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding); |
3715 | 0 | if (p != NULL) { |
3716 | 0 | mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1); |
3717 | 0 | mbfl_memory_device_strcat(&device, p); |
3718 | 0 | } |
3719 | 0 | mbfl_memory_device_strncat(&device, "\n", 1); |
3720 | 0 | } |
3721 | 0 | if (!suppressed_hdrs.cnt_trans_enc) { |
3722 | 0 | mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1); |
3723 | 0 | p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding); |
3724 | 0 | if (p == NULL) { |
3725 | 0 | p = "7bit"; |
3726 | 0 | } |
3727 | 0 | mbfl_memory_device_strcat(&device, p); |
3728 | 0 | mbfl_memory_device_strncat(&device, "\n", 1); |
3729 | 0 | } |
3730 | |
|
3731 | 0 | mbfl_memory_device_unput(&device); |
3732 | 0 | mbfl_memory_device_output('\0', &device); |
3733 | 0 | str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0); |
3734 | |
|
3735 | 0 | if (force_extra_parameters) { |
3736 | 0 | extra_cmd = php_escape_shell_cmd(force_extra_parameters); |
3737 | 0 | } else if (extra_cmd) { |
3738 | 0 | extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd)); |
3739 | 0 | } |
3740 | |
|
3741 | 0 | if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) { |
3742 | 0 | RETVAL_TRUE; |
3743 | 0 | } else { |
3744 | 0 | RETVAL_FALSE; |
3745 | 0 | } |
3746 | |
|
3747 | 0 | if (extra_cmd) { |
3748 | 0 | zend_string_release_ex(extra_cmd, 0); |
3749 | 0 | } |
3750 | |
|
3751 | 0 | if (to_r != to) { |
3752 | 0 | efree(to_r); |
3753 | 0 | } |
3754 | 0 | if (subject_buf) { |
3755 | 0 | efree((void *)subject_buf); |
3756 | 0 | } |
3757 | 0 | if (message_buf) { |
3758 | 0 | efree((void *)message_buf); |
3759 | 0 | } |
3760 | 0 | mbfl_memory_device_clear(&device); |
3761 | 0 | zend_hash_destroy(&ht_headers); |
3762 | 0 | if (str_headers) { |
3763 | 0 | zend_string_release_ex(str_headers, 0); |
3764 | 0 | } |
3765 | 0 | } |
3766 | | |
3767 | | #undef SKIP_LONG_HEADER_SEP_MBSTRING |
3768 | | #undef MAIL_ASCIIZ_CHECK_MBSTRING |
3769 | | #undef PHP_MBSTR_MAIL_MIME_HEADER1 |
3770 | | #undef PHP_MBSTR_MAIL_MIME_HEADER2 |
3771 | | #undef PHP_MBSTR_MAIL_MIME_HEADER3 |
3772 | | #undef PHP_MBSTR_MAIL_MIME_HEADER4 |
3773 | | /* }}} */ |
3774 | | |
3775 | | /* {{{ Returns the current settings of mbstring */ |
3776 | | PHP_FUNCTION(mb_get_info) |
3777 | 0 | { |
3778 | 0 | char *typ = NULL; |
3779 | 0 | size_t typ_len; |
3780 | 0 | size_t n; |
3781 | 0 | char *name; |
3782 | 0 | zval row; |
3783 | 0 | const mbfl_language *lang = mbfl_no2language(MBSTRG(language)); |
3784 | 0 | const mbfl_encoding **entry; |
3785 | |
|
3786 | 0 | if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &typ, &typ_len) == FAILURE) { |
3787 | 0 | RETURN_THROWS(); |
3788 | 0 | } |
3789 | |
|
3790 | 0 | if (!typ || !strcasecmp("all", typ)) { |
3791 | 0 | array_init(return_value); |
3792 | 0 | if (MBSTRG(current_internal_encoding)) { |
3793 | 0 | add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name); |
3794 | 0 | } |
3795 | 0 | if (MBSTRG(http_input_identify)) { |
3796 | 0 | add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name); |
3797 | 0 | } |
3798 | 0 | if (MBSTRG(current_http_output_encoding)) { |
3799 | 0 | add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name); |
3800 | 0 | } |
3801 | 0 | if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) { |
3802 | 0 | add_assoc_string(return_value, "http_output_conv_mimetypes", name); |
3803 | 0 | } |
3804 | 0 | if (lang != NULL) { |
3805 | 0 | if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) { |
3806 | 0 | add_assoc_string(return_value, "mail_charset", name); |
3807 | 0 | } |
3808 | 0 | if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) { |
3809 | 0 | add_assoc_string(return_value, "mail_header_encoding", name); |
3810 | 0 | } |
3811 | 0 | if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) { |
3812 | 0 | add_assoc_string(return_value, "mail_body_encoding", name); |
3813 | 0 | } |
3814 | 0 | } |
3815 | 0 | add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars)); |
3816 | 0 | if (MBSTRG(encoding_translation)) { |
3817 | 0 | add_assoc_string(return_value, "encoding_translation", "On"); |
3818 | 0 | } else { |
3819 | 0 | add_assoc_string(return_value, "encoding_translation", "Off"); |
3820 | 0 | } |
3821 | 0 | if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) { |
3822 | 0 | add_assoc_string(return_value, "language", name); |
3823 | 0 | } |
3824 | 0 | n = MBSTRG(current_detect_order_list_size); |
3825 | 0 | entry = MBSTRG(current_detect_order_list); |
3826 | 0 | if (n > 0) { |
3827 | 0 | size_t i; |
3828 | 0 | array_init(&row); |
3829 | 0 | for (i = 0; i < n; i++) { |
3830 | 0 | add_next_index_string(&row, (*entry)->name); |
3831 | 0 | entry++; |
3832 | 0 | } |
3833 | 0 | add_assoc_zval(return_value, "detect_order", &row); |
3834 | 0 | } |
3835 | 0 | if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { |
3836 | 0 | add_assoc_string(return_value, "substitute_character", "none"); |
3837 | 0 | } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { |
3838 | 0 | add_assoc_string(return_value, "substitute_character", "long"); |
3839 | 0 | } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { |
3840 | 0 | add_assoc_string(return_value, "substitute_character", "entity"); |
3841 | 0 | } else { |
3842 | 0 | add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar)); |
3843 | 0 | } |
3844 | 0 | if (MBSTRG(strict_detection)) { |
3845 | 0 | add_assoc_string(return_value, "strict_detection", "On"); |
3846 | 0 | } else { |
3847 | 0 | add_assoc_string(return_value, "strict_detection", "Off"); |
3848 | 0 | } |
3849 | 0 | } else if (!strcasecmp("internal_encoding", typ)) { |
3850 | 0 | if (MBSTRG(current_internal_encoding)) { |
3851 | 0 | RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name); |
3852 | 0 | } |
3853 | 0 | } else if (!strcasecmp("http_input", typ)) { |
3854 | 0 | if (MBSTRG(http_input_identify)) { |
3855 | 0 | RETVAL_STRING((char *)MBSTRG(http_input_identify)->name); |
3856 | 0 | } |
3857 | 0 | } else if (!strcasecmp("http_output", typ)) { |
3858 | 0 | if (MBSTRG(current_http_output_encoding)) { |
3859 | 0 | RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name); |
3860 | 0 | } |
3861 | 0 | } else if (!strcasecmp("http_output_conv_mimetypes", typ)) { |
3862 | 0 | if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) { |
3863 | 0 | RETVAL_STRING(name); |
3864 | 0 | } |
3865 | 0 | } else if (!strcasecmp("mail_charset", typ)) { |
3866 | 0 | if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) { |
3867 | 0 | RETVAL_STRING(name); |
3868 | 0 | } |
3869 | 0 | } else if (!strcasecmp("mail_header_encoding", typ)) { |
3870 | 0 | if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) { |
3871 | 0 | RETVAL_STRING(name); |
3872 | 0 | } |
3873 | 0 | } else if (!strcasecmp("mail_body_encoding", typ)) { |
3874 | 0 | if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) { |
3875 | 0 | RETVAL_STRING(name); |
3876 | 0 | } |
3877 | 0 | } else if (!strcasecmp("illegal_chars", typ)) { |
3878 | 0 | RETVAL_LONG(MBSTRG(illegalchars)); |
3879 | 0 | } else if (!strcasecmp("encoding_translation", typ)) { |
3880 | 0 | if (MBSTRG(encoding_translation)) { |
3881 | 0 | RETVAL_STRING("On"); |
3882 | 0 | } else { |
3883 | 0 | RETVAL_STRING("Off"); |
3884 | 0 | } |
3885 | 0 | } else if (!strcasecmp("language", typ)) { |
3886 | 0 | if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) { |
3887 | 0 | RETVAL_STRING(name); |
3888 | 0 | } |
3889 | 0 | } else if (!strcasecmp("detect_order", typ)) { |
3890 | 0 | n = MBSTRG(current_detect_order_list_size); |
3891 | 0 | entry = MBSTRG(current_detect_order_list); |
3892 | 0 | if (n > 0) { |
3893 | 0 | size_t i; |
3894 | 0 | array_init(return_value); |
3895 | 0 | for (i = 0; i < n; i++) { |
3896 | 0 | add_next_index_string(return_value, (*entry)->name); |
3897 | 0 | entry++; |
3898 | 0 | } |
3899 | 0 | } |
3900 | 0 | } else if (!strcasecmp("substitute_character", typ)) { |
3901 | 0 | if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { |
3902 | 0 | RETVAL_STRING("none"); |
3903 | 0 | } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { |
3904 | 0 | RETVAL_STRING("long"); |
3905 | 0 | } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { |
3906 | 0 | RETVAL_STRING("entity"); |
3907 | 0 | } else { |
3908 | 0 | RETVAL_LONG(MBSTRG(current_filter_illegal_substchar)); |
3909 | 0 | } |
3910 | 0 | } else if (!strcasecmp("strict_detection", typ)) { |
3911 | 0 | if (MBSTRG(strict_detection)) { |
3912 | 0 | RETVAL_STRING("On"); |
3913 | 0 | } else { |
3914 | 0 | RETVAL_STRING("Off"); |
3915 | 0 | } |
3916 | 0 | } else { |
3917 | | // TODO Convert to ValueError |
3918 | 0 | RETURN_FALSE; |
3919 | 0 | } |
3920 | 0 | } |
3921 | | /* }}} */ |
3922 | | |
3923 | | |
3924 | | static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding) |
3925 | 0 | { |
3926 | 0 | mbfl_buffer_converter *convd; |
3927 | |
|
3928 | 0 | convd = mbfl_buffer_converter_new(encoding, encoding, 0); |
3929 | 0 | if (convd == NULL) { |
3930 | 0 | return NULL; |
3931 | 0 | } |
3932 | 0 | mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); |
3933 | 0 | mbfl_buffer_converter_illegal_substchar(convd, 0); |
3934 | 0 | return convd; |
3935 | 0 | } |
3936 | | |
3937 | | |
3938 | 0 | static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) { |
3939 | 0 | mbfl_string string, result, *ret = NULL; |
3940 | 0 | size_t illegalchars = 0; |
3941 | | |
3942 | | /* initialize string */ |
3943 | 0 | mbfl_string_init_set(&string, encoding); |
3944 | 0 | mbfl_string_init(&result); |
3945 | |
|
3946 | 0 | string.val = (unsigned char *) input; |
3947 | 0 | string.len = length; |
3948 | |
|
3949 | 0 | ret = mbfl_buffer_converter_feed_result(convd, &string, &result); |
3950 | 0 | illegalchars = mbfl_buffer_illegalchars(convd); |
3951 | |
|
3952 | 0 | if (ret != NULL) { |
3953 | 0 | if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { |
3954 | 0 | mbfl_string_clear(&result); |
3955 | 0 | return 1; |
3956 | 0 | } |
3957 | 0 | mbfl_string_clear(&result); |
3958 | 0 | } |
3959 | 0 | return 0; |
3960 | 0 | } |
3961 | | |
3962 | | MBSTRING_API int php_mb_check_encoding( |
3963 | | const char *input, size_t length, const mbfl_encoding *encoding) |
3964 | 0 | { |
3965 | 0 | mbfl_buffer_converter *convd; |
3966 | |
|
3967 | 0 | convd = php_mb_init_convd(encoding); |
3968 | | /* If this assertion fails this means some memory allocation failure which is a bug */ |
3969 | 0 | ZEND_ASSERT(convd != NULL); |
3970 | |
|
3971 | 0 | if (php_mb_check_encoding_impl(convd, input, length, encoding)) { |
3972 | 0 | mbfl_buffer_converter_delete(convd); |
3973 | 0 | return 1; |
3974 | 0 | } |
3975 | 0 | mbfl_buffer_converter_delete(convd); |
3976 | 0 | return 0; |
3977 | 0 | } |
3978 | | |
3979 | | static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding) |
3980 | 0 | { |
3981 | 0 | mbfl_buffer_converter *convd; |
3982 | 0 | zend_long idx; |
3983 | 0 | zend_string *key; |
3984 | 0 | zval *entry; |
3985 | 0 | int valid = 1; |
3986 | |
|
3987 | 0 | (void)(idx); |
3988 | |
|
3989 | 0 | convd = php_mb_init_convd(encoding); |
3990 | | /* If this assertion fails this means some memory allocation failure which is a bug */ |
3991 | 0 | ZEND_ASSERT(convd != NULL); |
3992 | |
|
3993 | 0 | if (GC_IS_RECURSIVE(vars)) { |
3994 | 0 | mbfl_buffer_converter_delete(convd); |
3995 | 0 | php_error_docref(NULL, E_WARNING, "Cannot not handle circular references"); |
3996 | 0 | return 0; |
3997 | 0 | } |
3998 | 0 | GC_TRY_PROTECT_RECURSION(vars); |
3999 | 0 | ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) { |
4000 | 0 | ZVAL_DEREF(entry); |
4001 | 0 | if (key) { |
4002 | 0 | if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) { |
4003 | 0 | valid = 0; |
4004 | 0 | break; |
4005 | 0 | } |
4006 | 0 | } |
4007 | 0 | switch (Z_TYPE_P(entry)) { |
4008 | 0 | case IS_STRING: |
4009 | 0 | if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) { |
4010 | 0 | valid = 0; |
4011 | 0 | break; |
4012 | 0 | } |
4013 | 0 | break; |
4014 | 0 | case IS_ARRAY: |
4015 | 0 | if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), encoding)) { |
4016 | 0 | valid = 0; |
4017 | 0 | break; |
4018 | 0 | } |
4019 | 0 | break; |
4020 | 0 | case IS_LONG: |
4021 | 0 | case IS_DOUBLE: |
4022 | 0 | case IS_NULL: |
4023 | 0 | case IS_TRUE: |
4024 | 0 | case IS_FALSE: |
4025 | 0 | break; |
4026 | 0 | default: |
4027 | | /* Other types are error. */ |
4028 | 0 | valid = 0; |
4029 | 0 | break; |
4030 | 0 | } |
4031 | 0 | } ZEND_HASH_FOREACH_END(); |
4032 | 0 | GC_TRY_UNPROTECT_RECURSION(vars); |
4033 | 0 | mbfl_buffer_converter_delete(convd); |
4034 | 0 | return valid; |
4035 | 0 | } |
4036 | | |
4037 | | |
4038 | | /* {{{ Check if the string is valid for the specified encoding */ |
4039 | | PHP_FUNCTION(mb_check_encoding) |
4040 | 0 | { |
4041 | 0 | zend_string *input_str = NULL, *enc = NULL; |
4042 | 0 | HashTable *input_ht = NULL; |
4043 | 0 | const mbfl_encoding *encoding; |
4044 | |
|
4045 | 0 | ZEND_PARSE_PARAMETERS_START(0, 2) |
4046 | 0 | Z_PARAM_OPTIONAL |
4047 | 0 | Z_PARAM_STR_OR_ARRAY_HT_OR_NULL(input_str, input_ht) |
4048 | 0 | Z_PARAM_STR_OR_NULL(enc) |
4049 | 0 | ZEND_PARSE_PARAMETERS_END(); |
4050 | |
|
4051 | 0 | encoding = php_mb_get_encoding(enc, 2); |
4052 | 0 | if (!encoding) { |
4053 | 0 | RETURN_THROWS(); |
4054 | 0 | } |
4055 | |
|
4056 | 0 | if (input_ht) { |
4057 | 0 | if (!php_mb_check_encoding_recursive(input_ht, encoding)) { |
4058 | 0 | RETURN_FALSE; |
4059 | 0 | } |
4060 | 0 | RETURN_TRUE; |
4061 | 0 | } else if (input_str) { |
4062 | 0 | if (!php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding)) { |
4063 | 0 | RETURN_FALSE; |
4064 | 0 | } |
4065 | 0 | RETURN_TRUE; |
4066 | 0 | } else { |
4067 | | /* FIXME: Actually check all inputs, except $_FILES file content. */ |
4068 | 0 | if (MBSTRG(illegalchars) == 0) { |
4069 | 0 | RETURN_TRUE; |
4070 | 0 | } |
4071 | 0 | RETURN_FALSE; |
4072 | 0 | } |
4073 | 0 | } |
4074 | | /* }}} */ |
4075 | | |
4076 | | |
4077 | | static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name, |
4078 | | const uint32_t enc_name_arg_num) |
4079 | 0 | { |
4080 | 0 | const mbfl_encoding *enc; |
4081 | 0 | enum mbfl_no_encoding no_enc; |
4082 | |
|
4083 | 0 | ZEND_ASSERT(str_len > 0); |
4084 | |
|
4085 | 0 | enc = php_mb_get_encoding(enc_name, enc_name_arg_num); |
4086 | 0 | if (!enc) { |
4087 | 0 | return -2; |
4088 | 0 | } |
4089 | | |
4090 | 0 | no_enc = enc->no_encoding; |
4091 | 0 | if (php_mb_is_unsupported_no_encoding(no_enc)) { |
4092 | 0 | zend_value_error("mb_ord() does not support the \"%s\" encoding", enc->name); |
4093 | 0 | return -2; |
4094 | 0 | } |
4095 | | |
4096 | 0 | { |
4097 | 0 | mbfl_wchar_device dev; |
4098 | 0 | mbfl_convert_filter *filter; |
4099 | 0 | zend_long cp; |
4100 | |
|
4101 | 0 | mbfl_wchar_device_init(&dev); |
4102 | 0 | filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev); |
4103 | | /* If this assertion fails this means some memory allocation failure which is a bug */ |
4104 | 0 | ZEND_ASSERT(filter != NULL); |
4105 | |
|
4106 | 0 | mbfl_convert_filter_feed_string(filter, (const unsigned char *) str, str_len); |
4107 | 0 | mbfl_convert_filter_flush(filter); |
4108 | |
|
4109 | 0 | if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) { |
4110 | 0 | mbfl_convert_filter_delete(filter); |
4111 | 0 | mbfl_wchar_device_clear(&dev); |
4112 | 0 | return -1; |
4113 | 0 | } |
4114 | | |
4115 | 0 | cp = dev.buffer[0]; |
4116 | 0 | mbfl_convert_filter_delete(filter); |
4117 | 0 | mbfl_wchar_device_clear(&dev); |
4118 | 0 | return cp; |
4119 | 0 | } |
4120 | 0 | } |
4121 | | |
4122 | | |
4123 | | /* {{{ */ |
4124 | | PHP_FUNCTION(mb_ord) |
4125 | 0 | { |
4126 | 0 | char *str; |
4127 | 0 | size_t str_len; |
4128 | 0 | zend_string *enc = NULL; |
4129 | 0 | zend_long cp; |
4130 | |
|
4131 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
4132 | 0 | Z_PARAM_STRING(str, str_len) |
4133 | 0 | Z_PARAM_OPTIONAL |
4134 | 0 | Z_PARAM_STR_OR_NULL(enc) |
4135 | 0 | ZEND_PARSE_PARAMETERS_END(); |
4136 | |
|
4137 | 0 | if (str_len == 0) { |
4138 | 0 | zend_argument_value_error(1, "must not be empty"); |
4139 | 0 | RETURN_THROWS(); |
4140 | 0 | } |
4141 | |
|
4142 | 0 | cp = php_mb_ord(str, str_len, enc, 2); |
4143 | |
|
4144 | 0 | if (0 > cp) { |
4145 | 0 | if (cp == -2) { |
4146 | 0 | RETURN_THROWS(); |
4147 | 0 | } |
4148 | 0 | RETURN_FALSE; |
4149 | 0 | } |
4150 | |
|
4151 | 0 | RETURN_LONG(cp); |
4152 | 0 | } |
4153 | | /* }}} */ |
4154 | | |
4155 | | |
4156 | | static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num) |
4157 | 0 | { |
4158 | 0 | const mbfl_encoding *enc; |
4159 | 0 | enum mbfl_no_encoding no_enc; |
4160 | 0 | zend_string *ret; |
4161 | 0 | char* buf; |
4162 | 0 | size_t buf_len; |
4163 | |
|
4164 | 0 | enc = php_mb_get_encoding(enc_name, enc_name_arg_num); |
4165 | 0 | if (!enc) { |
4166 | 0 | return NULL; |
4167 | 0 | } |
4168 | | |
4169 | 0 | no_enc = enc->no_encoding; |
4170 | 0 | if (php_mb_is_unsupported_no_encoding(no_enc)) { |
4171 | 0 | zend_value_error("mb_chr() does not support the \"%s\" encoding", enc->name); |
4172 | 0 | return NULL; |
4173 | 0 | } |
4174 | | |
4175 | 0 | if (cp < 0 || cp > 0x10ffff) { |
4176 | 0 | return NULL; |
4177 | 0 | } |
4178 | | |
4179 | 0 | if (php_mb_is_no_encoding_utf8(no_enc)) { |
4180 | 0 | if (cp > 0xd7ff && 0xe000 > cp) { |
4181 | 0 | return NULL; |
4182 | 0 | } |
4183 | | |
4184 | 0 | if (cp < 0x80) { |
4185 | 0 | ret = ZSTR_CHAR(cp); |
4186 | 0 | } else if (cp < 0x800) { |
4187 | 0 | ret = zend_string_alloc(2, 0); |
4188 | 0 | ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6); |
4189 | 0 | ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f); |
4190 | 0 | ZSTR_VAL(ret)[2] = 0; |
4191 | 0 | } else if (cp < 0x10000) { |
4192 | 0 | ret = zend_string_alloc(3, 0); |
4193 | 0 | ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12); |
4194 | 0 | ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f); |
4195 | 0 | ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f); |
4196 | 0 | ZSTR_VAL(ret)[3] = 0; |
4197 | 0 | } else { |
4198 | 0 | ret = zend_string_alloc(4, 0); |
4199 | 0 | ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18); |
4200 | 0 | ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f); |
4201 | 0 | ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f); |
4202 | 0 | ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f); |
4203 | 0 | ZSTR_VAL(ret)[4] = 0; |
4204 | 0 | } |
4205 | |
|
4206 | 0 | return ret; |
4207 | 0 | } |
4208 | | |
4209 | 0 | buf_len = 4; |
4210 | 0 | buf = (char *) emalloc(buf_len + 1); |
4211 | 0 | buf[0] = (cp >> 24) & 0xff; |
4212 | 0 | buf[1] = (cp >> 16) & 0xff; |
4213 | 0 | buf[2] = (cp >> 8) & 0xff; |
4214 | 0 | buf[3] = cp & 0xff; |
4215 | 0 | buf[4] = 0; |
4216 | |
|
4217 | 0 | { |
4218 | 0 | char *ret_str; |
4219 | 0 | size_t ret_len; |
4220 | 0 | long orig_illegalchars = MBSTRG(illegalchars); |
4221 | 0 | MBSTRG(illegalchars) = 0; |
4222 | 0 | ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len); |
4223 | 0 | if (MBSTRG(illegalchars) != 0) { |
4224 | 0 | efree(buf); |
4225 | 0 | efree(ret_str); |
4226 | 0 | MBSTRG(illegalchars) = orig_illegalchars; |
4227 | 0 | return NULL; |
4228 | 0 | } |
4229 | | |
4230 | 0 | ret = zend_string_init(ret_str, ret_len, 0); |
4231 | 0 | efree(ret_str); |
4232 | 0 | MBSTRG(illegalchars) = orig_illegalchars; |
4233 | 0 | } |
4234 | |
|
4235 | 0 | efree(buf); |
4236 | 0 | return ret; |
4237 | 0 | } |
4238 | | |
4239 | | |
4240 | | /* {{{ */ |
4241 | | PHP_FUNCTION(mb_chr) |
4242 | 0 | { |
4243 | 0 | zend_long cp; |
4244 | 0 | zend_string *enc = NULL; |
4245 | 0 | zend_string* ret; |
4246 | |
|
4247 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
4248 | 0 | Z_PARAM_LONG(cp) |
4249 | 0 | Z_PARAM_OPTIONAL |
4250 | 0 | Z_PARAM_STR_OR_NULL(enc) |
4251 | 0 | ZEND_PARSE_PARAMETERS_END(); |
4252 | |
|
4253 | 0 | ret = php_mb_chr(cp, enc, 2); |
4254 | 0 | if (ret == NULL) { |
4255 | 0 | RETURN_FALSE; |
4256 | 0 | } |
4257 | |
|
4258 | 0 | RETURN_STR(ret); |
4259 | 0 | } |
4260 | | /* }}} */ |
4261 | | |
4262 | | /* {{{ */ |
4263 | | PHP_FUNCTION(mb_scrub) |
4264 | 0 | { |
4265 | 0 | const mbfl_encoding *enc; |
4266 | 0 | char* str; |
4267 | 0 | size_t str_len; |
4268 | 0 | zend_string *enc_name = NULL; |
4269 | 0 | char *ret; |
4270 | 0 | size_t ret_len; |
4271 | |
|
4272 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
4273 | 0 | Z_PARAM_STRING(str, str_len) |
4274 | 0 | Z_PARAM_OPTIONAL |
4275 | 0 | Z_PARAM_STR_OR_NULL(enc_name) |
4276 | 0 | ZEND_PARSE_PARAMETERS_END(); |
4277 | |
|
4278 | 0 | enc = php_mb_get_encoding(enc_name, 2); |
4279 | 0 | if (!enc) { |
4280 | 0 | RETURN_THROWS(); |
4281 | 0 | } |
4282 | |
|
4283 | 0 | ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len); |
4284 | |
|
4285 | 0 | RETVAL_STRINGL(ret, ret_len); |
4286 | 0 | efree(ret); |
4287 | 0 | } |
4288 | | /* }}} */ |
4289 | | |
4290 | | |
4291 | | /* {{{ php_mb_populate_current_detect_order_list */ |
4292 | | static void php_mb_populate_current_detect_order_list(void) |
4293 | 939k | { |
4294 | 939k | const mbfl_encoding **entry = 0; |
4295 | 939k | size_t nentries; |
4296 | | |
4297 | 939k | if (MBSTRG(current_detect_order_list)) { |
4298 | 0 | return; |
4299 | 0 | } |
4300 | | |
4301 | 939k | if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) { |
4302 | 0 | nentries = MBSTRG(detect_order_list_size); |
4303 | 0 | entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); |
4304 | 0 | memcpy(ZEND_VOIDP(entry), MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries); |
4305 | 939k | } else { |
4306 | 939k | const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list); |
4307 | 939k | size_t i; |
4308 | 939k | nentries = MBSTRG(default_detect_order_list_size); |
4309 | 939k | entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0); |
4310 | 2.81M | for (i = 0; i < nentries; i++) { |
4311 | 1.87M | entry[i] = mbfl_no2encoding(src[i]); |
4312 | 1.87M | } |
4313 | 939k | } |
4314 | 939k | MBSTRG(current_detect_order_list) = entry; |
4315 | 939k | MBSTRG(current_detect_order_list_size) = nentries; |
4316 | 939k | } |
4317 | | /* }}} */ |
4318 | | |
4319 | | /* {{{ static int php_mb_encoding_translation() */ |
4320 | | static int php_mb_encoding_translation(void) |
4321 | 0 | { |
4322 | 0 | return MBSTRG(encoding_translation); |
4323 | 0 | } |
4324 | | /* }}} */ |
4325 | | |
4326 | | /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */ |
4327 | | MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc) |
4328 | 0 | { |
4329 | 0 | if (enc != NULL) { |
4330 | 0 | if (enc->flag & MBFL_ENCTYPE_MBCS) { |
4331 | 0 | if (enc->mblen_table != NULL) { |
4332 | 0 | if (s != NULL) return enc->mblen_table[*(unsigned char *)s]; |
4333 | 0 | } |
4334 | 0 | } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { |
4335 | 0 | return 2; |
4336 | 0 | } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { |
4337 | 0 | return 4; |
4338 | 0 | } |
4339 | 0 | } |
4340 | 0 | return 1; |
4341 | 0 | } |
4342 | | /* }}} */ |
4343 | | |
4344 | | /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */ |
4345 | | MBSTRING_API size_t php_mb_mbchar_bytes(const char *s) |
4346 | 0 | { |
4347 | 0 | return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding)); |
4348 | 0 | } |
4349 | | /* }}} */ |
4350 | | |
4351 | | /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */ |
4352 | | MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc) |
4353 | 0 | { |
4354 | 0 | register const char *p = s; |
4355 | 0 | char *last=NULL; |
4356 | |
|
4357 | 0 | if (nbytes == (size_t)-1) { |
4358 | 0 | size_t nb = 0; |
4359 | |
|
4360 | 0 | while (*p != '\0') { |
4361 | 0 | if (nb == 0) { |
4362 | 0 | if ((unsigned char)*p == (unsigned char)c) { |
4363 | 0 | last = (char *)p; |
4364 | 0 | } |
4365 | 0 | nb = php_mb_mbchar_bytes_ex(p, enc); |
4366 | 0 | if (nb == 0) { |
4367 | 0 | return NULL; /* something is going wrong! */ |
4368 | 0 | } |
4369 | 0 | } |
4370 | 0 | --nb; |
4371 | 0 | ++p; |
4372 | 0 | } |
4373 | 0 | } else { |
4374 | 0 | register size_t bcnt = nbytes; |
4375 | 0 | register size_t nbytes_char; |
4376 | 0 | while (bcnt > 0) { |
4377 | 0 | if ((unsigned char)*p == (unsigned char)c) { |
4378 | 0 | last = (char *)p; |
4379 | 0 | } |
4380 | 0 | nbytes_char = php_mb_mbchar_bytes_ex(p, enc); |
4381 | 0 | if (bcnt < nbytes_char) { |
4382 | 0 | return NULL; |
4383 | 0 | } |
4384 | 0 | p += nbytes_char; |
4385 | 0 | bcnt -= nbytes_char; |
4386 | 0 | } |
4387 | 0 | } |
4388 | 0 | return last; |
4389 | 0 | } |
4390 | | /* }}} */ |
4391 | | |
4392 | | /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */ |
4393 | | MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes) |
4394 | 0 | { |
4395 | 0 | return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding)); |
4396 | 0 | } |
4397 | | /* }}} */ |
4398 | | |
4399 | | /* {{{ MBSTRING_API int php_mb_stripos() */ |
4400 | | MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const mbfl_encoding *enc) |
4401 | 0 | { |
4402 | 0 | size_t n = (size_t) -1; |
4403 | 0 | mbfl_string haystack, needle; |
4404 | |
|
4405 | 0 | mbfl_string_init(&haystack); |
4406 | 0 | mbfl_string_init(&needle); |
4407 | 0 | haystack.encoding = enc; |
4408 | 0 | needle.encoding = enc; |
4409 | |
|
4410 | 0 | do { |
4411 | | /* We're using simple case-folding here, because we'd have to deal with remapping of |
4412 | | * offsets otherwise. */ |
4413 | |
|
4414 | 0 | size_t len = 0; |
4415 | 0 | haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc); |
4416 | 0 | haystack.len = len; |
4417 | |
|
4418 | 0 | if (!haystack.val) { |
4419 | 0 | break; |
4420 | 0 | } |
4421 | | |
4422 | 0 | if (haystack.len == 0) { |
4423 | 0 | break; |
4424 | 0 | } |
4425 | | |
4426 | 0 | needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc); |
4427 | 0 | needle.len = len; |
4428 | |
|
4429 | 0 | if (!needle.val) { |
4430 | 0 | break; |
4431 | 0 | } |
4432 | | |
4433 | 0 | n = mbfl_strpos(&haystack, &needle, offset, mode); |
4434 | 0 | } while(0); |
4435 | |
|
4436 | 0 | if (haystack.val) { |
4437 | 0 | efree(haystack.val); |
4438 | 0 | } |
4439 | |
|
4440 | 0 | if (needle.val) { |
4441 | 0 | efree(needle.val); |
4442 | 0 | } |
4443 | |
|
4444 | 0 | return n; |
4445 | 0 | } |
4446 | | /* }}} */ |
4447 | | |
4448 | | static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */ |
4449 | 0 | { |
4450 | 0 | *list = (const zend_encoding **)MBSTRG(http_input_list); |
4451 | 0 | *list_size = MBSTRG(http_input_list_size); |
4452 | 0 | } |
4453 | | /* }}} */ |
4454 | | |
4455 | | static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */ |
4456 | 0 | { |
4457 | 0 | MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding; |
4458 | 0 | } |
4459 | | /* }}} */ |
4460 | | |
4461 | | #endif /* HAVE_MBSTRING */ |