/src/cpython/Modules/_localemodule.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*********************************************************** |
2 | | Copyright (C) 1997, 2002, 2003, 2007, 2008 Martin von Loewis |
3 | | |
4 | | Permission to use, copy, modify, and distribute this software and its |
5 | | documentation for any purpose and without fee is hereby granted, |
6 | | provided that the above copyright notice appear in all copies. |
7 | | |
8 | | This software comes with no warranty. Use at your own risk. |
9 | | |
10 | | ******************************************************************/ |
11 | | |
12 | | #include "Python.h" |
13 | | #include "pycore_fileutils.h" // _Py_GetLocaleconvNumeric() |
14 | | #include "pycore_pymem.h" // _PyMem_Strdup() |
15 | | |
16 | | #include <locale.h> // setlocale() |
17 | | #include <string.h> // strlen() |
18 | | #ifdef HAVE_ERRNO_H |
19 | | # include <errno.h> // errno |
20 | | #endif |
21 | | #ifdef HAVE_LANGINFO_H |
22 | | # include <langinfo.h> // nl_langinfo() |
23 | | #endif |
24 | | #ifdef HAVE_LIBINTL_H |
25 | | # include <libintl.h> |
26 | | #endif |
27 | | #ifdef MS_WINDOWS |
28 | | # ifndef WIN32_LEAN_AND_MEAN |
29 | | # define WIN32_LEAN_AND_MEAN |
30 | | # endif |
31 | | # include <windows.h> |
32 | | #endif |
33 | | |
34 | | PyDoc_STRVAR(locale__doc__, "Support for POSIX locales."); |
35 | | |
36 | | typedef struct _locale_state { |
37 | | PyObject *Error; |
38 | | } _locale_state; |
39 | | |
40 | | static inline _locale_state* |
41 | | get_locale_state(PyObject *m) |
42 | 0 | { |
43 | 0 | void *state = PyModule_GetState(m); |
44 | 0 | assert(state != NULL); |
45 | 0 | return (_locale_state *)state; |
46 | 0 | } |
47 | | |
48 | | #include "clinic/_localemodule.c.h" |
49 | | |
50 | | /*[clinic input] |
51 | | module _locale |
52 | | [clinic start generated code]*/ |
53 | | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=ed98569b726feada]*/ |
54 | | |
55 | | /* support functions for formatting floating-point numbers */ |
56 | | |
57 | | /* the grouping is terminated by either 0 or CHAR_MAX */ |
58 | | static PyObject* |
59 | | copy_grouping(const char* s) |
60 | 0 | { |
61 | 0 | int i; |
62 | 0 | PyObject *result, *val = NULL; |
63 | |
|
64 | 0 | if (s[0] == '\0') { |
65 | | /* empty string: no grouping at all */ |
66 | 0 | return PyList_New(0); |
67 | 0 | } |
68 | | |
69 | 0 | for (i = 0; s[i] != '\0' && s[i] != CHAR_MAX; i++) |
70 | 0 | ; /* nothing */ |
71 | |
|
72 | 0 | result = PyList_New(i+1); |
73 | 0 | if (!result) |
74 | 0 | return NULL; |
75 | | |
76 | 0 | i = -1; |
77 | 0 | do { |
78 | 0 | i++; |
79 | 0 | val = PyLong_FromLong(s[i]); |
80 | 0 | if (val == NULL) { |
81 | 0 | Py_DECREF(result); |
82 | 0 | return NULL; |
83 | 0 | } |
84 | 0 | PyList_SET_ITEM(result, i, val); |
85 | 0 | } while (s[i] != '\0' && s[i] != CHAR_MAX); |
86 | | |
87 | 0 | return result; |
88 | 0 | } |
89 | | |
90 | | /*[clinic input] |
91 | | _locale.setlocale |
92 | | |
93 | | category: int |
94 | | locale: str(accept={str, NoneType}) = NULL |
95 | | / |
96 | | |
97 | | Activates/queries locale processing. |
98 | | [clinic start generated code]*/ |
99 | | |
100 | | static PyObject * |
101 | | _locale_setlocale_impl(PyObject *module, int category, const char *locale) |
102 | | /*[clinic end generated code: output=a0e777ae5d2ff117 input=dbe18f1d66c57a6a]*/ |
103 | 0 | { |
104 | 0 | char *result; |
105 | 0 | PyObject *result_object; |
106 | |
|
107 | | #if defined(MS_WINDOWS) |
108 | | if (category < LC_MIN || category > LC_MAX) |
109 | | { |
110 | | PyErr_SetString(get_locale_state(module)->Error, |
111 | | "invalid locale category"); |
112 | | return NULL; |
113 | | } |
114 | | #endif |
115 | |
|
116 | 0 | if (locale) { |
117 | | /* set locale */ |
118 | 0 | result = setlocale(category, locale); |
119 | 0 | if (!result) { |
120 | | /* operation failed, no setting was changed */ |
121 | 0 | PyErr_SetString(get_locale_state(module)->Error, |
122 | 0 | "unsupported locale setting"); |
123 | 0 | return NULL; |
124 | 0 | } |
125 | 0 | result_object = PyUnicode_DecodeLocale(result, NULL); |
126 | 0 | if (!result_object) |
127 | 0 | return NULL; |
128 | 0 | } else { |
129 | | /* get locale */ |
130 | 0 | result = setlocale(category, NULL); |
131 | 0 | if (!result) { |
132 | 0 | PyErr_SetString(get_locale_state(module)->Error, |
133 | 0 | "locale query failed"); |
134 | 0 | return NULL; |
135 | 0 | } |
136 | 0 | result_object = PyUnicode_DecodeLocale(result, NULL); |
137 | 0 | } |
138 | 0 | return result_object; |
139 | 0 | } |
140 | | |
141 | | static int |
142 | | locale_is_ascii(const char *str) |
143 | 0 | { |
144 | 0 | return (strlen(str) == 1 && ((unsigned char)str[0]) <= 127); |
145 | 0 | } |
146 | | |
147 | | static int |
148 | | is_all_ascii(const char *str) |
149 | 0 | { |
150 | 0 | for (; *str; str++) { |
151 | 0 | if ((unsigned char)*str > 127) { |
152 | 0 | return 0; |
153 | 0 | } |
154 | 0 | } |
155 | 0 | return 1; |
156 | 0 | } |
157 | | |
158 | | static int |
159 | | locale_decode_monetary(PyObject *dict, struct lconv *lc) |
160 | 0 | { |
161 | 0 | #ifndef MS_WINDOWS |
162 | 0 | int change_locale; |
163 | 0 | change_locale = (!locale_is_ascii(lc->int_curr_symbol) |
164 | 0 | || !locale_is_ascii(lc->currency_symbol) |
165 | 0 | || !locale_is_ascii(lc->mon_decimal_point) |
166 | 0 | || !locale_is_ascii(lc->mon_thousands_sep)); |
167 | | |
168 | | /* Keep a copy of the LC_CTYPE locale */ |
169 | 0 | char *oldloc = NULL, *loc = NULL; |
170 | 0 | if (change_locale) { |
171 | 0 | oldloc = setlocale(LC_CTYPE, NULL); |
172 | 0 | if (!oldloc) { |
173 | 0 | PyErr_SetString(PyExc_RuntimeWarning, |
174 | 0 | "failed to get LC_CTYPE locale"); |
175 | 0 | return -1; |
176 | 0 | } |
177 | | |
178 | 0 | oldloc = _PyMem_Strdup(oldloc); |
179 | 0 | if (!oldloc) { |
180 | 0 | PyErr_NoMemory(); |
181 | 0 | return -1; |
182 | 0 | } |
183 | | |
184 | 0 | loc = setlocale(LC_MONETARY, NULL); |
185 | 0 | if (loc != NULL && strcmp(loc, oldloc) == 0) { |
186 | 0 | loc = NULL; |
187 | 0 | } |
188 | |
|
189 | 0 | if (loc != NULL) { |
190 | | /* Only set the locale temporarily the LC_CTYPE locale |
191 | | to the LC_MONETARY locale if the two locales are different and |
192 | | at least one string is non-ASCII. */ |
193 | 0 | setlocale(LC_CTYPE, loc); |
194 | 0 | } |
195 | 0 | } |
196 | | |
197 | 0 | #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL) |
198 | | #else /* MS_WINDOWS */ |
199 | | /* Use _W_* fields of Windows struct lconv */ |
200 | | #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1) |
201 | | #endif /* MS_WINDOWS */ |
202 | | |
203 | 0 | int res = -1; |
204 | |
|
205 | 0 | #define RESULT_STRING(ATTR) \ |
206 | 0 | do { \ |
207 | 0 | PyObject *obj; \ |
208 | 0 | obj = GET_LOCALE_STRING(ATTR); \ |
209 | 0 | if (obj == NULL) { \ |
210 | 0 | goto done; \ |
211 | 0 | } \ |
212 | 0 | if (PyDict_SetItemString(dict, Py_STRINGIFY(ATTR), obj) < 0) { \ |
213 | 0 | Py_DECREF(obj); \ |
214 | 0 | goto done; \ |
215 | 0 | } \ |
216 | 0 | Py_DECREF(obj); \ |
217 | 0 | } while (0) |
218 | |
|
219 | 0 | RESULT_STRING(int_curr_symbol); |
220 | 0 | RESULT_STRING(currency_symbol); |
221 | 0 | RESULT_STRING(mon_decimal_point); |
222 | 0 | RESULT_STRING(mon_thousands_sep); |
223 | 0 | #undef RESULT_STRING |
224 | 0 | #undef GET_LOCALE_STRING |
225 | | |
226 | 0 | res = 0; |
227 | |
|
228 | 0 | done: |
229 | 0 | #ifndef MS_WINDOWS |
230 | 0 | if (loc != NULL) { |
231 | 0 | setlocale(LC_CTYPE, oldloc); |
232 | 0 | } |
233 | 0 | PyMem_Free(oldloc); |
234 | 0 | #endif |
235 | 0 | return res; |
236 | 0 | } |
237 | | |
238 | | /*[clinic input] |
239 | | _locale.localeconv |
240 | | |
241 | | Returns numeric and monetary locale-specific parameters. |
242 | | [clinic start generated code]*/ |
243 | | |
244 | | static PyObject * |
245 | | _locale_localeconv_impl(PyObject *module) |
246 | | /*[clinic end generated code: output=43a54515e0a2aef5 input=f1132d15accf4444]*/ |
247 | 0 | { |
248 | 0 | PyObject* result; |
249 | 0 | struct lconv *lc; |
250 | 0 | PyObject *x; |
251 | |
|
252 | 0 | result = PyDict_New(); |
253 | 0 | if (!result) { |
254 | 0 | return NULL; |
255 | 0 | } |
256 | | |
257 | | /* if LC_NUMERIC is different in the C library, use saved value */ |
258 | 0 | lc = localeconv(); |
259 | | |
260 | | /* hopefully, the localeconv result survives the C library calls |
261 | | involved herein */ |
262 | |
|
263 | 0 | #define RESULT(key, obj)\ |
264 | 0 | do { \ |
265 | 0 | if (obj == NULL) \ |
266 | 0 | goto failed; \ |
267 | 0 | if (PyDict_SetItemString(result, key, obj) < 0) { \ |
268 | 0 | Py_DECREF(obj); \ |
269 | 0 | goto failed; \ |
270 | 0 | } \ |
271 | 0 | Py_DECREF(obj); \ |
272 | 0 | } while (0) |
273 | |
|
274 | | #ifdef MS_WINDOWS |
275 | | /* Use _W_* fields of Windows struct lconv */ |
276 | | #define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1) |
277 | | #else |
278 | 0 | #define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL) |
279 | 0 | #endif |
280 | 0 | #define RESULT_STRING(s)\ |
281 | 0 | do { \ |
282 | 0 | x = GET_LOCALE_STRING(s); \ |
283 | 0 | RESULT(#s, x); \ |
284 | 0 | } while (0) |
285 | |
|
286 | 0 | #define RESULT_INT(i)\ |
287 | 0 | do { \ |
288 | 0 | x = PyLong_FromLong(lc->i); \ |
289 | 0 | RESULT(#i, x); \ |
290 | 0 | } while (0) |
291 | | |
292 | | /* Monetary information: LC_MONETARY encoding */ |
293 | 0 | if (locale_decode_monetary(result, lc) < 0) { |
294 | 0 | goto failed; |
295 | 0 | } |
296 | 0 | x = copy_grouping(lc->mon_grouping); |
297 | 0 | RESULT("mon_grouping", x); |
298 | | |
299 | 0 | RESULT_STRING(positive_sign); |
300 | 0 | RESULT_STRING(negative_sign); |
301 | 0 | RESULT_INT(int_frac_digits); |
302 | 0 | RESULT_INT(frac_digits); |
303 | 0 | RESULT_INT(p_cs_precedes); |
304 | 0 | RESULT_INT(p_sep_by_space); |
305 | 0 | RESULT_INT(n_cs_precedes); |
306 | 0 | RESULT_INT(n_sep_by_space); |
307 | 0 | RESULT_INT(p_sign_posn); |
308 | 0 | RESULT_INT(n_sign_posn); |
309 | | |
310 | | /* Numeric information: LC_NUMERIC encoding */ |
311 | 0 | PyObject *decimal_point = NULL, *thousands_sep = NULL; |
312 | 0 | if (_Py_GetLocaleconvNumeric(lc, &decimal_point, &thousands_sep) < 0) { |
313 | 0 | Py_XDECREF(decimal_point); |
314 | 0 | Py_XDECREF(thousands_sep); |
315 | 0 | goto failed; |
316 | 0 | } |
317 | | |
318 | 0 | if (PyDict_SetItemString(result, "decimal_point", decimal_point) < 0) { |
319 | 0 | Py_DECREF(decimal_point); |
320 | 0 | Py_DECREF(thousands_sep); |
321 | 0 | goto failed; |
322 | 0 | } |
323 | 0 | Py_DECREF(decimal_point); |
324 | |
|
325 | 0 | if (PyDict_SetItemString(result, "thousands_sep", thousands_sep) < 0) { |
326 | 0 | Py_DECREF(thousands_sep); |
327 | 0 | goto failed; |
328 | 0 | } |
329 | 0 | Py_DECREF(thousands_sep); |
330 | |
|
331 | 0 | x = copy_grouping(lc->grouping); |
332 | 0 | RESULT("grouping", x); |
333 | | |
334 | 0 | return result; |
335 | | |
336 | 0 | failed: |
337 | 0 | Py_DECREF(result); |
338 | 0 | return NULL; |
339 | |
|
340 | 0 | #undef RESULT |
341 | 0 | #undef RESULT_STRING |
342 | 0 | #undef RESULT_INT |
343 | 0 | #undef GET_LOCALE_STRING |
344 | 0 | } |
345 | | |
346 | | #if defined(HAVE_WCSCOLL) |
347 | | |
348 | | /*[clinic input] |
349 | | _locale.strcoll |
350 | | |
351 | | os1: unicode |
352 | | os2: unicode |
353 | | / |
354 | | |
355 | | Compares two strings according to the locale. |
356 | | [clinic start generated code]*/ |
357 | | |
358 | | static PyObject * |
359 | | _locale_strcoll_impl(PyObject *module, PyObject *os1, PyObject *os2) |
360 | | /*[clinic end generated code: output=82ddc6d62c76d618 input=693cd02bcbf38dd8]*/ |
361 | 0 | { |
362 | 0 | PyObject *result = NULL; |
363 | 0 | wchar_t *ws1 = NULL, *ws2 = NULL; |
364 | | |
365 | | /* Convert the unicode strings to wchar[]. */ |
366 | 0 | ws1 = PyUnicode_AsWideCharString(os1, NULL); |
367 | 0 | if (ws1 == NULL) |
368 | 0 | goto done; |
369 | 0 | ws2 = PyUnicode_AsWideCharString(os2, NULL); |
370 | 0 | if (ws2 == NULL) |
371 | 0 | goto done; |
372 | | /* Collate the strings. */ |
373 | 0 | result = PyLong_FromLong(wcscoll(ws1, ws2)); |
374 | 0 | done: |
375 | | /* Deallocate everything. */ |
376 | 0 | if (ws1) PyMem_Free(ws1); |
377 | 0 | if (ws2) PyMem_Free(ws2); |
378 | 0 | return result; |
379 | 0 | } |
380 | | #endif |
381 | | |
382 | | #ifdef HAVE_WCSXFRM |
383 | | |
384 | | /*[clinic input] |
385 | | _locale.strxfrm |
386 | | |
387 | | string as str: unicode |
388 | | / |
389 | | |
390 | | Return a string that can be used as a key for locale-aware comparisons. |
391 | | [clinic start generated code]*/ |
392 | | |
393 | | static PyObject * |
394 | | _locale_strxfrm_impl(PyObject *module, PyObject *str) |
395 | | /*[clinic end generated code: output=3081866ebffc01af input=1378bbe6a88b4780]*/ |
396 | 0 | { |
397 | 0 | Py_ssize_t n1; |
398 | 0 | wchar_t *s = NULL, *buf = NULL; |
399 | 0 | size_t n2; |
400 | 0 | PyObject *result = NULL; |
401 | |
|
402 | 0 | s = PyUnicode_AsWideCharString(str, &n1); |
403 | 0 | if (s == NULL) |
404 | 0 | goto exit; |
405 | 0 | if (wcslen(s) != (size_t)n1) { |
406 | 0 | PyErr_SetString(PyExc_ValueError, |
407 | 0 | "embedded null character"); |
408 | 0 | goto exit; |
409 | 0 | } |
410 | | |
411 | | /* assume no change in size, first */ |
412 | 0 | n1 = n1 + 1; |
413 | 0 | buf = PyMem_New(wchar_t, n1); |
414 | 0 | if (!buf) { |
415 | 0 | PyErr_NoMemory(); |
416 | 0 | goto exit; |
417 | 0 | } |
418 | 0 | errno = 0; |
419 | 0 | n2 = wcsxfrm(buf, s, n1); |
420 | 0 | if (errno && errno != ERANGE) { |
421 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
422 | 0 | goto exit; |
423 | 0 | } |
424 | 0 | if (n2 >= (size_t)n1) { |
425 | | /* more space needed */ |
426 | 0 | wchar_t * new_buf = PyMem_Realloc(buf, (n2+1)*sizeof(wchar_t)); |
427 | 0 | if (!new_buf) { |
428 | 0 | PyErr_NoMemory(); |
429 | 0 | goto exit; |
430 | 0 | } |
431 | 0 | buf = new_buf; |
432 | 0 | errno = 0; |
433 | 0 | n2 = wcsxfrm(buf, s, n2+1); |
434 | 0 | if (errno) { |
435 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
436 | 0 | goto exit; |
437 | 0 | } |
438 | 0 | } |
439 | 0 | result = PyUnicode_FromWideChar(buf, n2); |
440 | 0 | exit: |
441 | 0 | PyMem_Free(buf); |
442 | 0 | PyMem_Free(s); |
443 | 0 | return result; |
444 | 0 | } |
445 | | #endif |
446 | | |
447 | | #if defined(MS_WINDOWS) |
448 | | |
449 | | /*[clinic input] |
450 | | _locale._getdefaultlocale |
451 | | |
452 | | [clinic start generated code]*/ |
453 | | |
454 | | static PyObject * |
455 | | _locale__getdefaultlocale_impl(PyObject *module) |
456 | | /*[clinic end generated code: output=e6254088579534c2 input=003ea41acd17f7c7]*/ |
457 | | { |
458 | | char encoding[20]; |
459 | | char locale[100]; |
460 | | |
461 | | PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP()); |
462 | | |
463 | | if (GetLocaleInfoA(LOCALE_USER_DEFAULT, |
464 | | LOCALE_SISO639LANGNAME, |
465 | | locale, sizeof(locale))) { |
466 | | Py_ssize_t i = strlen(locale); |
467 | | locale[i++] = '_'; |
468 | | if (GetLocaleInfoA(LOCALE_USER_DEFAULT, |
469 | | LOCALE_SISO3166CTRYNAME, |
470 | | locale+i, (int)(sizeof(locale)-i))) |
471 | | return Py_BuildValue("ss", locale, encoding); |
472 | | } |
473 | | |
474 | | /* If we end up here, this windows version didn't know about |
475 | | ISO639/ISO3166 names (it's probably Windows 95). Return the |
476 | | Windows language identifier instead (a hexadecimal number) */ |
477 | | |
478 | | locale[0] = '0'; |
479 | | locale[1] = 'x'; |
480 | | if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_IDEFAULTLANGUAGE, |
481 | | locale+2, sizeof(locale)-2)) { |
482 | | return Py_BuildValue("ss", locale, encoding); |
483 | | } |
484 | | |
485 | | /* cannot determine the language code (very unlikely) */ |
486 | | Py_INCREF(Py_None); |
487 | | return Py_BuildValue("Os", Py_None, encoding); |
488 | | } |
489 | | #endif |
490 | | |
491 | | #ifdef HAVE_LANGINFO_H |
492 | | #define LANGINFO(X, Y) {#X, X, Y} |
493 | | static struct langinfo_constant{ |
494 | | const char *name; |
495 | | int value; |
496 | | int category; |
497 | | } langinfo_constants[] = |
498 | | { |
499 | | /* These constants should exist on any langinfo implementation */ |
500 | | LANGINFO(DAY_1, LC_TIME), |
501 | | LANGINFO(DAY_2, LC_TIME), |
502 | | LANGINFO(DAY_3, LC_TIME), |
503 | | LANGINFO(DAY_4, LC_TIME), |
504 | | LANGINFO(DAY_5, LC_TIME), |
505 | | LANGINFO(DAY_6, LC_TIME), |
506 | | LANGINFO(DAY_7, LC_TIME), |
507 | | |
508 | | LANGINFO(ABDAY_1, LC_TIME), |
509 | | LANGINFO(ABDAY_2, LC_TIME), |
510 | | LANGINFO(ABDAY_3, LC_TIME), |
511 | | LANGINFO(ABDAY_4, LC_TIME), |
512 | | LANGINFO(ABDAY_5, LC_TIME), |
513 | | LANGINFO(ABDAY_6, LC_TIME), |
514 | | LANGINFO(ABDAY_7, LC_TIME), |
515 | | |
516 | | LANGINFO(MON_1, LC_TIME), |
517 | | LANGINFO(MON_2, LC_TIME), |
518 | | LANGINFO(MON_3, LC_TIME), |
519 | | LANGINFO(MON_4, LC_TIME), |
520 | | LANGINFO(MON_5, LC_TIME), |
521 | | LANGINFO(MON_6, LC_TIME), |
522 | | LANGINFO(MON_7, LC_TIME), |
523 | | LANGINFO(MON_8, LC_TIME), |
524 | | LANGINFO(MON_9, LC_TIME), |
525 | | LANGINFO(MON_10, LC_TIME), |
526 | | LANGINFO(MON_11, LC_TIME), |
527 | | LANGINFO(MON_12, LC_TIME), |
528 | | |
529 | | LANGINFO(ABMON_1, LC_TIME), |
530 | | LANGINFO(ABMON_2, LC_TIME), |
531 | | LANGINFO(ABMON_3, LC_TIME), |
532 | | LANGINFO(ABMON_4, LC_TIME), |
533 | | LANGINFO(ABMON_5, LC_TIME), |
534 | | LANGINFO(ABMON_6, LC_TIME), |
535 | | LANGINFO(ABMON_7, LC_TIME), |
536 | | LANGINFO(ABMON_8, LC_TIME), |
537 | | LANGINFO(ABMON_9, LC_TIME), |
538 | | LANGINFO(ABMON_10, LC_TIME), |
539 | | LANGINFO(ABMON_11, LC_TIME), |
540 | | LANGINFO(ABMON_12, LC_TIME), |
541 | | |
542 | | #ifdef RADIXCHAR |
543 | | /* The following are not available with glibc 2.0 */ |
544 | | LANGINFO(RADIXCHAR, LC_NUMERIC), |
545 | | LANGINFO(THOUSEP, LC_NUMERIC), |
546 | | /* YESSTR and NOSTR are deprecated in glibc, since they are |
547 | | a special case of message translation, which should be rather |
548 | | done using gettext. So we don't expose it to Python in the |
549 | | first place. |
550 | | LANGINFO(YESSTR, LC_MESSAGES), |
551 | | LANGINFO(NOSTR, LC_MESSAGES), |
552 | | */ |
553 | | LANGINFO(CRNCYSTR, LC_MONETARY), |
554 | | #endif |
555 | | |
556 | | LANGINFO(D_T_FMT, LC_TIME), |
557 | | LANGINFO(D_FMT, LC_TIME), |
558 | | LANGINFO(T_FMT, LC_TIME), |
559 | | LANGINFO(AM_STR, LC_TIME), |
560 | | LANGINFO(PM_STR, LC_TIME), |
561 | | |
562 | | /* The following constants are available only with XPG4, but... |
563 | | OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have |
564 | | a few of the others. |
565 | | Solution: ifdef-test them all. */ |
566 | | #ifdef CODESET |
567 | | LANGINFO(CODESET, LC_CTYPE), |
568 | | #endif |
569 | | #ifdef T_FMT_AMPM |
570 | | LANGINFO(T_FMT_AMPM, LC_TIME), |
571 | | #endif |
572 | | #ifdef ERA |
573 | | LANGINFO(ERA, LC_TIME), |
574 | | #endif |
575 | | #ifdef ERA_D_FMT |
576 | | LANGINFO(ERA_D_FMT, LC_TIME), |
577 | | #endif |
578 | | #ifdef ERA_D_T_FMT |
579 | | LANGINFO(ERA_D_T_FMT, LC_TIME), |
580 | | #endif |
581 | | #ifdef ERA_T_FMT |
582 | | LANGINFO(ERA_T_FMT, LC_TIME), |
583 | | #endif |
584 | | #ifdef ALT_DIGITS |
585 | | LANGINFO(ALT_DIGITS, LC_TIME), |
586 | | #endif |
587 | | #ifdef YESEXPR |
588 | | LANGINFO(YESEXPR, LC_MESSAGES), |
589 | | #endif |
590 | | #ifdef NOEXPR |
591 | | LANGINFO(NOEXPR, LC_MESSAGES), |
592 | | #endif |
593 | | #ifdef _DATE_FMT |
594 | | /* This is not available in all glibc versions that have CODESET. */ |
595 | | LANGINFO(_DATE_FMT, LC_TIME), |
596 | | #endif |
597 | | {0, 0, 0} |
598 | | }; |
599 | | |
600 | | /* Temporary make the LC_CTYPE locale to be the same as |
601 | | * the locale of the specified category. */ |
602 | | static int |
603 | | change_locale(int category, char **oldloc) |
604 | 0 | { |
605 | | /* Keep a copy of the LC_CTYPE locale */ |
606 | 0 | *oldloc = setlocale(LC_CTYPE, NULL); |
607 | 0 | if (!*oldloc) { |
608 | 0 | PyErr_SetString(PyExc_RuntimeError, "failed to get LC_CTYPE locale"); |
609 | 0 | return -1; |
610 | 0 | } |
611 | 0 | *oldloc = _PyMem_Strdup(*oldloc); |
612 | 0 | if (!*oldloc) { |
613 | 0 | PyErr_NoMemory(); |
614 | 0 | return -1; |
615 | 0 | } |
616 | | |
617 | | /* Set a new locale if it is different. */ |
618 | 0 | char *loc = setlocale(category, NULL); |
619 | 0 | if (loc == NULL || strcmp(loc, *oldloc) == 0) { |
620 | 0 | PyMem_Free(*oldloc); |
621 | 0 | *oldloc = NULL; |
622 | 0 | return 0; |
623 | 0 | } |
624 | | |
625 | 0 | setlocale(LC_CTYPE, loc); |
626 | 0 | return 1; |
627 | 0 | } |
628 | | |
629 | | /* Restore the old LC_CTYPE locale. */ |
630 | | static void |
631 | | restore_locale(char *oldloc) |
632 | 0 | { |
633 | 0 | if (oldloc != NULL) { |
634 | 0 | setlocale(LC_CTYPE, oldloc); |
635 | 0 | PyMem_Free(oldloc); |
636 | 0 | } |
637 | 0 | } |
638 | | |
639 | | #ifdef __GLIBC__ |
640 | | #if defined(ALT_DIGITS) || defined(ERA) |
641 | | static PyObject * |
642 | | decode_strings(const char *result, size_t max_count) |
643 | 0 | { |
644 | | /* Convert a sequence of NUL-separated C strings to a Python string |
645 | | * containing semicolon separated items. */ |
646 | 0 | size_t i = 0; |
647 | 0 | size_t count = 0; |
648 | 0 | for (; count < max_count && result[i]; count++) { |
649 | 0 | i += strlen(result + i) + 1; |
650 | 0 | } |
651 | 0 | char *buf = PyMem_Malloc(i); |
652 | 0 | if (buf == NULL) { |
653 | 0 | PyErr_NoMemory(); |
654 | 0 | return NULL; |
655 | 0 | } |
656 | 0 | memcpy(buf, result, i); |
657 | | /* Replace all NULs with semicolons. */ |
658 | 0 | i = 0; |
659 | 0 | while (--count) { |
660 | 0 | i += strlen(buf + i); |
661 | 0 | buf[i++] = ';'; |
662 | 0 | } |
663 | 0 | PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL); |
664 | 0 | PyMem_Free(buf); |
665 | 0 | return pyresult; |
666 | 0 | } |
667 | | #endif |
668 | | #endif |
669 | | |
670 | | /*[clinic input] |
671 | | _locale.nl_langinfo |
672 | | |
673 | | key as item: int |
674 | | / |
675 | | |
676 | | Return the value for the locale information associated with key. |
677 | | [clinic start generated code]*/ |
678 | | |
679 | | static PyObject * |
680 | | _locale_nl_langinfo_impl(PyObject *module, int item) |
681 | | /*[clinic end generated code: output=6aea457b47e077a3 input=00798143eecfeddc]*/ |
682 | 0 | { |
683 | 0 | int i; |
684 | | /* Check whether this is a supported constant. GNU libc sometimes |
685 | | returns numeric values in the char* return value, which would |
686 | | crash PyUnicode_FromString. */ |
687 | 0 | for (i = 0; langinfo_constants[i].name; i++) { |
688 | 0 | if (langinfo_constants[i].value == item) { |
689 | | /* Check NULL as a workaround for GNU libc's returning NULL |
690 | | instead of an empty string for nl_langinfo(ERA). */ |
691 | 0 | const char *result = nl_langinfo(item); |
692 | 0 | result = result != NULL ? result : ""; |
693 | 0 | char *oldloc = NULL; |
694 | 0 | if (langinfo_constants[i].category != LC_CTYPE |
695 | 0 | && *result && ( |
696 | 0 | #ifdef __GLIBC__ |
697 | | // gh-133740: Always change the locale for ALT_DIGITS and ERA |
698 | 0 | # ifdef ALT_DIGITS |
699 | 0 | item == ALT_DIGITS || |
700 | 0 | # endif |
701 | 0 | # ifdef ERA |
702 | 0 | item == ERA || |
703 | 0 | # endif |
704 | 0 | #endif |
705 | 0 | !is_all_ascii(result)) |
706 | 0 | && change_locale(langinfo_constants[i].category, &oldloc) < 0) |
707 | 0 | { |
708 | 0 | return NULL; |
709 | 0 | } |
710 | 0 | PyObject *pyresult; |
711 | 0 | #ifdef __GLIBC__ |
712 | | /* According to the POSIX specification the result must be |
713 | | * a sequence of semicolon-separated strings. |
714 | | * But in Glibc they are NUL-separated. */ |
715 | 0 | #ifdef ALT_DIGITS |
716 | 0 | if (item == ALT_DIGITS && *result) { |
717 | 0 | pyresult = decode_strings(result, 100); |
718 | 0 | } |
719 | 0 | else |
720 | 0 | #endif |
721 | 0 | #ifdef ERA |
722 | 0 | if (item == ERA && *result) { |
723 | 0 | pyresult = decode_strings(result, SIZE_MAX); |
724 | 0 | } |
725 | 0 | else |
726 | 0 | #endif |
727 | 0 | #endif |
728 | 0 | { |
729 | 0 | pyresult = PyUnicode_DecodeLocale(result, NULL); |
730 | 0 | } |
731 | 0 | restore_locale(oldloc); |
732 | 0 | return pyresult; |
733 | 0 | } |
734 | 0 | } |
735 | 0 | PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant"); |
736 | 0 | return NULL; |
737 | 0 | } |
738 | | #endif /* HAVE_LANGINFO_H */ |
739 | | |
740 | | #ifdef HAVE_LIBINTL_H |
741 | | |
742 | | /*[clinic input] |
743 | | _locale.gettext |
744 | | |
745 | | msg as in: str |
746 | | / |
747 | | |
748 | | gettext(msg) -> string |
749 | | |
750 | | Return translation of msg. |
751 | | [clinic start generated code]*/ |
752 | | |
753 | | static PyObject * |
754 | | _locale_gettext_impl(PyObject *module, const char *in) |
755 | | /*[clinic end generated code: output=493bb4b38a4704fe input=949fc8efc2bb3bc3]*/ |
756 | 0 | { |
757 | 0 | return PyUnicode_DecodeLocale(gettext(in), NULL); |
758 | 0 | } |
759 | | |
760 | | /*[clinic input] |
761 | | _locale.dgettext |
762 | | |
763 | | domain: str(accept={str, NoneType}) |
764 | | msg as in: str |
765 | | / |
766 | | |
767 | | dgettext(domain, msg) -> string |
768 | | |
769 | | Return translation of msg in domain. |
770 | | [clinic start generated code]*/ |
771 | | |
772 | | static PyObject * |
773 | | _locale_dgettext_impl(PyObject *module, const char *domain, const char *in) |
774 | | /*[clinic end generated code: output=3c0cd5287b972c8f input=a277388a635109d8]*/ |
775 | 0 | { |
776 | 0 | return PyUnicode_DecodeLocale(dgettext(domain, in), NULL); |
777 | 0 | } |
778 | | |
779 | | /*[clinic input] |
780 | | _locale.dcgettext |
781 | | |
782 | | domain: str(accept={str, NoneType}) |
783 | | msg as msgid: str |
784 | | category: int |
785 | | / |
786 | | |
787 | | Return translation of msg in domain and category. |
788 | | [clinic start generated code]*/ |
789 | | |
790 | | static PyObject * |
791 | | _locale_dcgettext_impl(PyObject *module, const char *domain, |
792 | | const char *msgid, int category) |
793 | | /*[clinic end generated code: output=0f4cc4fce0aa283f input=ec5f8fed4336de67]*/ |
794 | 0 | { |
795 | 0 | return PyUnicode_DecodeLocale(dcgettext(domain,msgid,category), NULL); |
796 | 0 | } |
797 | | |
798 | | /*[clinic input] |
799 | | _locale.textdomain |
800 | | |
801 | | domain: str(accept={str, NoneType}) |
802 | | / |
803 | | |
804 | | Set the C library's textdmain to domain, returning the new domain. |
805 | | [clinic start generated code]*/ |
806 | | |
807 | | static PyObject * |
808 | | _locale_textdomain_impl(PyObject *module, const char *domain) |
809 | | /*[clinic end generated code: output=7992df06aadec313 input=66359716f5eb1d38]*/ |
810 | 0 | { |
811 | 0 | domain = textdomain(domain); |
812 | 0 | if (!domain) { |
813 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
814 | 0 | return NULL; |
815 | 0 | } |
816 | 0 | return PyUnicode_DecodeLocale(domain, NULL); |
817 | 0 | } |
818 | | |
819 | | /*[clinic input] |
820 | | _locale.bindtextdomain |
821 | | |
822 | | domain: str |
823 | | dir as dirname_obj: object |
824 | | / |
825 | | |
826 | | Bind the C library's domain to dir. |
827 | | [clinic start generated code]*/ |
828 | | |
829 | | static PyObject * |
830 | | _locale_bindtextdomain_impl(PyObject *module, const char *domain, |
831 | | PyObject *dirname_obj) |
832 | | /*[clinic end generated code: output=6d6f3c7b345d785c input=c0dff085acfe272b]*/ |
833 | 0 | { |
834 | 0 | const char *dirname, *current_dirname; |
835 | 0 | PyObject *dirname_bytes = NULL, *result; |
836 | |
|
837 | 0 | if (!strlen(domain)) { |
838 | 0 | PyErr_SetString(get_locale_state(module)->Error, |
839 | 0 | "domain must be a non-empty string"); |
840 | 0 | return 0; |
841 | 0 | } |
842 | 0 | if (dirname_obj != Py_None) { |
843 | 0 | if (!PyUnicode_FSConverter(dirname_obj, &dirname_bytes)) |
844 | 0 | return NULL; |
845 | 0 | dirname = PyBytes_AsString(dirname_bytes); |
846 | 0 | } else { |
847 | 0 | dirname_bytes = NULL; |
848 | 0 | dirname = NULL; |
849 | 0 | } |
850 | 0 | current_dirname = bindtextdomain(domain, dirname); |
851 | 0 | if (current_dirname == NULL) { |
852 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
853 | 0 | Py_XDECREF(dirname_bytes); |
854 | 0 | return NULL; |
855 | 0 | } |
856 | 0 | result = PyUnicode_DecodeLocale(current_dirname, NULL); |
857 | 0 | Py_XDECREF(dirname_bytes); |
858 | 0 | return result; |
859 | 0 | } |
860 | | |
861 | | #ifdef HAVE_BIND_TEXTDOMAIN_CODESET |
862 | | |
863 | | /*[clinic input] |
864 | | _locale.bind_textdomain_codeset |
865 | | |
866 | | domain: str |
867 | | codeset: str(accept={str, NoneType}) |
868 | | / |
869 | | |
870 | | Bind the C library's domain to codeset. |
871 | | [clinic start generated code]*/ |
872 | | |
873 | | static PyObject * |
874 | | _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain, |
875 | | const char *codeset) |
876 | | /*[clinic end generated code: output=fa452f9c8b1b9e89 input=23fbe3540400f259]*/ |
877 | 0 | { |
878 | 0 | codeset = bind_textdomain_codeset(domain, codeset); |
879 | 0 | if (codeset) { |
880 | 0 | return PyUnicode_DecodeLocale(codeset, NULL); |
881 | 0 | } |
882 | 0 | Py_RETURN_NONE; |
883 | 0 | } |
884 | | #endif // HAVE_BIND_TEXTDOMAIN_CODESET |
885 | | |
886 | | #endif // HAVE_LIBINTL_H |
887 | | |
888 | | |
889 | | /*[clinic input] |
890 | | _locale.getencoding |
891 | | |
892 | | Get the current locale encoding. |
893 | | [clinic start generated code]*/ |
894 | | |
895 | | static PyObject * |
896 | | _locale_getencoding_impl(PyObject *module) |
897 | | /*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/ |
898 | 0 | { |
899 | 0 | return _Py_GetLocaleEncodingObject(); |
900 | 0 | } |
901 | | |
902 | | |
903 | | static struct PyMethodDef PyLocale_Methods[] = { |
904 | | _LOCALE_SETLOCALE_METHODDEF |
905 | | _LOCALE_LOCALECONV_METHODDEF |
906 | | #ifdef HAVE_WCSCOLL |
907 | | _LOCALE_STRCOLL_METHODDEF |
908 | | #endif |
909 | | #ifdef HAVE_WCSXFRM |
910 | | _LOCALE_STRXFRM_METHODDEF |
911 | | #endif |
912 | | #if defined(MS_WINDOWS) |
913 | | _LOCALE__GETDEFAULTLOCALE_METHODDEF |
914 | | #endif |
915 | | #ifdef HAVE_LANGINFO_H |
916 | | _LOCALE_NL_LANGINFO_METHODDEF |
917 | | #endif |
918 | | #ifdef HAVE_LIBINTL_H |
919 | | _LOCALE_GETTEXT_METHODDEF |
920 | | _LOCALE_DGETTEXT_METHODDEF |
921 | | _LOCALE_DCGETTEXT_METHODDEF |
922 | | _LOCALE_TEXTDOMAIN_METHODDEF |
923 | | _LOCALE_BINDTEXTDOMAIN_METHODDEF |
924 | | #ifdef HAVE_BIND_TEXTDOMAIN_CODESET |
925 | | _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF |
926 | | #endif |
927 | | #endif |
928 | | _LOCALE_GETENCODING_METHODDEF |
929 | | {NULL, NULL} |
930 | | }; |
931 | | |
932 | | static int |
933 | | _locale_exec(PyObject *module) |
934 | 0 | { |
935 | 0 | #ifdef HAVE_LANGINFO_H |
936 | 0 | int i; |
937 | 0 | #endif |
938 | 0 | #define ADD_INT(module, value) \ |
939 | 0 | do { \ |
940 | 0 | if (PyModule_AddIntConstant(module, #value, value) < 0) { \ |
941 | 0 | return -1; \ |
942 | 0 | } \ |
943 | 0 | } while (0) |
944 | |
|
945 | 0 | ADD_INT(module, LC_CTYPE); |
946 | 0 | ADD_INT(module, LC_TIME); |
947 | 0 | ADD_INT(module, LC_COLLATE); |
948 | 0 | ADD_INT(module, LC_MONETARY); |
949 | | |
950 | 0 | #ifdef LC_MESSAGES |
951 | 0 | ADD_INT(module, LC_MESSAGES); |
952 | 0 | #endif /* LC_MESSAGES */ |
953 | | |
954 | 0 | ADD_INT(module, LC_NUMERIC); |
955 | 0 | ADD_INT(module, LC_ALL); |
956 | 0 | ADD_INT(module, CHAR_MAX); |
957 | | |
958 | 0 | _locale_state *state = get_locale_state(module); |
959 | 0 | state->Error = PyErr_NewException("locale.Error", NULL, NULL); |
960 | 0 | if (PyModule_AddObjectRef(module, "Error", state->Error) < 0) { |
961 | 0 | return -1; |
962 | 0 | } |
963 | | |
964 | 0 | #ifdef HAVE_LANGINFO_H |
965 | 0 | for (i = 0; langinfo_constants[i].name; i++) { |
966 | 0 | if (PyModule_AddIntConstant(module, |
967 | 0 | langinfo_constants[i].name, |
968 | 0 | langinfo_constants[i].value) < 0) { |
969 | 0 | return -1; |
970 | 0 | } |
971 | 0 | } |
972 | 0 | #endif |
973 | | |
974 | 0 | if (PyErr_Occurred()) { |
975 | 0 | return -1; |
976 | 0 | } |
977 | 0 | return 0; |
978 | |
|
979 | 0 | #undef ADD_INT |
980 | 0 | } |
981 | | |
982 | | static struct PyModuleDef_Slot _locale_slots[] = { |
983 | | {Py_mod_exec, _locale_exec}, |
984 | | {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, |
985 | | {Py_mod_gil, Py_MOD_GIL_NOT_USED}, |
986 | | {0, NULL} |
987 | | }; |
988 | | |
989 | | static int |
990 | | locale_traverse(PyObject *module, visitproc visit, void *arg) |
991 | 0 | { |
992 | 0 | _locale_state *state = get_locale_state(module); |
993 | 0 | Py_VISIT(state->Error); |
994 | 0 | return 0; |
995 | 0 | } |
996 | | |
997 | | static int |
998 | | locale_clear(PyObject *module) |
999 | 0 | { |
1000 | 0 | _locale_state *state = get_locale_state(module); |
1001 | 0 | Py_CLEAR(state->Error); |
1002 | 0 | return 0; |
1003 | 0 | } |
1004 | | |
1005 | | static void |
1006 | | locale_free(void *module) |
1007 | 0 | { |
1008 | 0 | locale_clear((PyObject*)module); |
1009 | 0 | } |
1010 | | |
1011 | | static struct PyModuleDef _localemodule = { |
1012 | | PyModuleDef_HEAD_INIT, |
1013 | | "_locale", |
1014 | | locale__doc__, |
1015 | | sizeof(_locale_state), |
1016 | | PyLocale_Methods, |
1017 | | _locale_slots, |
1018 | | locale_traverse, |
1019 | | locale_clear, |
1020 | | locale_free, |
1021 | | }; |
1022 | | |
1023 | | PyMODINIT_FUNC |
1024 | | PyInit__locale(void) |
1025 | 0 | { |
1026 | 0 | return PyModuleDef_Init(&_localemodule); |
1027 | 0 | } |
1028 | | |
1029 | | /* |
1030 | | Local variables: |
1031 | | c-basic-offset: 4 |
1032 | | indent-tabs-mode: nil |
1033 | | End: |
1034 | | */ |