Coverage Report

Created: 2025-10-10 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_localemodule.c
Line
Count
Source
1
/***********************************************************
2
Copyright (C) 1997, 2002, 2003, 2007, 2008 Martin von Loewis
3
4
Permission to use, copy, modify, and distribute this software and its
5
documentation for any purpose and without fee is hereby granted,
6
provided that the above copyright notice appear in all copies.
7
8
This software comes with no warranty. Use at your own risk.
9
10
******************************************************************/
11
12
#include "Python.h"
13
#include "pycore_fileutils.h"     // _Py_GetLocaleconvNumeric()
14
#include "pycore_pymem.h"         // _PyMem_Strdup()
15
16
#include <locale.h>               // setlocale()
17
#include <string.h>               // strlen()
18
#ifdef HAVE_ERRNO_H
19
#  include <errno.h>              // errno
20
#endif
21
#ifdef HAVE_LANGINFO_H
22
#  include <langinfo.h>           // nl_langinfo()
23
#endif
24
#ifdef HAVE_LIBINTL_H
25
#  include <libintl.h>
26
#endif
27
#ifdef MS_WINDOWS
28
#  ifndef WIN32_LEAN_AND_MEAN
29
#    define WIN32_LEAN_AND_MEAN
30
#  endif
31
#  include <windows.h>
32
#endif
33
34
PyDoc_STRVAR(locale__doc__, "Support for POSIX locales.");
35
36
typedef struct _locale_state {
37
    PyObject *Error;
38
} _locale_state;
39
40
static inline _locale_state*
41
get_locale_state(PyObject *m)
42
0
{
43
0
    void *state = PyModule_GetState(m);
44
0
    assert(state != NULL);
45
0
    return (_locale_state *)state;
46
0
}
47
48
#include "clinic/_localemodule.c.h"
49
50
/*[clinic input]
51
module _locale
52
[clinic start generated code]*/
53
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ed98569b726feada]*/
54
55
/* support functions for formatting floating-point numbers */
56
57
/* the grouping is terminated by either 0 or CHAR_MAX */
58
static PyObject*
59
copy_grouping(const char* s)
60
0
{
61
0
    int i;
62
0
    PyObject *result, *val = NULL;
63
64
0
    if (s[0] == '\0') {
65
        /* empty string: no grouping at all */
66
0
        return PyList_New(0);
67
0
    }
68
69
0
    for (i = 0; s[i] != '\0' && s[i] != CHAR_MAX; i++)
70
0
        ; /* nothing */
71
72
0
    result = PyList_New(i+1);
73
0
    if (!result)
74
0
        return NULL;
75
76
0
    i = -1;
77
0
    do {
78
0
        i++;
79
0
        val = PyLong_FromLong(s[i]);
80
0
        if (val == NULL) {
81
0
            Py_DECREF(result);
82
0
            return NULL;
83
0
        }
84
0
        PyList_SET_ITEM(result, i, val);
85
0
    } while (s[i] != '\0' && s[i] != CHAR_MAX);
86
87
0
    return result;
88
0
}
89
90
#if defined(MS_WINDOWS)
91
92
// 16 is the number of elements in the szCodePage field
93
// of the __crt_locale_strings structure.
94
#define MAX_CP_LEN 15
95
96
static int
97
check_locale_name(const char *locale, const char *end)
98
{
99
    size_t len = end ? (size_t)(end - locale) : strlen(locale);
100
    const char *dot = memchr(locale, '.', len);
101
    if (dot && locale + len - dot - 1 > MAX_CP_LEN) {
102
        return -1;
103
    }
104
    return 0;
105
}
106
107
static int
108
check_locale_name_all(const char *locale)
109
{
110
    const char *start = locale;
111
    while (1) {
112
        const char *end = strchr(start, ';');
113
        if (check_locale_name(start, end) < 0) {
114
            return -1;
115
        }
116
        if (end == NULL) {
117
            break;
118
        }
119
        start = end + 1;
120
    }
121
    return 0;
122
}
123
#endif
124
125
/*[clinic input]
126
_locale.setlocale
127
128
    category: int
129
    locale: str(accept={str, NoneType}) = NULL
130
    /
131
132
Activates/queries locale processing.
133
[clinic start generated code]*/
134
135
static PyObject *
136
_locale_setlocale_impl(PyObject *module, int category, const char *locale)
137
/*[clinic end generated code: output=a0e777ae5d2ff117 input=dbe18f1d66c57a6a]*/
138
0
{
139
0
    char *result;
140
0
    PyObject *result_object;
141
142
#if defined(MS_WINDOWS)
143
    if (category < LC_MIN || category > LC_MAX)
144
    {
145
        PyErr_SetString(get_locale_state(module)->Error,
146
                        "invalid locale category");
147
        return NULL;
148
    }
149
    if (locale) {
150
        if ((category == LC_ALL
151
             ? check_locale_name_all(locale)
152
             : check_locale_name(locale, NULL)) < 0)
153
        {
154
            /* Debug assertion failure on Windows.
155
             * _Py_BEGIN_SUPPRESS_IPH/_Py_END_SUPPRESS_IPH do not help. */
156
            PyErr_SetString(get_locale_state(module)->Error,
157
                "unsupported locale setting");
158
            return NULL;
159
        }
160
    }
161
#endif
162
163
0
    if (locale) {
164
        /* set locale */
165
0
        result = setlocale(category, locale);
166
0
        if (!result) {
167
            /* operation failed, no setting was changed */
168
0
            PyErr_SetString(get_locale_state(module)->Error,
169
0
                            "unsupported locale setting");
170
0
            return NULL;
171
0
        }
172
0
        result_object = PyUnicode_DecodeLocale(result, NULL);
173
0
        if (!result_object)
174
0
            return NULL;
175
0
    } else {
176
        /* get locale */
177
0
        result = setlocale(category, NULL);
178
0
        if (!result) {
179
0
            PyErr_SetString(get_locale_state(module)->Error,
180
0
                            "locale query failed");
181
0
            return NULL;
182
0
        }
183
0
        result_object = PyUnicode_DecodeLocale(result, NULL);
184
0
    }
185
0
    return result_object;
186
0
}
187
188
static int
189
locale_is_ascii(const char *str)
190
0
{
191
0
    return (strlen(str) == 1 && ((unsigned char)str[0]) <= 127);
192
0
}
193
194
static int
195
is_all_ascii(const char *str)
196
0
{
197
0
    for (; *str; str++) {
198
0
        if ((unsigned char)*str > 127) {
199
0
            return 0;
200
0
        }
201
0
    }
202
0
    return 1;
203
0
}
204
205
static int
206
locale_decode_monetary(PyObject *dict, struct lconv *lc)
207
0
{
208
0
#ifndef MS_WINDOWS
209
0
    int change_locale;
210
0
    change_locale = (!locale_is_ascii(lc->int_curr_symbol)
211
0
                     || !locale_is_ascii(lc->currency_symbol)
212
0
                     || !locale_is_ascii(lc->mon_decimal_point)
213
0
                     || !locale_is_ascii(lc->mon_thousands_sep));
214
215
    /* Keep a copy of the LC_CTYPE locale */
216
0
    char *oldloc = NULL, *loc = NULL;
217
0
    if (change_locale) {
218
0
        oldloc = setlocale(LC_CTYPE, NULL);
219
0
        if (!oldloc) {
220
0
            PyErr_SetString(PyExc_RuntimeWarning,
221
0
                            "failed to get LC_CTYPE locale");
222
0
            return -1;
223
0
        }
224
225
0
        oldloc = _PyMem_Strdup(oldloc);
226
0
        if (!oldloc) {
227
0
            PyErr_NoMemory();
228
0
            return -1;
229
0
        }
230
231
0
        loc = setlocale(LC_MONETARY, NULL);
232
0
        if (loc != NULL && strcmp(loc, oldloc) == 0) {
233
0
            loc = NULL;
234
0
        }
235
236
0
        if (loc != NULL) {
237
            /* Only set the locale temporarily the LC_CTYPE locale
238
               to the LC_MONETARY locale if the two locales are different and
239
               at least one string is non-ASCII. */
240
0
            setlocale(LC_CTYPE, loc);
241
0
        }
242
0
    }
243
244
0
#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
245
#else  /* MS_WINDOWS */
246
/* Use _W_* fields of Windows struct lconv */
247
#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
248
#endif /* MS_WINDOWS */
249
250
0
    int res = -1;
251
252
0
#define RESULT_STRING(ATTR) \
253
0
    do { \
254
0
        PyObject *obj; \
255
0
        obj = GET_LOCALE_STRING(ATTR); \
256
0
        if (obj == NULL) { \
257
0
            goto done; \
258
0
        } \
259
0
        if (PyDict_SetItemString(dict, Py_STRINGIFY(ATTR), obj) < 0) { \
260
0
            Py_DECREF(obj); \
261
0
            goto done; \
262
0
        } \
263
0
        Py_DECREF(obj); \
264
0
    } while (0)
265
266
0
    RESULT_STRING(int_curr_symbol);
267
0
    RESULT_STRING(currency_symbol);
268
0
    RESULT_STRING(mon_decimal_point);
269
0
    RESULT_STRING(mon_thousands_sep);
270
0
#undef RESULT_STRING
271
0
#undef GET_LOCALE_STRING
272
273
0
    res = 0;
274
275
0
done:
276
0
#ifndef MS_WINDOWS
277
0
    if (loc != NULL) {
278
0
        setlocale(LC_CTYPE, oldloc);
279
0
    }
280
0
    PyMem_Free(oldloc);
281
0
#endif
282
0
    return res;
283
0
}
284
285
/*[clinic input]
286
_locale.localeconv
287
288
Returns numeric and monetary locale-specific parameters.
289
[clinic start generated code]*/
290
291
static PyObject *
292
_locale_localeconv_impl(PyObject *module)
293
/*[clinic end generated code: output=43a54515e0a2aef5 input=f1132d15accf4444]*/
294
0
{
295
0
    PyObject* result;
296
0
    struct lconv *lc;
297
0
    PyObject *x;
298
299
0
    result = PyDict_New();
300
0
    if (!result) {
301
0
        return NULL;
302
0
    }
303
304
    /* if LC_NUMERIC is different in the C library, use saved value */
305
0
    lc = localeconv();
306
307
    /* hopefully, the localeconv result survives the C library calls
308
       involved herein */
309
310
0
#define RESULT(key, obj)\
311
0
    do { \
312
0
        if (obj == NULL) \
313
0
            goto failed; \
314
0
        if (PyDict_SetItemString(result, key, obj) < 0) { \
315
0
            Py_DECREF(obj); \
316
0
            goto failed; \
317
0
        } \
318
0
        Py_DECREF(obj); \
319
0
    } while (0)
320
321
#ifdef MS_WINDOWS
322
/* Use _W_* fields of Windows struct lconv */
323
#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
324
#else
325
0
#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
326
0
#endif
327
0
#define RESULT_STRING(s)\
328
0
    do { \
329
0
        x = GET_LOCALE_STRING(s); \
330
0
        RESULT(#s, x); \
331
0
    } while (0)
332
333
0
#define RESULT_INT(i)\
334
0
    do { \
335
0
        x = PyLong_FromLong(lc->i); \
336
0
        RESULT(#i, x); \
337
0
    } while (0)
338
339
    /* Monetary information: LC_MONETARY encoding */
340
0
    if (locale_decode_monetary(result, lc) < 0) {
341
0
        goto failed;
342
0
    }
343
0
    x = copy_grouping(lc->mon_grouping);
344
0
    RESULT("mon_grouping", x);
345
346
0
    RESULT_STRING(positive_sign);
347
0
    RESULT_STRING(negative_sign);
348
0
    RESULT_INT(int_frac_digits);
349
0
    RESULT_INT(frac_digits);
350
0
    RESULT_INT(p_cs_precedes);
351
0
    RESULT_INT(p_sep_by_space);
352
0
    RESULT_INT(n_cs_precedes);
353
0
    RESULT_INT(n_sep_by_space);
354
0
    RESULT_INT(p_sign_posn);
355
0
    RESULT_INT(n_sign_posn);
356
357
    /* Numeric information: LC_NUMERIC encoding */
358
0
    PyObject *decimal_point = NULL, *thousands_sep = NULL;
359
0
    if (_Py_GetLocaleconvNumeric(lc, &decimal_point, &thousands_sep) < 0) {
360
0
        Py_XDECREF(decimal_point);
361
0
        Py_XDECREF(thousands_sep);
362
0
        goto failed;
363
0
    }
364
365
0
    if (PyDict_SetItemString(result, "decimal_point", decimal_point) < 0) {
366
0
        Py_DECREF(decimal_point);
367
0
        Py_DECREF(thousands_sep);
368
0
        goto failed;
369
0
    }
370
0
    Py_DECREF(decimal_point);
371
372
0
    if (PyDict_SetItemString(result, "thousands_sep", thousands_sep) < 0) {
373
0
        Py_DECREF(thousands_sep);
374
0
        goto failed;
375
0
    }
376
0
    Py_DECREF(thousands_sep);
377
378
0
    x = copy_grouping(lc->grouping);
379
0
    RESULT("grouping", x);
380
381
0
    return result;
382
383
0
  failed:
384
0
    Py_DECREF(result);
385
0
    return NULL;
386
387
0
#undef RESULT
388
0
#undef RESULT_STRING
389
0
#undef RESULT_INT
390
0
#undef GET_LOCALE_STRING
391
0
}
392
393
#if defined(HAVE_WCSCOLL)
394
395
/*[clinic input]
396
_locale.strcoll
397
398
    os1: unicode
399
    os2: unicode
400
    /
401
402
Compares two strings according to the locale.
403
[clinic start generated code]*/
404
405
static PyObject *
406
_locale_strcoll_impl(PyObject *module, PyObject *os1, PyObject *os2)
407
/*[clinic end generated code: output=82ddc6d62c76d618 input=693cd02bcbf38dd8]*/
408
0
{
409
0
    PyObject *result = NULL;
410
0
    wchar_t *ws1 = NULL, *ws2 = NULL;
411
412
    /* Convert the unicode strings to wchar[]. */
413
0
    ws1 = PyUnicode_AsWideCharString(os1, NULL);
414
0
    if (ws1 == NULL)
415
0
        goto done;
416
0
    ws2 = PyUnicode_AsWideCharString(os2, NULL);
417
0
    if (ws2 == NULL)
418
0
        goto done;
419
    /* Collate the strings. */
420
0
    result = PyLong_FromLong(wcscoll(ws1, ws2));
421
0
  done:
422
    /* Deallocate everything. */
423
0
    if (ws1) PyMem_Free(ws1);
424
0
    if (ws2) PyMem_Free(ws2);
425
0
    return result;
426
0
}
427
#endif
428
429
#ifdef HAVE_WCSXFRM
430
431
/*[clinic input]
432
_locale.strxfrm
433
434
    string as str: unicode
435
    /
436
437
Return a string that can be used as a key for locale-aware comparisons.
438
[clinic start generated code]*/
439
440
static PyObject *
441
_locale_strxfrm_impl(PyObject *module, PyObject *str)
442
/*[clinic end generated code: output=3081866ebffc01af input=1378bbe6a88b4780]*/
443
0
{
444
0
    Py_ssize_t n1;
445
0
    wchar_t *s = NULL, *buf = NULL;
446
0
    size_t n2;
447
0
    PyObject *result = NULL;
448
449
0
    s = PyUnicode_AsWideCharString(str, &n1);
450
0
    if (s == NULL)
451
0
        goto exit;
452
0
    if (wcslen(s) != (size_t)n1) {
453
0
        PyErr_SetString(PyExc_ValueError,
454
0
                        "embedded null character");
455
0
        goto exit;
456
0
    }
457
458
    /* assume no change in size, first */
459
0
    n1 = n1 + 1;
460
    /* Yet another +1 is needed to work around a platform bug in wcsxfrm()
461
     * on macOS. See gh-130567. */
462
0
    buf = PyMem_New(wchar_t, n1+1);
463
0
    if (!buf) {
464
0
        PyErr_NoMemory();
465
0
        goto exit;
466
0
    }
467
0
    errno = 0;
468
0
    n2 = wcsxfrm(buf, s, n1);
469
0
    if (errno && errno != ERANGE) {
470
0
        PyErr_SetFromErrno(PyExc_OSError);
471
0
        goto exit;
472
0
    }
473
0
    if (n2 >= (size_t)n1) {
474
        /* more space needed */
475
0
        wchar_t * new_buf = PyMem_Realloc(buf, (n2+1)*sizeof(wchar_t));
476
0
        if (!new_buf) {
477
0
            PyErr_NoMemory();
478
0
            goto exit;
479
0
        }
480
0
        buf = new_buf;
481
0
        errno = 0;
482
0
        n2 = wcsxfrm(buf, s, n2+1);
483
0
        if (errno) {
484
0
            PyErr_SetFromErrno(PyExc_OSError);
485
0
            goto exit;
486
0
        }
487
0
    }
488
    /* The result is just a sequence of integers, they are not necessary
489
       Unicode code points, so PyUnicode_FromWideChar() cannot be used
490
       here. For example, 0xD83D 0xDC0D should not be larger than 0xFF41.
491
     */
492
0
#if SIZEOF_WCHAR_T == 4
493
0
    {
494
        /* Some codes can exceed the range of Unicode code points
495
           (0 - 0x10FFFF), so they cannot be directly used in
496
           PyUnicode_FromKindAndData(). They should be first encoded in
497
           a way that preserves the lexicographical order.
498
499
           Codes in the range 0-0xFFFF represent themself.
500
           Codes larger than 0xFFFF are encoded as a pair:
501
           * 0x1xxxx -- the highest 16 bits
502
           * 0x0xxxx -- the lowest 16 bits
503
         */
504
0
        size_t n3 = 0;
505
0
        for (size_t i = 0; i < n2; i++) {
506
0
            if ((Py_UCS4)buf[i] > 0x10000u) {
507
0
                n3++;
508
0
            }
509
0
        }
510
0
        if (n3) {
511
0
            n3 += n2; // no integer overflow
512
0
            Py_UCS4 *buf2 = PyMem_New(Py_UCS4, n3);
513
0
            if (buf2 == NULL) {
514
0
                PyErr_NoMemory();
515
0
                goto exit;
516
0
            }
517
0
            size_t j = 0;
518
0
            for (size_t i = 0; i < n2; i++) {
519
0
                Py_UCS4 c = (Py_UCS4)buf[i];
520
0
                if (c > 0x10000u) {
521
0
                    buf2[j++] = (c >> 16) | 0x10000u;
522
0
                    buf2[j++] = c & 0xFFFFu;
523
0
                }
524
0
                else {
525
0
                    buf2[j++] = c;
526
0
                }
527
0
            }
528
0
            assert(j == n3);
529
0
            result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf2, n3);
530
0
            PyMem_Free(buf2);
531
0
            goto exit;
532
0
        }
533
0
    }
534
0
#endif
535
0
    result = PyUnicode_FromKindAndData(sizeof(wchar_t), buf, n2);
536
0
exit:
537
0
    PyMem_Free(buf);
538
0
    PyMem_Free(s);
539
0
    return result;
540
0
}
541
#endif
542
543
#if defined(MS_WINDOWS)
544
545
/*[clinic input]
546
_locale._getdefaultlocale
547
548
[clinic start generated code]*/
549
550
static PyObject *
551
_locale__getdefaultlocale_impl(PyObject *module)
552
/*[clinic end generated code: output=e6254088579534c2 input=003ea41acd17f7c7]*/
553
{
554
    char encoding[20];
555
    char locale[100];
556
557
    PyOS_snprintf(encoding, sizeof(encoding), "cp%u", GetACP());
558
559
    if (GetLocaleInfoA(LOCALE_USER_DEFAULT,
560
                      LOCALE_SISO639LANGNAME,
561
                      locale, sizeof(locale))) {
562
        Py_ssize_t i = strlen(locale);
563
        locale[i++] = '_';
564
        if (GetLocaleInfoA(LOCALE_USER_DEFAULT,
565
                          LOCALE_SISO3166CTRYNAME,
566
                          locale+i, (int)(sizeof(locale)-i)))
567
            return Py_BuildValue("ss", locale, encoding);
568
    }
569
570
    /* If we end up here, this windows version didn't know about
571
       ISO639/ISO3166 names (it's probably Windows 95).  Return the
572
       Windows language identifier instead (a hexadecimal number) */
573
574
    locale[0] = '0';
575
    locale[1] = 'x';
576
    if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_IDEFAULTLANGUAGE,
577
                      locale+2, sizeof(locale)-2)) {
578
        return Py_BuildValue("ss", locale, encoding);
579
    }
580
581
    /* cannot determine the language code (very unlikely) */
582
    Py_INCREF(Py_None);
583
    return Py_BuildValue("Os", Py_None, encoding);
584
}
585
#endif
586
587
#ifdef HAVE_LANGINFO_H
588
#define LANGINFO(X, Y) {#X, X, Y}
589
static struct langinfo_constant{
590
    const char *name;
591
    int value;
592
    int category;
593
} langinfo_constants[] =
594
{
595
    /* These constants should exist on any langinfo implementation */
596
    LANGINFO(DAY_1, LC_TIME),
597
    LANGINFO(DAY_2, LC_TIME),
598
    LANGINFO(DAY_3, LC_TIME),
599
    LANGINFO(DAY_4, LC_TIME),
600
    LANGINFO(DAY_5, LC_TIME),
601
    LANGINFO(DAY_6, LC_TIME),
602
    LANGINFO(DAY_7, LC_TIME),
603
604
    LANGINFO(ABDAY_1, LC_TIME),
605
    LANGINFO(ABDAY_2, LC_TIME),
606
    LANGINFO(ABDAY_3, LC_TIME),
607
    LANGINFO(ABDAY_4, LC_TIME),
608
    LANGINFO(ABDAY_5, LC_TIME),
609
    LANGINFO(ABDAY_6, LC_TIME),
610
    LANGINFO(ABDAY_7, LC_TIME),
611
612
    LANGINFO(MON_1, LC_TIME),
613
    LANGINFO(MON_2, LC_TIME),
614
    LANGINFO(MON_3, LC_TIME),
615
    LANGINFO(MON_4, LC_TIME),
616
    LANGINFO(MON_5, LC_TIME),
617
    LANGINFO(MON_6, LC_TIME),
618
    LANGINFO(MON_7, LC_TIME),
619
    LANGINFO(MON_8, LC_TIME),
620
    LANGINFO(MON_9, LC_TIME),
621
    LANGINFO(MON_10, LC_TIME),
622
    LANGINFO(MON_11, LC_TIME),
623
    LANGINFO(MON_12, LC_TIME),
624
625
    LANGINFO(ABMON_1, LC_TIME),
626
    LANGINFO(ABMON_2, LC_TIME),
627
    LANGINFO(ABMON_3, LC_TIME),
628
    LANGINFO(ABMON_4, LC_TIME),
629
    LANGINFO(ABMON_5, LC_TIME),
630
    LANGINFO(ABMON_6, LC_TIME),
631
    LANGINFO(ABMON_7, LC_TIME),
632
    LANGINFO(ABMON_8, LC_TIME),
633
    LANGINFO(ABMON_9, LC_TIME),
634
    LANGINFO(ABMON_10, LC_TIME),
635
    LANGINFO(ABMON_11, LC_TIME),
636
    LANGINFO(ABMON_12, LC_TIME),
637
638
#ifdef RADIXCHAR
639
    /* The following are not available with glibc 2.0 */
640
    LANGINFO(RADIXCHAR, LC_NUMERIC),
641
    LANGINFO(THOUSEP, LC_NUMERIC),
642
    /* YESSTR and NOSTR are deprecated in glibc, since they are
643
       a special case of message translation, which should be rather
644
       done using gettext. So we don't expose it to Python in the
645
       first place.
646
    LANGINFO(YESSTR, LC_MESSAGES),
647
    LANGINFO(NOSTR, LC_MESSAGES),
648
    */
649
    LANGINFO(CRNCYSTR, LC_MONETARY),
650
#endif
651
652
    LANGINFO(D_T_FMT, LC_TIME),
653
    LANGINFO(D_FMT, LC_TIME),
654
    LANGINFO(T_FMT, LC_TIME),
655
    LANGINFO(AM_STR, LC_TIME),
656
    LANGINFO(PM_STR, LC_TIME),
657
658
    /* The following constants are available only with XPG4, but...
659
       OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have
660
       a few of the others.
661
       Solution: ifdef-test them all. */
662
#ifdef CODESET
663
    LANGINFO(CODESET, LC_CTYPE),
664
#endif
665
#ifdef T_FMT_AMPM
666
    LANGINFO(T_FMT_AMPM, LC_TIME),
667
#endif
668
#ifdef ERA
669
    LANGINFO(ERA, LC_TIME),
670
#endif
671
#ifdef ERA_D_FMT
672
    LANGINFO(ERA_D_FMT, LC_TIME),
673
#endif
674
#ifdef ERA_D_T_FMT
675
    LANGINFO(ERA_D_T_FMT, LC_TIME),
676
#endif
677
#ifdef ERA_T_FMT
678
    LANGINFO(ERA_T_FMT, LC_TIME),
679
#endif
680
#ifdef ALT_DIGITS
681
    LANGINFO(ALT_DIGITS, LC_TIME),
682
#endif
683
#ifdef YESEXPR
684
    LANGINFO(YESEXPR, LC_MESSAGES),
685
#endif
686
#ifdef NOEXPR
687
    LANGINFO(NOEXPR, LC_MESSAGES),
688
#endif
689
#ifdef _DATE_FMT
690
    /* This is not available in all glibc versions that have CODESET. */
691
    LANGINFO(_DATE_FMT, LC_TIME),
692
#endif
693
    {0, 0, 0}
694
};
695
696
/* Temporary make the LC_CTYPE locale to be the same as
697
 * the locale of the specified category. */
698
static int
699
change_locale(int category, char **oldloc)
700
0
{
701
    /* Keep a copy of the LC_CTYPE locale */
702
0
    *oldloc = setlocale(LC_CTYPE, NULL);
703
0
    if (!*oldloc) {
704
0
        PyErr_SetString(PyExc_RuntimeError, "failed to get LC_CTYPE locale");
705
0
        return -1;
706
0
    }
707
0
    *oldloc = _PyMem_Strdup(*oldloc);
708
0
    if (!*oldloc) {
709
0
        PyErr_NoMemory();
710
0
        return -1;
711
0
    }
712
713
    /* Set a new locale if it is different. */
714
0
    char *loc = setlocale(category, NULL);
715
0
    if (loc == NULL || strcmp(loc, *oldloc) == 0) {
716
0
        PyMem_Free(*oldloc);
717
0
        *oldloc = NULL;
718
0
        return 0;
719
0
    }
720
721
0
    setlocale(LC_CTYPE, loc);
722
0
    return 1;
723
0
}
724
725
/* Restore the old LC_CTYPE locale. */
726
static void
727
restore_locale(char *oldloc)
728
0
{
729
0
    if (oldloc != NULL) {
730
0
        setlocale(LC_CTYPE, oldloc);
731
0
        PyMem_Free(oldloc);
732
0
    }
733
0
}
734
735
#ifdef __GLIBC__
736
#if defined(ALT_DIGITS) || defined(ERA)
737
static PyObject *
738
decode_strings(const char *result, size_t max_count)
739
0
{
740
    /* Convert a sequence of NUL-separated C strings to a Python string
741
     * containing semicolon separated items. */
742
0
    size_t i = 0;
743
0
    size_t count = 0;
744
0
    for (; count < max_count && result[i]; count++) {
745
0
        i += strlen(result + i) + 1;
746
0
    }
747
0
    char *buf = PyMem_Malloc(i);
748
0
    if (buf == NULL) {
749
0
        PyErr_NoMemory();
750
0
        return NULL;
751
0
    }
752
0
    memcpy(buf, result, i);
753
    /* Replace all NULs with semicolons. */
754
0
    i = 0;
755
0
    while (--count) {
756
0
        i += strlen(buf + i);
757
0
        buf[i++] = ';';
758
0
    }
759
0
    PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL);
760
0
    PyMem_Free(buf);
761
0
    return pyresult;
762
0
}
763
#endif
764
#endif
765
766
/*[clinic input]
767
_locale.nl_langinfo
768
769
    key as item: int
770
    /
771
772
Return the value for the locale information associated with key.
773
[clinic start generated code]*/
774
775
static PyObject *
776
_locale_nl_langinfo_impl(PyObject *module, int item)
777
/*[clinic end generated code: output=6aea457b47e077a3 input=00798143eecfeddc]*/
778
0
{
779
0
    int i;
780
    /* Check whether this is a supported constant. GNU libc sometimes
781
       returns numeric values in the char* return value, which would
782
       crash PyUnicode_FromString.  */
783
0
    for (i = 0; langinfo_constants[i].name; i++) {
784
0
        if (langinfo_constants[i].value == item) {
785
            /* Check NULL as a workaround for GNU libc's returning NULL
786
               instead of an empty string for nl_langinfo(ERA).  */
787
0
            const char *result = nl_langinfo(item);
788
0
            result = result != NULL ? result : "";
789
0
            char *oldloc = NULL;
790
0
            if (langinfo_constants[i].category != LC_CTYPE
791
0
                && *result && (
792
0
#ifdef __GLIBC__
793
                    // gh-133740: Always change the locale for ALT_DIGITS and ERA
794
0
#  ifdef ALT_DIGITS
795
0
                    item == ALT_DIGITS ||
796
0
#  endif
797
0
#  ifdef ERA
798
0
                    item == ERA ||
799
0
#  endif
800
0
#endif
801
0
                    !is_all_ascii(result))
802
0
                && change_locale(langinfo_constants[i].category, &oldloc) < 0)
803
0
            {
804
0
                return NULL;
805
0
            }
806
0
            PyObject *pyresult;
807
0
#ifdef __GLIBC__
808
            /* According to the POSIX specification the result must be
809
             * a sequence of semicolon-separated strings.
810
             * But in Glibc they are NUL-separated. */
811
0
#ifdef ALT_DIGITS
812
0
            if (item == ALT_DIGITS && *result) {
813
0
                pyresult = decode_strings(result, 100);
814
0
            }
815
0
            else
816
0
#endif
817
0
#ifdef ERA
818
0
            if (item == ERA && *result) {
819
0
                pyresult = decode_strings(result, SIZE_MAX);
820
0
            }
821
0
            else
822
0
#endif
823
0
#endif
824
0
            {
825
0
                pyresult = PyUnicode_DecodeLocale(result, NULL);
826
0
            }
827
0
            restore_locale(oldloc);
828
0
            return pyresult;
829
0
        }
830
0
    }
831
0
    PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant");
832
0
    return NULL;
833
0
}
834
#endif /* HAVE_LANGINFO_H */
835
836
#ifdef HAVE_LIBINTL_H
837
838
/*[clinic input]
839
_locale.gettext
840
841
    msg as in: str
842
    /
843
844
gettext(msg) -> string
845
846
Return translation of msg.
847
[clinic start generated code]*/
848
849
static PyObject *
850
_locale_gettext_impl(PyObject *module, const char *in)
851
/*[clinic end generated code: output=493bb4b38a4704fe input=949fc8efc2bb3bc3]*/
852
0
{
853
0
    return PyUnicode_DecodeLocale(gettext(in), NULL);
854
0
}
855
856
/*[clinic input]
857
_locale.dgettext
858
859
    domain: str(accept={str, NoneType})
860
    msg as in: str
861
    /
862
863
dgettext(domain, msg) -> string
864
865
Return translation of msg in domain.
866
[clinic start generated code]*/
867
868
static PyObject *
869
_locale_dgettext_impl(PyObject *module, const char *domain, const char *in)
870
/*[clinic end generated code: output=3c0cd5287b972c8f input=a277388a635109d8]*/
871
0
{
872
0
    return PyUnicode_DecodeLocale(dgettext(domain, in), NULL);
873
0
}
874
875
/*[clinic input]
876
_locale.dcgettext
877
878
    domain: str(accept={str, NoneType})
879
    msg as msgid: str
880
    category: int
881
    /
882
883
Return translation of msg in domain and category.
884
[clinic start generated code]*/
885
886
static PyObject *
887
_locale_dcgettext_impl(PyObject *module, const char *domain,
888
                       const char *msgid, int category)
889
/*[clinic end generated code: output=0f4cc4fce0aa283f input=ec5f8fed4336de67]*/
890
0
{
891
0
    return PyUnicode_DecodeLocale(dcgettext(domain,msgid,category), NULL);
892
0
}
893
894
/*[clinic input]
895
_locale.textdomain
896
897
    domain: str(accept={str, NoneType})
898
    /
899
900
Set the C library's textdmain to domain, returning the new domain.
901
[clinic start generated code]*/
902
903
static PyObject *
904
_locale_textdomain_impl(PyObject *module, const char *domain)
905
/*[clinic end generated code: output=7992df06aadec313 input=66359716f5eb1d38]*/
906
0
{
907
0
    domain = textdomain(domain);
908
0
    if (!domain) {
909
0
        PyErr_SetFromErrno(PyExc_OSError);
910
0
        return NULL;
911
0
    }
912
0
    return PyUnicode_DecodeLocale(domain, NULL);
913
0
}
914
915
/*[clinic input]
916
_locale.bindtextdomain
917
918
    domain: str
919
    dir as dirname_obj: object
920
    /
921
922
Bind the C library's domain to dir.
923
[clinic start generated code]*/
924
925
static PyObject *
926
_locale_bindtextdomain_impl(PyObject *module, const char *domain,
927
                            PyObject *dirname_obj)
928
/*[clinic end generated code: output=6d6f3c7b345d785c input=c0dff085acfe272b]*/
929
0
{
930
0
    const char *dirname, *current_dirname;
931
0
    PyObject *dirname_bytes = NULL, *result;
932
933
0
    if (!strlen(domain)) {
934
0
        PyErr_SetString(get_locale_state(module)->Error,
935
0
                        "domain must be a non-empty string");
936
0
        return 0;
937
0
    }
938
0
    if (dirname_obj != Py_None) {
939
0
        if (!PyUnicode_FSConverter(dirname_obj, &dirname_bytes))
940
0
            return NULL;
941
0
        dirname = PyBytes_AsString(dirname_bytes);
942
0
    } else {
943
0
        dirname_bytes = NULL;
944
0
        dirname = NULL;
945
0
    }
946
0
    current_dirname = bindtextdomain(domain, dirname);
947
0
    if (current_dirname == NULL) {
948
0
        PyErr_SetFromErrno(PyExc_OSError);
949
0
        Py_XDECREF(dirname_bytes);
950
0
        return NULL;
951
0
    }
952
0
    result = PyUnicode_DecodeLocale(current_dirname, NULL);
953
0
    Py_XDECREF(dirname_bytes);
954
0
    return result;
955
0
}
956
957
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
958
959
/*[clinic input]
960
_locale.bind_textdomain_codeset
961
962
    domain: str
963
    codeset: str(accept={str, NoneType})
964
    /
965
966
Bind the C library's domain to codeset.
967
[clinic start generated code]*/
968
969
static PyObject *
970
_locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
971
                                     const char *codeset)
972
/*[clinic end generated code: output=fa452f9c8b1b9e89 input=23fbe3540400f259]*/
973
0
{
974
0
    codeset = bind_textdomain_codeset(domain, codeset);
975
0
    if (codeset) {
976
0
        return PyUnicode_DecodeLocale(codeset, NULL);
977
0
    }
978
0
    Py_RETURN_NONE;
979
0
}
980
#endif  // HAVE_BIND_TEXTDOMAIN_CODESET
981
982
#endif  // HAVE_LIBINTL_H
983
984
985
/*[clinic input]
986
_locale.getencoding
987
988
Get the current locale encoding.
989
[clinic start generated code]*/
990
991
static PyObject *
992
_locale_getencoding_impl(PyObject *module)
993
/*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/
994
0
{
995
0
    return _Py_GetLocaleEncodingObject();
996
0
}
997
998
999
static struct PyMethodDef PyLocale_Methods[] = {
1000
    _LOCALE_SETLOCALE_METHODDEF
1001
    _LOCALE_LOCALECONV_METHODDEF
1002
#ifdef HAVE_WCSCOLL
1003
    _LOCALE_STRCOLL_METHODDEF
1004
#endif
1005
#ifdef HAVE_WCSXFRM
1006
    _LOCALE_STRXFRM_METHODDEF
1007
#endif
1008
#if defined(MS_WINDOWS)
1009
    _LOCALE__GETDEFAULTLOCALE_METHODDEF
1010
#endif
1011
#ifdef HAVE_LANGINFO_H
1012
    _LOCALE_NL_LANGINFO_METHODDEF
1013
#endif
1014
#ifdef HAVE_LIBINTL_H
1015
    _LOCALE_GETTEXT_METHODDEF
1016
    _LOCALE_DGETTEXT_METHODDEF
1017
    _LOCALE_DCGETTEXT_METHODDEF
1018
    _LOCALE_TEXTDOMAIN_METHODDEF
1019
    _LOCALE_BINDTEXTDOMAIN_METHODDEF
1020
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
1021
    _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
1022
#endif
1023
#endif
1024
    _LOCALE_GETENCODING_METHODDEF
1025
  {NULL, NULL}
1026
};
1027
1028
static int
1029
_locale_exec(PyObject *module)
1030
0
{
1031
0
#ifdef HAVE_LANGINFO_H
1032
0
    int i;
1033
0
#endif
1034
0
#define ADD_INT(module, value)                                    \
1035
0
    do {                                                          \
1036
0
        if (PyModule_AddIntConstant(module, #value, value) < 0) { \
1037
0
            return -1;                                            \
1038
0
        }                                                         \
1039
0
    } while (0)
1040
1041
0
    ADD_INT(module, LC_CTYPE);
1042
0
    ADD_INT(module, LC_TIME);
1043
0
    ADD_INT(module, LC_COLLATE);
1044
0
    ADD_INT(module, LC_MONETARY);
1045
1046
0
#ifdef LC_MESSAGES
1047
0
    ADD_INT(module, LC_MESSAGES);
1048
0
#endif /* LC_MESSAGES */
1049
1050
0
    ADD_INT(module, LC_NUMERIC);
1051
0
    ADD_INT(module, LC_ALL);
1052
0
    ADD_INT(module, CHAR_MAX);
1053
1054
0
    _locale_state *state = get_locale_state(module);
1055
0
    state->Error = PyErr_NewException("locale.Error", NULL, NULL);
1056
0
    if (PyModule_AddObjectRef(module, "Error", state->Error) < 0) {
1057
0
        return -1;
1058
0
    }
1059
1060
0
#ifdef HAVE_LANGINFO_H
1061
0
    for (i = 0; langinfo_constants[i].name; i++) {
1062
0
        if (PyModule_AddIntConstant(module,
1063
0
                                    langinfo_constants[i].name,
1064
0
                                    langinfo_constants[i].value) < 0) {
1065
0
            return -1;
1066
0
        }
1067
0
    }
1068
0
#endif
1069
1070
0
    if (PyErr_Occurred()) {
1071
0
        return -1;
1072
0
    }
1073
0
    return 0;
1074
1075
0
#undef ADD_INT
1076
0
}
1077
1078
static struct PyModuleDef_Slot _locale_slots[] = {
1079
    {Py_mod_exec, _locale_exec},
1080
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1081
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1082
    {0, NULL}
1083
};
1084
1085
static int
1086
locale_traverse(PyObject *module, visitproc visit, void *arg)
1087
0
{
1088
0
    _locale_state *state = get_locale_state(module);
1089
0
    Py_VISIT(state->Error);
1090
0
    return 0;
1091
0
}
1092
1093
static int
1094
locale_clear(PyObject *module)
1095
0
{
1096
0
    _locale_state *state = get_locale_state(module);
1097
0
    Py_CLEAR(state->Error);
1098
0
    return 0;
1099
0
}
1100
1101
static void
1102
locale_free(void *module)
1103
0
{
1104
0
    locale_clear((PyObject*)module);
1105
0
}
1106
1107
static struct PyModuleDef _localemodule = {
1108
    PyModuleDef_HEAD_INIT,
1109
    "_locale",
1110
    locale__doc__,
1111
    sizeof(_locale_state),
1112
    PyLocale_Methods,
1113
    _locale_slots,
1114
    locale_traverse,
1115
    locale_clear,
1116
    locale_free,
1117
};
1118
1119
PyMODINIT_FUNC
1120
PyInit__locale(void)
1121
0
{
1122
0
    return PyModuleDef_Init(&_localemodule);
1123
0
}
1124
1125
/*
1126
Local variables:
1127
c-basic-offset: 4
1128
indent-tabs-mode: nil
1129
End:
1130
*/