Coverage Report

Created: 2025-08-26 06:26

/src/cpython/Modules/_codecsmodule.c
Line
Count
Source (jump to first uncovered line)
1
/* ------------------------------------------------------------------------
2
3
   _codecs -- Provides access to the codec registry and the builtin
4
              codecs.
5
6
   This module should never be imported directly. The standard library
7
   module "codecs" wraps this builtin module for use within Python.
8
9
   The codec registry is accessible via:
10
11
     register(search_function) -> None
12
13
     lookup(encoding) -> CodecInfo object
14
15
   The builtin Unicode codecs use the following interface:
16
17
     <encoding>_encode(Unicode_object[,errors='strict']) ->
18
        (string object, bytes consumed)
19
20
     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21
        (Unicode object, bytes consumed)
22
23
   These <encoding>s are available: utf_8, unicode_escape,
24
   raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27
Written by Marc-Andre Lemburg (mal@lemburg.com).
28
29
Copyright (c) Corporation for National Research Initiatives.
30
31
   ------------------------------------------------------------------------ */
32
33
#include "Python.h"
34
#include "pycore_codecs.h"        // _PyCodec_Lookup()
35
#include "pycore_unicodeobject.h" // _PyUnicode_EncodeCharmap
36
37
#ifdef MS_WINDOWS
38
#include <windows.h>
39
#endif
40
41
/*[clinic input]
42
module _codecs
43
[clinic start generated code]*/
44
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
45
46
#include "pycore_runtime.h"
47
#include "clinic/_codecsmodule.c.h"
48
49
/* --- Registry ----------------------------------------------------------- */
50
51
/*[clinic input]
52
_codecs.register
53
    search_function: object
54
    /
55
56
Register a codec search function.
57
58
Search functions are expected to take one argument, the encoding name in
59
all lower case letters, and either return None, or a tuple of functions
60
(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
61
[clinic start generated code]*/
62
63
static PyObject *
64
_codecs_register(PyObject *module, PyObject *search_function)
65
/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
66
16
{
67
16
    if (PyCodec_Register(search_function))
68
0
        return NULL;
69
70
16
    Py_RETURN_NONE;
71
16
}
72
73
/*[clinic input]
74
_codecs.unregister
75
    search_function: object
76
    /
77
78
Unregister a codec search function and clear the registry's cache.
79
80
If the search function is not registered, do nothing.
81
[clinic start generated code]*/
82
83
static PyObject *
84
_codecs_unregister(PyObject *module, PyObject *search_function)
85
/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
86
0
{
87
0
    if (PyCodec_Unregister(search_function) < 0) {
88
0
        return NULL;
89
0
    }
90
91
0
    Py_RETURN_NONE;
92
0
}
93
94
/*[clinic input]
95
@permit_long_summary
96
_codecs.lookup
97
    encoding: str
98
    /
99
100
Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
101
[clinic start generated code]*/
102
103
static PyObject *
104
_codecs_lookup_impl(PyObject *module, const char *encoding)
105
/*[clinic end generated code: output=9f0afa572080c36d input=02227d5429491ab3]*/
106
0
{
107
0
    return _PyCodec_Lookup(encoding);
108
0
}
109
110
/*[clinic input]
111
_codecs.encode
112
    obj: object
113
    encoding: str(c_default="NULL") = "utf-8"
114
    errors: str(c_default="NULL") = "strict"
115
116
Encodes obj using the codec registered for encoding.
117
118
The default encoding is 'utf-8'.  errors may be given to set a
119
different error handling scheme.  Default is 'strict' meaning that encoding
120
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
121
and 'backslashreplace' as well as any other name registered with
122
codecs.register_error that can handle ValueErrors.
123
[clinic start generated code]*/
124
125
static PyObject *
126
_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
127
                    const char *errors)
128
/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
129
0
{
130
0
    if (encoding == NULL)
131
0
        encoding = PyUnicode_GetDefaultEncoding();
132
133
    /* Encode via the codec registry */
134
0
    return PyCodec_Encode(obj, encoding, errors);
135
0
}
136
137
/*[clinic input]
138
_codecs.decode
139
    obj: object
140
    encoding: str(c_default="NULL") = "utf-8"
141
    errors: str(c_default="NULL") = "strict"
142
143
Decodes obj using the codec registered for encoding.
144
145
Default encoding is 'utf-8'.  errors may be given to set a
146
different error handling scheme.  Default is 'strict' meaning that encoding
147
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
148
and 'backslashreplace' as well as any other name registered with
149
codecs.register_error that can handle ValueErrors.
150
[clinic start generated code]*/
151
152
static PyObject *
153
_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
154
                    const char *errors)
155
/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
156
0
{
157
0
    if (encoding == NULL)
158
0
        encoding = PyUnicode_GetDefaultEncoding();
159
160
    /* Decode via the codec registry */
161
0
    return PyCodec_Decode(obj, encoding, errors);
162
0
}
163
164
/* --- Helpers ------------------------------------------------------------ */
165
166
static
167
PyObject *codec_tuple(PyObject *decoded,
168
                      Py_ssize_t len)
169
1.06M
{
170
1.06M
    if (decoded == NULL)
171
53.4k
        return NULL;
172
1.01M
    return Py_BuildValue("Nn", decoded, len);
173
1.06M
}
174
175
/* --- String codecs ------------------------------------------------------ */
176
/*[clinic input]
177
_codecs.escape_decode
178
    data: Py_buffer(accept={str, buffer})
179
    errors: str(accept={str, NoneType}) = None
180
    /
181
[clinic start generated code]*/
182
183
static PyObject *
184
_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
185
                           const char *errors)
186
/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
187
0
{
188
0
    PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
189
0
                                             errors, 0, NULL);
190
0
    return codec_tuple(decoded, data->len);
191
0
}
192
193
/*[clinic input]
194
_codecs.escape_encode
195
    data: object(subclass_of='&PyBytes_Type')
196
    errors: str(accept={str, NoneType}) = None
197
    /
198
[clinic start generated code]*/
199
200
static PyObject *
201
_codecs_escape_encode_impl(PyObject *module, PyObject *data,
202
                           const char *errors)
203
/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
204
0
{
205
0
    Py_ssize_t size;
206
0
    Py_ssize_t newsize;
207
0
    PyObject *v;
208
209
0
    size = PyBytes_GET_SIZE(data);
210
0
    if (size > PY_SSIZE_T_MAX / 4) {
211
0
        PyErr_SetString(PyExc_OverflowError,
212
0
            "string is too large to encode");
213
0
            return NULL;
214
0
    }
215
0
    newsize = 4*size;
216
0
    v = PyBytes_FromStringAndSize(NULL, newsize);
217
218
0
    if (v == NULL) {
219
0
        return NULL;
220
0
    }
221
0
    else {
222
0
        Py_ssize_t i;
223
0
        char c;
224
0
        char *p = PyBytes_AS_STRING(v);
225
226
0
        for (i = 0; i < size; i++) {
227
            /* There's at least enough room for a hex escape */
228
0
            assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
229
0
            c = PyBytes_AS_STRING(data)[i];
230
0
            if (c == '\'' || c == '\\')
231
0
                *p++ = '\\', *p++ = c;
232
0
            else if (c == '\t')
233
0
                *p++ = '\\', *p++ = 't';
234
0
            else if (c == '\n')
235
0
                *p++ = '\\', *p++ = 'n';
236
0
            else if (c == '\r')
237
0
                *p++ = '\\', *p++ = 'r';
238
0
            else if (c < ' ' || c >= 0x7f) {
239
0
                *p++ = '\\';
240
0
                *p++ = 'x';
241
0
                *p++ = Py_hexdigits[(c & 0xf0) >> 4];
242
0
                *p++ = Py_hexdigits[c & 0xf];
243
0
            }
244
0
            else
245
0
                *p++ = c;
246
0
        }
247
0
        *p = '\0';
248
0
        if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
249
0
            return NULL;
250
0
        }
251
0
    }
252
253
0
    return codec_tuple(v, size);
254
0
}
255
256
/* --- Decoder ------------------------------------------------------------ */
257
/*[clinic input]
258
_codecs.utf_7_decode
259
    data: Py_buffer
260
    errors: str(accept={str, NoneType}) = None
261
    final: bool = False
262
    /
263
[clinic start generated code]*/
264
265
static PyObject *
266
_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
267
                          const char *errors, int final)
268
/*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
269
23.0k
{
270
23.0k
    Py_ssize_t consumed = data->len;
271
23.0k
    PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
272
23.0k
                                                     errors,
273
23.0k
                                                     final ? NULL : &consumed);
274
23.0k
    return codec_tuple(decoded, consumed);
275
23.0k
}
276
277
/*[clinic input]
278
_codecs.utf_8_decode
279
    data: Py_buffer
280
    errors: str(accept={str, NoneType}) = None
281
    final: bool = False
282
    /
283
[clinic start generated code]*/
284
285
static PyObject *
286
_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
287
                          const char *errors, int final)
288
/*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
289
80.0k
{
290
80.0k
    Py_ssize_t consumed = data->len;
291
80.0k
    PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
292
80.0k
                                                     errors,
293
80.0k
                                                     final ? NULL : &consumed);
294
80.0k
    return codec_tuple(decoded, consumed);
295
80.0k
}
296
297
/*[clinic input]
298
_codecs.utf_16_decode
299
    data: Py_buffer
300
    errors: str(accept={str, NoneType}) = None
301
    final: bool = False
302
    /
303
[clinic start generated code]*/
304
305
static PyObject *
306
_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
307
                           const char *errors, int final)
308
/*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
309
14.7k
{
310
14.7k
    int byteorder = 0;
311
    /* This is overwritten unless final is true. */
312
14.7k
    Py_ssize_t consumed = data->len;
313
14.7k
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
314
14.7k
                                                      errors, &byteorder,
315
14.7k
                                                      final ? NULL : &consumed);
316
14.7k
    return codec_tuple(decoded, consumed);
317
14.7k
}
318
319
/*[clinic input]
320
_codecs.utf_16_le_decode
321
    data: Py_buffer
322
    errors: str(accept={str, NoneType}) = None
323
    final: bool = False
324
    /
325
[clinic start generated code]*/
326
327
static PyObject *
328
_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
329
                              const char *errors, int final)
330
/*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
331
14
{
332
14
    int byteorder = -1;
333
    /* This is overwritten unless final is true. */
334
14
    Py_ssize_t consumed = data->len;
335
14
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
336
14
                                                      errors, &byteorder,
337
14
                                                      final ? NULL : &consumed);
338
14
    return codec_tuple(decoded, consumed);
339
14
}
340
341
/*[clinic input]
342
_codecs.utf_16_be_decode
343
    data: Py_buffer
344
    errors: str(accept={str, NoneType}) = None
345
    final: bool = False
346
    /
347
[clinic start generated code]*/
348
349
static PyObject *
350
_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
351
                              const char *errors, int final)
352
/*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
353
149
{
354
149
    int byteorder = 1;
355
    /* This is overwritten unless final is true. */
356
149
    Py_ssize_t consumed = data->len;
357
149
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
358
149
                                                      errors, &byteorder,
359
149
                                                      final ? NULL : &consumed);
360
149
    return codec_tuple(decoded, consumed);
361
149
}
362
363
/* This non-standard version also provides access to the byteorder
364
   parameter of the builtin UTF-16 codec.
365
366
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
367
   being the value in effect at the end of data.
368
369
*/
370
/*[clinic input]
371
_codecs.utf_16_ex_decode
372
    data: Py_buffer
373
    errors: str(accept={str, NoneType}) = None
374
    byteorder: int = 0
375
    final: bool = False
376
    /
377
[clinic start generated code]*/
378
379
static PyObject *
380
_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
381
                              const char *errors, int byteorder, int final)
382
/*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
383
0
{
384
    /* This is overwritten unless final is true. */
385
0
    Py_ssize_t consumed = data->len;
386
387
0
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
388
0
                                                      errors, &byteorder,
389
0
                                                      final ? NULL : &consumed);
390
0
    if (decoded == NULL)
391
0
        return NULL;
392
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
393
0
}
394
395
/*[clinic input]
396
_codecs.utf_32_decode
397
    data: Py_buffer
398
    errors: str(accept={str, NoneType}) = None
399
    final: bool = False
400
    /
401
[clinic start generated code]*/
402
403
static PyObject *
404
_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
405
                           const char *errors, int final)
406
/*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
407
31.2k
{
408
31.2k
    int byteorder = 0;
409
    /* This is overwritten unless final is true. */
410
31.2k
    Py_ssize_t consumed = data->len;
411
31.2k
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
412
31.2k
                                                      errors, &byteorder,
413
31.2k
                                                      final ? NULL : &consumed);
414
31.2k
    return codec_tuple(decoded, consumed);
415
31.2k
}
416
417
/*[clinic input]
418
_codecs.utf_32_le_decode
419
    data: Py_buffer
420
    errors: str(accept={str, NoneType}) = None
421
    final: bool = False
422
    /
423
[clinic start generated code]*/
424
425
static PyObject *
426
_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
427
                              const char *errors, int final)
428
/*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
429
13
{
430
13
    int byteorder = -1;
431
    /* This is overwritten unless final is true. */
432
13
    Py_ssize_t consumed = data->len;
433
13
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
434
13
                                                      errors, &byteorder,
435
13
                                                      final ? NULL : &consumed);
436
13
    return codec_tuple(decoded, consumed);
437
13
}
438
439
/*[clinic input]
440
_codecs.utf_32_be_decode
441
    data: Py_buffer
442
    errors: str(accept={str, NoneType}) = None
443
    final: bool = False
444
    /
445
[clinic start generated code]*/
446
447
static PyObject *
448
_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
449
                              const char *errors, int final)
450
/*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
451
52
{
452
52
    int byteorder = 1;
453
    /* This is overwritten unless final is true. */
454
52
    Py_ssize_t consumed = data->len;
455
52
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
456
52
                                                      errors, &byteorder,
457
52
                                                      final ? NULL : &consumed);
458
52
    return codec_tuple(decoded, consumed);
459
52
}
460
461
/* This non-standard version also provides access to the byteorder
462
   parameter of the builtin UTF-32 codec.
463
464
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
465
   being the value in effect at the end of data.
466
467
*/
468
/*[clinic input]
469
_codecs.utf_32_ex_decode
470
    data: Py_buffer
471
    errors: str(accept={str, NoneType}) = None
472
    byteorder: int = 0
473
    final: bool = False
474
    /
475
[clinic start generated code]*/
476
477
static PyObject *
478
_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
479
                              const char *errors, int byteorder, int final)
480
/*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
481
0
{
482
0
    Py_ssize_t consumed = data->len;
483
0
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
484
0
                                                      errors, &byteorder,
485
0
                                                      final ? NULL : &consumed);
486
0
    if (decoded == NULL)
487
0
        return NULL;
488
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
489
0
}
490
491
/*[clinic input]
492
_codecs.unicode_escape_decode
493
    data: Py_buffer(accept={str, buffer})
494
    errors: str(accept={str, NoneType}) = None
495
    final: bool = True
496
    /
497
[clinic start generated code]*/
498
499
static PyObject *
500
_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
501
                                   const char *errors, int final)
502
/*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
503
0
{
504
0
    Py_ssize_t consumed = data->len;
505
0
    PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
506
0
                                                               errors,
507
0
                                                               final ? NULL : &consumed);
508
0
    return codec_tuple(decoded, consumed);
509
0
}
510
511
/*[clinic input]
512
_codecs.raw_unicode_escape_decode
513
    data: Py_buffer(accept={str, buffer})
514
    errors: str(accept={str, NoneType}) = None
515
    final: bool = True
516
    /
517
[clinic start generated code]*/
518
519
static PyObject *
520
_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
521
                                       const char *errors, int final)
522
/*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
523
0
{
524
0
    Py_ssize_t consumed = data->len;
525
0
    PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
526
0
                                                                  errors,
527
0
                                                                  final ? NULL : &consumed);
528
0
    return codec_tuple(decoded, consumed);
529
0
}
530
531
/*[clinic input]
532
_codecs.latin_1_decode
533
    data: Py_buffer
534
    errors: str(accept={str, NoneType}) = None
535
    /
536
[clinic start generated code]*/
537
538
static PyObject *
539
_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
540
                            const char *errors)
541
/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
542
5.97k
{
543
5.97k
    PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
544
5.97k
    return codec_tuple(decoded, data->len);
545
5.97k
}
546
547
/*[clinic input]
548
_codecs.ascii_decode
549
    data: Py_buffer
550
    errors: str(accept={str, NoneType}) = None
551
    /
552
[clinic start generated code]*/
553
554
static PyObject *
555
_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
556
                          const char *errors)
557
/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
558
17.0k
{
559
17.0k
    PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
560
17.0k
    return codec_tuple(decoded, data->len);
561
17.0k
}
562
563
/*[clinic input]
564
_codecs.charmap_decode
565
    data: Py_buffer
566
    errors: str(accept={str, NoneType}) = None
567
    mapping: object = None
568
    /
569
[clinic start generated code]*/
570
571
static PyObject *
572
_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
573
                            const char *errors, PyObject *mapping)
574
/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
575
19.0k
{
576
19.0k
    PyObject *decoded;
577
578
19.0k
    if (mapping == Py_None)
579
0
        mapping = NULL;
580
581
19.0k
    decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
582
19.0k
    return codec_tuple(decoded, data->len);
583
19.0k
}
584
585
#ifdef MS_WINDOWS
586
587
/*[clinic input]
588
_codecs.mbcs_decode
589
    data: Py_buffer
590
    errors: str(accept={str, NoneType}) = None
591
    final: bool = False
592
    /
593
[clinic start generated code]*/
594
595
static PyObject *
596
_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
597
                         const char *errors, int final)
598
/*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
599
{
600
    Py_ssize_t consumed = data->len;
601
    PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
602
            errors, final ? NULL : &consumed);
603
    return codec_tuple(decoded, consumed);
604
}
605
606
/*[clinic input]
607
_codecs.oem_decode
608
    data: Py_buffer
609
    errors: str(accept={str, NoneType}) = None
610
    final: bool = False
611
    /
612
[clinic start generated code]*/
613
614
static PyObject *
615
_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
616
                        const char *errors, int final)
617
/*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
618
{
619
    Py_ssize_t consumed = data->len;
620
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
621
        data->buf, data->len, errors, final ? NULL : &consumed);
622
    return codec_tuple(decoded, consumed);
623
}
624
625
/*[clinic input]
626
_codecs.code_page_decode
627
    codepage: int
628
    data: Py_buffer
629
    errors: str(accept={str, NoneType}) = None
630
    final: bool = False
631
    /
632
[clinic start generated code]*/
633
634
static PyObject *
635
_codecs_code_page_decode_impl(PyObject *module, int codepage,
636
                              Py_buffer *data, const char *errors, int final)
637
/*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
638
{
639
    Py_ssize_t consumed = data->len;
640
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
641
                                                         data->buf, data->len,
642
                                                         errors,
643
                                                         final ? NULL : &consumed);
644
    return codec_tuple(decoded, consumed);
645
}
646
647
#endif /* MS_WINDOWS */
648
649
/* --- Encoder ------------------------------------------------------------ */
650
651
/*[clinic input]
652
_codecs.readbuffer_encode
653
    data: Py_buffer(accept={str, buffer})
654
    errors: str(accept={str, NoneType}) = None
655
    /
656
[clinic start generated code]*/
657
658
static PyObject *
659
_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
660
                               const char *errors)
661
/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
662
0
{
663
0
    PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
664
0
    return codec_tuple(result, data->len);
665
0
}
666
667
/*[clinic input]
668
_codecs.utf_7_encode
669
    str: unicode
670
    errors: str(accept={str, NoneType}) = None
671
    /
672
[clinic start generated code]*/
673
674
static PyObject *
675
_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
676
                          const char *errors)
677
/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
678
0
{
679
0
    return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
680
0
                       PyUnicode_GET_LENGTH(str));
681
0
}
682
683
/*[clinic input]
684
_codecs.utf_8_encode
685
    str: unicode
686
    errors: str(accept={str, NoneType}) = None
687
    /
688
[clinic start generated code]*/
689
690
static PyObject *
691
_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
692
                          const char *errors)
693
/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
694
0
{
695
0
    return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
696
0
                       PyUnicode_GET_LENGTH(str));
697
0
}
698
699
/* This version provides access to the byteorder parameter of the
700
   builtin UTF-16 codecs as optional third argument. It defaults to 0
701
   which means: use the native byte order and prepend the data with a
702
   BOM mark.
703
704
*/
705
706
/*[clinic input]
707
_codecs.utf_16_encode
708
    str: unicode
709
    errors: str(accept={str, NoneType}) = None
710
    byteorder: int = 0
711
    /
712
[clinic start generated code]*/
713
714
static PyObject *
715
_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
716
                           const char *errors, int byteorder)
717
/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
718
0
{
719
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
720
0
                       PyUnicode_GET_LENGTH(str));
721
0
}
722
723
/*[clinic input]
724
_codecs.utf_16_le_encode
725
    str: unicode
726
    errors: str(accept={str, NoneType}) = None
727
    /
728
[clinic start generated code]*/
729
730
static PyObject *
731
_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
732
                              const char *errors)
733
/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
734
0
{
735
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
736
0
                       PyUnicode_GET_LENGTH(str));
737
0
}
738
739
/*[clinic input]
740
_codecs.utf_16_be_encode
741
    str: unicode
742
    errors: str(accept={str, NoneType}) = None
743
    /
744
[clinic start generated code]*/
745
746
static PyObject *
747
_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
748
                              const char *errors)
749
/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
750
0
{
751
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
752
0
                       PyUnicode_GET_LENGTH(str));
753
0
}
754
755
/* This version provides access to the byteorder parameter of the
756
   builtin UTF-32 codecs as optional third argument. It defaults to 0
757
   which means: use the native byte order and prepend the data with a
758
   BOM mark.
759
760
*/
761
762
/*[clinic input]
763
_codecs.utf_32_encode
764
    str: unicode
765
    errors: str(accept={str, NoneType}) = None
766
    byteorder: int = 0
767
    /
768
[clinic start generated code]*/
769
770
static PyObject *
771
_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
772
                           const char *errors, int byteorder)
773
/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
774
0
{
775
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
776
0
                       PyUnicode_GET_LENGTH(str));
777
0
}
778
779
/*[clinic input]
780
_codecs.utf_32_le_encode
781
    str: unicode
782
    errors: str(accept={str, NoneType}) = None
783
    /
784
[clinic start generated code]*/
785
786
static PyObject *
787
_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
788
                              const char *errors)
789
/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
790
0
{
791
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
792
0
                       PyUnicode_GET_LENGTH(str));
793
0
}
794
795
/*[clinic input]
796
_codecs.utf_32_be_encode
797
    str: unicode
798
    errors: str(accept={str, NoneType}) = None
799
    /
800
[clinic start generated code]*/
801
802
static PyObject *
803
_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
804
                              const char *errors)
805
/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
806
0
{
807
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
808
0
                       PyUnicode_GET_LENGTH(str));
809
0
}
810
811
/*[clinic input]
812
_codecs.unicode_escape_encode
813
    str: unicode
814
    errors: str(accept={str, NoneType}) = None
815
    /
816
[clinic start generated code]*/
817
818
static PyObject *
819
_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
820
                                   const char *errors)
821
/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
822
660k
{
823
660k
    return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
824
660k
                       PyUnicode_GET_LENGTH(str));
825
660k
}
826
827
/*[clinic input]
828
_codecs.raw_unicode_escape_encode
829
    str: unicode
830
    errors: str(accept={str, NoneType}) = None
831
    /
832
[clinic start generated code]*/
833
834
static PyObject *
835
_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
836
                                       const char *errors)
837
/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
838
213k
{
839
213k
    return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
840
213k
                       PyUnicode_GET_LENGTH(str));
841
213k
}
842
843
/*[clinic input]
844
_codecs.latin_1_encode
845
    str: unicode
846
    errors: str(accept={str, NoneType}) = None
847
    /
848
[clinic start generated code]*/
849
850
static PyObject *
851
_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
852
                            const char *errors)
853
/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
854
0
{
855
0
    return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
856
0
                       PyUnicode_GET_LENGTH(str));
857
0
}
858
859
/*[clinic input]
860
_codecs.ascii_encode
861
    str: unicode
862
    errors: str(accept={str, NoneType}) = None
863
    /
864
[clinic start generated code]*/
865
866
static PyObject *
867
_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
868
                          const char *errors)
869
/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
870
0
{
871
0
    return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
872
0
                       PyUnicode_GET_LENGTH(str));
873
0
}
874
875
/*[clinic input]
876
_codecs.charmap_encode
877
    str: unicode
878
    errors: str(accept={str, NoneType}) = None
879
    mapping: object = None
880
    /
881
[clinic start generated code]*/
882
883
static PyObject *
884
_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
885
                            const char *errors, PyObject *mapping)
886
/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
887
0
{
888
0
    if (mapping == Py_None)
889
0
        mapping = NULL;
890
891
0
    return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
892
0
                       PyUnicode_GET_LENGTH(str));
893
0
}
894
895
/*[clinic input]
896
_codecs.charmap_build
897
    map: unicode
898
    /
899
[clinic start generated code]*/
900
901
static PyObject *
902
_codecs_charmap_build_impl(PyObject *module, PyObject *map)
903
/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
904
118
{
905
118
    return PyUnicode_BuildEncodingMap(map);
906
118
}
907
908
#ifdef MS_WINDOWS
909
910
/*[clinic input]
911
_codecs.mbcs_encode
912
    str: unicode
913
    errors: str(accept={str, NoneType}) = None
914
    /
915
[clinic start generated code]*/
916
917
static PyObject *
918
_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
919
/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
920
{
921
    return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
922
                       PyUnicode_GET_LENGTH(str));
923
}
924
925
/*[clinic input]
926
_codecs.oem_encode
927
    str: unicode
928
    errors: str(accept={str, NoneType}) = None
929
    /
930
[clinic start generated code]*/
931
932
static PyObject *
933
_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
934
/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
935
{
936
    return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
937
        PyUnicode_GET_LENGTH(str));
938
}
939
940
/*[clinic input]
941
_codecs.code_page_encode
942
    code_page: int
943
    str: unicode
944
    errors: str(accept={str, NoneType}) = None
945
    /
946
[clinic start generated code]*/
947
948
static PyObject *
949
_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
950
                              const char *errors)
951
/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
952
{
953
    return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
954
                       PyUnicode_GET_LENGTH(str));
955
}
956
957
#endif /* MS_WINDOWS */
958
959
/* --- Error handler registry --------------------------------------------- */
960
961
/*[clinic input]
962
_codecs.register_error
963
    errors: str
964
    handler: object
965
    /
966
967
Register the specified error handler under the name errors.
968
969
handler must be a callable object, that will be called with an exception
970
instance containing information about the location of the encoding/decoding
971
error and must return a (replacement, new position) tuple.
972
[clinic start generated code]*/
973
974
static PyObject *
975
_codecs_register_error_impl(PyObject *module, const char *errors,
976
                            PyObject *handler)
977
/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
978
0
{
979
0
    if (PyCodec_RegisterError(errors, handler))
980
0
        return NULL;
981
0
    Py_RETURN_NONE;
982
0
}
983
984
/*[clinic input]
985
_codecs._unregister_error -> bool
986
    errors: str
987
    /
988
989
Un-register the specified error handler for the error handling `errors'.
990
991
Only custom error handlers can be un-registered. An exception is raised
992
if the error handling is a built-in one (e.g., 'strict'), or if an error
993
occurs.
994
995
Otherwise, this returns True if a custom handler has been successfully
996
un-registered, and False if no custom handler for the specified error
997
handling exists.
998
999
[clinic start generated code]*/
1000
1001
static int
1002
_codecs__unregister_error_impl(PyObject *module, const char *errors)
1003
/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/
1004
0
{
1005
0
    return _PyCodec_UnregisterError(errors);
1006
0
}
1007
1008
/*[clinic input]
1009
_codecs.lookup_error
1010
    name: str
1011
    /
1012
1013
lookup_error(errors) -> handler
1014
1015
Return the error handler for the specified error handling name or raise a
1016
LookupError, if no handler exists under this name.
1017
[clinic start generated code]*/
1018
1019
static PyObject *
1020
_codecs_lookup_error_impl(PyObject *module, const char *name)
1021
/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
1022
96
{
1023
96
    return PyCodec_LookupError(name);
1024
96
}
1025
1026
/* --- Module API --------------------------------------------------------- */
1027
1028
static PyMethodDef _codecs_functions[] = {
1029
    _CODECS_REGISTER_METHODDEF
1030
    _CODECS_UNREGISTER_METHODDEF
1031
    _CODECS_LOOKUP_METHODDEF
1032
    _CODECS_ENCODE_METHODDEF
1033
    _CODECS_DECODE_METHODDEF
1034
    _CODECS_ESCAPE_ENCODE_METHODDEF
1035
    _CODECS_ESCAPE_DECODE_METHODDEF
1036
    _CODECS_UTF_8_ENCODE_METHODDEF
1037
    _CODECS_UTF_8_DECODE_METHODDEF
1038
    _CODECS_UTF_7_ENCODE_METHODDEF
1039
    _CODECS_UTF_7_DECODE_METHODDEF
1040
    _CODECS_UTF_16_ENCODE_METHODDEF
1041
    _CODECS_UTF_16_LE_ENCODE_METHODDEF
1042
    _CODECS_UTF_16_BE_ENCODE_METHODDEF
1043
    _CODECS_UTF_16_DECODE_METHODDEF
1044
    _CODECS_UTF_16_LE_DECODE_METHODDEF
1045
    _CODECS_UTF_16_BE_DECODE_METHODDEF
1046
    _CODECS_UTF_16_EX_DECODE_METHODDEF
1047
    _CODECS_UTF_32_ENCODE_METHODDEF
1048
    _CODECS_UTF_32_LE_ENCODE_METHODDEF
1049
    _CODECS_UTF_32_BE_ENCODE_METHODDEF
1050
    _CODECS_UTF_32_DECODE_METHODDEF
1051
    _CODECS_UTF_32_LE_DECODE_METHODDEF
1052
    _CODECS_UTF_32_BE_DECODE_METHODDEF
1053
    _CODECS_UTF_32_EX_DECODE_METHODDEF
1054
    _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1055
    _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1056
    _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1057
    _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1058
    _CODECS_LATIN_1_ENCODE_METHODDEF
1059
    _CODECS_LATIN_1_DECODE_METHODDEF
1060
    _CODECS_ASCII_ENCODE_METHODDEF
1061
    _CODECS_ASCII_DECODE_METHODDEF
1062
    _CODECS_CHARMAP_ENCODE_METHODDEF
1063
    _CODECS_CHARMAP_DECODE_METHODDEF
1064
    _CODECS_CHARMAP_BUILD_METHODDEF
1065
    _CODECS_READBUFFER_ENCODE_METHODDEF
1066
    _CODECS_MBCS_ENCODE_METHODDEF
1067
    _CODECS_MBCS_DECODE_METHODDEF
1068
    _CODECS_OEM_ENCODE_METHODDEF
1069
    _CODECS_OEM_DECODE_METHODDEF
1070
    _CODECS_CODE_PAGE_ENCODE_METHODDEF
1071
    _CODECS_CODE_PAGE_DECODE_METHODDEF
1072
    _CODECS_REGISTER_ERROR_METHODDEF
1073
    _CODECS__UNREGISTER_ERROR_METHODDEF
1074
    _CODECS_LOOKUP_ERROR_METHODDEF
1075
    {NULL, NULL}                /* sentinel */
1076
};
1077
1078
static PyModuleDef_Slot _codecs_slots[] = {
1079
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1080
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1081
    {0, NULL}
1082
};
1083
1084
static struct PyModuleDef codecsmodule = {
1085
        PyModuleDef_HEAD_INIT,
1086
        "_codecs",
1087
        NULL,
1088
        0,
1089
        _codecs_functions,
1090
        _codecs_slots,
1091
        NULL,
1092
        NULL,
1093
        NULL
1094
};
1095
1096
PyMODINIT_FUNC
1097
PyInit__codecs(void)
1098
16
{
1099
16
    return PyModuleDef_Init(&codecsmodule);
1100
16
}