Coverage Report

Created: 2025-10-10 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_codecsmodule.c
Line
Count
Source
1
/* ------------------------------------------------------------------------
2
3
   _codecs -- Provides access to the codec registry and the builtin
4
              codecs.
5
6
   This module should never be imported directly. The standard library
7
   module "codecs" wraps this builtin module for use within Python.
8
9
   The codec registry is accessible via:
10
11
     register(search_function) -> None
12
13
     lookup(encoding) -> CodecInfo object
14
15
   The builtin Unicode codecs use the following interface:
16
17
     <encoding>_encode(Unicode_object[,errors='strict']) ->
18
        (string object, bytes consumed)
19
20
     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21
        (Unicode object, bytes consumed)
22
23
   These <encoding>s are available: utf_8, unicode_escape,
24
   raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27
Written by Marc-Andre Lemburg (mal@lemburg.com).
28
29
Copyright (c) Corporation for National Research Initiatives.
30
31
   ------------------------------------------------------------------------ */
32
33
#include "Python.h"
34
#include "pycore_codecs.h"        // _PyCodec_Lookup()
35
#include "pycore_unicodeobject.h" // _PyUnicode_EncodeCharmap
36
37
#ifdef MS_WINDOWS
38
#include <windows.h>
39
#endif
40
41
/*[clinic input]
42
module _codecs
43
[clinic start generated code]*/
44
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
45
46
#include "pycore_runtime.h"
47
#include "clinic/_codecsmodule.c.h"
48
49
/* --- Registry ----------------------------------------------------------- */
50
51
/*[clinic input]
52
_codecs.register
53
    search_function: object
54
    /
55
56
Register a codec search function.
57
58
Search functions are expected to take one argument, the encoding name in
59
all lower case letters, and either return None, or a tuple of functions
60
(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
61
[clinic start generated code]*/
62
63
static PyObject *
64
_codecs_register(PyObject *module, PyObject *search_function)
65
/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
66
16
{
67
16
    if (PyCodec_Register(search_function))
68
0
        return NULL;
69
70
16
    Py_RETURN_NONE;
71
16
}
72
73
/*[clinic input]
74
_codecs.unregister
75
    search_function: object
76
    /
77
78
Unregister a codec search function and clear the registry's cache.
79
80
If the search function is not registered, do nothing.
81
[clinic start generated code]*/
82
83
static PyObject *
84
_codecs_unregister(PyObject *module, PyObject *search_function)
85
/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
86
0
{
87
0
    if (PyCodec_Unregister(search_function) < 0) {
88
0
        return NULL;
89
0
    }
90
91
0
    Py_RETURN_NONE;
92
0
}
93
94
/*[clinic input]
95
@permit_long_summary
96
_codecs.lookup
97
    encoding: str
98
    /
99
100
Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
101
[clinic start generated code]*/
102
103
static PyObject *
104
_codecs_lookup_impl(PyObject *module, const char *encoding)
105
/*[clinic end generated code: output=9f0afa572080c36d input=02227d5429491ab3]*/
106
0
{
107
0
    return _PyCodec_Lookup(encoding);
108
0
}
109
110
/*[clinic input]
111
_codecs.encode
112
    obj: object
113
    encoding: str(c_default="NULL") = "utf-8"
114
    errors: str(c_default="NULL") = "strict"
115
116
Encodes obj using the codec registered for encoding.
117
118
The default encoding is 'utf-8'.  errors may be given to set a
119
different error handling scheme.  Default is 'strict' meaning that encoding
120
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
121
and 'backslashreplace' as well as any other name registered with
122
codecs.register_error that can handle ValueErrors.
123
[clinic start generated code]*/
124
125
static PyObject *
126
_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
127
                    const char *errors)
128
/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
129
0
{
130
0
    if (encoding == NULL)
131
0
        encoding = PyUnicode_GetDefaultEncoding();
132
133
    /* Encode via the codec registry */
134
0
    return PyCodec_Encode(obj, encoding, errors);
135
0
}
136
137
/*[clinic input]
138
_codecs.decode
139
    obj: object
140
    encoding: str(c_default="NULL") = "utf-8"
141
    errors: str(c_default="NULL") = "strict"
142
143
Decodes obj using the codec registered for encoding.
144
145
Default encoding is 'utf-8'.  errors may be given to set a
146
different error handling scheme.  Default is 'strict' meaning that encoding
147
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
148
and 'backslashreplace' as well as any other name registered with
149
codecs.register_error that can handle ValueErrors.
150
[clinic start generated code]*/
151
152
static PyObject *
153
_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
154
                    const char *errors)
155
/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
156
0
{
157
0
    if (encoding == NULL)
158
0
        encoding = PyUnicode_GetDefaultEncoding();
159
160
    /* Decode via the codec registry */
161
0
    return PyCodec_Decode(obj, encoding, errors);
162
0
}
163
164
/* --- Helpers ------------------------------------------------------------ */
165
166
static
167
PyObject *codec_tuple(PyObject *decoded,
168
                      Py_ssize_t len)
169
973k
{
170
973k
    if (decoded == NULL)
171
45.0k
        return NULL;
172
927k
    return Py_BuildValue("Nn", decoded, len);
173
973k
}
174
175
/* --- String codecs ------------------------------------------------------ */
176
/*[clinic input]
177
_codecs.escape_decode
178
    data: Py_buffer(accept={str, buffer})
179
    errors: str(accept={str, NoneType}) = None
180
    /
181
[clinic start generated code]*/
182
183
static PyObject *
184
_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
185
                           const char *errors)
186
/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
187
0
{
188
0
    PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
189
0
                                             errors, 0, NULL);
190
0
    return codec_tuple(decoded, data->len);
191
0
}
192
193
/*[clinic input]
194
_codecs.escape_encode
195
    data: object(subclass_of='&PyBytes_Type')
196
    errors: str(accept={str, NoneType}) = None
197
    /
198
[clinic start generated code]*/
199
200
static PyObject *
201
_codecs_escape_encode_impl(PyObject *module, PyObject *data,
202
                           const char *errors)
203
/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
204
0
{
205
0
    Py_ssize_t size = PyBytes_GET_SIZE(data);
206
0
    if (size > PY_SSIZE_T_MAX / 4) {
207
0
        PyErr_SetString(PyExc_OverflowError,
208
0
            "string is too large to encode");
209
0
            return NULL;
210
0
    }
211
0
    Py_ssize_t newsize = 4*size;
212
213
0
    PyBytesWriter *writer = PyBytesWriter_Create(newsize);
214
0
    if (writer == NULL) {
215
0
        return NULL;
216
0
    }
217
0
    char *p = PyBytesWriter_GetData(writer);
218
219
0
    for (Py_ssize_t i = 0; i < size; i++) {
220
        /* There's at least enough room for a hex escape */
221
0
        assert(newsize - (p - (char*)PyBytesWriter_GetData(writer)) >= 4);
222
223
0
        char c = PyBytes_AS_STRING(data)[i];
224
0
        if (c == '\'' || c == '\\') {
225
0
            *p++ = '\\'; *p++ = c;
226
0
        }
227
0
        else if (c == '\t') {
228
0
            *p++ = '\\'; *p++ = 't';
229
0
        }
230
0
        else if (c == '\n') {
231
0
            *p++ = '\\'; *p++ = 'n';
232
0
        }
233
0
        else if (c == '\r') {
234
0
            *p++ = '\\'; *p++ = 'r';
235
0
        }
236
0
        else if (c < ' ' || c >= 0x7f) {
237
0
            *p++ = '\\';
238
0
            *p++ = 'x';
239
0
            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
240
0
            *p++ = Py_hexdigits[c & 0xf];
241
0
        }
242
0
        else {
243
0
            *p++ = c;
244
0
        }
245
0
    }
246
247
0
    PyObject *decoded = PyBytesWriter_FinishWithPointer(writer, p);
248
0
    return codec_tuple(decoded, size);
249
0
}
250
251
/* --- Decoder ------------------------------------------------------------ */
252
/*[clinic input]
253
_codecs.utf_7_decode
254
    data: Py_buffer
255
    errors: str(accept={str, NoneType}) = None
256
    final: bool = False
257
    /
258
[clinic start generated code]*/
259
260
static PyObject *
261
_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
262
                          const char *errors, int final)
263
/*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
264
32.5k
{
265
32.5k
    Py_ssize_t consumed = data->len;
266
32.5k
    PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
267
32.5k
                                                     errors,
268
32.5k
                                                     final ? NULL : &consumed);
269
32.5k
    return codec_tuple(decoded, consumed);
270
32.5k
}
271
272
/*[clinic input]
273
_codecs.utf_8_decode
274
    data: Py_buffer
275
    errors: str(accept={str, NoneType}) = None
276
    final: bool = False
277
    /
278
[clinic start generated code]*/
279
280
static PyObject *
281
_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
282
                          const char *errors, int final)
283
/*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
284
62.4k
{
285
62.4k
    Py_ssize_t consumed = data->len;
286
62.4k
    PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
287
62.4k
                                                     errors,
288
62.4k
                                                     final ? NULL : &consumed);
289
62.4k
    return codec_tuple(decoded, consumed);
290
62.4k
}
291
292
/*[clinic input]
293
_codecs.utf_16_decode
294
    data: Py_buffer
295
    errors: str(accept={str, NoneType}) = None
296
    final: bool = False
297
    /
298
[clinic start generated code]*/
299
300
static PyObject *
301
_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
302
                           const char *errors, int final)
303
/*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
304
14.4k
{
305
14.4k
    int byteorder = 0;
306
    /* This is overwritten unless final is true. */
307
14.4k
    Py_ssize_t consumed = data->len;
308
14.4k
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
309
14.4k
                                                      errors, &byteorder,
310
14.4k
                                                      final ? NULL : &consumed);
311
14.4k
    return codec_tuple(decoded, consumed);
312
14.4k
}
313
314
/*[clinic input]
315
_codecs.utf_16_le_decode
316
    data: Py_buffer
317
    errors: str(accept={str, NoneType}) = None
318
    final: bool = False
319
    /
320
[clinic start generated code]*/
321
322
static PyObject *
323
_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
324
                              const char *errors, int final)
325
/*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
326
14
{
327
14
    int byteorder = -1;
328
    /* This is overwritten unless final is true. */
329
14
    Py_ssize_t consumed = data->len;
330
14
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
331
14
                                                      errors, &byteorder,
332
14
                                                      final ? NULL : &consumed);
333
14
    return codec_tuple(decoded, consumed);
334
14
}
335
336
/*[clinic input]
337
_codecs.utf_16_be_decode
338
    data: Py_buffer
339
    errors: str(accept={str, NoneType}) = None
340
    final: bool = False
341
    /
342
[clinic start generated code]*/
343
344
static PyObject *
345
_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
346
                              const char *errors, int final)
347
/*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
348
127
{
349
127
    int byteorder = 1;
350
    /* This is overwritten unless final is true. */
351
127
    Py_ssize_t consumed = data->len;
352
127
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
353
127
                                                      errors, &byteorder,
354
127
                                                      final ? NULL : &consumed);
355
127
    return codec_tuple(decoded, consumed);
356
127
}
357
358
/* This non-standard version also provides access to the byteorder
359
   parameter of the builtin UTF-16 codec.
360
361
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
362
   being the value in effect at the end of data.
363
364
*/
365
/*[clinic input]
366
_codecs.utf_16_ex_decode
367
    data: Py_buffer
368
    errors: str(accept={str, NoneType}) = None
369
    byteorder: int = 0
370
    final: bool = False
371
    /
372
[clinic start generated code]*/
373
374
static PyObject *
375
_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
376
                              const char *errors, int byteorder, int final)
377
/*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
378
0
{
379
    /* This is overwritten unless final is true. */
380
0
    Py_ssize_t consumed = data->len;
381
382
0
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
383
0
                                                      errors, &byteorder,
384
0
                                                      final ? NULL : &consumed);
385
0
    if (decoded == NULL)
386
0
        return NULL;
387
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
388
0
}
389
390
/*[clinic input]
391
_codecs.utf_32_decode
392
    data: Py_buffer
393
    errors: str(accept={str, NoneType}) = None
394
    final: bool = False
395
    /
396
[clinic start generated code]*/
397
398
static PyObject *
399
_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
400
                           const char *errors, int final)
401
/*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
402
19.0k
{
403
19.0k
    int byteorder = 0;
404
    /* This is overwritten unless final is true. */
405
19.0k
    Py_ssize_t consumed = data->len;
406
19.0k
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
407
19.0k
                                                      errors, &byteorder,
408
19.0k
                                                      final ? NULL : &consumed);
409
19.0k
    return codec_tuple(decoded, consumed);
410
19.0k
}
411
412
/*[clinic input]
413
_codecs.utf_32_le_decode
414
    data: Py_buffer
415
    errors: str(accept={str, NoneType}) = None
416
    final: bool = False
417
    /
418
[clinic start generated code]*/
419
420
static PyObject *
421
_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
422
                              const char *errors, int final)
423
/*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
424
13
{
425
13
    int byteorder = -1;
426
    /* This is overwritten unless final is true. */
427
13
    Py_ssize_t consumed = data->len;
428
13
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
429
13
                                                      errors, &byteorder,
430
13
                                                      final ? NULL : &consumed);
431
13
    return codec_tuple(decoded, consumed);
432
13
}
433
434
/*[clinic input]
435
_codecs.utf_32_be_decode
436
    data: Py_buffer
437
    errors: str(accept={str, NoneType}) = None
438
    final: bool = False
439
    /
440
[clinic start generated code]*/
441
442
static PyObject *
443
_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
444
                              const char *errors, int final)
445
/*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
446
66
{
447
66
    int byteorder = 1;
448
    /* This is overwritten unless final is true. */
449
66
    Py_ssize_t consumed = data->len;
450
66
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
451
66
                                                      errors, &byteorder,
452
66
                                                      final ? NULL : &consumed);
453
66
    return codec_tuple(decoded, consumed);
454
66
}
455
456
/* This non-standard version also provides access to the byteorder
457
   parameter of the builtin UTF-32 codec.
458
459
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
460
   being the value in effect at the end of data.
461
462
*/
463
/*[clinic input]
464
_codecs.utf_32_ex_decode
465
    data: Py_buffer
466
    errors: str(accept={str, NoneType}) = None
467
    byteorder: int = 0
468
    final: bool = False
469
    /
470
[clinic start generated code]*/
471
472
static PyObject *
473
_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
474
                              const char *errors, int byteorder, int final)
475
/*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
476
0
{
477
0
    Py_ssize_t consumed = data->len;
478
0
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
479
0
                                                      errors, &byteorder,
480
0
                                                      final ? NULL : &consumed);
481
0
    if (decoded == NULL)
482
0
        return NULL;
483
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
484
0
}
485
486
/*[clinic input]
487
_codecs.unicode_escape_decode
488
    data: Py_buffer(accept={str, buffer})
489
    errors: str(accept={str, NoneType}) = None
490
    final: bool = True
491
    /
492
[clinic start generated code]*/
493
494
static PyObject *
495
_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
496
                                   const char *errors, int final)
497
/*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
498
0
{
499
0
    Py_ssize_t consumed = data->len;
500
0
    PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
501
0
                                                               errors,
502
0
                                                               final ? NULL : &consumed);
503
0
    return codec_tuple(decoded, consumed);
504
0
}
505
506
/*[clinic input]
507
_codecs.raw_unicode_escape_decode
508
    data: Py_buffer(accept={str, buffer})
509
    errors: str(accept={str, NoneType}) = None
510
    final: bool = True
511
    /
512
[clinic start generated code]*/
513
514
static PyObject *
515
_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
516
                                       const char *errors, int final)
517
/*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
518
0
{
519
0
    Py_ssize_t consumed = data->len;
520
0
    PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
521
0
                                                                  errors,
522
0
                                                                  final ? NULL : &consumed);
523
0
    return codec_tuple(decoded, consumed);
524
0
}
525
526
/*[clinic input]
527
_codecs.latin_1_decode
528
    data: Py_buffer
529
    errors: str(accept={str, NoneType}) = None
530
    /
531
[clinic start generated code]*/
532
533
static PyObject *
534
_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
535
                            const char *errors)
536
/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
537
5.70k
{
538
5.70k
    PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
539
5.70k
    return codec_tuple(decoded, data->len);
540
5.70k
}
541
542
/*[clinic input]
543
_codecs.ascii_decode
544
    data: Py_buffer
545
    errors: str(accept={str, NoneType}) = None
546
    /
547
[clinic start generated code]*/
548
549
static PyObject *
550
_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
551
                          const char *errors)
552
/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
553
16.2k
{
554
16.2k
    PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
555
16.2k
    return codec_tuple(decoded, data->len);
556
16.2k
}
557
558
/*[clinic input]
559
_codecs.charmap_decode
560
    data: Py_buffer
561
    errors: str(accept={str, NoneType}) = None
562
    mapping: object = None
563
    /
564
[clinic start generated code]*/
565
566
static PyObject *
567
_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
568
                            const char *errors, PyObject *mapping)
569
/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
570
11.0k
{
571
11.0k
    PyObject *decoded;
572
573
11.0k
    if (mapping == Py_None)
574
0
        mapping = NULL;
575
576
11.0k
    decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
577
11.0k
    return codec_tuple(decoded, data->len);
578
11.0k
}
579
580
#ifdef MS_WINDOWS
581
582
/*[clinic input]
583
_codecs.mbcs_decode
584
    data: Py_buffer
585
    errors: str(accept={str, NoneType}) = None
586
    final: bool = False
587
    /
588
[clinic start generated code]*/
589
590
static PyObject *
591
_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
592
                         const char *errors, int final)
593
/*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
594
{
595
    Py_ssize_t consumed = data->len;
596
    PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
597
            errors, final ? NULL : &consumed);
598
    return codec_tuple(decoded, consumed);
599
}
600
601
/*[clinic input]
602
_codecs.oem_decode
603
    data: Py_buffer
604
    errors: str(accept={str, NoneType}) = None
605
    final: bool = False
606
    /
607
[clinic start generated code]*/
608
609
static PyObject *
610
_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
611
                        const char *errors, int final)
612
/*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
613
{
614
    Py_ssize_t consumed = data->len;
615
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
616
        data->buf, data->len, errors, final ? NULL : &consumed);
617
    return codec_tuple(decoded, consumed);
618
}
619
620
/*[clinic input]
621
_codecs.code_page_decode
622
    codepage: int
623
    data: Py_buffer
624
    errors: str(accept={str, NoneType}) = None
625
    final: bool = False
626
    /
627
[clinic start generated code]*/
628
629
static PyObject *
630
_codecs_code_page_decode_impl(PyObject *module, int codepage,
631
                              Py_buffer *data, const char *errors, int final)
632
/*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
633
{
634
    Py_ssize_t consumed = data->len;
635
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
636
                                                         data->buf, data->len,
637
                                                         errors,
638
                                                         final ? NULL : &consumed);
639
    return codec_tuple(decoded, consumed);
640
}
641
642
#endif /* MS_WINDOWS */
643
644
/* --- Encoder ------------------------------------------------------------ */
645
646
/*[clinic input]
647
_codecs.readbuffer_encode
648
    data: Py_buffer(accept={str, buffer})
649
    errors: str(accept={str, NoneType}) = None
650
    /
651
[clinic start generated code]*/
652
653
static PyObject *
654
_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
655
                               const char *errors)
656
/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
657
0
{
658
0
    PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
659
0
    return codec_tuple(result, data->len);
660
0
}
661
662
/*[clinic input]
663
_codecs.utf_7_encode
664
    str: unicode
665
    errors: str(accept={str, NoneType}) = None
666
    /
667
[clinic start generated code]*/
668
669
static PyObject *
670
_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
671
                          const char *errors)
672
/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
673
0
{
674
0
    return codec_tuple(_PyUnicode_EncodeUTF7(str, errors),
675
0
                       PyUnicode_GET_LENGTH(str));
676
0
}
677
678
/*[clinic input]
679
_codecs.utf_8_encode
680
    str: unicode
681
    errors: str(accept={str, NoneType}) = None
682
    /
683
[clinic start generated code]*/
684
685
static PyObject *
686
_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
687
                          const char *errors)
688
/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
689
0
{
690
0
    return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
691
0
                       PyUnicode_GET_LENGTH(str));
692
0
}
693
694
/* This version provides access to the byteorder parameter of the
695
   builtin UTF-16 codecs as optional third argument. It defaults to 0
696
   which means: use the native byte order and prepend the data with a
697
   BOM mark.
698
699
*/
700
701
/*[clinic input]
702
_codecs.utf_16_encode
703
    str: unicode
704
    errors: str(accept={str, NoneType}) = None
705
    byteorder: int = 0
706
    /
707
[clinic start generated code]*/
708
709
static PyObject *
710
_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
711
                           const char *errors, int byteorder)
712
/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
713
0
{
714
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
715
0
                       PyUnicode_GET_LENGTH(str));
716
0
}
717
718
/*[clinic input]
719
_codecs.utf_16_le_encode
720
    str: unicode
721
    errors: str(accept={str, NoneType}) = None
722
    /
723
[clinic start generated code]*/
724
725
static PyObject *
726
_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
727
                              const char *errors)
728
/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
729
0
{
730
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
731
0
                       PyUnicode_GET_LENGTH(str));
732
0
}
733
734
/*[clinic input]
735
_codecs.utf_16_be_encode
736
    str: unicode
737
    errors: str(accept={str, NoneType}) = None
738
    /
739
[clinic start generated code]*/
740
741
static PyObject *
742
_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
743
                              const char *errors)
744
/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
745
0
{
746
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
747
0
                       PyUnicode_GET_LENGTH(str));
748
0
}
749
750
/* This version provides access to the byteorder parameter of the
751
   builtin UTF-32 codecs as optional third argument. It defaults to 0
752
   which means: use the native byte order and prepend the data with a
753
   BOM mark.
754
755
*/
756
757
/*[clinic input]
758
_codecs.utf_32_encode
759
    str: unicode
760
    errors: str(accept={str, NoneType}) = None
761
    byteorder: int = 0
762
    /
763
[clinic start generated code]*/
764
765
static PyObject *
766
_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
767
                           const char *errors, int byteorder)
768
/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
769
0
{
770
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
771
0
                       PyUnicode_GET_LENGTH(str));
772
0
}
773
774
/*[clinic input]
775
_codecs.utf_32_le_encode
776
    str: unicode
777
    errors: str(accept={str, NoneType}) = None
778
    /
779
[clinic start generated code]*/
780
781
static PyObject *
782
_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
783
                              const char *errors)
784
/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
785
0
{
786
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
787
0
                       PyUnicode_GET_LENGTH(str));
788
0
}
789
790
/*[clinic input]
791
_codecs.utf_32_be_encode
792
    str: unicode
793
    errors: str(accept={str, NoneType}) = None
794
    /
795
[clinic start generated code]*/
796
797
static PyObject *
798
_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
799
                              const char *errors)
800
/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
801
0
{
802
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
803
0
                       PyUnicode_GET_LENGTH(str));
804
0
}
805
806
/*[clinic input]
807
_codecs.unicode_escape_encode
808
    str: unicode
809
    errors: str(accept={str, NoneType}) = None
810
    /
811
[clinic start generated code]*/
812
813
static PyObject *
814
_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
815
                                   const char *errors)
816
/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
817
613k
{
818
613k
    return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
819
613k
                       PyUnicode_GET_LENGTH(str));
820
613k
}
821
822
/*[clinic input]
823
_codecs.raw_unicode_escape_encode
824
    str: unicode
825
    errors: str(accept={str, NoneType}) = None
826
    /
827
[clinic start generated code]*/
828
829
static PyObject *
830
_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
831
                                       const char *errors)
832
/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
833
198k
{
834
198k
    return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
835
198k
                       PyUnicode_GET_LENGTH(str));
836
198k
}
837
838
/*[clinic input]
839
_codecs.latin_1_encode
840
    str: unicode
841
    errors: str(accept={str, NoneType}) = None
842
    /
843
[clinic start generated code]*/
844
845
static PyObject *
846
_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
847
                            const char *errors)
848
/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
849
0
{
850
0
    return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
851
0
                       PyUnicode_GET_LENGTH(str));
852
0
}
853
854
/*[clinic input]
855
_codecs.ascii_encode
856
    str: unicode
857
    errors: str(accept={str, NoneType}) = None
858
    /
859
[clinic start generated code]*/
860
861
static PyObject *
862
_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
863
                          const char *errors)
864
/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
865
0
{
866
0
    return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
867
0
                       PyUnicode_GET_LENGTH(str));
868
0
}
869
870
/*[clinic input]
871
_codecs.charmap_encode
872
    str: unicode
873
    errors: str(accept={str, NoneType}) = None
874
    mapping: object = None
875
    /
876
[clinic start generated code]*/
877
878
static PyObject *
879
_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
880
                            const char *errors, PyObject *mapping)
881
/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
882
0
{
883
0
    if (mapping == Py_None)
884
0
        mapping = NULL;
885
886
0
    return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
887
0
                       PyUnicode_GET_LENGTH(str));
888
0
}
889
890
/*[clinic input]
891
_codecs.charmap_build
892
    map: unicode
893
    /
894
[clinic start generated code]*/
895
896
static PyObject *
897
_codecs_charmap_build_impl(PyObject *module, PyObject *map)
898
/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
899
118
{
900
118
    return PyUnicode_BuildEncodingMap(map);
901
118
}
902
903
#ifdef MS_WINDOWS
904
905
/*[clinic input]
906
_codecs.mbcs_encode
907
    str: unicode
908
    errors: str(accept={str, NoneType}) = None
909
    /
910
[clinic start generated code]*/
911
912
static PyObject *
913
_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
914
/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
915
{
916
    return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
917
                       PyUnicode_GET_LENGTH(str));
918
}
919
920
/*[clinic input]
921
_codecs.oem_encode
922
    str: unicode
923
    errors: str(accept={str, NoneType}) = None
924
    /
925
[clinic start generated code]*/
926
927
static PyObject *
928
_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
929
/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
930
{
931
    return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
932
        PyUnicode_GET_LENGTH(str));
933
}
934
935
/*[clinic input]
936
_codecs.code_page_encode
937
    code_page: int
938
    str: unicode
939
    errors: str(accept={str, NoneType}) = None
940
    /
941
[clinic start generated code]*/
942
943
static PyObject *
944
_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
945
                              const char *errors)
946
/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
947
{
948
    return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
949
                       PyUnicode_GET_LENGTH(str));
950
}
951
952
#endif /* MS_WINDOWS */
953
954
/* --- Error handler registry --------------------------------------------- */
955
956
/*[clinic input]
957
_codecs.register_error
958
    errors: str
959
    handler: object
960
    /
961
962
Register the specified error handler under the name errors.
963
964
handler must be a callable object, that will be called with an exception
965
instance containing information about the location of the encoding/decoding
966
error and must return a (replacement, new position) tuple.
967
[clinic start generated code]*/
968
969
static PyObject *
970
_codecs_register_error_impl(PyObject *module, const char *errors,
971
                            PyObject *handler)
972
/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
973
0
{
974
0
    if (PyCodec_RegisterError(errors, handler))
975
0
        return NULL;
976
0
    Py_RETURN_NONE;
977
0
}
978
979
/*[clinic input]
980
_codecs._unregister_error -> bool
981
    errors: str
982
    /
983
984
Un-register the specified error handler for the error handling `errors'.
985
986
Only custom error handlers can be un-registered. An exception is raised
987
if the error handling is a built-in one (e.g., 'strict'), or if an error
988
occurs.
989
990
Otherwise, this returns True if a custom handler has been successfully
991
un-registered, and False if no custom handler for the specified error
992
handling exists.
993
994
[clinic start generated code]*/
995
996
static int
997
_codecs__unregister_error_impl(PyObject *module, const char *errors)
998
/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/
999
0
{
1000
0
    return _PyCodec_UnregisterError(errors);
1001
0
}
1002
1003
/*[clinic input]
1004
_codecs.lookup_error
1005
    name: str
1006
    /
1007
1008
lookup_error(errors) -> handler
1009
1010
Return the error handler for the specified error handling name or raise a
1011
LookupError, if no handler exists under this name.
1012
[clinic start generated code]*/
1013
1014
static PyObject *
1015
_codecs_lookup_error_impl(PyObject *module, const char *name)
1016
/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
1017
96
{
1018
96
    return PyCodec_LookupError(name);
1019
96
}
1020
1021
/* --- Module API --------------------------------------------------------- */
1022
1023
static PyMethodDef _codecs_functions[] = {
1024
    _CODECS_REGISTER_METHODDEF
1025
    _CODECS_UNREGISTER_METHODDEF
1026
    _CODECS_LOOKUP_METHODDEF
1027
    _CODECS_ENCODE_METHODDEF
1028
    _CODECS_DECODE_METHODDEF
1029
    _CODECS_ESCAPE_ENCODE_METHODDEF
1030
    _CODECS_ESCAPE_DECODE_METHODDEF
1031
    _CODECS_UTF_8_ENCODE_METHODDEF
1032
    _CODECS_UTF_8_DECODE_METHODDEF
1033
    _CODECS_UTF_7_ENCODE_METHODDEF
1034
    _CODECS_UTF_7_DECODE_METHODDEF
1035
    _CODECS_UTF_16_ENCODE_METHODDEF
1036
    _CODECS_UTF_16_LE_ENCODE_METHODDEF
1037
    _CODECS_UTF_16_BE_ENCODE_METHODDEF
1038
    _CODECS_UTF_16_DECODE_METHODDEF
1039
    _CODECS_UTF_16_LE_DECODE_METHODDEF
1040
    _CODECS_UTF_16_BE_DECODE_METHODDEF
1041
    _CODECS_UTF_16_EX_DECODE_METHODDEF
1042
    _CODECS_UTF_32_ENCODE_METHODDEF
1043
    _CODECS_UTF_32_LE_ENCODE_METHODDEF
1044
    _CODECS_UTF_32_BE_ENCODE_METHODDEF
1045
    _CODECS_UTF_32_DECODE_METHODDEF
1046
    _CODECS_UTF_32_LE_DECODE_METHODDEF
1047
    _CODECS_UTF_32_BE_DECODE_METHODDEF
1048
    _CODECS_UTF_32_EX_DECODE_METHODDEF
1049
    _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1050
    _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1051
    _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1052
    _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1053
    _CODECS_LATIN_1_ENCODE_METHODDEF
1054
    _CODECS_LATIN_1_DECODE_METHODDEF
1055
    _CODECS_ASCII_ENCODE_METHODDEF
1056
    _CODECS_ASCII_DECODE_METHODDEF
1057
    _CODECS_CHARMAP_ENCODE_METHODDEF
1058
    _CODECS_CHARMAP_DECODE_METHODDEF
1059
    _CODECS_CHARMAP_BUILD_METHODDEF
1060
    _CODECS_READBUFFER_ENCODE_METHODDEF
1061
    _CODECS_MBCS_ENCODE_METHODDEF
1062
    _CODECS_MBCS_DECODE_METHODDEF
1063
    _CODECS_OEM_ENCODE_METHODDEF
1064
    _CODECS_OEM_DECODE_METHODDEF
1065
    _CODECS_CODE_PAGE_ENCODE_METHODDEF
1066
    _CODECS_CODE_PAGE_DECODE_METHODDEF
1067
    _CODECS_REGISTER_ERROR_METHODDEF
1068
    _CODECS__UNREGISTER_ERROR_METHODDEF
1069
    _CODECS_LOOKUP_ERROR_METHODDEF
1070
    {NULL, NULL}                /* sentinel */
1071
};
1072
1073
static PyModuleDef_Slot _codecs_slots[] = {
1074
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1075
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1076
    {0, NULL}
1077
};
1078
1079
static struct PyModuleDef codecsmodule = {
1080
        PyModuleDef_HEAD_INIT,
1081
        "_codecs",
1082
        NULL,
1083
        0,
1084
        _codecs_functions,
1085
        _codecs_slots,
1086
        NULL,
1087
        NULL,
1088
        NULL
1089
};
1090
1091
PyMODINIT_FUNC
1092
PyInit__codecs(void)
1093
16
{
1094
16
    return PyModuleDef_Init(&codecsmodule);
1095
16
}