Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Modules/_codecsmodule.c
Line
Count
Source (jump to first uncovered line)
1
/* ------------------------------------------------------------------------
2
3
   _codecs -- Provides access to the codec registry and the builtin
4
              codecs.
5
6
   This module should never be imported directly. The standard library
7
   module "codecs" wraps this builtin module for use within Python.
8
9
   The codec registry is accessible via:
10
11
     register(search_function) -> None
12
13
     lookup(encoding) -> CodecInfo object
14
15
   The builtin Unicode codecs use the following interface:
16
17
     <encoding>_encode(Unicode_object[,errors='strict']) ->
18
        (string object, bytes consumed)
19
20
     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21
        (Unicode object, bytes consumed)
22
23
   These <encoding>s are available: utf_8, unicode_escape,
24
   raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27
Written by Marc-Andre Lemburg (mal@lemburg.com).
28
29
Copyright (c) Corporation for National Research Initiatives.
30
31
   ------------------------------------------------------------------------ */
32
33
#include "Python.h"
34
#include "pycore_codecs.h"        // _PyCodec_Lookup()
35
#include "pycore_unicodeobject.h" // _PyUnicode_EncodeCharmap
36
37
#ifdef MS_WINDOWS
38
#include <windows.h>
39
#endif
40
41
/*[clinic input]
42
module _codecs
43
[clinic start generated code]*/
44
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
45
46
#include "pycore_runtime.h"
47
#include "clinic/_codecsmodule.c.h"
48
49
/* --- Registry ----------------------------------------------------------- */
50
51
/*[clinic input]
52
_codecs.register
53
    search_function: object
54
    /
55
56
Register a codec search function.
57
58
Search functions are expected to take one argument, the encoding name in
59
all lower case letters, and either return None, or a tuple of functions
60
(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
61
[clinic start generated code]*/
62
63
static PyObject *
64
_codecs_register(PyObject *module, PyObject *search_function)
65
/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
66
16
{
67
16
    if (PyCodec_Register(search_function))
68
0
        return NULL;
69
70
16
    Py_RETURN_NONE;
71
16
}
72
73
/*[clinic input]
74
_codecs.unregister
75
    search_function: object
76
    /
77
78
Unregister a codec search function and clear the registry's cache.
79
80
If the search function is not registered, do nothing.
81
[clinic start generated code]*/
82
83
static PyObject *
84
_codecs_unregister(PyObject *module, PyObject *search_function)
85
/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
86
0
{
87
0
    if (PyCodec_Unregister(search_function) < 0) {
88
0
        return NULL;
89
0
    }
90
91
0
    Py_RETURN_NONE;
92
0
}
93
94
/*[clinic input]
95
_codecs.lookup
96
    encoding: str
97
    /
98
99
Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
100
[clinic start generated code]*/
101
102
static PyObject *
103
_codecs_lookup_impl(PyObject *module, const char *encoding)
104
/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
105
0
{
106
0
    return _PyCodec_Lookup(encoding);
107
0
}
108
109
/*[clinic input]
110
_codecs.encode
111
    obj: object
112
    encoding: str(c_default="NULL") = "utf-8"
113
    errors: str(c_default="NULL") = "strict"
114
115
Encodes obj using the codec registered for encoding.
116
117
The default encoding is 'utf-8'.  errors may be given to set a
118
different error handling scheme.  Default is 'strict' meaning that encoding
119
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
120
and 'backslashreplace' as well as any other name registered with
121
codecs.register_error that can handle ValueErrors.
122
[clinic start generated code]*/
123
124
static PyObject *
125
_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
126
                    const char *errors)
127
/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
128
0
{
129
0
    if (encoding == NULL)
130
0
        encoding = PyUnicode_GetDefaultEncoding();
131
132
    /* Encode via the codec registry */
133
0
    return PyCodec_Encode(obj, encoding, errors);
134
0
}
135
136
/*[clinic input]
137
_codecs.decode
138
    obj: object
139
    encoding: str(c_default="NULL") = "utf-8"
140
    errors: str(c_default="NULL") = "strict"
141
142
Decodes obj using the codec registered for encoding.
143
144
Default encoding is 'utf-8'.  errors may be given to set a
145
different error handling scheme.  Default is 'strict' meaning that encoding
146
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
147
and 'backslashreplace' as well as any other name registered with
148
codecs.register_error that can handle ValueErrors.
149
[clinic start generated code]*/
150
151
static PyObject *
152
_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
153
                    const char *errors)
154
/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
155
0
{
156
0
    if (encoding == NULL)
157
0
        encoding = PyUnicode_GetDefaultEncoding();
158
159
    /* Decode via the codec registry */
160
0
    return PyCodec_Decode(obj, encoding, errors);
161
0
}
162
163
/* --- Helpers ------------------------------------------------------------ */
164
165
static
166
PyObject *codec_tuple(PyObject *decoded,
167
                      Py_ssize_t len)
168
807k
{
169
807k
    if (decoded == NULL)
170
41.8k
        return NULL;
171
765k
    return Py_BuildValue("Nn", decoded, len);
172
807k
}
173
174
/* --- String codecs ------------------------------------------------------ */
175
/*[clinic input]
176
_codecs.escape_decode
177
    data: Py_buffer(accept={str, buffer})
178
    errors: str(accept={str, NoneType}) = None
179
    /
180
[clinic start generated code]*/
181
182
static PyObject *
183
_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
184
                           const char *errors)
185
/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
186
0
{
187
0
    PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
188
0
                                             errors, 0, NULL);
189
0
    return codec_tuple(decoded, data->len);
190
0
}
191
192
/*[clinic input]
193
_codecs.escape_encode
194
    data: object(subclass_of='&PyBytes_Type')
195
    errors: str(accept={str, NoneType}) = None
196
    /
197
[clinic start generated code]*/
198
199
static PyObject *
200
_codecs_escape_encode_impl(PyObject *module, PyObject *data,
201
                           const char *errors)
202
/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
203
0
{
204
0
    Py_ssize_t size;
205
0
    Py_ssize_t newsize;
206
0
    PyObject *v;
207
208
0
    size = PyBytes_GET_SIZE(data);
209
0
    if (size > PY_SSIZE_T_MAX / 4) {
210
0
        PyErr_SetString(PyExc_OverflowError,
211
0
            "string is too large to encode");
212
0
            return NULL;
213
0
    }
214
0
    newsize = 4*size;
215
0
    v = PyBytes_FromStringAndSize(NULL, newsize);
216
217
0
    if (v == NULL) {
218
0
        return NULL;
219
0
    }
220
0
    else {
221
0
        Py_ssize_t i;
222
0
        char c;
223
0
        char *p = PyBytes_AS_STRING(v);
224
225
0
        for (i = 0; i < size; i++) {
226
            /* There's at least enough room for a hex escape */
227
0
            assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
228
0
            c = PyBytes_AS_STRING(data)[i];
229
0
            if (c == '\'' || c == '\\')
230
0
                *p++ = '\\', *p++ = c;
231
0
            else if (c == '\t')
232
0
                *p++ = '\\', *p++ = 't';
233
0
            else if (c == '\n')
234
0
                *p++ = '\\', *p++ = 'n';
235
0
            else if (c == '\r')
236
0
                *p++ = '\\', *p++ = 'r';
237
0
            else if (c < ' ' || c >= 0x7f) {
238
0
                *p++ = '\\';
239
0
                *p++ = 'x';
240
0
                *p++ = Py_hexdigits[(c & 0xf0) >> 4];
241
0
                *p++ = Py_hexdigits[c & 0xf];
242
0
            }
243
0
            else
244
0
                *p++ = c;
245
0
        }
246
0
        *p = '\0';
247
0
        if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
248
0
            return NULL;
249
0
        }
250
0
    }
251
252
0
    return codec_tuple(v, size);
253
0
}
254
255
/* --- Decoder ------------------------------------------------------------ */
256
/*[clinic input]
257
_codecs.utf_7_decode
258
    data: Py_buffer
259
    errors: str(accept={str, NoneType}) = None
260
    final: bool = False
261
    /
262
[clinic start generated code]*/
263
264
static PyObject *
265
_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
266
                          const char *errors, int final)
267
/*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
268
20.6k
{
269
20.6k
    Py_ssize_t consumed = data->len;
270
20.6k
    PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
271
20.6k
                                                     errors,
272
20.6k
                                                     final ? NULL : &consumed);
273
20.6k
    return codec_tuple(decoded, consumed);
274
20.6k
}
275
276
/*[clinic input]
277
_codecs.utf_8_decode
278
    data: Py_buffer
279
    errors: str(accept={str, NoneType}) = None
280
    final: bool = False
281
    /
282
[clinic start generated code]*/
283
284
static PyObject *
285
_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
286
                          const char *errors, int final)
287
/*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
288
69.6k
{
289
69.6k
    Py_ssize_t consumed = data->len;
290
69.6k
    PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
291
69.6k
                                                     errors,
292
69.6k
                                                     final ? NULL : &consumed);
293
69.6k
    return codec_tuple(decoded, consumed);
294
69.6k
}
295
296
/*[clinic input]
297
_codecs.utf_16_decode
298
    data: Py_buffer
299
    errors: str(accept={str, NoneType}) = None
300
    final: bool = False
301
    /
302
[clinic start generated code]*/
303
304
static PyObject *
305
_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
306
                           const char *errors, int final)
307
/*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
308
12.3k
{
309
12.3k
    int byteorder = 0;
310
    /* This is overwritten unless final is true. */
311
12.3k
    Py_ssize_t consumed = data->len;
312
12.3k
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
313
12.3k
                                                      errors, &byteorder,
314
12.3k
                                                      final ? NULL : &consumed);
315
12.3k
    return codec_tuple(decoded, consumed);
316
12.3k
}
317
318
/*[clinic input]
319
_codecs.utf_16_le_decode
320
    data: Py_buffer
321
    errors: str(accept={str, NoneType}) = None
322
    final: bool = False
323
    /
324
[clinic start generated code]*/
325
326
static PyObject *
327
_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
328
                              const char *errors, int final)
329
/*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
330
20
{
331
20
    int byteorder = -1;
332
    /* This is overwritten unless final is true. */
333
20
    Py_ssize_t consumed = data->len;
334
20
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
335
20
                                                      errors, &byteorder,
336
20
                                                      final ? NULL : &consumed);
337
20
    return codec_tuple(decoded, consumed);
338
20
}
339
340
/*[clinic input]
341
_codecs.utf_16_be_decode
342
    data: Py_buffer
343
    errors: str(accept={str, NoneType}) = None
344
    final: bool = False
345
    /
346
[clinic start generated code]*/
347
348
static PyObject *
349
_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
350
                              const char *errors, int final)
351
/*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
352
136
{
353
136
    int byteorder = 1;
354
    /* This is overwritten unless final is true. */
355
136
    Py_ssize_t consumed = data->len;
356
136
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
357
136
                                                      errors, &byteorder,
358
136
                                                      final ? NULL : &consumed);
359
136
    return codec_tuple(decoded, consumed);
360
136
}
361
362
/* This non-standard version also provides access to the byteorder
363
   parameter of the builtin UTF-16 codec.
364
365
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
366
   being the value in effect at the end of data.
367
368
*/
369
/*[clinic input]
370
_codecs.utf_16_ex_decode
371
    data: Py_buffer
372
    errors: str(accept={str, NoneType}) = None
373
    byteorder: int = 0
374
    final: bool = False
375
    /
376
[clinic start generated code]*/
377
378
static PyObject *
379
_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
380
                              const char *errors, int byteorder, int final)
381
/*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
382
0
{
383
    /* This is overwritten unless final is true. */
384
0
    Py_ssize_t consumed = data->len;
385
386
0
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
387
0
                                                      errors, &byteorder,
388
0
                                                      final ? NULL : &consumed);
389
0
    if (decoded == NULL)
390
0
        return NULL;
391
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
392
0
}
393
394
/*[clinic input]
395
_codecs.utf_32_decode
396
    data: Py_buffer
397
    errors: str(accept={str, NoneType}) = None
398
    final: bool = False
399
    /
400
[clinic start generated code]*/
401
402
static PyObject *
403
_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
404
                           const char *errors, int final)
405
/*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
406
25.4k
{
407
25.4k
    int byteorder = 0;
408
    /* This is overwritten unless final is true. */
409
25.4k
    Py_ssize_t consumed = data->len;
410
25.4k
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
411
25.4k
                                                      errors, &byteorder,
412
25.4k
                                                      final ? NULL : &consumed);
413
25.4k
    return codec_tuple(decoded, consumed);
414
25.4k
}
415
416
/*[clinic input]
417
_codecs.utf_32_le_decode
418
    data: Py_buffer
419
    errors: str(accept={str, NoneType}) = None
420
    final: bool = False
421
    /
422
[clinic start generated code]*/
423
424
static PyObject *
425
_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
426
                              const char *errors, int final)
427
/*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
428
10
{
429
10
    int byteorder = -1;
430
    /* This is overwritten unless final is true. */
431
10
    Py_ssize_t consumed = data->len;
432
10
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
433
10
                                                      errors, &byteorder,
434
10
                                                      final ? NULL : &consumed);
435
10
    return codec_tuple(decoded, consumed);
436
10
}
437
438
/*[clinic input]
439
_codecs.utf_32_be_decode
440
    data: Py_buffer
441
    errors: str(accept={str, NoneType}) = None
442
    final: bool = False
443
    /
444
[clinic start generated code]*/
445
446
static PyObject *
447
_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
448
                              const char *errors, int final)
449
/*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
450
52
{
451
52
    int byteorder = 1;
452
    /* This is overwritten unless final is true. */
453
52
    Py_ssize_t consumed = data->len;
454
52
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
455
52
                                                      errors, &byteorder,
456
52
                                                      final ? NULL : &consumed);
457
52
    return codec_tuple(decoded, consumed);
458
52
}
459
460
/* This non-standard version also provides access to the byteorder
461
   parameter of the builtin UTF-32 codec.
462
463
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
464
   being the value in effect at the end of data.
465
466
*/
467
/*[clinic input]
468
_codecs.utf_32_ex_decode
469
    data: Py_buffer
470
    errors: str(accept={str, NoneType}) = None
471
    byteorder: int = 0
472
    final: bool = False
473
    /
474
[clinic start generated code]*/
475
476
static PyObject *
477
_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
478
                              const char *errors, int byteorder, int final)
479
/*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
480
0
{
481
0
    Py_ssize_t consumed = data->len;
482
0
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
483
0
                                                      errors, &byteorder,
484
0
                                                      final ? NULL : &consumed);
485
0
    if (decoded == NULL)
486
0
        return NULL;
487
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
488
0
}
489
490
/*[clinic input]
491
_codecs.unicode_escape_decode
492
    data: Py_buffer(accept={str, buffer})
493
    errors: str(accept={str, NoneType}) = None
494
    final: bool = True
495
    /
496
[clinic start generated code]*/
497
498
static PyObject *
499
_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
500
                                   const char *errors, int final)
501
/*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
502
0
{
503
0
    Py_ssize_t consumed = data->len;
504
0
    PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
505
0
                                                               errors,
506
0
                                                               final ? NULL : &consumed);
507
0
    return codec_tuple(decoded, consumed);
508
0
}
509
510
/*[clinic input]
511
_codecs.raw_unicode_escape_decode
512
    data: Py_buffer(accept={str, buffer})
513
    errors: str(accept={str, NoneType}) = None
514
    final: bool = True
515
    /
516
[clinic start generated code]*/
517
518
static PyObject *
519
_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
520
                                       const char *errors, int final)
521
/*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
522
0
{
523
0
    Py_ssize_t consumed = data->len;
524
0
    PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
525
0
                                                                  errors,
526
0
                                                                  final ? NULL : &consumed);
527
0
    return codec_tuple(decoded, consumed);
528
0
}
529
530
/*[clinic input]
531
_codecs.latin_1_decode
532
    data: Py_buffer
533
    errors: str(accept={str, NoneType}) = None
534
    /
535
[clinic start generated code]*/
536
537
static PyObject *
538
_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
539
                            const char *errors)
540
/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
541
5.23k
{
542
5.23k
    PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
543
5.23k
    return codec_tuple(decoded, data->len);
544
5.23k
}
545
546
/*[clinic input]
547
_codecs.ascii_decode
548
    data: Py_buffer
549
    errors: str(accept={str, NoneType}) = None
550
    /
551
[clinic start generated code]*/
552
553
static PyObject *
554
_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
555
                          const char *errors)
556
/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
557
10.8k
{
558
10.8k
    PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
559
10.8k
    return codec_tuple(decoded, data->len);
560
10.8k
}
561
562
/*[clinic input]
563
_codecs.charmap_decode
564
    data: Py_buffer
565
    errors: str(accept={str, NoneType}) = None
566
    mapping: object = None
567
    /
568
[clinic start generated code]*/
569
570
static PyObject *
571
_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
572
                            const char *errors, PyObject *mapping)
573
/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
574
10.2k
{
575
10.2k
    PyObject *decoded;
576
577
10.2k
    if (mapping == Py_None)
578
0
        mapping = NULL;
579
580
10.2k
    decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
581
10.2k
    return codec_tuple(decoded, data->len);
582
10.2k
}
583
584
#ifdef MS_WINDOWS
585
586
/*[clinic input]
587
_codecs.mbcs_decode
588
    data: Py_buffer
589
    errors: str(accept={str, NoneType}) = None
590
    final: bool = False
591
    /
592
[clinic start generated code]*/
593
594
static PyObject *
595
_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
596
                         const char *errors, int final)
597
/*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
598
{
599
    Py_ssize_t consumed = data->len;
600
    PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
601
            errors, final ? NULL : &consumed);
602
    return codec_tuple(decoded, consumed);
603
}
604
605
/*[clinic input]
606
_codecs.oem_decode
607
    data: Py_buffer
608
    errors: str(accept={str, NoneType}) = None
609
    final: bool = False
610
    /
611
[clinic start generated code]*/
612
613
static PyObject *
614
_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
615
                        const char *errors, int final)
616
/*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
617
{
618
    Py_ssize_t consumed = data->len;
619
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
620
        data->buf, data->len, errors, final ? NULL : &consumed);
621
    return codec_tuple(decoded, consumed);
622
}
623
624
/*[clinic input]
625
_codecs.code_page_decode
626
    codepage: int
627
    data: Py_buffer
628
    errors: str(accept={str, NoneType}) = None
629
    final: bool = False
630
    /
631
[clinic start generated code]*/
632
633
static PyObject *
634
_codecs_code_page_decode_impl(PyObject *module, int codepage,
635
                              Py_buffer *data, const char *errors, int final)
636
/*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
637
{
638
    Py_ssize_t consumed = data->len;
639
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
640
                                                         data->buf, data->len,
641
                                                         errors,
642
                                                         final ? NULL : &consumed);
643
    return codec_tuple(decoded, consumed);
644
}
645
646
#endif /* MS_WINDOWS */
647
648
/* --- Encoder ------------------------------------------------------------ */
649
650
/*[clinic input]
651
_codecs.readbuffer_encode
652
    data: Py_buffer(accept={str, buffer})
653
    errors: str(accept={str, NoneType}) = None
654
    /
655
[clinic start generated code]*/
656
657
static PyObject *
658
_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
659
                               const char *errors)
660
/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
661
0
{
662
0
    PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
663
0
    return codec_tuple(result, data->len);
664
0
}
665
666
/*[clinic input]
667
_codecs.utf_7_encode
668
    str: unicode
669
    errors: str(accept={str, NoneType}) = None
670
    /
671
[clinic start generated code]*/
672
673
static PyObject *
674
_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
675
                          const char *errors)
676
/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
677
0
{
678
0
    return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
679
0
                       PyUnicode_GET_LENGTH(str));
680
0
}
681
682
/*[clinic input]
683
_codecs.utf_8_encode
684
    str: unicode
685
    errors: str(accept={str, NoneType}) = None
686
    /
687
[clinic start generated code]*/
688
689
static PyObject *
690
_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
691
                          const char *errors)
692
/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
693
0
{
694
0
    return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
695
0
                       PyUnicode_GET_LENGTH(str));
696
0
}
697
698
/* This version provides access to the byteorder parameter of the
699
   builtin UTF-16 codecs as optional third argument. It defaults to 0
700
   which means: use the native byte order and prepend the data with a
701
   BOM mark.
702
703
*/
704
705
/*[clinic input]
706
_codecs.utf_16_encode
707
    str: unicode
708
    errors: str(accept={str, NoneType}) = None
709
    byteorder: int = 0
710
    /
711
[clinic start generated code]*/
712
713
static PyObject *
714
_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
715
                           const char *errors, int byteorder)
716
/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
717
0
{
718
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
719
0
                       PyUnicode_GET_LENGTH(str));
720
0
}
721
722
/*[clinic input]
723
_codecs.utf_16_le_encode
724
    str: unicode
725
    errors: str(accept={str, NoneType}) = None
726
    /
727
[clinic start generated code]*/
728
729
static PyObject *
730
_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
731
                              const char *errors)
732
/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
733
0
{
734
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
735
0
                       PyUnicode_GET_LENGTH(str));
736
0
}
737
738
/*[clinic input]
739
_codecs.utf_16_be_encode
740
    str: unicode
741
    errors: str(accept={str, NoneType}) = None
742
    /
743
[clinic start generated code]*/
744
745
static PyObject *
746
_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
747
                              const char *errors)
748
/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
749
0
{
750
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
751
0
                       PyUnicode_GET_LENGTH(str));
752
0
}
753
754
/* This version provides access to the byteorder parameter of the
755
   builtin UTF-32 codecs as optional third argument. It defaults to 0
756
   which means: use the native byte order and prepend the data with a
757
   BOM mark.
758
759
*/
760
761
/*[clinic input]
762
_codecs.utf_32_encode
763
    str: unicode
764
    errors: str(accept={str, NoneType}) = None
765
    byteorder: int = 0
766
    /
767
[clinic start generated code]*/
768
769
static PyObject *
770
_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
771
                           const char *errors, int byteorder)
772
/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
773
0
{
774
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
775
0
                       PyUnicode_GET_LENGTH(str));
776
0
}
777
778
/*[clinic input]
779
_codecs.utf_32_le_encode
780
    str: unicode
781
    errors: str(accept={str, NoneType}) = None
782
    /
783
[clinic start generated code]*/
784
785
static PyObject *
786
_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
787
                              const char *errors)
788
/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
789
0
{
790
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
791
0
                       PyUnicode_GET_LENGTH(str));
792
0
}
793
794
/*[clinic input]
795
_codecs.utf_32_be_encode
796
    str: unicode
797
    errors: str(accept={str, NoneType}) = None
798
    /
799
[clinic start generated code]*/
800
801
static PyObject *
802
_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
803
                              const char *errors)
804
/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
805
0
{
806
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
807
0
                       PyUnicode_GET_LENGTH(str));
808
0
}
809
810
/*[clinic input]
811
_codecs.unicode_escape_encode
812
    str: unicode
813
    errors: str(accept={str, NoneType}) = None
814
    /
815
[clinic start generated code]*/
816
817
static PyObject *
818
_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
819
                                   const char *errors)
820
/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
821
452k
{
822
452k
    return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
823
452k
                       PyUnicode_GET_LENGTH(str));
824
452k
}
825
826
/*[clinic input]
827
_codecs.raw_unicode_escape_encode
828
    str: unicode
829
    errors: str(accept={str, NoneType}) = None
830
    /
831
[clinic start generated code]*/
832
833
static PyObject *
834
_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
835
                                       const char *errors)
836
/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
837
200k
{
838
200k
    return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
839
200k
                       PyUnicode_GET_LENGTH(str));
840
200k
}
841
842
/*[clinic input]
843
_codecs.latin_1_encode
844
    str: unicode
845
    errors: str(accept={str, NoneType}) = None
846
    /
847
[clinic start generated code]*/
848
849
static PyObject *
850
_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
851
                            const char *errors)
852
/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
853
0
{
854
0
    return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
855
0
                       PyUnicode_GET_LENGTH(str));
856
0
}
857
858
/*[clinic input]
859
_codecs.ascii_encode
860
    str: unicode
861
    errors: str(accept={str, NoneType}) = None
862
    /
863
[clinic start generated code]*/
864
865
static PyObject *
866
_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
867
                          const char *errors)
868
/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
869
0
{
870
0
    return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
871
0
                       PyUnicode_GET_LENGTH(str));
872
0
}
873
874
/*[clinic input]
875
_codecs.charmap_encode
876
    str: unicode
877
    errors: str(accept={str, NoneType}) = None
878
    mapping: object = None
879
    /
880
[clinic start generated code]*/
881
882
static PyObject *
883
_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
884
                            const char *errors, PyObject *mapping)
885
/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
886
0
{
887
0
    if (mapping == Py_None)
888
0
        mapping = NULL;
889
890
0
    return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
891
0
                       PyUnicode_GET_LENGTH(str));
892
0
}
893
894
/*[clinic input]
895
_codecs.charmap_build
896
    map: unicode
897
    /
898
[clinic start generated code]*/
899
900
static PyObject *
901
_codecs_charmap_build_impl(PyObject *module, PyObject *map)
902
/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
903
115
{
904
115
    return PyUnicode_BuildEncodingMap(map);
905
115
}
906
907
#ifdef MS_WINDOWS
908
909
/*[clinic input]
910
_codecs.mbcs_encode
911
    str: unicode
912
    errors: str(accept={str, NoneType}) = None
913
    /
914
[clinic start generated code]*/
915
916
static PyObject *
917
_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
918
/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
919
{
920
    return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
921
                       PyUnicode_GET_LENGTH(str));
922
}
923
924
/*[clinic input]
925
_codecs.oem_encode
926
    str: unicode
927
    errors: str(accept={str, NoneType}) = None
928
    /
929
[clinic start generated code]*/
930
931
static PyObject *
932
_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
933
/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
934
{
935
    return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
936
        PyUnicode_GET_LENGTH(str));
937
}
938
939
/*[clinic input]
940
_codecs.code_page_encode
941
    code_page: int
942
    str: unicode
943
    errors: str(accept={str, NoneType}) = None
944
    /
945
[clinic start generated code]*/
946
947
static PyObject *
948
_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
949
                              const char *errors)
950
/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
951
{
952
    return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
953
                       PyUnicode_GET_LENGTH(str));
954
}
955
956
#endif /* MS_WINDOWS */
957
958
/* --- Error handler registry --------------------------------------------- */
959
960
/*[clinic input]
961
_codecs.register_error
962
    errors: str
963
    handler: object
964
    /
965
966
Register the specified error handler under the name errors.
967
968
handler must be a callable object, that will be called with an exception
969
instance containing information about the location of the encoding/decoding
970
error and must return a (replacement, new position) tuple.
971
[clinic start generated code]*/
972
973
static PyObject *
974
_codecs_register_error_impl(PyObject *module, const char *errors,
975
                            PyObject *handler)
976
/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
977
0
{
978
0
    if (PyCodec_RegisterError(errors, handler))
979
0
        return NULL;
980
0
    Py_RETURN_NONE;
981
0
}
982
983
/*[clinic input]
984
_codecs._unregister_error -> bool
985
    errors: str
986
    /
987
988
Un-register the specified error handler for the error handling `errors'.
989
990
Only custom error handlers can be un-registered. An exception is raised
991
if the error handling is a built-in one (e.g., 'strict'), or if an error
992
occurs.
993
994
Otherwise, this returns True if a custom handler has been successfully
995
un-registered, and False if no custom handler for the specified error
996
handling exists.
997
998
[clinic start generated code]*/
999
1000
static int
1001
_codecs__unregister_error_impl(PyObject *module, const char *errors)
1002
/*[clinic end generated code: output=28c22be667465503 input=a63ab9e9ce1686d4]*/
1003
0
{
1004
0
    return _PyCodec_UnregisterError(errors);
1005
0
}
1006
1007
/*[clinic input]
1008
_codecs.lookup_error
1009
    name: str
1010
    /
1011
1012
lookup_error(errors) -> handler
1013
1014
Return the error handler for the specified error handling name or raise a
1015
LookupError, if no handler exists under this name.
1016
[clinic start generated code]*/
1017
1018
static PyObject *
1019
_codecs_lookup_error_impl(PyObject *module, const char *name)
1020
/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
1021
96
{
1022
96
    return PyCodec_LookupError(name);
1023
96
}
1024
1025
/* --- Module API --------------------------------------------------------- */
1026
1027
static PyMethodDef _codecs_functions[] = {
1028
    _CODECS_REGISTER_METHODDEF
1029
    _CODECS_UNREGISTER_METHODDEF
1030
    _CODECS_LOOKUP_METHODDEF
1031
    _CODECS_ENCODE_METHODDEF
1032
    _CODECS_DECODE_METHODDEF
1033
    _CODECS_ESCAPE_ENCODE_METHODDEF
1034
    _CODECS_ESCAPE_DECODE_METHODDEF
1035
    _CODECS_UTF_8_ENCODE_METHODDEF
1036
    _CODECS_UTF_8_DECODE_METHODDEF
1037
    _CODECS_UTF_7_ENCODE_METHODDEF
1038
    _CODECS_UTF_7_DECODE_METHODDEF
1039
    _CODECS_UTF_16_ENCODE_METHODDEF
1040
    _CODECS_UTF_16_LE_ENCODE_METHODDEF
1041
    _CODECS_UTF_16_BE_ENCODE_METHODDEF
1042
    _CODECS_UTF_16_DECODE_METHODDEF
1043
    _CODECS_UTF_16_LE_DECODE_METHODDEF
1044
    _CODECS_UTF_16_BE_DECODE_METHODDEF
1045
    _CODECS_UTF_16_EX_DECODE_METHODDEF
1046
    _CODECS_UTF_32_ENCODE_METHODDEF
1047
    _CODECS_UTF_32_LE_ENCODE_METHODDEF
1048
    _CODECS_UTF_32_BE_ENCODE_METHODDEF
1049
    _CODECS_UTF_32_DECODE_METHODDEF
1050
    _CODECS_UTF_32_LE_DECODE_METHODDEF
1051
    _CODECS_UTF_32_BE_DECODE_METHODDEF
1052
    _CODECS_UTF_32_EX_DECODE_METHODDEF
1053
    _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1054
    _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1055
    _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1056
    _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1057
    _CODECS_LATIN_1_ENCODE_METHODDEF
1058
    _CODECS_LATIN_1_DECODE_METHODDEF
1059
    _CODECS_ASCII_ENCODE_METHODDEF
1060
    _CODECS_ASCII_DECODE_METHODDEF
1061
    _CODECS_CHARMAP_ENCODE_METHODDEF
1062
    _CODECS_CHARMAP_DECODE_METHODDEF
1063
    _CODECS_CHARMAP_BUILD_METHODDEF
1064
    _CODECS_READBUFFER_ENCODE_METHODDEF
1065
    _CODECS_MBCS_ENCODE_METHODDEF
1066
    _CODECS_MBCS_DECODE_METHODDEF
1067
    _CODECS_OEM_ENCODE_METHODDEF
1068
    _CODECS_OEM_DECODE_METHODDEF
1069
    _CODECS_CODE_PAGE_ENCODE_METHODDEF
1070
    _CODECS_CODE_PAGE_DECODE_METHODDEF
1071
    _CODECS_REGISTER_ERROR_METHODDEF
1072
    _CODECS__UNREGISTER_ERROR_METHODDEF
1073
    _CODECS_LOOKUP_ERROR_METHODDEF
1074
    {NULL, NULL}                /* sentinel */
1075
};
1076
1077
static PyModuleDef_Slot _codecs_slots[] = {
1078
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1079
    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
1080
    {0, NULL}
1081
};
1082
1083
static struct PyModuleDef codecsmodule = {
1084
        PyModuleDef_HEAD_INIT,
1085
        "_codecs",
1086
        NULL,
1087
        0,
1088
        _codecs_functions,
1089
        _codecs_slots,
1090
        NULL,
1091
        NULL,
1092
        NULL
1093
};
1094
1095
PyMODINIT_FUNC
1096
PyInit__codecs(void)
1097
16
{
1098
16
    return PyModuleDef_Init(&codecsmodule);
1099
16
}