Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Modules/_codecsmodule.c
Line
Count
Source (jump to first uncovered line)
1
/* ------------------------------------------------------------------------
2
3
   _codecs -- Provides access to the codec registry and the builtin
4
              codecs.
5
6
   This module should never be imported directly. The standard library
7
   module "codecs" wraps this builtin module for use within Python.
8
9
   The codec registry is accessible via:
10
11
     register(search_function) -> None
12
13
     lookup(encoding) -> CodecInfo object
14
15
   The builtin Unicode codecs use the following interface:
16
17
     <encoding>_encode(Unicode_object[,errors='strict']) ->
18
        (string object, bytes consumed)
19
20
     <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21
        (Unicode object, bytes consumed)
22
23
   These <encoding>s are available: utf_8, unicode_escape,
24
   raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27
Written by Marc-Andre Lemburg (mal@lemburg.com).
28
29
Copyright (c) Corporation for National Research Initiatives.
30
31
   ------------------------------------------------------------------------ */
32
33
#define PY_SSIZE_T_CLEAN
34
#include "Python.h"
35
36
#ifdef MS_WINDOWS
37
#include <windows.h>
38
#endif
39
40
/*[clinic input]
41
module _codecs
42
[clinic start generated code]*/
43
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44
45
#include "clinic/_codecsmodule.c.h"
46
47
/* --- Registry ----------------------------------------------------------- */
48
49
/*[clinic input]
50
_codecs.register
51
    search_function: object
52
    /
53
54
Register a codec search function.
55
56
Search functions are expected to take one argument, the encoding name in
57
all lower case letters, and either return None, or a tuple of functions
58
(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
59
[clinic start generated code]*/
60
61
static PyObject *
62
_codecs_register(PyObject *module, PyObject *search_function)
63
/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
64
14
{
65
14
    if (PyCodec_Register(search_function))
66
0
        return NULL;
67
68
14
    Py_RETURN_NONE;
69
14
}
70
71
/*[clinic input]
72
_codecs.lookup
73
    encoding: str
74
    /
75
76
Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
77
[clinic start generated code]*/
78
79
static PyObject *
80
_codecs_lookup_impl(PyObject *module, const char *encoding)
81
/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
82
0
{
83
0
    return _PyCodec_Lookup(encoding);
84
0
}
85
86
/*[clinic input]
87
_codecs.encode
88
    obj: object
89
    encoding: str(c_default="NULL") = "utf-8"
90
    errors: str(c_default="NULL") = "strict"
91
92
Encodes obj using the codec registered for encoding.
93
94
The default encoding is 'utf-8'.  errors may be given to set a
95
different error handling scheme.  Default is 'strict' meaning that encoding
96
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
97
and 'backslashreplace' as well as any other name registered with
98
codecs.register_error that can handle ValueErrors.
99
[clinic start generated code]*/
100
101
static PyObject *
102
_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
103
                    const char *errors)
104
/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
105
0
{
106
0
    if (encoding == NULL)
107
0
        encoding = PyUnicode_GetDefaultEncoding();
108
109
    /* Encode via the codec registry */
110
0
    return PyCodec_Encode(obj, encoding, errors);
111
0
}
112
113
/*[clinic input]
114
_codecs.decode
115
    obj: object
116
    encoding: str(c_default="NULL") = "utf-8"
117
    errors: str(c_default="NULL") = "strict"
118
119
Decodes obj using the codec registered for encoding.
120
121
Default encoding is 'utf-8'.  errors may be given to set a
122
different error handling scheme.  Default is 'strict' meaning that encoding
123
errors raise a ValueError.  Other possible values are 'ignore', 'replace'
124
and 'backslashreplace' as well as any other name registered with
125
codecs.register_error that can handle ValueErrors.
126
[clinic start generated code]*/
127
128
static PyObject *
129
_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
130
                    const char *errors)
131
/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
132
0
{
133
0
    if (encoding == NULL)
134
0
        encoding = PyUnicode_GetDefaultEncoding();
135
136
    /* Decode via the codec registry */
137
0
    return PyCodec_Decode(obj, encoding, errors);
138
0
}
139
140
/* --- Helpers ------------------------------------------------------------ */
141
142
/*[clinic input]
143
_codecs._forget_codec
144
145
    encoding: str
146
    /
147
148
Purge the named codec from the internal codec lookup cache
149
[clinic start generated code]*/
150
151
static PyObject *
152
_codecs__forget_codec_impl(PyObject *module, const char *encoding)
153
/*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/
154
0
{
155
0
    if (_PyCodec_Forget(encoding) < 0) {
156
0
        return NULL;
157
0
    };
158
0
    Py_RETURN_NONE;
159
0
}
160
161
static
162
PyObject *codec_tuple(PyObject *decoded,
163
                      Py_ssize_t len)
164
3
{
165
3
    if (decoded == NULL)
166
0
        return NULL;
167
3
    return Py_BuildValue("Nn", decoded, len);
168
3
}
169
170
/* --- String codecs ------------------------------------------------------ */
171
/*[clinic input]
172
_codecs.escape_decode
173
    data: Py_buffer(accept={str, buffer})
174
    errors: str(accept={str, NoneType}) = None
175
    /
176
[clinic start generated code]*/
177
178
static PyObject *
179
_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
180
                           const char *errors)
181
/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
182
0
{
183
0
    PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
184
0
                                             errors, 0, NULL);
185
0
    return codec_tuple(decoded, data->len);
186
0
}
187
188
/*[clinic input]
189
_codecs.escape_encode
190
    data: object(subclass_of='&PyBytes_Type')
191
    errors: str(accept={str, NoneType}) = None
192
    /
193
[clinic start generated code]*/
194
195
static PyObject *
196
_codecs_escape_encode_impl(PyObject *module, PyObject *data,
197
                           const char *errors)
198
/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
199
0
{
200
0
    Py_ssize_t size;
201
0
    Py_ssize_t newsize;
202
0
    PyObject *v;
203
204
0
    size = PyBytes_GET_SIZE(data);
205
0
    if (size > PY_SSIZE_T_MAX / 4) {
206
0
        PyErr_SetString(PyExc_OverflowError,
207
0
            "string is too large to encode");
208
0
            return NULL;
209
0
    }
210
0
    newsize = 4*size;
211
0
    v = PyBytes_FromStringAndSize(NULL, newsize);
212
213
0
    if (v == NULL) {
214
0
        return NULL;
215
0
    }
216
0
    else {
217
0
        Py_ssize_t i;
218
0
        char c;
219
0
        char *p = PyBytes_AS_STRING(v);
220
221
0
        for (i = 0; i < size; i++) {
222
            /* There's at least enough room for a hex escape */
223
0
            assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
224
0
            c = PyBytes_AS_STRING(data)[i];
225
0
            if (c == '\'' || c == '\\')
226
0
                *p++ = '\\', *p++ = c;
227
0
            else if (c == '\t')
228
0
                *p++ = '\\', *p++ = 't';
229
0
            else if (c == '\n')
230
0
                *p++ = '\\', *p++ = 'n';
231
0
            else if (c == '\r')
232
0
                *p++ = '\\', *p++ = 'r';
233
0
            else if (c < ' ' || c >= 0x7f) {
234
0
                *p++ = '\\';
235
0
                *p++ = 'x';
236
0
                *p++ = Py_hexdigits[(c & 0xf0) >> 4];
237
0
                *p++ = Py_hexdigits[c & 0xf];
238
0
            }
239
0
            else
240
0
                *p++ = c;
241
0
        }
242
0
        *p = '\0';
243
0
        if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
244
0
            return NULL;
245
0
        }
246
0
    }
247
248
0
    return codec_tuple(v, size);
249
0
}
250
251
/* --- Decoder ------------------------------------------------------------ */
252
/*[clinic input]
253
_codecs.utf_7_decode
254
    data: Py_buffer
255
    errors: str(accept={str, NoneType}) = None
256
    final: bool(accept={int}) = False
257
    /
258
[clinic start generated code]*/
259
260
static PyObject *
261
_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
262
                          const char *errors, int final)
263
/*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/
264
0
{
265
0
    Py_ssize_t consumed = data->len;
266
0
    PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
267
0
                                                     errors,
268
0
                                                     final ? NULL : &consumed);
269
0
    return codec_tuple(decoded, consumed);
270
0
}
271
272
/*[clinic input]
273
_codecs.utf_8_decode
274
    data: Py_buffer
275
    errors: str(accept={str, NoneType}) = None
276
    final: bool(accept={int}) = False
277
    /
278
[clinic start generated code]*/
279
280
static PyObject *
281
_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
282
                          const char *errors, int final)
283
/*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/
284
3
{
285
3
    Py_ssize_t consumed = data->len;
286
3
    PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
287
3
                                                     errors,
288
3
                                                     final ? NULL : &consumed);
289
3
    return codec_tuple(decoded, consumed);
290
3
}
291
292
/*[clinic input]
293
_codecs.utf_16_decode
294
    data: Py_buffer
295
    errors: str(accept={str, NoneType}) = None
296
    final: bool(accept={int}) = False
297
    /
298
[clinic start generated code]*/
299
300
static PyObject *
301
_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
302
                           const char *errors, int final)
303
/*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/
304
0
{
305
0
    int byteorder = 0;
306
    /* This is overwritten unless final is true. */
307
0
    Py_ssize_t consumed = data->len;
308
0
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
309
0
                                                      errors, &byteorder,
310
0
                                                      final ? NULL : &consumed);
311
0
    return codec_tuple(decoded, consumed);
312
0
}
313
314
/*[clinic input]
315
_codecs.utf_16_le_decode
316
    data: Py_buffer
317
    errors: str(accept={str, NoneType}) = None
318
    final: bool(accept={int}) = False
319
    /
320
[clinic start generated code]*/
321
322
static PyObject *
323
_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
324
                              const char *errors, int final)
325
/*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/
326
0
{
327
0
    int byteorder = -1;
328
    /* This is overwritten unless final is true. */
329
0
    Py_ssize_t consumed = data->len;
330
0
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
331
0
                                                      errors, &byteorder,
332
0
                                                      final ? NULL : &consumed);
333
0
    return codec_tuple(decoded, consumed);
334
0
}
335
336
/*[clinic input]
337
_codecs.utf_16_be_decode
338
    data: Py_buffer
339
    errors: str(accept={str, NoneType}) = None
340
    final: bool(accept={int}) = False
341
    /
342
[clinic start generated code]*/
343
344
static PyObject *
345
_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
346
                              const char *errors, int final)
347
/*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/
348
0
{
349
0
    int byteorder = 1;
350
    /* This is overwritten unless final is true. */
351
0
    Py_ssize_t consumed = data->len;
352
0
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
353
0
                                                      errors, &byteorder,
354
0
                                                      final ? NULL : &consumed);
355
0
    return codec_tuple(decoded, consumed);
356
0
}
357
358
/* This non-standard version also provides access to the byteorder
359
   parameter of the builtin UTF-16 codec.
360
361
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
362
   being the value in effect at the end of data.
363
364
*/
365
/*[clinic input]
366
_codecs.utf_16_ex_decode
367
    data: Py_buffer
368
    errors: str(accept={str, NoneType}) = None
369
    byteorder: int = 0
370
    final: bool(accept={int}) = False
371
    /
372
[clinic start generated code]*/
373
374
static PyObject *
375
_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
376
                              const char *errors, int byteorder, int final)
377
/*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/
378
0
{
379
    /* This is overwritten unless final is true. */
380
0
    Py_ssize_t consumed = data->len;
381
382
0
    PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
383
0
                                                      errors, &byteorder,
384
0
                                                      final ? NULL : &consumed);
385
0
    if (decoded == NULL)
386
0
        return NULL;
387
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
388
0
}
389
390
/*[clinic input]
391
_codecs.utf_32_decode
392
    data: Py_buffer
393
    errors: str(accept={str, NoneType}) = None
394
    final: bool(accept={int}) = False
395
    /
396
[clinic start generated code]*/
397
398
static PyObject *
399
_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
400
                           const char *errors, int final)
401
/*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/
402
0
{
403
0
    int byteorder = 0;
404
    /* This is overwritten unless final is true. */
405
0
    Py_ssize_t consumed = data->len;
406
0
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
407
0
                                                      errors, &byteorder,
408
0
                                                      final ? NULL : &consumed);
409
0
    return codec_tuple(decoded, consumed);
410
0
}
411
412
/*[clinic input]
413
_codecs.utf_32_le_decode
414
    data: Py_buffer
415
    errors: str(accept={str, NoneType}) = None
416
    final: bool(accept={int}) = False
417
    /
418
[clinic start generated code]*/
419
420
static PyObject *
421
_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
422
                              const char *errors, int final)
423
/*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/
424
0
{
425
0
    int byteorder = -1;
426
    /* This is overwritten unless final is true. */
427
0
    Py_ssize_t consumed = data->len;
428
0
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
429
0
                                                      errors, &byteorder,
430
0
                                                      final ? NULL : &consumed);
431
0
    return codec_tuple(decoded, consumed);
432
0
}
433
434
/*[clinic input]
435
_codecs.utf_32_be_decode
436
    data: Py_buffer
437
    errors: str(accept={str, NoneType}) = None
438
    final: bool(accept={int}) = False
439
    /
440
[clinic start generated code]*/
441
442
static PyObject *
443
_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
444
                              const char *errors, int final)
445
/*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/
446
0
{
447
0
    int byteorder = 1;
448
    /* This is overwritten unless final is true. */
449
0
    Py_ssize_t consumed = data->len;
450
0
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
451
0
                                                      errors, &byteorder,
452
0
                                                      final ? NULL : &consumed);
453
0
    return codec_tuple(decoded, consumed);
454
0
}
455
456
/* This non-standard version also provides access to the byteorder
457
   parameter of the builtin UTF-32 codec.
458
459
   It returns a tuple (unicode, bytesread, byteorder) with byteorder
460
   being the value in effect at the end of data.
461
462
*/
463
/*[clinic input]
464
_codecs.utf_32_ex_decode
465
    data: Py_buffer
466
    errors: str(accept={str, NoneType}) = None
467
    byteorder: int = 0
468
    final: bool(accept={int}) = False
469
    /
470
[clinic start generated code]*/
471
472
static PyObject *
473
_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
474
                              const char *errors, int byteorder, int final)
475
/*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/
476
0
{
477
0
    Py_ssize_t consumed = data->len;
478
0
    PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
479
0
                                                      errors, &byteorder,
480
0
                                                      final ? NULL : &consumed);
481
0
    if (decoded == NULL)
482
0
        return NULL;
483
0
    return Py_BuildValue("Nni", decoded, consumed, byteorder);
484
0
}
485
486
/*[clinic input]
487
_codecs.unicode_escape_decode
488
    data: Py_buffer(accept={str, buffer})
489
    errors: str(accept={str, NoneType}) = None
490
    /
491
[clinic start generated code]*/
492
493
static PyObject *
494
_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
495
                                   const char *errors)
496
/*[clinic end generated code: output=3ca3c917176b82ab input=8328081a3a569bd6]*/
497
0
{
498
0
    PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
499
0
                                                      errors);
500
0
    return codec_tuple(decoded, data->len);
501
0
}
502
503
/*[clinic input]
504
_codecs.raw_unicode_escape_decode
505
    data: Py_buffer(accept={str, buffer})
506
    errors: str(accept={str, NoneType}) = None
507
    /
508
[clinic start generated code]*/
509
510
static PyObject *
511
_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
512
                                       const char *errors)
513
/*[clinic end generated code: output=c98eeb56028070a6 input=d2f5159ce3b3392f]*/
514
0
{
515
0
    PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len,
516
0
                                                         errors);
517
0
    return codec_tuple(decoded, data->len);
518
0
}
519
520
/*[clinic input]
521
_codecs.latin_1_decode
522
    data: Py_buffer
523
    errors: str(accept={str, NoneType}) = None
524
    /
525
[clinic start generated code]*/
526
527
static PyObject *
528
_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
529
                            const char *errors)
530
/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
531
0
{
532
0
    PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
533
0
    return codec_tuple(decoded, data->len);
534
0
}
535
536
/*[clinic input]
537
_codecs.ascii_decode
538
    data: Py_buffer
539
    errors: str(accept={str, NoneType}) = None
540
    /
541
[clinic start generated code]*/
542
543
static PyObject *
544
_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
545
                          const char *errors)
546
/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
547
0
{
548
0
    PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
549
0
    return codec_tuple(decoded, data->len);
550
0
}
551
552
/*[clinic input]
553
_codecs.charmap_decode
554
    data: Py_buffer
555
    errors: str(accept={str, NoneType}) = None
556
    mapping: object = None
557
    /
558
[clinic start generated code]*/
559
560
static PyObject *
561
_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
562
                            const char *errors, PyObject *mapping)
563
/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
564
0
{
565
0
    PyObject *decoded;
566
567
0
    if (mapping == Py_None)
568
0
        mapping = NULL;
569
570
0
    decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
571
0
    return codec_tuple(decoded, data->len);
572
0
}
573
574
#ifdef MS_WINDOWS
575
576
/*[clinic input]
577
_codecs.mbcs_decode
578
    data: Py_buffer
579
    errors: str(accept={str, NoneType}) = None
580
    final: bool(accept={int}) = False
581
    /
582
[clinic start generated code]*/
583
584
static PyObject *
585
_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
586
                         const char *errors, int final)
587
/*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/
588
{
589
    Py_ssize_t consumed = data->len;
590
    PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
591
            errors, final ? NULL : &consumed);
592
    return codec_tuple(decoded, consumed);
593
}
594
595
/*[clinic input]
596
_codecs.oem_decode
597
    data: Py_buffer
598
    errors: str(accept={str, NoneType}) = None
599
    final: bool(accept={int}) = False
600
    /
601
[clinic start generated code]*/
602
603
static PyObject *
604
_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
605
                        const char *errors, int final)
606
/*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/
607
{
608
    Py_ssize_t consumed = data->len;
609
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
610
        data->buf, data->len, errors, final ? NULL : &consumed);
611
    return codec_tuple(decoded, consumed);
612
}
613
614
/*[clinic input]
615
_codecs.code_page_decode
616
    codepage: int
617
    data: Py_buffer
618
    errors: str(accept={str, NoneType}) = None
619
    final: bool(accept={int}) = False
620
    /
621
[clinic start generated code]*/
622
623
static PyObject *
624
_codecs_code_page_decode_impl(PyObject *module, int codepage,
625
                              Py_buffer *data, const char *errors, int final)
626
/*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/
627
{
628
    Py_ssize_t consumed = data->len;
629
    PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
630
                                                         data->buf, data->len,
631
                                                         errors,
632
                                                         final ? NULL : &consumed);
633
    return codec_tuple(decoded, consumed);
634
}
635
636
#endif /* MS_WINDOWS */
637
638
/* --- Encoder ------------------------------------------------------------ */
639
640
/*[clinic input]
641
_codecs.readbuffer_encode
642
    data: Py_buffer(accept={str, buffer})
643
    errors: str(accept={str, NoneType}) = None
644
    /
645
[clinic start generated code]*/
646
647
static PyObject *
648
_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
649
                               const char *errors)
650
/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
651
0
{
652
0
    PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
653
0
    return codec_tuple(result, data->len);
654
0
}
655
656
/*[clinic input]
657
_codecs.utf_7_encode
658
    str: unicode
659
    errors: str(accept={str, NoneType}) = None
660
    /
661
[clinic start generated code]*/
662
663
static PyObject *
664
_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
665
                          const char *errors)
666
/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
667
0
{
668
0
    return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
669
0
                       PyUnicode_GET_LENGTH(str));
670
0
}
671
672
/*[clinic input]
673
_codecs.utf_8_encode
674
    str: unicode
675
    errors: str(accept={str, NoneType}) = None
676
    /
677
[clinic start generated code]*/
678
679
static PyObject *
680
_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
681
                          const char *errors)
682
/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
683
0
{
684
0
    return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
685
0
                       PyUnicode_GET_LENGTH(str));
686
0
}
687
688
/* This version provides access to the byteorder parameter of the
689
   builtin UTF-16 codecs as optional third argument. It defaults to 0
690
   which means: use the native byte order and prepend the data with a
691
   BOM mark.
692
693
*/
694
695
/*[clinic input]
696
_codecs.utf_16_encode
697
    str: unicode
698
    errors: str(accept={str, NoneType}) = None
699
    byteorder: int = 0
700
    /
701
[clinic start generated code]*/
702
703
static PyObject *
704
_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
705
                           const char *errors, int byteorder)
706
/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
707
0
{
708
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
709
0
                       PyUnicode_GET_LENGTH(str));
710
0
}
711
712
/*[clinic input]
713
_codecs.utf_16_le_encode
714
    str: unicode
715
    errors: str(accept={str, NoneType}) = None
716
    /
717
[clinic start generated code]*/
718
719
static PyObject *
720
_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
721
                              const char *errors)
722
/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
723
0
{
724
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
725
0
                       PyUnicode_GET_LENGTH(str));
726
0
}
727
728
/*[clinic input]
729
_codecs.utf_16_be_encode
730
    str: unicode
731
    errors: str(accept={str, NoneType}) = None
732
    /
733
[clinic start generated code]*/
734
735
static PyObject *
736
_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
737
                              const char *errors)
738
/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
739
0
{
740
0
    return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
741
0
                       PyUnicode_GET_LENGTH(str));
742
0
}
743
744
/* This version provides access to the byteorder parameter of the
745
   builtin UTF-32 codecs as optional third argument. It defaults to 0
746
   which means: use the native byte order and prepend the data with a
747
   BOM mark.
748
749
*/
750
751
/*[clinic input]
752
_codecs.utf_32_encode
753
    str: unicode
754
    errors: str(accept={str, NoneType}) = None
755
    byteorder: int = 0
756
    /
757
[clinic start generated code]*/
758
759
static PyObject *
760
_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
761
                           const char *errors, int byteorder)
762
/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
763
0
{
764
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
765
0
                       PyUnicode_GET_LENGTH(str));
766
0
}
767
768
/*[clinic input]
769
_codecs.utf_32_le_encode
770
    str: unicode
771
    errors: str(accept={str, NoneType}) = None
772
    /
773
[clinic start generated code]*/
774
775
static PyObject *
776
_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
777
                              const char *errors)
778
/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
779
0
{
780
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
781
0
                       PyUnicode_GET_LENGTH(str));
782
0
}
783
784
/*[clinic input]
785
_codecs.utf_32_be_encode
786
    str: unicode
787
    errors: str(accept={str, NoneType}) = None
788
    /
789
[clinic start generated code]*/
790
791
static PyObject *
792
_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
793
                              const char *errors)
794
/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
795
0
{
796
0
    return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
797
0
                       PyUnicode_GET_LENGTH(str));
798
0
}
799
800
/*[clinic input]
801
_codecs.unicode_escape_encode
802
    str: unicode
803
    errors: str(accept={str, NoneType}) = None
804
    /
805
[clinic start generated code]*/
806
807
static PyObject *
808
_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
809
                                   const char *errors)
810
/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
811
0
{
812
0
    return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
813
0
                       PyUnicode_GET_LENGTH(str));
814
0
}
815
816
/*[clinic input]
817
_codecs.raw_unicode_escape_encode
818
    str: unicode
819
    errors: str(accept={str, NoneType}) = None
820
    /
821
[clinic start generated code]*/
822
823
static PyObject *
824
_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
825
                                       const char *errors)
826
/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
827
0
{
828
0
    return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
829
0
                       PyUnicode_GET_LENGTH(str));
830
0
}
831
832
/*[clinic input]
833
_codecs.latin_1_encode
834
    str: unicode
835
    errors: str(accept={str, NoneType}) = None
836
    /
837
[clinic start generated code]*/
838
839
static PyObject *
840
_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
841
                            const char *errors)
842
/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
843
0
{
844
0
    return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
845
0
                       PyUnicode_GET_LENGTH(str));
846
0
}
847
848
/*[clinic input]
849
_codecs.ascii_encode
850
    str: unicode
851
    errors: str(accept={str, NoneType}) = None
852
    /
853
[clinic start generated code]*/
854
855
static PyObject *
856
_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
857
                          const char *errors)
858
/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
859
0
{
860
0
    return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
861
0
                       PyUnicode_GET_LENGTH(str));
862
0
}
863
864
/*[clinic input]
865
_codecs.charmap_encode
866
    str: unicode
867
    errors: str(accept={str, NoneType}) = None
868
    mapping: object = None
869
    /
870
[clinic start generated code]*/
871
872
static PyObject *
873
_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
874
                            const char *errors, PyObject *mapping)
875
/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
876
0
{
877
0
    if (mapping == Py_None)
878
0
        mapping = NULL;
879
880
0
    return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
881
0
                       PyUnicode_GET_LENGTH(str));
882
0
}
883
884
/*[clinic input]
885
_codecs.charmap_build
886
    map: unicode
887
    /
888
[clinic start generated code]*/
889
890
static PyObject *
891
_codecs_charmap_build_impl(PyObject *module, PyObject *map)
892
/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
893
0
{
894
0
    return PyUnicode_BuildEncodingMap(map);
895
0
}
896
897
#ifdef MS_WINDOWS
898
899
/*[clinic input]
900
_codecs.mbcs_encode
901
    str: unicode
902
    errors: str(accept={str, NoneType}) = None
903
    /
904
[clinic start generated code]*/
905
906
static PyObject *
907
_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
908
/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
909
{
910
    return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
911
                       PyUnicode_GET_LENGTH(str));
912
}
913
914
/*[clinic input]
915
_codecs.oem_encode
916
    str: unicode
917
    errors: str(accept={str, NoneType}) = None
918
    /
919
[clinic start generated code]*/
920
921
static PyObject *
922
_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
923
/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
924
{
925
    return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
926
        PyUnicode_GET_LENGTH(str));
927
}
928
929
/*[clinic input]
930
_codecs.code_page_encode
931
    code_page: int
932
    str: unicode
933
    errors: str(accept={str, NoneType}) = None
934
    /
935
[clinic start generated code]*/
936
937
static PyObject *
938
_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
939
                              const char *errors)
940
/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
941
{
942
    return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
943
                       PyUnicode_GET_LENGTH(str));
944
}
945
946
#endif /* MS_WINDOWS */
947
948
/* --- Error handler registry --------------------------------------------- */
949
950
/*[clinic input]
951
_codecs.register_error
952
    errors: str
953
    handler: object
954
    /
955
956
Register the specified error handler under the name errors.
957
958
handler must be a callable object, that will be called with an exception
959
instance containing information about the location of the encoding/decoding
960
error and must return a (replacement, new position) tuple.
961
[clinic start generated code]*/
962
963
static PyObject *
964
_codecs_register_error_impl(PyObject *module, const char *errors,
965
                            PyObject *handler)
966
/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
967
0
{
968
0
    if (PyCodec_RegisterError(errors, handler))
969
0
        return NULL;
970
0
    Py_RETURN_NONE;
971
0
}
972
973
/*[clinic input]
974
_codecs.lookup_error
975
    name: str
976
    /
977
978
lookup_error(errors) -> handler
979
980
Return the error handler for the specified error handling name or raise a
981
LookupError, if no handler exists under this name.
982
[clinic start generated code]*/
983
984
static PyObject *
985
_codecs_lookup_error_impl(PyObject *module, const char *name)
986
/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
987
84
{
988
84
    return PyCodec_LookupError(name);
989
84
}
990
991
/* --- Module API --------------------------------------------------------- */
992
993
static PyMethodDef _codecs_functions[] = {
994
    _CODECS_REGISTER_METHODDEF
995
    _CODECS_LOOKUP_METHODDEF
996
    _CODECS_ENCODE_METHODDEF
997
    _CODECS_DECODE_METHODDEF
998
    _CODECS_ESCAPE_ENCODE_METHODDEF
999
    _CODECS_ESCAPE_DECODE_METHODDEF
1000
    _CODECS_UTF_8_ENCODE_METHODDEF
1001
    _CODECS_UTF_8_DECODE_METHODDEF
1002
    _CODECS_UTF_7_ENCODE_METHODDEF
1003
    _CODECS_UTF_7_DECODE_METHODDEF
1004
    _CODECS_UTF_16_ENCODE_METHODDEF
1005
    _CODECS_UTF_16_LE_ENCODE_METHODDEF
1006
    _CODECS_UTF_16_BE_ENCODE_METHODDEF
1007
    _CODECS_UTF_16_DECODE_METHODDEF
1008
    _CODECS_UTF_16_LE_DECODE_METHODDEF
1009
    _CODECS_UTF_16_BE_DECODE_METHODDEF
1010
    _CODECS_UTF_16_EX_DECODE_METHODDEF
1011
    _CODECS_UTF_32_ENCODE_METHODDEF
1012
    _CODECS_UTF_32_LE_ENCODE_METHODDEF
1013
    _CODECS_UTF_32_BE_ENCODE_METHODDEF
1014
    _CODECS_UTF_32_DECODE_METHODDEF
1015
    _CODECS_UTF_32_LE_DECODE_METHODDEF
1016
    _CODECS_UTF_32_BE_DECODE_METHODDEF
1017
    _CODECS_UTF_32_EX_DECODE_METHODDEF
1018
    _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1019
    _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1020
    _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1021
    _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1022
    _CODECS_LATIN_1_ENCODE_METHODDEF
1023
    _CODECS_LATIN_1_DECODE_METHODDEF
1024
    _CODECS_ASCII_ENCODE_METHODDEF
1025
    _CODECS_ASCII_DECODE_METHODDEF
1026
    _CODECS_CHARMAP_ENCODE_METHODDEF
1027
    _CODECS_CHARMAP_DECODE_METHODDEF
1028
    _CODECS_CHARMAP_BUILD_METHODDEF
1029
    _CODECS_READBUFFER_ENCODE_METHODDEF
1030
    _CODECS_MBCS_ENCODE_METHODDEF
1031
    _CODECS_MBCS_DECODE_METHODDEF
1032
    _CODECS_OEM_ENCODE_METHODDEF
1033
    _CODECS_OEM_DECODE_METHODDEF
1034
    _CODECS_CODE_PAGE_ENCODE_METHODDEF
1035
    _CODECS_CODE_PAGE_DECODE_METHODDEF
1036
    _CODECS_REGISTER_ERROR_METHODDEF
1037
    _CODECS_LOOKUP_ERROR_METHODDEF
1038
    _CODECS__FORGET_CODEC_METHODDEF
1039
    {NULL, NULL}                /* sentinel */
1040
};
1041
1042
static struct PyModuleDef codecsmodule = {
1043
        PyModuleDef_HEAD_INIT,
1044
        "_codecs",
1045
        NULL,
1046
        -1,
1047
        _codecs_functions,
1048
        NULL,
1049
        NULL,
1050
        NULL,
1051
        NULL
1052
};
1053
1054
PyMODINIT_FUNC
1055
PyInit__codecs(void)
1056
14
{
1057
14
        return PyModule_Create(&codecsmodule);
1058
14
}