/src/Python-3.8.3/Modules/_codecsmodule.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* ------------------------------------------------------------------------ |
2 | | |
3 | | _codecs -- Provides access to the codec registry and the builtin |
4 | | codecs. |
5 | | |
6 | | This module should never be imported directly. The standard library |
7 | | module "codecs" wraps this builtin module for use within Python. |
8 | | |
9 | | The codec registry is accessible via: |
10 | | |
11 | | register(search_function) -> None |
12 | | |
13 | | lookup(encoding) -> CodecInfo object |
14 | | |
15 | | The builtin Unicode codecs use the following interface: |
16 | | |
17 | | <encoding>_encode(Unicode_object[,errors='strict']) -> |
18 | | (string object, bytes consumed) |
19 | | |
20 | | <encoding>_decode(char_buffer_obj[,errors='strict']) -> |
21 | | (Unicode object, bytes consumed) |
22 | | |
23 | | These <encoding>s are available: utf_8, unicode_escape, |
24 | | raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32). |
25 | | |
26 | | |
27 | | Written by Marc-Andre Lemburg (mal@lemburg.com). |
28 | | |
29 | | Copyright (c) Corporation for National Research Initiatives. |
30 | | |
31 | | ------------------------------------------------------------------------ */ |
32 | | |
33 | | #define PY_SSIZE_T_CLEAN |
34 | | #include "Python.h" |
35 | | |
36 | | #ifdef MS_WINDOWS |
37 | | #include <windows.h> |
38 | | #endif |
39 | | |
40 | | /*[clinic input] |
41 | | module _codecs |
42 | | [clinic start generated code]*/ |
43 | | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/ |
44 | | |
45 | | #include "clinic/_codecsmodule.c.h" |
46 | | |
47 | | /* --- Registry ----------------------------------------------------------- */ |
48 | | |
49 | | /*[clinic input] |
50 | | _codecs.register |
51 | | search_function: object |
52 | | / |
53 | | |
54 | | Register a codec search function. |
55 | | |
56 | | Search functions are expected to take one argument, the encoding name in |
57 | | all lower case letters, and either return None, or a tuple of functions |
58 | | (encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object). |
59 | | [clinic start generated code]*/ |
60 | | |
61 | | static PyObject * |
62 | | _codecs_register(PyObject *module, PyObject *search_function) |
63 | | /*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/ |
64 | 14 | { |
65 | 14 | if (PyCodec_Register(search_function)) |
66 | 0 | return NULL; |
67 | | |
68 | 14 | Py_RETURN_NONE; |
69 | 14 | } |
70 | | |
71 | | /*[clinic input] |
72 | | _codecs.lookup |
73 | | encoding: str |
74 | | / |
75 | | |
76 | | Looks up a codec tuple in the Python codec registry and returns a CodecInfo object. |
77 | | [clinic start generated code]*/ |
78 | | |
79 | | static PyObject * |
80 | | _codecs_lookup_impl(PyObject *module, const char *encoding) |
81 | | /*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/ |
82 | 0 | { |
83 | 0 | return _PyCodec_Lookup(encoding); |
84 | 0 | } |
85 | | |
86 | | /*[clinic input] |
87 | | _codecs.encode |
88 | | obj: object |
89 | | encoding: str(c_default="NULL") = "utf-8" |
90 | | errors: str(c_default="NULL") = "strict" |
91 | | |
92 | | Encodes obj using the codec registered for encoding. |
93 | | |
94 | | The default encoding is 'utf-8'. errors may be given to set a |
95 | | different error handling scheme. Default is 'strict' meaning that encoding |
96 | | errors raise a ValueError. Other possible values are 'ignore', 'replace' |
97 | | and 'backslashreplace' as well as any other name registered with |
98 | | codecs.register_error that can handle ValueErrors. |
99 | | [clinic start generated code]*/ |
100 | | |
101 | | static PyObject * |
102 | | _codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding, |
103 | | const char *errors) |
104 | | /*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/ |
105 | 0 | { |
106 | 0 | if (encoding == NULL) |
107 | 0 | encoding = PyUnicode_GetDefaultEncoding(); |
108 | | |
109 | | /* Encode via the codec registry */ |
110 | 0 | return PyCodec_Encode(obj, encoding, errors); |
111 | 0 | } |
112 | | |
113 | | /*[clinic input] |
114 | | _codecs.decode |
115 | | obj: object |
116 | | encoding: str(c_default="NULL") = "utf-8" |
117 | | errors: str(c_default="NULL") = "strict" |
118 | | |
119 | | Decodes obj using the codec registered for encoding. |
120 | | |
121 | | Default encoding is 'utf-8'. errors may be given to set a |
122 | | different error handling scheme. Default is 'strict' meaning that encoding |
123 | | errors raise a ValueError. Other possible values are 'ignore', 'replace' |
124 | | and 'backslashreplace' as well as any other name registered with |
125 | | codecs.register_error that can handle ValueErrors. |
126 | | [clinic start generated code]*/ |
127 | | |
128 | | static PyObject * |
129 | | _codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding, |
130 | | const char *errors) |
131 | | /*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/ |
132 | 0 | { |
133 | 0 | if (encoding == NULL) |
134 | 0 | encoding = PyUnicode_GetDefaultEncoding(); |
135 | | |
136 | | /* Decode via the codec registry */ |
137 | 0 | return PyCodec_Decode(obj, encoding, errors); |
138 | 0 | } |
139 | | |
140 | | /* --- Helpers ------------------------------------------------------------ */ |
141 | | |
142 | | /*[clinic input] |
143 | | _codecs._forget_codec |
144 | | |
145 | | encoding: str |
146 | | / |
147 | | |
148 | | Purge the named codec from the internal codec lookup cache |
149 | | [clinic start generated code]*/ |
150 | | |
151 | | static PyObject * |
152 | | _codecs__forget_codec_impl(PyObject *module, const char *encoding) |
153 | | /*[clinic end generated code: output=0bde9f0a5b084aa2 input=18d5d92d0e386c38]*/ |
154 | 0 | { |
155 | 0 | if (_PyCodec_Forget(encoding) < 0) { |
156 | 0 | return NULL; |
157 | 0 | }; |
158 | 0 | Py_RETURN_NONE; |
159 | 0 | } |
160 | | |
161 | | static |
162 | | PyObject *codec_tuple(PyObject *decoded, |
163 | | Py_ssize_t len) |
164 | 3 | { |
165 | 3 | if (decoded == NULL) |
166 | 0 | return NULL; |
167 | 3 | return Py_BuildValue("Nn", decoded, len); |
168 | 3 | } |
169 | | |
170 | | /* --- String codecs ------------------------------------------------------ */ |
171 | | /*[clinic input] |
172 | | _codecs.escape_decode |
173 | | data: Py_buffer(accept={str, buffer}) |
174 | | errors: str(accept={str, NoneType}) = None |
175 | | / |
176 | | [clinic start generated code]*/ |
177 | | |
178 | | static PyObject * |
179 | | _codecs_escape_decode_impl(PyObject *module, Py_buffer *data, |
180 | | const char *errors) |
181 | | /*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/ |
182 | 0 | { |
183 | 0 | PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len, |
184 | 0 | errors, 0, NULL); |
185 | 0 | return codec_tuple(decoded, data->len); |
186 | 0 | } |
187 | | |
188 | | /*[clinic input] |
189 | | _codecs.escape_encode |
190 | | data: object(subclass_of='&PyBytes_Type') |
191 | | errors: str(accept={str, NoneType}) = None |
192 | | / |
193 | | [clinic start generated code]*/ |
194 | | |
195 | | static PyObject * |
196 | | _codecs_escape_encode_impl(PyObject *module, PyObject *data, |
197 | | const char *errors) |
198 | | /*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/ |
199 | 0 | { |
200 | 0 | Py_ssize_t size; |
201 | 0 | Py_ssize_t newsize; |
202 | 0 | PyObject *v; |
203 | |
|
204 | 0 | size = PyBytes_GET_SIZE(data); |
205 | 0 | if (size > PY_SSIZE_T_MAX / 4) { |
206 | 0 | PyErr_SetString(PyExc_OverflowError, |
207 | 0 | "string is too large to encode"); |
208 | 0 | return NULL; |
209 | 0 | } |
210 | 0 | newsize = 4*size; |
211 | 0 | v = PyBytes_FromStringAndSize(NULL, newsize); |
212 | |
|
213 | 0 | if (v == NULL) { |
214 | 0 | return NULL; |
215 | 0 | } |
216 | 0 | else { |
217 | 0 | Py_ssize_t i; |
218 | 0 | char c; |
219 | 0 | char *p = PyBytes_AS_STRING(v); |
220 | |
|
221 | 0 | for (i = 0; i < size; i++) { |
222 | | /* There's at least enough room for a hex escape */ |
223 | 0 | assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4); |
224 | 0 | c = PyBytes_AS_STRING(data)[i]; |
225 | 0 | if (c == '\'' || c == '\\') |
226 | 0 | *p++ = '\\', *p++ = c; |
227 | 0 | else if (c == '\t') |
228 | 0 | *p++ = '\\', *p++ = 't'; |
229 | 0 | else if (c == '\n') |
230 | 0 | *p++ = '\\', *p++ = 'n'; |
231 | 0 | else if (c == '\r') |
232 | 0 | *p++ = '\\', *p++ = 'r'; |
233 | 0 | else if (c < ' ' || c >= 0x7f) { |
234 | 0 | *p++ = '\\'; |
235 | 0 | *p++ = 'x'; |
236 | 0 | *p++ = Py_hexdigits[(c & 0xf0) >> 4]; |
237 | 0 | *p++ = Py_hexdigits[c & 0xf]; |
238 | 0 | } |
239 | 0 | else |
240 | 0 | *p++ = c; |
241 | 0 | } |
242 | 0 | *p = '\0'; |
243 | 0 | if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) { |
244 | 0 | return NULL; |
245 | 0 | } |
246 | 0 | } |
247 | | |
248 | 0 | return codec_tuple(v, size); |
249 | 0 | } |
250 | | |
251 | | /* --- Decoder ------------------------------------------------------------ */ |
252 | | /*[clinic input] |
253 | | _codecs.utf_7_decode |
254 | | data: Py_buffer |
255 | | errors: str(accept={str, NoneType}) = None |
256 | | final: bool(accept={int}) = False |
257 | | / |
258 | | [clinic start generated code]*/ |
259 | | |
260 | | static PyObject * |
261 | | _codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data, |
262 | | const char *errors, int final) |
263 | | /*[clinic end generated code: output=0cd3a944a32a4089 input=22c395d357815d26]*/ |
264 | 0 | { |
265 | 0 | Py_ssize_t consumed = data->len; |
266 | 0 | PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len, |
267 | 0 | errors, |
268 | 0 | final ? NULL : &consumed); |
269 | 0 | return codec_tuple(decoded, consumed); |
270 | 0 | } |
271 | | |
272 | | /*[clinic input] |
273 | | _codecs.utf_8_decode |
274 | | data: Py_buffer |
275 | | errors: str(accept={str, NoneType}) = None |
276 | | final: bool(accept={int}) = False |
277 | | / |
278 | | [clinic start generated code]*/ |
279 | | |
280 | | static PyObject * |
281 | | _codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data, |
282 | | const char *errors, int final) |
283 | | /*[clinic end generated code: output=10f74dec8d9bb8bf input=f611b3867352ba59]*/ |
284 | 3 | { |
285 | 3 | Py_ssize_t consumed = data->len; |
286 | 3 | PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len, |
287 | 3 | errors, |
288 | 3 | final ? NULL : &consumed); |
289 | 3 | return codec_tuple(decoded, consumed); |
290 | 3 | } |
291 | | |
292 | | /*[clinic input] |
293 | | _codecs.utf_16_decode |
294 | | data: Py_buffer |
295 | | errors: str(accept={str, NoneType}) = None |
296 | | final: bool(accept={int}) = False |
297 | | / |
298 | | [clinic start generated code]*/ |
299 | | |
300 | | static PyObject * |
301 | | _codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data, |
302 | | const char *errors, int final) |
303 | | /*[clinic end generated code: output=783b442abcbcc2d0 input=191d360bd7309180]*/ |
304 | 0 | { |
305 | 0 | int byteorder = 0; |
306 | | /* This is overwritten unless final is true. */ |
307 | 0 | Py_ssize_t consumed = data->len; |
308 | 0 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
309 | 0 | errors, &byteorder, |
310 | 0 | final ? NULL : &consumed); |
311 | 0 | return codec_tuple(decoded, consumed); |
312 | 0 | } |
313 | | |
314 | | /*[clinic input] |
315 | | _codecs.utf_16_le_decode |
316 | | data: Py_buffer |
317 | | errors: str(accept={str, NoneType}) = None |
318 | | final: bool(accept={int}) = False |
319 | | / |
320 | | [clinic start generated code]*/ |
321 | | |
322 | | static PyObject * |
323 | | _codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data, |
324 | | const char *errors, int final) |
325 | | /*[clinic end generated code: output=899b9e6364379dcd input=c6904fdc27fb4724]*/ |
326 | 0 | { |
327 | 0 | int byteorder = -1; |
328 | | /* This is overwritten unless final is true. */ |
329 | 0 | Py_ssize_t consumed = data->len; |
330 | 0 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
331 | 0 | errors, &byteorder, |
332 | 0 | final ? NULL : &consumed); |
333 | 0 | return codec_tuple(decoded, consumed); |
334 | 0 | } |
335 | | |
336 | | /*[clinic input] |
337 | | _codecs.utf_16_be_decode |
338 | | data: Py_buffer |
339 | | errors: str(accept={str, NoneType}) = None |
340 | | final: bool(accept={int}) = False |
341 | | / |
342 | | [clinic start generated code]*/ |
343 | | |
344 | | static PyObject * |
345 | | _codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data, |
346 | | const char *errors, int final) |
347 | | /*[clinic end generated code: output=49f6465ea07669c8 input=e49012400974649b]*/ |
348 | 0 | { |
349 | 0 | int byteorder = 1; |
350 | | /* This is overwritten unless final is true. */ |
351 | 0 | Py_ssize_t consumed = data->len; |
352 | 0 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
353 | 0 | errors, &byteorder, |
354 | 0 | final ? NULL : &consumed); |
355 | 0 | return codec_tuple(decoded, consumed); |
356 | 0 | } |
357 | | |
358 | | /* This non-standard version also provides access to the byteorder |
359 | | parameter of the builtin UTF-16 codec. |
360 | | |
361 | | It returns a tuple (unicode, bytesread, byteorder) with byteorder |
362 | | being the value in effect at the end of data. |
363 | | |
364 | | */ |
365 | | /*[clinic input] |
366 | | _codecs.utf_16_ex_decode |
367 | | data: Py_buffer |
368 | | errors: str(accept={str, NoneType}) = None |
369 | | byteorder: int = 0 |
370 | | final: bool(accept={int}) = False |
371 | | / |
372 | | [clinic start generated code]*/ |
373 | | |
374 | | static PyObject * |
375 | | _codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data, |
376 | | const char *errors, int byteorder, int final) |
377 | | /*[clinic end generated code: output=0f385f251ecc1988 input=5a9c19f2e6b6cf0e]*/ |
378 | 0 | { |
379 | | /* This is overwritten unless final is true. */ |
380 | 0 | Py_ssize_t consumed = data->len; |
381 | |
|
382 | 0 | PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len, |
383 | 0 | errors, &byteorder, |
384 | 0 | final ? NULL : &consumed); |
385 | 0 | if (decoded == NULL) |
386 | 0 | return NULL; |
387 | 0 | return Py_BuildValue("Nni", decoded, consumed, byteorder); |
388 | 0 | } |
389 | | |
390 | | /*[clinic input] |
391 | | _codecs.utf_32_decode |
392 | | data: Py_buffer |
393 | | errors: str(accept={str, NoneType}) = None |
394 | | final: bool(accept={int}) = False |
395 | | / |
396 | | [clinic start generated code]*/ |
397 | | |
398 | | static PyObject * |
399 | | _codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data, |
400 | | const char *errors, int final) |
401 | | /*[clinic end generated code: output=2fc961807f7b145f input=fd7193965627eb58]*/ |
402 | 0 | { |
403 | 0 | int byteorder = 0; |
404 | | /* This is overwritten unless final is true. */ |
405 | 0 | Py_ssize_t consumed = data->len; |
406 | 0 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
407 | 0 | errors, &byteorder, |
408 | 0 | final ? NULL : &consumed); |
409 | 0 | return codec_tuple(decoded, consumed); |
410 | 0 | } |
411 | | |
412 | | /*[clinic input] |
413 | | _codecs.utf_32_le_decode |
414 | | data: Py_buffer |
415 | | errors: str(accept={str, NoneType}) = None |
416 | | final: bool(accept={int}) = False |
417 | | / |
418 | | [clinic start generated code]*/ |
419 | | |
420 | | static PyObject * |
421 | | _codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data, |
422 | | const char *errors, int final) |
423 | | /*[clinic end generated code: output=ec8f46b67a94f3e6 input=9078ec70acfe7613]*/ |
424 | 0 | { |
425 | 0 | int byteorder = -1; |
426 | | /* This is overwritten unless final is true. */ |
427 | 0 | Py_ssize_t consumed = data->len; |
428 | 0 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
429 | 0 | errors, &byteorder, |
430 | 0 | final ? NULL : &consumed); |
431 | 0 | return codec_tuple(decoded, consumed); |
432 | 0 | } |
433 | | |
434 | | /*[clinic input] |
435 | | _codecs.utf_32_be_decode |
436 | | data: Py_buffer |
437 | | errors: str(accept={str, NoneType}) = None |
438 | | final: bool(accept={int}) = False |
439 | | / |
440 | | [clinic start generated code]*/ |
441 | | |
442 | | static PyObject * |
443 | | _codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data, |
444 | | const char *errors, int final) |
445 | | /*[clinic end generated code: output=ff82bae862c92c4e input=f1ae1bbbb86648ff]*/ |
446 | 0 | { |
447 | 0 | int byteorder = 1; |
448 | | /* This is overwritten unless final is true. */ |
449 | 0 | Py_ssize_t consumed = data->len; |
450 | 0 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
451 | 0 | errors, &byteorder, |
452 | 0 | final ? NULL : &consumed); |
453 | 0 | return codec_tuple(decoded, consumed); |
454 | 0 | } |
455 | | |
456 | | /* This non-standard version also provides access to the byteorder |
457 | | parameter of the builtin UTF-32 codec. |
458 | | |
459 | | It returns a tuple (unicode, bytesread, byteorder) with byteorder |
460 | | being the value in effect at the end of data. |
461 | | |
462 | | */ |
463 | | /*[clinic input] |
464 | | _codecs.utf_32_ex_decode |
465 | | data: Py_buffer |
466 | | errors: str(accept={str, NoneType}) = None |
467 | | byteorder: int = 0 |
468 | | final: bool(accept={int}) = False |
469 | | / |
470 | | [clinic start generated code]*/ |
471 | | |
472 | | static PyObject * |
473 | | _codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data, |
474 | | const char *errors, int byteorder, int final) |
475 | | /*[clinic end generated code: output=6bfb177dceaf4848 input=e46a73bc859d0bd0]*/ |
476 | 0 | { |
477 | 0 | Py_ssize_t consumed = data->len; |
478 | 0 | PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len, |
479 | 0 | errors, &byteorder, |
480 | 0 | final ? NULL : &consumed); |
481 | 0 | if (decoded == NULL) |
482 | 0 | return NULL; |
483 | 0 | return Py_BuildValue("Nni", decoded, consumed, byteorder); |
484 | 0 | } |
485 | | |
486 | | /*[clinic input] |
487 | | _codecs.unicode_escape_decode |
488 | | data: Py_buffer(accept={str, buffer}) |
489 | | errors: str(accept={str, NoneType}) = None |
490 | | / |
491 | | [clinic start generated code]*/ |
492 | | |
493 | | static PyObject * |
494 | | _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, |
495 | | const char *errors) |
496 | | /*[clinic end generated code: output=3ca3c917176b82ab input=8328081a3a569bd6]*/ |
497 | 0 | { |
498 | 0 | PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len, |
499 | 0 | errors); |
500 | 0 | return codec_tuple(decoded, data->len); |
501 | 0 | } |
502 | | |
503 | | /*[clinic input] |
504 | | _codecs.raw_unicode_escape_decode |
505 | | data: Py_buffer(accept={str, buffer}) |
506 | | errors: str(accept={str, NoneType}) = None |
507 | | / |
508 | | [clinic start generated code]*/ |
509 | | |
510 | | static PyObject * |
511 | | _codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data, |
512 | | const char *errors) |
513 | | /*[clinic end generated code: output=c98eeb56028070a6 input=d2f5159ce3b3392f]*/ |
514 | 0 | { |
515 | 0 | PyObject *decoded = PyUnicode_DecodeRawUnicodeEscape(data->buf, data->len, |
516 | 0 | errors); |
517 | 0 | return codec_tuple(decoded, data->len); |
518 | 0 | } |
519 | | |
520 | | /*[clinic input] |
521 | | _codecs.latin_1_decode |
522 | | data: Py_buffer |
523 | | errors: str(accept={str, NoneType}) = None |
524 | | / |
525 | | [clinic start generated code]*/ |
526 | | |
527 | | static PyObject * |
528 | | _codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data, |
529 | | const char *errors) |
530 | | /*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/ |
531 | 0 | { |
532 | 0 | PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors); |
533 | 0 | return codec_tuple(decoded, data->len); |
534 | 0 | } |
535 | | |
536 | | /*[clinic input] |
537 | | _codecs.ascii_decode |
538 | | data: Py_buffer |
539 | | errors: str(accept={str, NoneType}) = None |
540 | | / |
541 | | [clinic start generated code]*/ |
542 | | |
543 | | static PyObject * |
544 | | _codecs_ascii_decode_impl(PyObject *module, Py_buffer *data, |
545 | | const char *errors) |
546 | | /*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/ |
547 | 0 | { |
548 | 0 | PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors); |
549 | 0 | return codec_tuple(decoded, data->len); |
550 | 0 | } |
551 | | |
552 | | /*[clinic input] |
553 | | _codecs.charmap_decode |
554 | | data: Py_buffer |
555 | | errors: str(accept={str, NoneType}) = None |
556 | | mapping: object = None |
557 | | / |
558 | | [clinic start generated code]*/ |
559 | | |
560 | | static PyObject * |
561 | | _codecs_charmap_decode_impl(PyObject *module, Py_buffer *data, |
562 | | const char *errors, PyObject *mapping) |
563 | | /*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/ |
564 | 0 | { |
565 | 0 | PyObject *decoded; |
566 | |
|
567 | 0 | if (mapping == Py_None) |
568 | 0 | mapping = NULL; |
569 | |
|
570 | 0 | decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors); |
571 | 0 | return codec_tuple(decoded, data->len); |
572 | 0 | } |
573 | | |
574 | | #ifdef MS_WINDOWS |
575 | | |
576 | | /*[clinic input] |
577 | | _codecs.mbcs_decode |
578 | | data: Py_buffer |
579 | | errors: str(accept={str, NoneType}) = None |
580 | | final: bool(accept={int}) = False |
581 | | / |
582 | | [clinic start generated code]*/ |
583 | | |
584 | | static PyObject * |
585 | | _codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data, |
586 | | const char *errors, int final) |
587 | | /*[clinic end generated code: output=39b65b8598938c4b input=1c1d50f08fa53789]*/ |
588 | | { |
589 | | Py_ssize_t consumed = data->len; |
590 | | PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len, |
591 | | errors, final ? NULL : &consumed); |
592 | | return codec_tuple(decoded, consumed); |
593 | | } |
594 | | |
595 | | /*[clinic input] |
596 | | _codecs.oem_decode |
597 | | data: Py_buffer |
598 | | errors: str(accept={str, NoneType}) = None |
599 | | final: bool(accept={int}) = False |
600 | | / |
601 | | [clinic start generated code]*/ |
602 | | |
603 | | static PyObject * |
604 | | _codecs_oem_decode_impl(PyObject *module, Py_buffer *data, |
605 | | const char *errors, int final) |
606 | | /*[clinic end generated code: output=da1617612f3fcad8 input=81b67cba811022e5]*/ |
607 | | { |
608 | | Py_ssize_t consumed = data->len; |
609 | | PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP, |
610 | | data->buf, data->len, errors, final ? NULL : &consumed); |
611 | | return codec_tuple(decoded, consumed); |
612 | | } |
613 | | |
614 | | /*[clinic input] |
615 | | _codecs.code_page_decode |
616 | | codepage: int |
617 | | data: Py_buffer |
618 | | errors: str(accept={str, NoneType}) = None |
619 | | final: bool(accept={int}) = False |
620 | | / |
621 | | [clinic start generated code]*/ |
622 | | |
623 | | static PyObject * |
624 | | _codecs_code_page_decode_impl(PyObject *module, int codepage, |
625 | | Py_buffer *data, const char *errors, int final) |
626 | | /*[clinic end generated code: output=53008ea967da3fff input=c5f58d036cb63575]*/ |
627 | | { |
628 | | Py_ssize_t consumed = data->len; |
629 | | PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage, |
630 | | data->buf, data->len, |
631 | | errors, |
632 | | final ? NULL : &consumed); |
633 | | return codec_tuple(decoded, consumed); |
634 | | } |
635 | | |
636 | | #endif /* MS_WINDOWS */ |
637 | | |
638 | | /* --- Encoder ------------------------------------------------------------ */ |
639 | | |
640 | | /*[clinic input] |
641 | | _codecs.readbuffer_encode |
642 | | data: Py_buffer(accept={str, buffer}) |
643 | | errors: str(accept={str, NoneType}) = None |
644 | | / |
645 | | [clinic start generated code]*/ |
646 | | |
647 | | static PyObject * |
648 | | _codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data, |
649 | | const char *errors) |
650 | | /*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/ |
651 | 0 | { |
652 | 0 | PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len); |
653 | 0 | return codec_tuple(result, data->len); |
654 | 0 | } |
655 | | |
656 | | /*[clinic input] |
657 | | _codecs.utf_7_encode |
658 | | str: unicode |
659 | | errors: str(accept={str, NoneType}) = None |
660 | | / |
661 | | [clinic start generated code]*/ |
662 | | |
663 | | static PyObject * |
664 | | _codecs_utf_7_encode_impl(PyObject *module, PyObject *str, |
665 | | const char *errors) |
666 | | /*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/ |
667 | 0 | { |
668 | 0 | return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors), |
669 | 0 | PyUnicode_GET_LENGTH(str)); |
670 | 0 | } |
671 | | |
672 | | /*[clinic input] |
673 | | _codecs.utf_8_encode |
674 | | str: unicode |
675 | | errors: str(accept={str, NoneType}) = None |
676 | | / |
677 | | [clinic start generated code]*/ |
678 | | |
679 | | static PyObject * |
680 | | _codecs_utf_8_encode_impl(PyObject *module, PyObject *str, |
681 | | const char *errors) |
682 | | /*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/ |
683 | 0 | { |
684 | 0 | return codec_tuple(_PyUnicode_AsUTF8String(str, errors), |
685 | 0 | PyUnicode_GET_LENGTH(str)); |
686 | 0 | } |
687 | | |
688 | | /* This version provides access to the byteorder parameter of the |
689 | | builtin UTF-16 codecs as optional third argument. It defaults to 0 |
690 | | which means: use the native byte order and prepend the data with a |
691 | | BOM mark. |
692 | | |
693 | | */ |
694 | | |
695 | | /*[clinic input] |
696 | | _codecs.utf_16_encode |
697 | | str: unicode |
698 | | errors: str(accept={str, NoneType}) = None |
699 | | byteorder: int = 0 |
700 | | / |
701 | | [clinic start generated code]*/ |
702 | | |
703 | | static PyObject * |
704 | | _codecs_utf_16_encode_impl(PyObject *module, PyObject *str, |
705 | | const char *errors, int byteorder) |
706 | | /*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/ |
707 | 0 | { |
708 | 0 | return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder), |
709 | 0 | PyUnicode_GET_LENGTH(str)); |
710 | 0 | } |
711 | | |
712 | | /*[clinic input] |
713 | | _codecs.utf_16_le_encode |
714 | | str: unicode |
715 | | errors: str(accept={str, NoneType}) = None |
716 | | / |
717 | | [clinic start generated code]*/ |
718 | | |
719 | | static PyObject * |
720 | | _codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str, |
721 | | const char *errors) |
722 | | /*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/ |
723 | 0 | { |
724 | 0 | return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1), |
725 | 0 | PyUnicode_GET_LENGTH(str)); |
726 | 0 | } |
727 | | |
728 | | /*[clinic input] |
729 | | _codecs.utf_16_be_encode |
730 | | str: unicode |
731 | | errors: str(accept={str, NoneType}) = None |
732 | | / |
733 | | [clinic start generated code]*/ |
734 | | |
735 | | static PyObject * |
736 | | _codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str, |
737 | | const char *errors) |
738 | | /*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/ |
739 | 0 | { |
740 | 0 | return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1), |
741 | 0 | PyUnicode_GET_LENGTH(str)); |
742 | 0 | } |
743 | | |
744 | | /* This version provides access to the byteorder parameter of the |
745 | | builtin UTF-32 codecs as optional third argument. It defaults to 0 |
746 | | which means: use the native byte order and prepend the data with a |
747 | | BOM mark. |
748 | | |
749 | | */ |
750 | | |
751 | | /*[clinic input] |
752 | | _codecs.utf_32_encode |
753 | | str: unicode |
754 | | errors: str(accept={str, NoneType}) = None |
755 | | byteorder: int = 0 |
756 | | / |
757 | | [clinic start generated code]*/ |
758 | | |
759 | | static PyObject * |
760 | | _codecs_utf_32_encode_impl(PyObject *module, PyObject *str, |
761 | | const char *errors, int byteorder) |
762 | | /*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/ |
763 | 0 | { |
764 | 0 | return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder), |
765 | 0 | PyUnicode_GET_LENGTH(str)); |
766 | 0 | } |
767 | | |
768 | | /*[clinic input] |
769 | | _codecs.utf_32_le_encode |
770 | | str: unicode |
771 | | errors: str(accept={str, NoneType}) = None |
772 | | / |
773 | | [clinic start generated code]*/ |
774 | | |
775 | | static PyObject * |
776 | | _codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str, |
777 | | const char *errors) |
778 | | /*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/ |
779 | 0 | { |
780 | 0 | return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1), |
781 | 0 | PyUnicode_GET_LENGTH(str)); |
782 | 0 | } |
783 | | |
784 | | /*[clinic input] |
785 | | _codecs.utf_32_be_encode |
786 | | str: unicode |
787 | | errors: str(accept={str, NoneType}) = None |
788 | | / |
789 | | [clinic start generated code]*/ |
790 | | |
791 | | static PyObject * |
792 | | _codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str, |
793 | | const char *errors) |
794 | | /*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/ |
795 | 0 | { |
796 | 0 | return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1), |
797 | 0 | PyUnicode_GET_LENGTH(str)); |
798 | 0 | } |
799 | | |
800 | | /*[clinic input] |
801 | | _codecs.unicode_escape_encode |
802 | | str: unicode |
803 | | errors: str(accept={str, NoneType}) = None |
804 | | / |
805 | | [clinic start generated code]*/ |
806 | | |
807 | | static PyObject * |
808 | | _codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str, |
809 | | const char *errors) |
810 | | /*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/ |
811 | 0 | { |
812 | 0 | return codec_tuple(PyUnicode_AsUnicodeEscapeString(str), |
813 | 0 | PyUnicode_GET_LENGTH(str)); |
814 | 0 | } |
815 | | |
816 | | /*[clinic input] |
817 | | _codecs.raw_unicode_escape_encode |
818 | | str: unicode |
819 | | errors: str(accept={str, NoneType}) = None |
820 | | / |
821 | | [clinic start generated code]*/ |
822 | | |
823 | | static PyObject * |
824 | | _codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str, |
825 | | const char *errors) |
826 | | /*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/ |
827 | 0 | { |
828 | 0 | return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str), |
829 | 0 | PyUnicode_GET_LENGTH(str)); |
830 | 0 | } |
831 | | |
832 | | /*[clinic input] |
833 | | _codecs.latin_1_encode |
834 | | str: unicode |
835 | | errors: str(accept={str, NoneType}) = None |
836 | | / |
837 | | [clinic start generated code]*/ |
838 | | |
839 | | static PyObject * |
840 | | _codecs_latin_1_encode_impl(PyObject *module, PyObject *str, |
841 | | const char *errors) |
842 | | /*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/ |
843 | 0 | { |
844 | 0 | return codec_tuple(_PyUnicode_AsLatin1String(str, errors), |
845 | 0 | PyUnicode_GET_LENGTH(str)); |
846 | 0 | } |
847 | | |
848 | | /*[clinic input] |
849 | | _codecs.ascii_encode |
850 | | str: unicode |
851 | | errors: str(accept={str, NoneType}) = None |
852 | | / |
853 | | [clinic start generated code]*/ |
854 | | |
855 | | static PyObject * |
856 | | _codecs_ascii_encode_impl(PyObject *module, PyObject *str, |
857 | | const char *errors) |
858 | | /*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/ |
859 | 0 | { |
860 | 0 | return codec_tuple(_PyUnicode_AsASCIIString(str, errors), |
861 | 0 | PyUnicode_GET_LENGTH(str)); |
862 | 0 | } |
863 | | |
864 | | /*[clinic input] |
865 | | _codecs.charmap_encode |
866 | | str: unicode |
867 | | errors: str(accept={str, NoneType}) = None |
868 | | mapping: object = None |
869 | | / |
870 | | [clinic start generated code]*/ |
871 | | |
872 | | static PyObject * |
873 | | _codecs_charmap_encode_impl(PyObject *module, PyObject *str, |
874 | | const char *errors, PyObject *mapping) |
875 | | /*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/ |
876 | 0 | { |
877 | 0 | if (mapping == Py_None) |
878 | 0 | mapping = NULL; |
879 | |
|
880 | 0 | return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors), |
881 | 0 | PyUnicode_GET_LENGTH(str)); |
882 | 0 | } |
883 | | |
884 | | /*[clinic input] |
885 | | _codecs.charmap_build |
886 | | map: unicode |
887 | | / |
888 | | [clinic start generated code]*/ |
889 | | |
890 | | static PyObject * |
891 | | _codecs_charmap_build_impl(PyObject *module, PyObject *map) |
892 | | /*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/ |
893 | 0 | { |
894 | 0 | return PyUnicode_BuildEncodingMap(map); |
895 | 0 | } |
896 | | |
897 | | #ifdef MS_WINDOWS |
898 | | |
899 | | /*[clinic input] |
900 | | _codecs.mbcs_encode |
901 | | str: unicode |
902 | | errors: str(accept={str, NoneType}) = None |
903 | | / |
904 | | [clinic start generated code]*/ |
905 | | |
906 | | static PyObject * |
907 | | _codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors) |
908 | | /*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/ |
909 | | { |
910 | | return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors), |
911 | | PyUnicode_GET_LENGTH(str)); |
912 | | } |
913 | | |
914 | | /*[clinic input] |
915 | | _codecs.oem_encode |
916 | | str: unicode |
917 | | errors: str(accept={str, NoneType}) = None |
918 | | / |
919 | | [clinic start generated code]*/ |
920 | | |
921 | | static PyObject * |
922 | | _codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors) |
923 | | /*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/ |
924 | | { |
925 | | return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors), |
926 | | PyUnicode_GET_LENGTH(str)); |
927 | | } |
928 | | |
929 | | /*[clinic input] |
930 | | _codecs.code_page_encode |
931 | | code_page: int |
932 | | str: unicode |
933 | | errors: str(accept={str, NoneType}) = None |
934 | | / |
935 | | [clinic start generated code]*/ |
936 | | |
937 | | static PyObject * |
938 | | _codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str, |
939 | | const char *errors) |
940 | | /*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/ |
941 | | { |
942 | | return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors), |
943 | | PyUnicode_GET_LENGTH(str)); |
944 | | } |
945 | | |
946 | | #endif /* MS_WINDOWS */ |
947 | | |
948 | | /* --- Error handler registry --------------------------------------------- */ |
949 | | |
950 | | /*[clinic input] |
951 | | _codecs.register_error |
952 | | errors: str |
953 | | handler: object |
954 | | / |
955 | | |
956 | | Register the specified error handler under the name errors. |
957 | | |
958 | | handler must be a callable object, that will be called with an exception |
959 | | instance containing information about the location of the encoding/decoding |
960 | | error and must return a (replacement, new position) tuple. |
961 | | [clinic start generated code]*/ |
962 | | |
963 | | static PyObject * |
964 | | _codecs_register_error_impl(PyObject *module, const char *errors, |
965 | | PyObject *handler) |
966 | | /*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/ |
967 | 0 | { |
968 | 0 | if (PyCodec_RegisterError(errors, handler)) |
969 | 0 | return NULL; |
970 | 0 | Py_RETURN_NONE; |
971 | 0 | } |
972 | | |
973 | | /*[clinic input] |
974 | | _codecs.lookup_error |
975 | | name: str |
976 | | / |
977 | | |
978 | | lookup_error(errors) -> handler |
979 | | |
980 | | Return the error handler for the specified error handling name or raise a |
981 | | LookupError, if no handler exists under this name. |
982 | | [clinic start generated code]*/ |
983 | | |
984 | | static PyObject * |
985 | | _codecs_lookup_error_impl(PyObject *module, const char *name) |
986 | | /*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/ |
987 | 84 | { |
988 | 84 | return PyCodec_LookupError(name); |
989 | 84 | } |
990 | | |
991 | | /* --- Module API --------------------------------------------------------- */ |
992 | | |
993 | | static PyMethodDef _codecs_functions[] = { |
994 | | _CODECS_REGISTER_METHODDEF |
995 | | _CODECS_LOOKUP_METHODDEF |
996 | | _CODECS_ENCODE_METHODDEF |
997 | | _CODECS_DECODE_METHODDEF |
998 | | _CODECS_ESCAPE_ENCODE_METHODDEF |
999 | | _CODECS_ESCAPE_DECODE_METHODDEF |
1000 | | _CODECS_UTF_8_ENCODE_METHODDEF |
1001 | | _CODECS_UTF_8_DECODE_METHODDEF |
1002 | | _CODECS_UTF_7_ENCODE_METHODDEF |
1003 | | _CODECS_UTF_7_DECODE_METHODDEF |
1004 | | _CODECS_UTF_16_ENCODE_METHODDEF |
1005 | | _CODECS_UTF_16_LE_ENCODE_METHODDEF |
1006 | | _CODECS_UTF_16_BE_ENCODE_METHODDEF |
1007 | | _CODECS_UTF_16_DECODE_METHODDEF |
1008 | | _CODECS_UTF_16_LE_DECODE_METHODDEF |
1009 | | _CODECS_UTF_16_BE_DECODE_METHODDEF |
1010 | | _CODECS_UTF_16_EX_DECODE_METHODDEF |
1011 | | _CODECS_UTF_32_ENCODE_METHODDEF |
1012 | | _CODECS_UTF_32_LE_ENCODE_METHODDEF |
1013 | | _CODECS_UTF_32_BE_ENCODE_METHODDEF |
1014 | | _CODECS_UTF_32_DECODE_METHODDEF |
1015 | | _CODECS_UTF_32_LE_DECODE_METHODDEF |
1016 | | _CODECS_UTF_32_BE_DECODE_METHODDEF |
1017 | | _CODECS_UTF_32_EX_DECODE_METHODDEF |
1018 | | _CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF |
1019 | | _CODECS_UNICODE_ESCAPE_DECODE_METHODDEF |
1020 | | _CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF |
1021 | | _CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF |
1022 | | _CODECS_LATIN_1_ENCODE_METHODDEF |
1023 | | _CODECS_LATIN_1_DECODE_METHODDEF |
1024 | | _CODECS_ASCII_ENCODE_METHODDEF |
1025 | | _CODECS_ASCII_DECODE_METHODDEF |
1026 | | _CODECS_CHARMAP_ENCODE_METHODDEF |
1027 | | _CODECS_CHARMAP_DECODE_METHODDEF |
1028 | | _CODECS_CHARMAP_BUILD_METHODDEF |
1029 | | _CODECS_READBUFFER_ENCODE_METHODDEF |
1030 | | _CODECS_MBCS_ENCODE_METHODDEF |
1031 | | _CODECS_MBCS_DECODE_METHODDEF |
1032 | | _CODECS_OEM_ENCODE_METHODDEF |
1033 | | _CODECS_OEM_DECODE_METHODDEF |
1034 | | _CODECS_CODE_PAGE_ENCODE_METHODDEF |
1035 | | _CODECS_CODE_PAGE_DECODE_METHODDEF |
1036 | | _CODECS_REGISTER_ERROR_METHODDEF |
1037 | | _CODECS_LOOKUP_ERROR_METHODDEF |
1038 | | _CODECS__FORGET_CODEC_METHODDEF |
1039 | | {NULL, NULL} /* sentinel */ |
1040 | | }; |
1041 | | |
1042 | | static struct PyModuleDef codecsmodule = { |
1043 | | PyModuleDef_HEAD_INIT, |
1044 | | "_codecs", |
1045 | | NULL, |
1046 | | -1, |
1047 | | _codecs_functions, |
1048 | | NULL, |
1049 | | NULL, |
1050 | | NULL, |
1051 | | NULL |
1052 | | }; |
1053 | | |
1054 | | PyMODINIT_FUNC |
1055 | | PyInit__codecs(void) |
1056 | 14 | { |
1057 | 14 | return PyModule_Create(&codecsmodule); |
1058 | 14 | } |