/src/cpython/Modules/_io/textio.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | An implementation of Text I/O as defined by PEP 3116 - "New I/O" |
3 | | |
4 | | Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. |
5 | | |
6 | | Written by Amaury Forgeot d'Arc and Antoine Pitrou |
7 | | */ |
8 | | |
9 | | #include "Python.h" |
10 | | #include "pycore_call.h" // _PyObject_CallMethod() |
11 | | #include "pycore_codecs.h" // _PyCodecInfo_GetIncrementalDecoder() |
12 | | #include "pycore_fileutils.h" // _Py_GetLocaleEncoding() |
13 | | #include "pycore_interp.h" // PyInterpreterState.fs_codec |
14 | | #include "pycore_long.h" // _PyLong_GetZero() |
15 | | #include "pycore_object.h" // _PyObject_GC_UNTRACK() |
16 | | #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() |
17 | | #include "pycore_pystate.h" // _PyInterpreterState_GET() |
18 | | #include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString() |
19 | | #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() |
20 | | |
21 | | #include "_iomodule.h" |
22 | | |
23 | | /*[clinic input] |
24 | | module _io |
25 | | class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type" |
26 | | class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type" |
27 | | class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type" |
28 | | [clinic start generated code]*/ |
29 | | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/ |
30 | | |
31 | | typedef struct nldecoder_object nldecoder_object; |
32 | | typedef struct textio textio; |
33 | | |
34 | | #define clinic_state() (find_io_state_by_def(Py_TYPE(self))) |
35 | | #include "clinic/textio.c.h" |
36 | | #undef clinic_state |
37 | | |
38 | | /* TextIOBase */ |
39 | | |
40 | | PyDoc_STRVAR(textiobase_doc, |
41 | | "Base class for text I/O.\n" |
42 | | "\n" |
43 | | "This class provides a character and line based interface to stream\n" |
44 | | "I/O. There is no readinto method because Python's character strings\n" |
45 | | "are immutable.\n" |
46 | | ); |
47 | | |
48 | | static PyObject * |
49 | | _unsupported(_PyIO_State *state, const char *message) |
50 | 0 | { |
51 | 0 | PyErr_SetString(state->unsupported_operation, message); |
52 | 0 | return NULL; |
53 | 0 | } |
54 | | |
55 | | /*[clinic input] |
56 | | _io._TextIOBase.detach |
57 | | cls: defining_class |
58 | | / |
59 | | |
60 | | Separate the underlying buffer from the TextIOBase and return it. |
61 | | |
62 | | After the underlying buffer has been detached, the TextIO is in an unusable state. |
63 | | [clinic start generated code]*/ |
64 | | |
65 | | static PyObject * |
66 | | _io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls) |
67 | | /*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/ |
68 | 0 | { |
69 | 0 | _PyIO_State *state = get_io_state_by_cls(cls); |
70 | 0 | return _unsupported(state, "detach"); |
71 | 0 | } |
72 | | |
73 | | /*[clinic input] |
74 | | _io._TextIOBase.read |
75 | | cls: defining_class |
76 | | size: int(unused=True) = -1 |
77 | | / |
78 | | |
79 | | Read at most size characters from stream. |
80 | | |
81 | | Read from underlying buffer until we have size characters or we hit EOF. |
82 | | If size is negative or omitted, read until EOF. |
83 | | [clinic start generated code]*/ |
84 | | |
85 | | static PyObject * |
86 | | _io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls, |
87 | | int Py_UNUSED(size)) |
88 | | /*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/ |
89 | 0 | { |
90 | 0 | _PyIO_State *state = get_io_state_by_cls(cls); |
91 | 0 | return _unsupported(state, "read"); |
92 | 0 | } |
93 | | |
94 | | /*[clinic input] |
95 | | _io._TextIOBase.readline |
96 | | cls: defining_class |
97 | | size: int(unused=True) = -1 |
98 | | / |
99 | | |
100 | | Read until newline or EOF. |
101 | | |
102 | | Return an empty string if EOF is hit immediately. |
103 | | If size is specified, at most size characters will be read. |
104 | | [clinic start generated code]*/ |
105 | | |
106 | | static PyObject * |
107 | | _io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls, |
108 | | int Py_UNUSED(size)) |
109 | | /*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/ |
110 | 0 | { |
111 | 0 | _PyIO_State *state = get_io_state_by_cls(cls); |
112 | 0 | return _unsupported(state, "readline"); |
113 | 0 | } |
114 | | |
115 | | /*[clinic input] |
116 | | _io._TextIOBase.write |
117 | | cls: defining_class |
118 | | s: str(unused=True) |
119 | | / |
120 | | |
121 | | Write string s to stream. |
122 | | |
123 | | Return the number of characters written |
124 | | (which is always equal to the length of the string). |
125 | | [clinic start generated code]*/ |
126 | | |
127 | | static PyObject * |
128 | | _io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls, |
129 | | const char *Py_UNUSED(s)) |
130 | | /*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/ |
131 | 0 | { |
132 | 0 | _PyIO_State *state = get_io_state_by_cls(cls); |
133 | 0 | return _unsupported(state, "write"); |
134 | 0 | } |
135 | | |
136 | | /*[clinic input] |
137 | | @getter |
138 | | _io._TextIOBase.encoding |
139 | | |
140 | | Encoding of the text stream. |
141 | | |
142 | | Subclasses should override. |
143 | | [clinic start generated code]*/ |
144 | | |
145 | | static PyObject * |
146 | | _io__TextIOBase_encoding_get_impl(PyObject *self) |
147 | | /*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/ |
148 | 0 | { |
149 | 0 | Py_RETURN_NONE; |
150 | 0 | } |
151 | | |
152 | | /*[clinic input] |
153 | | @getter |
154 | | _io._TextIOBase.newlines |
155 | | |
156 | | Line endings translated so far. |
157 | | |
158 | | Only line endings translated during reading are considered. |
159 | | |
160 | | Subclasses should override. |
161 | | [clinic start generated code]*/ |
162 | | |
163 | | static PyObject * |
164 | | _io__TextIOBase_newlines_get_impl(PyObject *self) |
165 | | /*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/ |
166 | 0 | { |
167 | 0 | Py_RETURN_NONE; |
168 | 0 | } |
169 | | |
170 | | /*[clinic input] |
171 | | @getter |
172 | | _io._TextIOBase.errors |
173 | | |
174 | | The error setting of the decoder or encoder. |
175 | | |
176 | | Subclasses should override. |
177 | | [clinic start generated code]*/ |
178 | | |
179 | | static PyObject * |
180 | | _io__TextIOBase_errors_get_impl(PyObject *self) |
181 | | /*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/ |
182 | 0 | { |
183 | 0 | Py_RETURN_NONE; |
184 | 0 | } |
185 | | |
186 | | |
187 | | static PyMethodDef textiobase_methods[] = { |
188 | | _IO__TEXTIOBASE_DETACH_METHODDEF |
189 | | _IO__TEXTIOBASE_READ_METHODDEF |
190 | | _IO__TEXTIOBASE_READLINE_METHODDEF |
191 | | _IO__TEXTIOBASE_WRITE_METHODDEF |
192 | | {NULL, NULL} |
193 | | }; |
194 | | |
195 | | static PyGetSetDef textiobase_getset[] = { |
196 | | _IO__TEXTIOBASE_ENCODING_GETSETDEF |
197 | | _IO__TEXTIOBASE_NEWLINES_GETSETDEF |
198 | | _IO__TEXTIOBASE_ERRORS_GETSETDEF |
199 | | {NULL} |
200 | | }; |
201 | | |
202 | | static PyType_Slot textiobase_slots[] = { |
203 | | {Py_tp_doc, (void *)textiobase_doc}, |
204 | | {Py_tp_methods, textiobase_methods}, |
205 | | {Py_tp_getset, textiobase_getset}, |
206 | | {0, NULL}, |
207 | | }; |
208 | | |
209 | | /* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */ |
210 | | PyType_Spec textiobase_spec = { |
211 | | .name = "_io._TextIOBase", |
212 | | .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | |
213 | | Py_TPFLAGS_IMMUTABLETYPE), |
214 | | .slots = textiobase_slots, |
215 | | }; |
216 | | |
217 | | /* IncrementalNewlineDecoder */ |
218 | | |
219 | | struct nldecoder_object { |
220 | | PyObject_HEAD |
221 | | PyObject *decoder; |
222 | | PyObject *errors; |
223 | | unsigned int pendingcr: 1; |
224 | | unsigned int translate: 1; |
225 | | unsigned int seennl: 3; |
226 | | }; |
227 | | |
228 | 104k | #define nldecoder_object_CAST(op) ((nldecoder_object *)(op)) |
229 | | |
230 | | /*[clinic input] |
231 | | _io.IncrementalNewlineDecoder.__init__ |
232 | | decoder: object |
233 | | translate: bool |
234 | | errors: object(c_default="NULL") = "strict" |
235 | | |
236 | | Codec used when reading a file in universal newlines mode. |
237 | | |
238 | | It wraps another incremental decoder, translating \r\n and \r into \n. |
239 | | It also records the types of newlines encountered. When used with |
240 | | translate=False, it ensures that the newline sequence is returned in |
241 | | one piece. When used with decoder=None, it expects unicode strings as |
242 | | decode input and translates newlines without first invoking an external |
243 | | decoder. |
244 | | [clinic start generated code]*/ |
245 | | |
246 | | static int |
247 | | _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, |
248 | | PyObject *decoder, int translate, |
249 | | PyObject *errors) |
250 | | /*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/ |
251 | 17.4k | { |
252 | | |
253 | 17.4k | if (errors == NULL) { |
254 | 17.4k | errors = &_Py_ID(strict); |
255 | 17.4k | } |
256 | 0 | else { |
257 | 0 | errors = Py_NewRef(errors); |
258 | 0 | } |
259 | | |
260 | 17.4k | Py_XSETREF(self->errors, errors); |
261 | 17.4k | Py_XSETREF(self->decoder, Py_NewRef(decoder)); |
262 | 17.4k | self->translate = translate ? 1 : 0; |
263 | 17.4k | self->seennl = 0; |
264 | 17.4k | self->pendingcr = 0; |
265 | | |
266 | 17.4k | return 0; |
267 | 17.4k | } |
268 | | |
269 | | static int |
270 | | incrementalnewlinedecoder_traverse(PyObject *op, visitproc visit, void *arg) |
271 | 3.09k | { |
272 | 3.09k | nldecoder_object *self = nldecoder_object_CAST(op); |
273 | 3.09k | Py_VISIT(Py_TYPE(self)); |
274 | 3.09k | Py_VISIT(self->decoder); |
275 | 3.09k | Py_VISIT(self->errors); |
276 | 3.09k | return 0; |
277 | 3.09k | } |
278 | | |
279 | | static int |
280 | | incrementalnewlinedecoder_clear(PyObject *op) |
281 | 17.4k | { |
282 | 17.4k | nldecoder_object *self = nldecoder_object_CAST(op); |
283 | 17.4k | Py_CLEAR(self->decoder); |
284 | 17.4k | Py_CLEAR(self->errors); |
285 | 17.4k | return 0; |
286 | 17.4k | } |
287 | | |
288 | | static void |
289 | | incrementalnewlinedecoder_dealloc(PyObject *op) |
290 | 17.4k | { |
291 | 17.4k | nldecoder_object *self = nldecoder_object_CAST(op); |
292 | 17.4k | PyTypeObject *tp = Py_TYPE(self); |
293 | 17.4k | _PyObject_GC_UNTRACK(self); |
294 | 17.4k | (void)incrementalnewlinedecoder_clear(op); |
295 | 17.4k | tp->tp_free(self); |
296 | 17.4k | Py_DECREF(tp); |
297 | 17.4k | } |
298 | | |
299 | | static int |
300 | | check_decoded(PyObject *decoded) |
301 | 66.0k | { |
302 | 66.0k | if (decoded == NULL) |
303 | 0 | return -1; |
304 | 66.0k | if (!PyUnicode_Check(decoded)) { |
305 | 0 | PyErr_Format(PyExc_TypeError, |
306 | 0 | "decoder should return a string result, not '%.200s'", |
307 | 0 | Py_TYPE(decoded)->tp_name); |
308 | 0 | Py_DECREF(decoded); |
309 | 0 | return -1; |
310 | 0 | } |
311 | 66.0k | return 0; |
312 | 66.0k | } |
313 | | |
314 | | #define CHECK_INITIALIZED_DECODER(self) \ |
315 | 66.0k | if (self->errors == NULL) { \ |
316 | 0 | PyErr_SetString(PyExc_ValueError, \ |
317 | 0 | "IncrementalNewlineDecoder.__init__() not called"); \ |
318 | 0 | return NULL; \ |
319 | 0 | } |
320 | | |
321 | 34.3M | #define SEEN_CR 1 |
322 | 27.6M | #define SEEN_LF 2 |
323 | 26.3M | #define SEEN_CRLF 4 |
324 | 26.3M | #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF) |
325 | | |
326 | | PyObject * |
327 | | _PyIncrementalNewlineDecoder_decode(PyObject *myself, |
328 | | PyObject *input, int final) |
329 | 66.0k | { |
330 | 66.0k | PyObject *output; |
331 | 66.0k | Py_ssize_t output_len; |
332 | 66.0k | nldecoder_object *self = nldecoder_object_CAST(myself); |
333 | | |
334 | 66.0k | CHECK_INITIALIZED_DECODER(self); |
335 | | |
336 | | /* decode input (with the eventual \r from a previous pass) */ |
337 | 66.0k | if (self->decoder != Py_None) { |
338 | 0 | output = PyObject_CallMethodObjArgs(self->decoder, |
339 | 0 | &_Py_ID(decode), input, final ? Py_True : Py_False, NULL); |
340 | 0 | } |
341 | 66.0k | else { |
342 | 66.0k | output = Py_NewRef(input); |
343 | 66.0k | } |
344 | | |
345 | 66.0k | if (check_decoded(output) < 0) |
346 | 0 | return NULL; |
347 | | |
348 | 66.0k | output_len = PyUnicode_GET_LENGTH(output); |
349 | 66.0k | if (self->pendingcr && (final || output_len > 0)) { |
350 | | /* Prefix output with CR */ |
351 | 0 | int kind; |
352 | 0 | PyObject *modified; |
353 | 0 | char *out; |
354 | |
|
355 | 0 | modified = PyUnicode_New(output_len + 1, |
356 | 0 | PyUnicode_MAX_CHAR_VALUE(output)); |
357 | 0 | if (modified == NULL) |
358 | 0 | goto error; |
359 | 0 | kind = PyUnicode_KIND(modified); |
360 | 0 | out = PyUnicode_DATA(modified); |
361 | 0 | PyUnicode_WRITE(kind, out, 0, '\r'); |
362 | 0 | memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); |
363 | 0 | Py_SETREF(output, modified); |
364 | 0 | self->pendingcr = 0; |
365 | 0 | output_len++; |
366 | 0 | } |
367 | | |
368 | | /* retain last \r even when not translating data: |
369 | | * then readline() is sure to get \r\n in one pass |
370 | | */ |
371 | 66.0k | if (!final) { |
372 | 0 | if (output_len > 0 |
373 | 0 | && PyUnicode_READ_CHAR(output, output_len - 1) == '\r') |
374 | 0 | { |
375 | 0 | PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); |
376 | 0 | if (modified == NULL) |
377 | 0 | goto error; |
378 | 0 | Py_SETREF(output, modified); |
379 | 0 | self->pendingcr = 1; |
380 | 0 | } |
381 | 0 | } |
382 | | |
383 | | /* Record which newlines are read and do newline translation if desired, |
384 | | all in one pass. */ |
385 | 66.0k | { |
386 | 66.0k | const void *in_str; |
387 | 66.0k | Py_ssize_t len; |
388 | 66.0k | int seennl = self->seennl; |
389 | 66.0k | int only_lf = 0; |
390 | 66.0k | int kind; |
391 | | |
392 | 66.0k | in_str = PyUnicode_DATA(output); |
393 | 66.0k | len = PyUnicode_GET_LENGTH(output); |
394 | 66.0k | kind = PyUnicode_KIND(output); |
395 | | |
396 | 66.0k | if (len == 0) |
397 | 0 | return output; |
398 | | |
399 | | /* If, up to now, newlines are consistently \n, do a quick check |
400 | | for the \r *byte* with the libc's optimized memchr. |
401 | | */ |
402 | 66.0k | if (seennl == SEEN_LF || seennl == 0) { |
403 | 33.5k | only_lf = (memchr(in_str, '\r', kind * len) == NULL); |
404 | 33.5k | } |
405 | | |
406 | 66.0k | if (only_lf) { |
407 | | /* If not already seen, quick scan for a possible "\n" character. |
408 | | (there's nothing else to be done, even when in translation mode) |
409 | | */ |
410 | 26.9k | if (seennl == 0 && |
411 | 26.9k | memchr(in_str, '\n', kind * len) != NULL) { |
412 | 2.12k | if (kind == PyUnicode_1BYTE_KIND) |
413 | 1.07k | seennl |= SEEN_LF; |
414 | 1.05k | else { |
415 | 1.05k | Py_ssize_t i = 0; |
416 | 161k | for (;;) { |
417 | 161k | Py_UCS4 c; |
418 | | /* Fast loop for non-control characters */ |
419 | 1.13M | while (PyUnicode_READ(kind, in_str, i) > '\n') |
420 | 971k | i++; |
421 | 161k | c = PyUnicode_READ(kind, in_str, i++); |
422 | 161k | if (c == '\n') { |
423 | 838 | seennl |= SEEN_LF; |
424 | 838 | break; |
425 | 838 | } |
426 | 160k | if (i >= len) |
427 | 220 | break; |
428 | 160k | } |
429 | 1.05k | } |
430 | 2.12k | } |
431 | | /* Finished: we have scanned for newlines, and none of them |
432 | | need translating */ |
433 | 26.9k | } |
434 | 39.0k | else if (!self->translate) { |
435 | 39.0k | Py_ssize_t i = 0; |
436 | | /* We have already seen all newline types, no need to scan again */ |
437 | 39.0k | if (seennl == SEEN_ALL) |
438 | 11.7k | goto endscan; |
439 | 26.3M | for (;;) { |
440 | 26.3M | Py_UCS4 c; |
441 | | /* Fast loop for non-control characters */ |
442 | 89.5M | while (PyUnicode_READ(kind, in_str, i) > '\r') |
443 | 63.1M | i++; |
444 | 26.3M | c = PyUnicode_READ(kind, in_str, i++); |
445 | 26.3M | if (c == '\n') |
446 | 1.10M | seennl |= SEEN_LF; |
447 | 25.2M | else if (c == '\r') { |
448 | 7.94M | if (PyUnicode_READ(kind, in_str, i) == '\n') { |
449 | 3.27k | seennl |= SEEN_CRLF; |
450 | 3.27k | i++; |
451 | 3.27k | } |
452 | 7.94M | else |
453 | 7.94M | seennl |= SEEN_CR; |
454 | 7.94M | } |
455 | 26.3M | if (i >= len) |
456 | 26.1k | break; |
457 | 26.3M | if (seennl == SEEN_ALL) |
458 | 1.12k | break; |
459 | 26.3M | } |
460 | 39.0k | endscan: |
461 | 39.0k | ; |
462 | 39.0k | } |
463 | 0 | else { |
464 | 0 | void *translated; |
465 | 0 | int kind = PyUnicode_KIND(output); |
466 | 0 | const void *in_str = PyUnicode_DATA(output); |
467 | 0 | Py_ssize_t in, out; |
468 | | /* XXX: Previous in-place translation here is disabled as |
469 | | resizing is not possible anymore */ |
470 | | /* We could try to optimize this so that we only do a copy |
471 | | when there is something to translate. On the other hand, |
472 | | we already know there is a \r byte, so chances are high |
473 | | that something needs to be done. */ |
474 | 0 | translated = PyMem_Malloc(kind * len); |
475 | 0 | if (translated == NULL) { |
476 | 0 | PyErr_NoMemory(); |
477 | 0 | goto error; |
478 | 0 | } |
479 | 0 | in = out = 0; |
480 | 0 | for (;;) { |
481 | 0 | Py_UCS4 c; |
482 | | /* Fast loop for non-control characters */ |
483 | 0 | while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r') |
484 | 0 | PyUnicode_WRITE(kind, translated, out++, c); |
485 | 0 | if (c == '\n') { |
486 | 0 | PyUnicode_WRITE(kind, translated, out++, c); |
487 | 0 | seennl |= SEEN_LF; |
488 | 0 | continue; |
489 | 0 | } |
490 | 0 | if (c == '\r') { |
491 | 0 | if (PyUnicode_READ(kind, in_str, in) == '\n') { |
492 | 0 | in++; |
493 | 0 | seennl |= SEEN_CRLF; |
494 | 0 | } |
495 | 0 | else |
496 | 0 | seennl |= SEEN_CR; |
497 | 0 | PyUnicode_WRITE(kind, translated, out++, '\n'); |
498 | 0 | continue; |
499 | 0 | } |
500 | 0 | if (in > len) |
501 | 0 | break; |
502 | 0 | PyUnicode_WRITE(kind, translated, out++, c); |
503 | 0 | } |
504 | 0 | Py_DECREF(output); |
505 | 0 | output = PyUnicode_FromKindAndData(kind, translated, out); |
506 | 0 | PyMem_Free(translated); |
507 | 0 | if (!output) |
508 | 0 | return NULL; |
509 | 0 | } |
510 | 66.0k | self->seennl |= seennl; |
511 | 66.0k | } |
512 | | |
513 | 0 | return output; |
514 | | |
515 | 0 | error: |
516 | 0 | Py_DECREF(output); |
517 | 0 | return NULL; |
518 | 66.0k | } |
519 | | |
520 | | /*[clinic input] |
521 | | _io.IncrementalNewlineDecoder.decode |
522 | | input: object |
523 | | final: bool = False |
524 | | [clinic start generated code]*/ |
525 | | |
526 | | static PyObject * |
527 | | _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self, |
528 | | PyObject *input, int final) |
529 | | /*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/ |
530 | 0 | { |
531 | 0 | return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); |
532 | 0 | } |
533 | | |
534 | | /*[clinic input] |
535 | | _io.IncrementalNewlineDecoder.getstate |
536 | | [clinic start generated code]*/ |
537 | | |
538 | | static PyObject * |
539 | | _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) |
540 | | /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/ |
541 | 0 | { |
542 | 0 | PyObject *buffer; |
543 | 0 | unsigned long long flag; |
544 | |
|
545 | 0 | CHECK_INITIALIZED_DECODER(self); |
546 | |
|
547 | 0 | if (self->decoder != Py_None) { |
548 | 0 | PyObject *state = PyObject_CallMethodNoArgs(self->decoder, |
549 | 0 | &_Py_ID(getstate)); |
550 | 0 | if (state == NULL) |
551 | 0 | return NULL; |
552 | 0 | if (!PyTuple_Check(state)) { |
553 | 0 | PyErr_SetString(PyExc_TypeError, |
554 | 0 | "illegal decoder state"); |
555 | 0 | Py_DECREF(state); |
556 | 0 | return NULL; |
557 | 0 | } |
558 | 0 | if (!PyArg_ParseTuple(state, "OK;illegal decoder state", |
559 | 0 | &buffer, &flag)) |
560 | 0 | { |
561 | 0 | Py_DECREF(state); |
562 | 0 | return NULL; |
563 | 0 | } |
564 | 0 | Py_INCREF(buffer); |
565 | 0 | Py_DECREF(state); |
566 | 0 | } |
567 | 0 | else { |
568 | 0 | buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); |
569 | 0 | flag = 0; |
570 | 0 | } |
571 | 0 | flag <<= 1; |
572 | 0 | if (self->pendingcr) |
573 | 0 | flag |= 1; |
574 | 0 | return Py_BuildValue("NK", buffer, flag); |
575 | 0 | } |
576 | | |
577 | | /*[clinic input] |
578 | | _io.IncrementalNewlineDecoder.setstate |
579 | | state: object |
580 | | / |
581 | | [clinic start generated code]*/ |
582 | | |
583 | | static PyObject * |
584 | | _io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self, |
585 | | PyObject *state) |
586 | | /*[clinic end generated code: output=09135cb6e78a1dc8 input=c53fb505a76dbbe2]*/ |
587 | 0 | { |
588 | 0 | PyObject *buffer; |
589 | 0 | unsigned long long flag; |
590 | |
|
591 | 0 | CHECK_INITIALIZED_DECODER(self); |
592 | |
|
593 | 0 | if (!PyTuple_Check(state)) { |
594 | 0 | PyErr_SetString(PyExc_TypeError, "state argument must be a tuple"); |
595 | 0 | return NULL; |
596 | 0 | } |
597 | 0 | if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument", |
598 | 0 | &buffer, &flag)) |
599 | 0 | { |
600 | 0 | return NULL; |
601 | 0 | } |
602 | | |
603 | 0 | self->pendingcr = (int) (flag & 1); |
604 | 0 | flag >>= 1; |
605 | |
|
606 | 0 | if (self->decoder != Py_None) { |
607 | 0 | return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate), |
608 | 0 | "((OK))", buffer, flag); |
609 | 0 | } |
610 | 0 | else { |
611 | 0 | Py_RETURN_NONE; |
612 | 0 | } |
613 | 0 | } |
614 | | |
615 | | /*[clinic input] |
616 | | _io.IncrementalNewlineDecoder.reset |
617 | | [clinic start generated code]*/ |
618 | | |
619 | | static PyObject * |
620 | | _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) |
621 | | /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ |
622 | 0 | { |
623 | 0 | CHECK_INITIALIZED_DECODER(self); |
624 | |
|
625 | 0 | self->seennl = 0; |
626 | 0 | self->pendingcr = 0; |
627 | 0 | if (self->decoder != Py_None) |
628 | 0 | return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); |
629 | 0 | else |
630 | 0 | Py_RETURN_NONE; |
631 | 0 | } |
632 | | |
633 | | static PyObject * |
634 | | incrementalnewlinedecoder_newlines_get(PyObject *op, void *Py_UNUSED(context)) |
635 | 0 | { |
636 | 0 | nldecoder_object *self = nldecoder_object_CAST(op); |
637 | 0 | CHECK_INITIALIZED_DECODER(self); |
638 | |
|
639 | 0 | switch (self->seennl) { |
640 | 0 | case SEEN_CR: |
641 | 0 | return PyUnicode_FromString("\r"); |
642 | 0 | case SEEN_LF: |
643 | 0 | return PyUnicode_FromString("\n"); |
644 | 0 | case SEEN_CRLF: |
645 | 0 | return PyUnicode_FromString("\r\n"); |
646 | 0 | case SEEN_CR | SEEN_LF: |
647 | 0 | return Py_BuildValue("ss", "\r", "\n"); |
648 | 0 | case SEEN_CR | SEEN_CRLF: |
649 | 0 | return Py_BuildValue("ss", "\r", "\r\n"); |
650 | 0 | case SEEN_LF | SEEN_CRLF: |
651 | 0 | return Py_BuildValue("ss", "\n", "\r\n"); |
652 | 0 | case SEEN_CR | SEEN_LF | SEEN_CRLF: |
653 | 0 | return Py_BuildValue("sss", "\r", "\n", "\r\n"); |
654 | 0 | default: |
655 | 0 | Py_RETURN_NONE; |
656 | 0 | } |
657 | |
|
658 | 0 | } |
659 | | |
660 | | /* TextIOWrapper */ |
661 | | |
662 | | typedef PyObject *(*encodefunc_t)(PyObject *, PyObject *); |
663 | | |
664 | | struct textio |
665 | | { |
666 | | PyObject_HEAD |
667 | | int ok; /* initialized? */ |
668 | | int detached; |
669 | | Py_ssize_t chunk_size; |
670 | | PyObject *buffer; |
671 | | PyObject *encoding; |
672 | | PyObject *encoder; |
673 | | PyObject *decoder; |
674 | | PyObject *readnl; |
675 | | PyObject *errors; |
676 | | const char *writenl; /* ASCII-encoded; NULL stands for \n */ |
677 | | char line_buffering; |
678 | | char write_through; |
679 | | char readuniversal; |
680 | | char readtranslate; |
681 | | char writetranslate; |
682 | | char seekable; |
683 | | char has_read1; |
684 | | char telling; |
685 | | char finalizing; |
686 | | /* Specialized encoding func (see below) */ |
687 | | encodefunc_t encodefunc; |
688 | | /* Whether or not it's the start of the stream */ |
689 | | char encoding_start_of_stream; |
690 | | |
691 | | /* Reads and writes are internally buffered in order to speed things up. |
692 | | However, any read will first flush the write buffer if itsn't empty. |
693 | | |
694 | | Please also note that text to be written is first encoded before being |
695 | | buffered. This is necessary so that encoding errors are immediately |
696 | | reported to the caller, but it unfortunately means that the |
697 | | IncrementalEncoder (whose encode() method is always written in Python) |
698 | | becomes a bottleneck for small writes. |
699 | | */ |
700 | | PyObject *decoded_chars; /* buffer for text returned from decoder */ |
701 | | Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */ |
702 | | PyObject *pending_bytes; // data waiting to be written. |
703 | | // ascii unicode, bytes, or list of them. |
704 | | Py_ssize_t pending_bytes_count; |
705 | | |
706 | | /* snapshot is either NULL, or a tuple (dec_flags, next_input) where |
707 | | * dec_flags is the second (integer) item of the decoder state and |
708 | | * next_input is the chunk of input bytes that comes next after the |
709 | | * snapshot point. We use this to reconstruct decoder states in tell(). |
710 | | */ |
711 | | PyObject *snapshot; |
712 | | /* Bytes-to-characters ratio for the current chunk. Serves as input for |
713 | | the heuristic in tell(). */ |
714 | | double b2cratio; |
715 | | |
716 | | /* Cache raw object if it's a FileIO object */ |
717 | | PyObject *raw; |
718 | | |
719 | | PyObject *weakreflist; |
720 | | PyObject *dict; |
721 | | |
722 | | _PyIO_State *state; |
723 | | }; |
724 | | |
725 | 16.7k | #define textio_CAST(op) ((textio *)(op)) |
726 | | |
727 | | static void |
728 | | textiowrapper_set_decoded_chars(textio *self, PyObject *chars); |
729 | | |
730 | | /* A couple of specialized cases in order to bypass the slow incremental |
731 | | encoding methods for the most popular encodings. */ |
732 | | |
733 | | static PyObject * |
734 | | ascii_encode(PyObject *op, PyObject *text) |
735 | 0 | { |
736 | 0 | textio *self = textio_CAST(op); |
737 | 0 | return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors)); |
738 | 0 | } |
739 | | |
740 | | static PyObject * |
741 | | utf16be_encode(PyObject *op, PyObject *text) |
742 | 0 | { |
743 | 0 | textio *self = textio_CAST(op); |
744 | 0 | return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 1); |
745 | 0 | } |
746 | | |
747 | | static PyObject * |
748 | | utf16le_encode(PyObject *op, PyObject *text) |
749 | 0 | { |
750 | 0 | textio *self = textio_CAST(op); |
751 | 0 | return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), -1); |
752 | 0 | } |
753 | | |
754 | | static PyObject * |
755 | | utf16_encode(PyObject *op, PyObject *text) |
756 | 0 | { |
757 | 0 | textio *self = textio_CAST(op); |
758 | 0 | if (!self->encoding_start_of_stream) { |
759 | | /* Skip the BOM and use native byte ordering */ |
760 | | #if PY_BIG_ENDIAN |
761 | | return utf16be_encode(op, text); |
762 | | #else |
763 | 0 | return utf16le_encode(op, text); |
764 | 0 | #endif |
765 | 0 | } |
766 | 0 | return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 0); |
767 | 0 | } |
768 | | |
769 | | static PyObject * |
770 | | utf32be_encode(PyObject *op, PyObject *text) |
771 | 0 | { |
772 | 0 | textio *self = textio_CAST(op); |
773 | 0 | return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 1); |
774 | 0 | } |
775 | | |
776 | | static PyObject * |
777 | | utf32le_encode(PyObject *op, PyObject *text) |
778 | 0 | { |
779 | 0 | textio *self = textio_CAST(op); |
780 | 0 | return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), -1); |
781 | 0 | } |
782 | | |
783 | | static PyObject * |
784 | | utf32_encode(PyObject *op, PyObject *text) |
785 | 0 | { |
786 | 0 | textio *self = textio_CAST(op); |
787 | 0 | if (!self->encoding_start_of_stream) { |
788 | | /* Skip the BOM and use native byte ordering */ |
789 | | #if PY_BIG_ENDIAN |
790 | | return utf32be_encode(op, text); |
791 | | #else |
792 | 0 | return utf32le_encode(op, text); |
793 | 0 | #endif |
794 | 0 | } |
795 | 0 | return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 0); |
796 | 0 | } |
797 | | |
798 | | static PyObject * |
799 | | utf8_encode(PyObject *op, PyObject *text) |
800 | 0 | { |
801 | 0 | textio *self = textio_CAST(op); |
802 | 0 | return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors)); |
803 | 0 | } |
804 | | |
805 | | static PyObject * |
806 | | latin1_encode(PyObject *op, PyObject *text) |
807 | 0 | { |
808 | 0 | textio *self = textio_CAST(op); |
809 | 0 | return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors)); |
810 | 0 | } |
811 | | |
812 | | // Return true when encoding can be skipped when text is ascii. |
813 | | static inline int |
814 | | is_asciicompat_encoding(encodefunc_t f) |
815 | 0 | { |
816 | 0 | return f == ascii_encode || f == latin1_encode || f == utf8_encode; |
817 | 0 | } |
818 | | |
819 | | /* Map normalized encoding names onto the specialized encoding funcs */ |
820 | | |
821 | | typedef struct { |
822 | | const char *name; |
823 | | encodefunc_t encodefunc; |
824 | | } encodefuncentry; |
825 | | |
826 | | static const encodefuncentry encodefuncs[] = { |
827 | | {"ascii", ascii_encode}, |
828 | | {"iso8859-1", latin1_encode}, |
829 | | {"utf-8", utf8_encode}, |
830 | | {"utf-16-be", utf16be_encode}, |
831 | | {"utf-16-le", utf16le_encode}, |
832 | | {"utf-16", utf16_encode}, |
833 | | {"utf-32-be", utf32be_encode}, |
834 | | {"utf-32-le", utf32le_encode}, |
835 | | {"utf-32", utf32_encode}, |
836 | | {NULL, NULL} |
837 | | }; |
838 | | |
839 | | static int |
840 | | validate_newline(const char *newline) |
841 | 48 | { |
842 | 48 | if (newline && newline[0] != '\0' |
843 | 48 | && !(newline[0] == '\n' && newline[1] == '\0') |
844 | 48 | && !(newline[0] == '\r' && newline[1] == '\0') |
845 | 48 | && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { |
846 | 0 | PyErr_Format(PyExc_ValueError, |
847 | 0 | "illegal newline value: %s", newline); |
848 | 0 | return -1; |
849 | 0 | } |
850 | 48 | return 0; |
851 | 48 | } |
852 | | |
853 | | static int |
854 | | set_newline(textio *self, const char *newline) |
855 | 48 | { |
856 | 48 | PyObject *old = self->readnl; |
857 | 48 | if (newline == NULL) { |
858 | 0 | self->readnl = NULL; |
859 | 0 | } |
860 | 48 | else { |
861 | 48 | self->readnl = PyUnicode_FromString(newline); |
862 | 48 | if (self->readnl == NULL) { |
863 | 0 | self->readnl = old; |
864 | 0 | return -1; |
865 | 0 | } |
866 | 48 | } |
867 | 48 | self->readuniversal = (newline == NULL || newline[0] == '\0'); |
868 | 48 | self->readtranslate = (newline == NULL); |
869 | 48 | self->writetranslate = (newline == NULL || newline[0] != '\0'); |
870 | 48 | if (!self->readuniversal && self->readnl != NULL) { |
871 | | // validate_newline() accepts only ASCII newlines. |
872 | 48 | assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND); |
873 | 48 | self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl); |
874 | 48 | if (strcmp(self->writenl, "\n") == 0) { |
875 | 48 | self->writenl = NULL; |
876 | 48 | } |
877 | 48 | } |
878 | 0 | else { |
879 | | #ifdef MS_WINDOWS |
880 | | self->writenl = "\r\n"; |
881 | | #else |
882 | 0 | self->writenl = NULL; |
883 | 0 | #endif |
884 | 0 | } |
885 | 48 | Py_XDECREF(old); |
886 | 48 | return 0; |
887 | 48 | } |
888 | | |
889 | | static int |
890 | | _textiowrapper_set_decoder(textio *self, PyObject *codec_info, |
891 | | const char *errors) |
892 | 48 | { |
893 | 48 | PyObject *res; |
894 | 48 | int r; |
895 | | |
896 | 48 | res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); |
897 | 48 | if (res == NULL) |
898 | 0 | return -1; |
899 | | |
900 | 48 | r = PyObject_IsTrue(res); |
901 | 48 | Py_DECREF(res); |
902 | 48 | if (r == -1) |
903 | 0 | return -1; |
904 | | |
905 | 48 | if (r != 1) |
906 | 32 | return 0; |
907 | | |
908 | 16 | Py_CLEAR(self->decoder); |
909 | 16 | self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors); |
910 | 16 | if (self->decoder == NULL) |
911 | 0 | return -1; |
912 | | |
913 | 16 | if (self->readuniversal) { |
914 | 0 | _PyIO_State *state = self->state; |
915 | 0 | PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs( |
916 | 0 | (PyObject *)state->PyIncrementalNewlineDecoder_Type, |
917 | 0 | self->decoder, self->readtranslate ? Py_True : Py_False, NULL); |
918 | 0 | if (incrementalDecoder == NULL) |
919 | 0 | return -1; |
920 | 0 | Py_XSETREF(self->decoder, incrementalDecoder); |
921 | 0 | } |
922 | | |
923 | 16 | return 0; |
924 | 16 | } |
925 | | |
926 | | static PyObject* |
927 | | _textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes, |
928 | | int eof) |
929 | 0 | { |
930 | 0 | PyObject *chars; |
931 | |
|
932 | 0 | if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type)) |
933 | 0 | chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof); |
934 | 0 | else |
935 | 0 | chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes, |
936 | 0 | eof ? Py_True : Py_False, NULL); |
937 | |
|
938 | 0 | if (check_decoded(chars) < 0) |
939 | | // check_decoded already decreases refcount |
940 | 0 | return NULL; |
941 | | |
942 | 0 | return chars; |
943 | 0 | } |
944 | | |
945 | | static int |
946 | | _textiowrapper_set_encoder(textio *self, PyObject *codec_info, |
947 | | const char *errors) |
948 | 48 | { |
949 | 48 | PyObject *res; |
950 | 48 | int r; |
951 | | |
952 | 48 | res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); |
953 | 48 | if (res == NULL) |
954 | 0 | return -1; |
955 | | |
956 | 48 | r = PyObject_IsTrue(res); |
957 | 48 | Py_DECREF(res); |
958 | 48 | if (r == -1) |
959 | 0 | return -1; |
960 | | |
961 | 48 | if (r != 1) |
962 | 16 | return 0; |
963 | | |
964 | 32 | Py_CLEAR(self->encoder); |
965 | 32 | self->encodefunc = NULL; |
966 | 32 | self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors); |
967 | 32 | if (self->encoder == NULL) |
968 | 0 | return -1; |
969 | | |
970 | | /* Get the normalized named of the codec */ |
971 | 32 | if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) { |
972 | 0 | return -1; |
973 | 0 | } |
974 | 32 | if (res != NULL && PyUnicode_Check(res)) { |
975 | 32 | const encodefuncentry *e = encodefuncs; |
976 | 32 | while (e->name != NULL) { |
977 | 32 | if (_PyUnicode_EqualToASCIIString(res, e->name)) { |
978 | 32 | self->encodefunc = e->encodefunc; |
979 | 32 | break; |
980 | 32 | } |
981 | 0 | e++; |
982 | 0 | } |
983 | 32 | } |
984 | 32 | Py_XDECREF(res); |
985 | | |
986 | 32 | return 0; |
987 | 32 | } |
988 | | |
989 | | static int |
990 | | _textiowrapper_fix_encoder_state(textio *self) |
991 | 48 | { |
992 | 48 | if (!self->seekable || !self->encoder) { |
993 | 16 | return 0; |
994 | 16 | } |
995 | | |
996 | 32 | self->encoding_start_of_stream = 1; |
997 | | |
998 | 32 | PyObject *cookieObj = PyObject_CallMethodNoArgs( |
999 | 32 | self->buffer, &_Py_ID(tell)); |
1000 | 32 | if (cookieObj == NULL) { |
1001 | 0 | return -1; |
1002 | 0 | } |
1003 | | |
1004 | 32 | int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ); |
1005 | 32 | Py_DECREF(cookieObj); |
1006 | 32 | if (cmp < 0) { |
1007 | 0 | return -1; |
1008 | 0 | } |
1009 | | |
1010 | 32 | if (cmp == 0) { |
1011 | 16 | self->encoding_start_of_stream = 0; |
1012 | 16 | PyObject *res = PyObject_CallMethodOneArg( |
1013 | 16 | self->encoder, &_Py_ID(setstate), _PyLong_GetZero()); |
1014 | 16 | if (res == NULL) { |
1015 | 0 | return -1; |
1016 | 0 | } |
1017 | 16 | Py_DECREF(res); |
1018 | 16 | } |
1019 | | |
1020 | 32 | return 0; |
1021 | 32 | } |
1022 | | |
1023 | | static int |
1024 | | io_check_errors(PyObject *errors) |
1025 | 48 | { |
1026 | 48 | assert(errors != NULL && errors != Py_None); |
1027 | | |
1028 | 48 | PyInterpreterState *interp = _PyInterpreterState_GET(); |
1029 | 48 | #ifndef Py_DEBUG |
1030 | | /* In release mode, only check in development mode (-X dev) */ |
1031 | 48 | if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { |
1032 | 48 | return 0; |
1033 | 48 | } |
1034 | | #else |
1035 | | /* Always check in debug mode */ |
1036 | | #endif |
1037 | | |
1038 | | /* Avoid calling PyCodec_LookupError() before the codec registry is ready: |
1039 | | before_PyUnicode_InitEncodings() is called. */ |
1040 | 0 | if (!interp->unicode.fs_codec.encoding) { |
1041 | 0 | return 0; |
1042 | 0 | } |
1043 | | |
1044 | 0 | const char *name = _PyUnicode_AsUTF8NoNUL(errors); |
1045 | 0 | if (name == NULL) { |
1046 | 0 | return -1; |
1047 | 0 | } |
1048 | 0 | PyObject *handler = PyCodec_LookupError(name); |
1049 | 0 | if (handler != NULL) { |
1050 | 0 | Py_DECREF(handler); |
1051 | 0 | return 0; |
1052 | 0 | } |
1053 | 0 | return -1; |
1054 | 0 | } |
1055 | | |
1056 | | |
1057 | | |
1058 | | /*[clinic input] |
1059 | | _io.TextIOWrapper.__init__ |
1060 | | buffer: object |
1061 | | encoding: str(accept={str, NoneType}) = None |
1062 | | errors: object = None |
1063 | | newline: str(accept={str, NoneType}) = None |
1064 | | line_buffering: bool = False |
1065 | | write_through: bool = False |
1066 | | |
1067 | | Character and line based layer over a BufferedIOBase object, buffer. |
1068 | | |
1069 | | encoding gives the name of the encoding that the stream will be |
1070 | | decoded or encoded with. It defaults to locale.getencoding(). |
1071 | | |
1072 | | errors determines the strictness of encoding and decoding (see |
1073 | | help(codecs.Codec) or the documentation for codecs.register) and |
1074 | | defaults to "strict". |
1075 | | |
1076 | | newline controls how line endings are handled. It can be None, '', |
1077 | | '\n', '\r', and '\r\n'. It works as follows: |
1078 | | |
1079 | | * On input, if newline is None, universal newlines mode is |
1080 | | enabled. Lines in the input can end in '\n', '\r', or '\r\n', and |
1081 | | these are translated into '\n' before being returned to the |
1082 | | caller. If it is '', universal newline mode is enabled, but line |
1083 | | endings are returned to the caller untranslated. If it has any of |
1084 | | the other legal values, input lines are only terminated by the given |
1085 | | string, and the line ending is returned to the caller untranslated. |
1086 | | |
1087 | | * On output, if newline is None, any '\n' characters written are |
1088 | | translated to the system default line separator, os.linesep. If |
1089 | | newline is '' or '\n', no translation takes place. If newline is any |
1090 | | of the other legal values, any '\n' characters written are translated |
1091 | | to the given string. |
1092 | | |
1093 | | If line_buffering is True, a call to flush is implied when a call to |
1094 | | write contains a newline character. |
1095 | | [clinic start generated code]*/ |
1096 | | |
1097 | | static int |
1098 | | _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, |
1099 | | const char *encoding, PyObject *errors, |
1100 | | const char *newline, int line_buffering, |
1101 | | int write_through) |
1102 | | /*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/ |
1103 | 48 | { |
1104 | 48 | PyObject *raw, *codec_info = NULL; |
1105 | 48 | PyObject *res; |
1106 | 48 | int r; |
1107 | | |
1108 | 48 | self->ok = 0; |
1109 | 48 | self->detached = 0; |
1110 | | |
1111 | 48 | if (encoding == NULL) { |
1112 | 0 | PyInterpreterState *interp = _PyInterpreterState_GET(); |
1113 | 0 | if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { |
1114 | 0 | if (PyErr_WarnEx(PyExc_EncodingWarning, |
1115 | 0 | "'encoding' argument not specified", 1)) { |
1116 | 0 | return -1; |
1117 | 0 | } |
1118 | 0 | } |
1119 | 0 | } |
1120 | | |
1121 | 48 | if (errors == Py_None) { |
1122 | 0 | errors = &_Py_ID(strict); |
1123 | 0 | } |
1124 | 48 | else if (!PyUnicode_Check(errors)) { |
1125 | | // Check 'errors' argument here because Argument Clinic doesn't support |
1126 | | // 'str(accept={str, NoneType})' converter. |
1127 | 0 | PyErr_Format( |
1128 | 0 | PyExc_TypeError, |
1129 | 0 | "TextIOWrapper() argument 'errors' must be str or None, not %.50s", |
1130 | 0 | Py_TYPE(errors)->tp_name); |
1131 | 0 | return -1; |
1132 | 0 | } |
1133 | 48 | else if (io_check_errors(errors)) { |
1134 | 0 | return -1; |
1135 | 0 | } |
1136 | 48 | const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors); |
1137 | 48 | if (errors_str == NULL) { |
1138 | 0 | return -1; |
1139 | 0 | } |
1140 | | |
1141 | 48 | if (validate_newline(newline) < 0) { |
1142 | 0 | return -1; |
1143 | 0 | } |
1144 | | |
1145 | 48 | Py_CLEAR(self->buffer); |
1146 | 48 | Py_CLEAR(self->encoding); |
1147 | 48 | Py_CLEAR(self->encoder); |
1148 | 48 | Py_CLEAR(self->decoder); |
1149 | 48 | Py_CLEAR(self->readnl); |
1150 | 48 | Py_CLEAR(self->decoded_chars); |
1151 | 48 | Py_CLEAR(self->pending_bytes); |
1152 | 48 | Py_CLEAR(self->snapshot); |
1153 | 48 | Py_CLEAR(self->errors); |
1154 | 48 | Py_CLEAR(self->raw); |
1155 | 48 | self->decoded_chars_used = 0; |
1156 | 48 | self->pending_bytes_count = 0; |
1157 | 48 | self->encodefunc = NULL; |
1158 | 48 | self->b2cratio = 0.0; |
1159 | | |
1160 | 48 | if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) { |
1161 | 0 | _Py_DECLARE_STR(utf_8, "utf-8"); |
1162 | 0 | self->encoding = &_Py_STR(utf_8); |
1163 | 0 | } |
1164 | 48 | else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) { |
1165 | 0 | self->encoding = _Py_GetLocaleEncodingObject(); |
1166 | 0 | if (self->encoding == NULL) { |
1167 | 0 | goto error; |
1168 | 0 | } |
1169 | 0 | assert(PyUnicode_Check(self->encoding)); |
1170 | 0 | } |
1171 | | |
1172 | 48 | if (self->encoding != NULL) { |
1173 | 0 | encoding = PyUnicode_AsUTF8(self->encoding); |
1174 | 0 | if (encoding == NULL) |
1175 | 0 | goto error; |
1176 | 0 | } |
1177 | 48 | else if (encoding != NULL) { |
1178 | 48 | self->encoding = PyUnicode_FromString(encoding); |
1179 | 48 | if (self->encoding == NULL) |
1180 | 0 | goto error; |
1181 | 48 | } |
1182 | 0 | else { |
1183 | 0 | PyErr_SetString(PyExc_OSError, |
1184 | 0 | "could not determine default encoding"); |
1185 | 0 | goto error; |
1186 | 0 | } |
1187 | | |
1188 | | /* Check we have been asked for a real text encoding */ |
1189 | 48 | codec_info = _PyCodec_LookupTextEncoding(encoding, NULL); |
1190 | 48 | if (codec_info == NULL) { |
1191 | 0 | Py_CLEAR(self->encoding); |
1192 | 0 | goto error; |
1193 | 0 | } |
1194 | | |
1195 | | /* XXX: Failures beyond this point have the potential to leak elements |
1196 | | * of the partially constructed object (like self->encoding) |
1197 | | */ |
1198 | | |
1199 | 48 | self->errors = Py_NewRef(errors); |
1200 | 48 | self->chunk_size = 8192; |
1201 | 48 | self->line_buffering = line_buffering; |
1202 | 48 | self->write_through = write_through; |
1203 | 48 | if (set_newline(self, newline) < 0) { |
1204 | 0 | goto error; |
1205 | 0 | } |
1206 | | |
1207 | 48 | self->buffer = Py_NewRef(buffer); |
1208 | | |
1209 | | /* Build the decoder object */ |
1210 | 48 | _PyIO_State *state = find_io_state_by_def(Py_TYPE(self)); |
1211 | 48 | self->state = state; |
1212 | 48 | if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0) |
1213 | 0 | goto error; |
1214 | | |
1215 | | /* Build the encoder object */ |
1216 | 48 | if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0) |
1217 | 0 | goto error; |
1218 | | |
1219 | | /* Finished sorting out the codec details */ |
1220 | 48 | Py_CLEAR(codec_info); |
1221 | | |
1222 | 48 | if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) || |
1223 | 48 | Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) || |
1224 | 48 | Py_IS_TYPE(buffer, state->PyBufferedRandom_Type)) |
1225 | 48 | { |
1226 | 48 | if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0) |
1227 | 0 | goto error; |
1228 | | /* Cache the raw FileIO object to speed up 'closed' checks */ |
1229 | 48 | if (raw != NULL) { |
1230 | 48 | if (Py_IS_TYPE(raw, state->PyFileIO_Type)) |
1231 | 48 | self->raw = raw; |
1232 | 0 | else |
1233 | 0 | Py_DECREF(raw); |
1234 | 48 | } |
1235 | 48 | } |
1236 | | |
1237 | 48 | res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable)); |
1238 | 48 | if (res == NULL) |
1239 | 0 | goto error; |
1240 | 48 | r = PyObject_IsTrue(res); |
1241 | 48 | Py_DECREF(res); |
1242 | 48 | if (r < 0) |
1243 | 0 | goto error; |
1244 | 48 | self->seekable = self->telling = r; |
1245 | | |
1246 | 48 | r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1)); |
1247 | 48 | if (r < 0) { |
1248 | 0 | goto error; |
1249 | 0 | } |
1250 | 48 | self->has_read1 = r; |
1251 | | |
1252 | 48 | self->encoding_start_of_stream = 0; |
1253 | 48 | if (_textiowrapper_fix_encoder_state(self) < 0) { |
1254 | 0 | goto error; |
1255 | 0 | } |
1256 | | |
1257 | 48 | self->ok = 1; |
1258 | 48 | return 0; |
1259 | | |
1260 | 0 | error: |
1261 | 0 | Py_XDECREF(codec_info); |
1262 | 0 | return -1; |
1263 | 48 | } |
1264 | | |
1265 | | /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true, |
1266 | | * -1 on error. |
1267 | | */ |
1268 | | static int |
1269 | | convert_optional_bool(PyObject *obj, int default_value) |
1270 | 0 | { |
1271 | 0 | long v; |
1272 | 0 | if (obj == Py_None) { |
1273 | 0 | v = default_value; |
1274 | 0 | } |
1275 | 0 | else { |
1276 | 0 | v = PyLong_AsLong(obj); |
1277 | 0 | if (v == -1 && PyErr_Occurred()) |
1278 | 0 | return -1; |
1279 | 0 | } |
1280 | 0 | return v != 0; |
1281 | 0 | } |
1282 | | |
1283 | | static int |
1284 | | textiowrapper_change_encoding(textio *self, PyObject *encoding, |
1285 | | PyObject *errors, int newline_changed) |
1286 | 0 | { |
1287 | | /* Use existing settings where new settings are not specified */ |
1288 | 0 | if (encoding == Py_None && errors == Py_None && !newline_changed) { |
1289 | 0 | return 0; // no change |
1290 | 0 | } |
1291 | | |
1292 | 0 | if (encoding == Py_None) { |
1293 | 0 | encoding = self->encoding; |
1294 | 0 | if (errors == Py_None) { |
1295 | 0 | errors = self->errors; |
1296 | 0 | } |
1297 | 0 | Py_INCREF(encoding); |
1298 | 0 | } |
1299 | 0 | else { |
1300 | 0 | if (_PyUnicode_EqualToASCIIString(encoding, "locale")) { |
1301 | 0 | encoding = _Py_GetLocaleEncodingObject(); |
1302 | 0 | if (encoding == NULL) { |
1303 | 0 | return -1; |
1304 | 0 | } |
1305 | 0 | } else { |
1306 | 0 | Py_INCREF(encoding); |
1307 | 0 | } |
1308 | 0 | if (errors == Py_None) { |
1309 | 0 | errors = &_Py_ID(strict); |
1310 | 0 | } |
1311 | 0 | } |
1312 | 0 | Py_INCREF(errors); |
1313 | |
|
1314 | 0 | const char *c_encoding = PyUnicode_AsUTF8(encoding); |
1315 | 0 | if (c_encoding == NULL) { |
1316 | 0 | Py_DECREF(encoding); |
1317 | 0 | Py_DECREF(errors); |
1318 | 0 | return -1; |
1319 | 0 | } |
1320 | 0 | const char *c_errors = PyUnicode_AsUTF8(errors); |
1321 | 0 | if (c_errors == NULL) { |
1322 | 0 | Py_DECREF(encoding); |
1323 | 0 | Py_DECREF(errors); |
1324 | 0 | return -1; |
1325 | 0 | } |
1326 | | |
1327 | | // Create new encoder & decoder |
1328 | 0 | PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL); |
1329 | 0 | if (codec_info == NULL) { |
1330 | 0 | Py_DECREF(encoding); |
1331 | 0 | Py_DECREF(errors); |
1332 | 0 | return -1; |
1333 | 0 | } |
1334 | 0 | if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 || |
1335 | 0 | _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) { |
1336 | 0 | Py_DECREF(codec_info); |
1337 | 0 | Py_DECREF(encoding); |
1338 | 0 | Py_DECREF(errors); |
1339 | 0 | return -1; |
1340 | 0 | } |
1341 | 0 | Py_DECREF(codec_info); |
1342 | |
|
1343 | 0 | Py_SETREF(self->encoding, encoding); |
1344 | 0 | Py_SETREF(self->errors, errors); |
1345 | |
|
1346 | 0 | return _textiowrapper_fix_encoder_state(self); |
1347 | 0 | } |
1348 | | |
1349 | | /*[clinic input] |
1350 | | @critical_section |
1351 | | _io.TextIOWrapper.reconfigure |
1352 | | * |
1353 | | encoding: object = None |
1354 | | errors: object = None |
1355 | | newline as newline_obj: object(c_default="NULL") = None |
1356 | | line_buffering as line_buffering_obj: object = None |
1357 | | write_through as write_through_obj: object = None |
1358 | | |
1359 | | Reconfigure the text stream with new parameters. |
1360 | | |
1361 | | This also does an implicit stream flush. |
1362 | | |
1363 | | [clinic start generated code]*/ |
1364 | | |
1365 | | static PyObject * |
1366 | | _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, |
1367 | | PyObject *errors, PyObject *newline_obj, |
1368 | | PyObject *line_buffering_obj, |
1369 | | PyObject *write_through_obj) |
1370 | | /*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/ |
1371 | 0 | { |
1372 | 0 | int line_buffering; |
1373 | 0 | int write_through; |
1374 | 0 | const char *newline = NULL; |
1375 | |
|
1376 | 0 | if (encoding != Py_None && !PyUnicode_Check(encoding)) { |
1377 | 0 | PyErr_Format(PyExc_TypeError, |
1378 | 0 | "reconfigure() argument 'encoding' must be str or None, not %s", |
1379 | 0 | Py_TYPE(encoding)->tp_name); |
1380 | 0 | return NULL; |
1381 | 0 | } |
1382 | 0 | if (errors != Py_None && !PyUnicode_Check(errors)) { |
1383 | 0 | PyErr_Format(PyExc_TypeError, |
1384 | 0 | "reconfigure() argument 'errors' must be str or None, not %s", |
1385 | 0 | Py_TYPE(errors)->tp_name); |
1386 | 0 | return NULL; |
1387 | 0 | } |
1388 | 0 | if (newline_obj != NULL && newline_obj != Py_None && |
1389 | 0 | !PyUnicode_Check(newline_obj)) |
1390 | 0 | { |
1391 | 0 | PyErr_Format(PyExc_TypeError, |
1392 | 0 | "reconfigure() argument 'newline' must be str or None, not %s", |
1393 | 0 | Py_TYPE(newline_obj)->tp_name); |
1394 | 0 | return NULL; |
1395 | 0 | } |
1396 | | /* Check if something is in the read buffer */ |
1397 | 0 | if (self->decoded_chars != NULL) { |
1398 | 0 | if (encoding != Py_None || errors != Py_None || newline_obj != NULL) { |
1399 | 0 | _unsupported(self->state, |
1400 | 0 | "It is not possible to set the encoding or newline " |
1401 | 0 | "of stream after the first read"); |
1402 | 0 | return NULL; |
1403 | 0 | } |
1404 | 0 | } |
1405 | | |
1406 | 0 | if (newline_obj != NULL && newline_obj != Py_None) { |
1407 | 0 | newline = PyUnicode_AsUTF8(newline_obj); |
1408 | 0 | if (newline == NULL || validate_newline(newline) < 0) { |
1409 | 0 | return NULL; |
1410 | 0 | } |
1411 | 0 | } |
1412 | | |
1413 | 0 | line_buffering = convert_optional_bool(line_buffering_obj, |
1414 | 0 | self->line_buffering); |
1415 | 0 | if (line_buffering < 0) { |
1416 | 0 | return NULL; |
1417 | 0 | } |
1418 | 0 | write_through = convert_optional_bool(write_through_obj, |
1419 | 0 | self->write_through); |
1420 | 0 | if (write_through < 0) { |
1421 | 0 | return NULL; |
1422 | 0 | } |
1423 | | |
1424 | 0 | if (_PyFile_Flush((PyObject *)self) < 0) { |
1425 | 0 | return NULL; |
1426 | 0 | } |
1427 | 0 | self->b2cratio = 0; |
1428 | |
|
1429 | 0 | if (newline_obj != NULL && set_newline(self, newline) < 0) { |
1430 | 0 | return NULL; |
1431 | 0 | } |
1432 | | |
1433 | 0 | if (textiowrapper_change_encoding( |
1434 | 0 | self, encoding, errors, newline_obj != NULL) < 0) { |
1435 | 0 | return NULL; |
1436 | 0 | } |
1437 | | |
1438 | 0 | self->line_buffering = line_buffering; |
1439 | 0 | self->write_through = write_through; |
1440 | 0 | Py_RETURN_NONE; |
1441 | 0 | } |
1442 | | |
1443 | | static int |
1444 | | textiowrapper_clear(PyObject *op) |
1445 | 0 | { |
1446 | 0 | textio *self = textio_CAST(op); |
1447 | 0 | self->ok = 0; |
1448 | 0 | Py_CLEAR(self->buffer); |
1449 | 0 | Py_CLEAR(self->encoding); |
1450 | 0 | Py_CLEAR(self->encoder); |
1451 | 0 | Py_CLEAR(self->decoder); |
1452 | 0 | Py_CLEAR(self->readnl); |
1453 | 0 | Py_CLEAR(self->decoded_chars); |
1454 | 0 | Py_CLEAR(self->pending_bytes); |
1455 | 0 | Py_CLEAR(self->snapshot); |
1456 | 0 | Py_CLEAR(self->errors); |
1457 | 0 | Py_CLEAR(self->raw); |
1458 | |
|
1459 | 0 | Py_CLEAR(self->dict); |
1460 | 0 | return 0; |
1461 | 0 | } |
1462 | | |
1463 | | static void |
1464 | | textiowrapper_dealloc(PyObject *op) |
1465 | 0 | { |
1466 | 0 | textio *self = textio_CAST(op); |
1467 | 0 | PyTypeObject *tp = Py_TYPE(self); |
1468 | 0 | self->finalizing = 1; |
1469 | 0 | if (_PyIOBase_finalize(op) < 0) |
1470 | 0 | return; |
1471 | 0 | self->ok = 0; |
1472 | 0 | _PyObject_GC_UNTRACK(self); |
1473 | 0 | FT_CLEAR_WEAKREFS(op, self->weakreflist); |
1474 | 0 | (void)textiowrapper_clear(op); |
1475 | 0 | tp->tp_free(self); |
1476 | 0 | Py_DECREF(tp); |
1477 | 0 | } |
1478 | | |
1479 | | static int |
1480 | | textiowrapper_traverse(PyObject *op, visitproc visit, void *arg) |
1481 | 16.7k | { |
1482 | 16.7k | textio *self = textio_CAST(op); |
1483 | 16.7k | Py_VISIT(Py_TYPE(self)); |
1484 | 16.7k | Py_VISIT(self->buffer); |
1485 | 16.7k | Py_VISIT(self->encoding); |
1486 | 16.7k | Py_VISIT(self->encoder); |
1487 | 16.7k | Py_VISIT(self->decoder); |
1488 | 16.7k | Py_VISIT(self->readnl); |
1489 | 16.7k | Py_VISIT(self->decoded_chars); |
1490 | 16.7k | Py_VISIT(self->pending_bytes); |
1491 | 16.7k | Py_VISIT(self->snapshot); |
1492 | 16.7k | Py_VISIT(self->errors); |
1493 | 16.7k | Py_VISIT(self->raw); |
1494 | | |
1495 | 16.7k | Py_VISIT(self->dict); |
1496 | 16.7k | return 0; |
1497 | 16.7k | } |
1498 | | |
1499 | | static PyObject * |
1500 | | _io_TextIOWrapper_closed_get_impl(textio *self); |
1501 | | |
1502 | | /* This macro takes some shortcuts to make the common case faster. */ |
1503 | | #define CHECK_CLOSED(self) \ |
1504 | 0 | do { \ |
1505 | 0 | int r; \ |
1506 | 0 | PyObject *_res; \ |
1507 | 0 | if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \ |
1508 | 0 | if (self->raw != NULL) \ |
1509 | 0 | r = _PyFileIO_closed(self->raw); \ |
1510 | 0 | else { \ |
1511 | 0 | _res = _io_TextIOWrapper_closed_get_impl(self); \ |
1512 | 0 | if (_res == NULL) \ |
1513 | 0 | return NULL; \ |
1514 | 0 | r = PyObject_IsTrue(_res); \ |
1515 | 0 | Py_DECREF(_res); \ |
1516 | 0 | if (r < 0) \ |
1517 | 0 | return NULL; \ |
1518 | 0 | } \ |
1519 | 0 | if (r > 0) { \ |
1520 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1521 | 0 | "I/O operation on closed file."); \ |
1522 | 0 | return NULL; \ |
1523 | 0 | } \ |
1524 | 0 | } \ |
1525 | 0 | else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \ |
1526 | 0 | return NULL; \ |
1527 | 0 | } while (0) |
1528 | | |
1529 | | #define CHECK_INITIALIZED(self) \ |
1530 | 0 | if (self->ok <= 0) { \ |
1531 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1532 | 0 | "I/O operation on uninitialized object"); \ |
1533 | 0 | return NULL; \ |
1534 | 0 | } |
1535 | | |
1536 | | #define CHECK_ATTACHED(self) \ |
1537 | 0 | CHECK_INITIALIZED(self); \ |
1538 | 0 | if (self->detached) { \ |
1539 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1540 | 0 | "underlying buffer has been detached"); \ |
1541 | 0 | return NULL; \ |
1542 | 0 | } |
1543 | | |
1544 | | #define CHECK_ATTACHED_INT(self) \ |
1545 | 0 | if (self->ok <= 0) { \ |
1546 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1547 | 0 | "I/O operation on uninitialized object"); \ |
1548 | 0 | return -1; \ |
1549 | 0 | } else if (self->detached) { \ |
1550 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1551 | 0 | "underlying buffer has been detached"); \ |
1552 | 0 | return -1; \ |
1553 | 0 | } |
1554 | | |
1555 | | |
1556 | | /*[clinic input] |
1557 | | @critical_section |
1558 | | _io.TextIOWrapper.detach |
1559 | | [clinic start generated code]*/ |
1560 | | |
1561 | | static PyObject * |
1562 | | _io_TextIOWrapper_detach_impl(textio *self) |
1563 | | /*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/ |
1564 | 0 | { |
1565 | 0 | PyObject *buffer; |
1566 | 0 | CHECK_ATTACHED(self); |
1567 | 0 | if (_PyFile_Flush((PyObject *)self) < 0) { |
1568 | 0 | return NULL; |
1569 | 0 | } |
1570 | 0 | buffer = self->buffer; |
1571 | 0 | self->buffer = NULL; |
1572 | 0 | self->detached = 1; |
1573 | 0 | return buffer; |
1574 | 0 | } |
1575 | | |
1576 | | /* Flush the internal write buffer. This doesn't explicitly flush the |
1577 | | underlying buffered object, though. */ |
1578 | | static int |
1579 | | _textiowrapper_writeflush(textio *self) |
1580 | 0 | { |
1581 | 0 | _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); |
1582 | |
|
1583 | 0 | if (self->pending_bytes == NULL) |
1584 | 0 | return 0; |
1585 | | |
1586 | 0 | PyObject *pending = self->pending_bytes; |
1587 | 0 | PyObject *b; |
1588 | |
|
1589 | 0 | if (PyBytes_Check(pending)) { |
1590 | 0 | b = Py_NewRef(pending); |
1591 | 0 | } |
1592 | 0 | else if (PyUnicode_Check(pending)) { |
1593 | 0 | assert(PyUnicode_IS_ASCII(pending)); |
1594 | 0 | assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count); |
1595 | 0 | b = PyBytes_FromStringAndSize( |
1596 | 0 | PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending)); |
1597 | 0 | if (b == NULL) { |
1598 | 0 | return -1; |
1599 | 0 | } |
1600 | 0 | } |
1601 | 0 | else { |
1602 | 0 | assert(PyList_Check(pending)); |
1603 | 0 | b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count); |
1604 | 0 | if (b == NULL) { |
1605 | 0 | return -1; |
1606 | 0 | } |
1607 | | |
1608 | 0 | char *buf = PyBytes_AsString(b); |
1609 | 0 | Py_ssize_t pos = 0; |
1610 | |
|
1611 | 0 | for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) { |
1612 | 0 | PyObject *obj = PyList_GET_ITEM(pending, i); |
1613 | 0 | char *src; |
1614 | 0 | Py_ssize_t len; |
1615 | 0 | if (PyUnicode_Check(obj)) { |
1616 | 0 | assert(PyUnicode_IS_ASCII(obj)); |
1617 | 0 | src = PyUnicode_DATA(obj); |
1618 | 0 | len = PyUnicode_GET_LENGTH(obj); |
1619 | 0 | } |
1620 | 0 | else { |
1621 | 0 | assert(PyBytes_Check(obj)); |
1622 | 0 | if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) { |
1623 | 0 | Py_DECREF(b); |
1624 | 0 | return -1; |
1625 | 0 | } |
1626 | 0 | } |
1627 | 0 | memcpy(buf + pos, src, len); |
1628 | 0 | pos += len; |
1629 | 0 | } |
1630 | 0 | assert(pos == self->pending_bytes_count); |
1631 | 0 | } |
1632 | | |
1633 | 0 | self->pending_bytes_count = 0; |
1634 | 0 | self->pending_bytes = NULL; |
1635 | 0 | Py_DECREF(pending); |
1636 | |
|
1637 | 0 | PyObject *ret; |
1638 | 0 | do { |
1639 | 0 | ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b); |
1640 | 0 | } while (ret == NULL && _PyIO_trap_eintr()); |
1641 | 0 | Py_DECREF(b); |
1642 | | // NOTE: We cleared buffer but we don't know how many bytes are actually written |
1643 | | // when an error occurred. |
1644 | 0 | if (ret == NULL) |
1645 | 0 | return -1; |
1646 | 0 | Py_DECREF(ret); |
1647 | 0 | return 0; |
1648 | 0 | } |
1649 | | |
1650 | | /*[clinic input] |
1651 | | @critical_section |
1652 | | _io.TextIOWrapper.write |
1653 | | text: unicode |
1654 | | / |
1655 | | [clinic start generated code]*/ |
1656 | | |
1657 | | static PyObject * |
1658 | | _io_TextIOWrapper_write_impl(textio *self, PyObject *text) |
1659 | | /*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/ |
1660 | 0 | { |
1661 | 0 | PyObject *ret; |
1662 | 0 | PyObject *b; |
1663 | 0 | Py_ssize_t textlen; |
1664 | 0 | int haslf = 0; |
1665 | 0 | int needflush = 0, text_needflush = 0; |
1666 | |
|
1667 | 0 | CHECK_ATTACHED(self); |
1668 | 0 | CHECK_CLOSED(self); |
1669 | | |
1670 | 0 | if (self->encoder == NULL) { |
1671 | 0 | return _unsupported(self->state, "not writable"); |
1672 | 0 | } |
1673 | | |
1674 | 0 | Py_INCREF(text); |
1675 | |
|
1676 | 0 | textlen = PyUnicode_GET_LENGTH(text); |
1677 | |
|
1678 | 0 | if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) |
1679 | 0 | if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1) |
1680 | 0 | haslf = 1; |
1681 | |
|
1682 | 0 | if (haslf && self->writetranslate && self->writenl != NULL) { |
1683 | 0 | PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace), |
1684 | 0 | "ss", "\n", self->writenl); |
1685 | 0 | Py_DECREF(text); |
1686 | 0 | if (newtext == NULL) |
1687 | 0 | return NULL; |
1688 | 0 | text = newtext; |
1689 | 0 | } |
1690 | | |
1691 | 0 | if (self->write_through) |
1692 | 0 | text_needflush = 1; |
1693 | 0 | if (self->line_buffering && |
1694 | 0 | (haslf || |
1695 | 0 | PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1)) |
1696 | 0 | needflush = 1; |
1697 | | |
1698 | | /* XXX What if we were just reading? */ |
1699 | 0 | if (self->encodefunc != NULL) { |
1700 | 0 | if (PyUnicode_IS_ASCII(text) && |
1701 | | // See bpo-43260 |
1702 | 0 | PyUnicode_GET_LENGTH(text) <= self->chunk_size && |
1703 | 0 | is_asciicompat_encoding(self->encodefunc)) { |
1704 | 0 | b = Py_NewRef(text); |
1705 | 0 | } |
1706 | 0 | else { |
1707 | 0 | b = (*self->encodefunc)((PyObject *) self, text); |
1708 | 0 | } |
1709 | 0 | self->encoding_start_of_stream = 0; |
1710 | 0 | } |
1711 | 0 | else { |
1712 | 0 | b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text); |
1713 | 0 | } |
1714 | |
|
1715 | 0 | Py_DECREF(text); |
1716 | 0 | if (b == NULL) |
1717 | 0 | return NULL; |
1718 | 0 | if (b != text && !PyBytes_Check(b)) { |
1719 | 0 | PyErr_Format(PyExc_TypeError, |
1720 | 0 | "encoder should return a bytes object, not '%.200s'", |
1721 | 0 | Py_TYPE(b)->tp_name); |
1722 | 0 | Py_DECREF(b); |
1723 | 0 | return NULL; |
1724 | 0 | } |
1725 | | |
1726 | 0 | Py_ssize_t bytes_len; |
1727 | 0 | if (b == text) { |
1728 | 0 | bytes_len = PyUnicode_GET_LENGTH(b); |
1729 | 0 | } |
1730 | 0 | else { |
1731 | 0 | bytes_len = PyBytes_GET_SIZE(b); |
1732 | 0 | } |
1733 | | |
1734 | | // We should avoid concatenating huge data. |
1735 | | // Flush the buffer before adding b to the buffer if b is not small. |
1736 | | // https://github.com/python/cpython/issues/87426 |
1737 | 0 | if (bytes_len >= self->chunk_size) { |
1738 | | // _textiowrapper_writeflush() calls buffer.write(). |
1739 | | // self->pending_bytes can be appended during buffer->write() |
1740 | | // or other thread. |
1741 | | // We need to loop until buffer becomes empty. |
1742 | | // https://github.com/python/cpython/issues/118138 |
1743 | | // https://github.com/python/cpython/issues/119506 |
1744 | 0 | while (self->pending_bytes != NULL) { |
1745 | 0 | if (_textiowrapper_writeflush(self) < 0) { |
1746 | 0 | Py_DECREF(b); |
1747 | 0 | return NULL; |
1748 | 0 | } |
1749 | 0 | } |
1750 | 0 | } |
1751 | | |
1752 | 0 | if (self->pending_bytes == NULL) { |
1753 | 0 | assert(self->pending_bytes_count == 0); |
1754 | 0 | self->pending_bytes = b; |
1755 | 0 | } |
1756 | 0 | else if (!PyList_CheckExact(self->pending_bytes)) { |
1757 | 0 | PyObject *list = PyList_New(2); |
1758 | 0 | if (list == NULL) { |
1759 | 0 | Py_DECREF(b); |
1760 | 0 | return NULL; |
1761 | 0 | } |
1762 | | // Since Python 3.12, allocating GC object won't trigger GC and release |
1763 | | // GIL. See https://github.com/python/cpython/issues/97922 |
1764 | 0 | assert(!PyList_CheckExact(self->pending_bytes)); |
1765 | 0 | PyList_SET_ITEM(list, 0, self->pending_bytes); |
1766 | 0 | PyList_SET_ITEM(list, 1, b); |
1767 | 0 | self->pending_bytes = list; |
1768 | 0 | } |
1769 | 0 | else { |
1770 | 0 | if (PyList_Append(self->pending_bytes, b) < 0) { |
1771 | 0 | Py_DECREF(b); |
1772 | 0 | return NULL; |
1773 | 0 | } |
1774 | 0 | Py_DECREF(b); |
1775 | 0 | } |
1776 | | |
1777 | 0 | self->pending_bytes_count += bytes_len; |
1778 | 0 | if (self->pending_bytes_count >= self->chunk_size || needflush || |
1779 | 0 | text_needflush) { |
1780 | 0 | if (_textiowrapper_writeflush(self) < 0) |
1781 | 0 | return NULL; |
1782 | 0 | } |
1783 | | |
1784 | 0 | if (needflush) { |
1785 | 0 | if (_PyFile_Flush(self->buffer) < 0) { |
1786 | 0 | return NULL; |
1787 | 0 | } |
1788 | 0 | } |
1789 | | |
1790 | 0 | if (self->snapshot != NULL) { |
1791 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
1792 | 0 | Py_CLEAR(self->snapshot); |
1793 | 0 | } |
1794 | |
|
1795 | 0 | if (self->decoder) { |
1796 | 0 | ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); |
1797 | 0 | if (ret == NULL) |
1798 | 0 | return NULL; |
1799 | 0 | Py_DECREF(ret); |
1800 | 0 | } |
1801 | | |
1802 | 0 | return PyLong_FromSsize_t(textlen); |
1803 | 0 | } |
1804 | | |
1805 | | /* Steal a reference to chars and store it in the decoded_char buffer; |
1806 | | */ |
1807 | | static void |
1808 | | textiowrapper_set_decoded_chars(textio *self, PyObject *chars) |
1809 | 0 | { |
1810 | 0 | Py_XSETREF(self->decoded_chars, chars); |
1811 | 0 | self->decoded_chars_used = 0; |
1812 | 0 | } |
1813 | | |
1814 | | static PyObject * |
1815 | | textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) |
1816 | 0 | { |
1817 | 0 | PyObject *chars; |
1818 | 0 | Py_ssize_t avail; |
1819 | |
|
1820 | 0 | if (self->decoded_chars == NULL) |
1821 | 0 | return Py_GetConstant(Py_CONSTANT_EMPTY_STR); |
1822 | | |
1823 | 0 | avail = (PyUnicode_GET_LENGTH(self->decoded_chars) |
1824 | 0 | - self->decoded_chars_used); |
1825 | |
|
1826 | 0 | assert(avail >= 0); |
1827 | |
|
1828 | 0 | if (n < 0 || n > avail) |
1829 | 0 | n = avail; |
1830 | |
|
1831 | 0 | if (self->decoded_chars_used > 0 || n < avail) { |
1832 | 0 | chars = PyUnicode_Substring(self->decoded_chars, |
1833 | 0 | self->decoded_chars_used, |
1834 | 0 | self->decoded_chars_used + n); |
1835 | 0 | if (chars == NULL) |
1836 | 0 | return NULL; |
1837 | 0 | } |
1838 | 0 | else { |
1839 | 0 | chars = Py_NewRef(self->decoded_chars); |
1840 | 0 | } |
1841 | | |
1842 | 0 | self->decoded_chars_used += n; |
1843 | 0 | return chars; |
1844 | 0 | } |
1845 | | |
1846 | | /* Read and decode the next chunk of data from the BufferedReader. |
1847 | | */ |
1848 | | static int |
1849 | | textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) |
1850 | 0 | { |
1851 | 0 | PyObject *dec_buffer = NULL; |
1852 | 0 | PyObject *dec_flags = NULL; |
1853 | 0 | PyObject *input_chunk = NULL; |
1854 | 0 | Py_buffer input_chunk_buf; |
1855 | 0 | PyObject *decoded_chars, *chunk_size; |
1856 | 0 | Py_ssize_t nbytes, nchars; |
1857 | 0 | int eof; |
1858 | | |
1859 | | /* The return value is True unless EOF was reached. The decoded string is |
1860 | | * placed in self._decoded_chars (replacing its previous value). The |
1861 | | * entire input chunk is sent to the decoder, though some of it may remain |
1862 | | * buffered in the decoder, yet to be converted. |
1863 | | */ |
1864 | |
|
1865 | 0 | if (self->decoder == NULL) { |
1866 | 0 | _unsupported(self->state, "not readable"); |
1867 | 0 | return -1; |
1868 | 0 | } |
1869 | | |
1870 | 0 | if (self->telling) { |
1871 | | /* To prepare for tell(), we need to snapshot a point in the file |
1872 | | * where the decoder's input buffer is empty. |
1873 | | */ |
1874 | 0 | PyObject *state = PyObject_CallMethodNoArgs(self->decoder, |
1875 | 0 | &_Py_ID(getstate)); |
1876 | 0 | if (state == NULL) |
1877 | 0 | return -1; |
1878 | | /* Given this, we know there was a valid snapshot point |
1879 | | * len(dec_buffer) bytes ago with decoder state (b'', dec_flags). |
1880 | | */ |
1881 | 0 | if (!PyTuple_Check(state)) { |
1882 | 0 | PyErr_SetString(PyExc_TypeError, |
1883 | 0 | "illegal decoder state"); |
1884 | 0 | Py_DECREF(state); |
1885 | 0 | return -1; |
1886 | 0 | } |
1887 | 0 | if (!PyArg_ParseTuple(state, |
1888 | 0 | "OO;illegal decoder state", &dec_buffer, &dec_flags)) |
1889 | 0 | { |
1890 | 0 | Py_DECREF(state); |
1891 | 0 | return -1; |
1892 | 0 | } |
1893 | | |
1894 | 0 | if (!PyBytes_Check(dec_buffer)) { |
1895 | 0 | PyErr_Format(PyExc_TypeError, |
1896 | 0 | "illegal decoder state: the first item should be a " |
1897 | 0 | "bytes object, not '%.200s'", |
1898 | 0 | Py_TYPE(dec_buffer)->tp_name); |
1899 | 0 | Py_DECREF(state); |
1900 | 0 | return -1; |
1901 | 0 | } |
1902 | 0 | Py_INCREF(dec_buffer); |
1903 | 0 | Py_INCREF(dec_flags); |
1904 | 0 | Py_DECREF(state); |
1905 | 0 | } |
1906 | | |
1907 | | /* Read a chunk, decode it, and put the result in self._decoded_chars. */ |
1908 | 0 | if (size_hint > 0) { |
1909 | 0 | size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint); |
1910 | 0 | } |
1911 | 0 | chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); |
1912 | 0 | if (chunk_size == NULL) |
1913 | 0 | goto fail; |
1914 | | |
1915 | 0 | input_chunk = PyObject_CallMethodOneArg(self->buffer, |
1916 | 0 | (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)), |
1917 | 0 | chunk_size); |
1918 | 0 | Py_DECREF(chunk_size); |
1919 | 0 | if (input_chunk == NULL) |
1920 | 0 | goto fail; |
1921 | | |
1922 | 0 | if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { |
1923 | 0 | PyErr_Format(PyExc_TypeError, |
1924 | 0 | "underlying %s() should have returned a bytes-like object, " |
1925 | 0 | "not '%.200s'", (self->has_read1 ? "read1": "read"), |
1926 | 0 | Py_TYPE(input_chunk)->tp_name); |
1927 | 0 | goto fail; |
1928 | 0 | } |
1929 | | |
1930 | 0 | nbytes = input_chunk_buf.len; |
1931 | 0 | eof = (nbytes == 0); |
1932 | |
|
1933 | 0 | decoded_chars = _textiowrapper_decode(self->state, self->decoder, |
1934 | 0 | input_chunk, eof); |
1935 | 0 | PyBuffer_Release(&input_chunk_buf); |
1936 | 0 | if (decoded_chars == NULL) |
1937 | 0 | goto fail; |
1938 | | |
1939 | 0 | textiowrapper_set_decoded_chars(self, decoded_chars); |
1940 | 0 | nchars = PyUnicode_GET_LENGTH(decoded_chars); |
1941 | 0 | if (nchars > 0) |
1942 | 0 | self->b2cratio = (double) nbytes / nchars; |
1943 | 0 | else |
1944 | 0 | self->b2cratio = 0.0; |
1945 | 0 | if (nchars > 0) |
1946 | 0 | eof = 0; |
1947 | |
|
1948 | 0 | if (self->telling) { |
1949 | | /* At the snapshot point, len(dec_buffer) bytes before the read, the |
1950 | | * next input to be decoded is dec_buffer + input_chunk. |
1951 | | */ |
1952 | 0 | PyObject *next_input = dec_buffer; |
1953 | 0 | PyBytes_Concat(&next_input, input_chunk); |
1954 | 0 | dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ |
1955 | 0 | if (next_input == NULL) { |
1956 | 0 | goto fail; |
1957 | 0 | } |
1958 | 0 | PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input); |
1959 | 0 | if (snapshot == NULL) { |
1960 | 0 | dec_flags = NULL; |
1961 | 0 | goto fail; |
1962 | 0 | } |
1963 | 0 | Py_XSETREF(self->snapshot, snapshot); |
1964 | 0 | } |
1965 | 0 | Py_DECREF(input_chunk); |
1966 | |
|
1967 | 0 | return (eof == 0); |
1968 | | |
1969 | 0 | fail: |
1970 | 0 | Py_XDECREF(dec_buffer); |
1971 | 0 | Py_XDECREF(dec_flags); |
1972 | 0 | Py_XDECREF(input_chunk); |
1973 | 0 | return -1; |
1974 | 0 | } |
1975 | | |
1976 | | /*[clinic input] |
1977 | | @critical_section |
1978 | | _io.TextIOWrapper.read |
1979 | | size as n: Py_ssize_t(accept={int, NoneType}) = -1 |
1980 | | / |
1981 | | [clinic start generated code]*/ |
1982 | | |
1983 | | static PyObject * |
1984 | | _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) |
1985 | | /*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/ |
1986 | 0 | { |
1987 | 0 | PyObject *result = NULL, *chunks = NULL; |
1988 | |
|
1989 | 0 | CHECK_ATTACHED(self); |
1990 | 0 | CHECK_CLOSED(self); |
1991 | | |
1992 | 0 | if (self->decoder == NULL) { |
1993 | 0 | return _unsupported(self->state, "not readable"); |
1994 | 0 | } |
1995 | | |
1996 | 0 | if (_textiowrapper_writeflush(self) < 0) |
1997 | 0 | return NULL; |
1998 | | |
1999 | 0 | if (n < 0) { |
2000 | | /* Read everything */ |
2001 | 0 | PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read)); |
2002 | 0 | PyObject *decoded; |
2003 | 0 | if (bytes == NULL) |
2004 | 0 | goto fail; |
2005 | | |
2006 | 0 | if (bytes == Py_None){ |
2007 | 0 | Py_DECREF(bytes); |
2008 | 0 | PyErr_SetString(PyExc_BlockingIOError, "Read returned None."); |
2009 | 0 | return NULL; |
2010 | 0 | } |
2011 | | |
2012 | 0 | _PyIO_State *state = self->state; |
2013 | 0 | if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type)) |
2014 | 0 | decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, |
2015 | 0 | bytes, 1); |
2016 | 0 | else |
2017 | 0 | decoded = PyObject_CallMethodObjArgs( |
2018 | 0 | self->decoder, &_Py_ID(decode), bytes, Py_True, NULL); |
2019 | 0 | Py_DECREF(bytes); |
2020 | 0 | if (check_decoded(decoded) < 0) |
2021 | 0 | goto fail; |
2022 | | |
2023 | 0 | result = textiowrapper_get_decoded_chars(self, -1); |
2024 | |
|
2025 | 0 | if (result == NULL) { |
2026 | 0 | Py_DECREF(decoded); |
2027 | 0 | return NULL; |
2028 | 0 | } |
2029 | | |
2030 | 0 | PyUnicode_AppendAndDel(&result, decoded); |
2031 | 0 | if (result == NULL) |
2032 | 0 | goto fail; |
2033 | | |
2034 | 0 | if (self->snapshot != NULL) { |
2035 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
2036 | 0 | Py_CLEAR(self->snapshot); |
2037 | 0 | } |
2038 | 0 | return result; |
2039 | 0 | } |
2040 | 0 | else { |
2041 | 0 | int res = 1; |
2042 | 0 | Py_ssize_t remaining = n; |
2043 | |
|
2044 | 0 | result = textiowrapper_get_decoded_chars(self, n); |
2045 | 0 | if (result == NULL) |
2046 | 0 | goto fail; |
2047 | 0 | remaining -= PyUnicode_GET_LENGTH(result); |
2048 | | |
2049 | | /* Keep reading chunks until we have n characters to return */ |
2050 | 0 | while (remaining > 0) { |
2051 | 0 | res = textiowrapper_read_chunk(self, remaining); |
2052 | 0 | if (res < 0) { |
2053 | | /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() |
2054 | | when EINTR occurs so we needn't do it ourselves. */ |
2055 | 0 | if (_PyIO_trap_eintr()) { |
2056 | 0 | continue; |
2057 | 0 | } |
2058 | 0 | goto fail; |
2059 | 0 | } |
2060 | 0 | if (res == 0) /* EOF */ |
2061 | 0 | break; |
2062 | 0 | if (chunks == NULL) { |
2063 | 0 | chunks = PyList_New(0); |
2064 | 0 | if (chunks == NULL) |
2065 | 0 | goto fail; |
2066 | 0 | } |
2067 | 0 | if (PyUnicode_GET_LENGTH(result) > 0 && |
2068 | 0 | PyList_Append(chunks, result) < 0) |
2069 | 0 | goto fail; |
2070 | 0 | Py_DECREF(result); |
2071 | 0 | result = textiowrapper_get_decoded_chars(self, remaining); |
2072 | 0 | if (result == NULL) |
2073 | 0 | goto fail; |
2074 | 0 | remaining -= PyUnicode_GET_LENGTH(result); |
2075 | 0 | } |
2076 | 0 | if (chunks != NULL) { |
2077 | 0 | if (result != NULL && PyList_Append(chunks, result) < 0) |
2078 | 0 | goto fail; |
2079 | 0 | _Py_DECLARE_STR(empty, ""); |
2080 | 0 | Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks)); |
2081 | 0 | if (result == NULL) |
2082 | 0 | goto fail; |
2083 | 0 | Py_CLEAR(chunks); |
2084 | 0 | } |
2085 | 0 | return result; |
2086 | 0 | } |
2087 | 0 | fail: |
2088 | 0 | Py_XDECREF(result); |
2089 | 0 | Py_XDECREF(chunks); |
2090 | 0 | return NULL; |
2091 | 0 | } |
2092 | | |
2093 | | |
2094 | | /* NOTE: `end` must point to the real end of the Py_UCS4 storage, |
2095 | | that is to the NUL character. Otherwise the function will produce |
2096 | | incorrect results. */ |
2097 | | static const char * |
2098 | | find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch) |
2099 | 0 | { |
2100 | 0 | if (kind == PyUnicode_1BYTE_KIND) { |
2101 | 0 | assert(ch < 256); |
2102 | 0 | return (char *) memchr((const void *) s, (char) ch, end - s); |
2103 | 0 | } |
2104 | 0 | for (;;) { |
2105 | 0 | while (PyUnicode_READ(kind, s, 0) > ch) |
2106 | 0 | s += kind; |
2107 | 0 | if (PyUnicode_READ(kind, s, 0) == ch) |
2108 | 0 | return s; |
2109 | 0 | if (s == end) |
2110 | 0 | return NULL; |
2111 | 0 | s += kind; |
2112 | 0 | } |
2113 | 0 | } |
2114 | | |
2115 | | Py_ssize_t |
2116 | | _PyIO_find_line_ending( |
2117 | | int translated, int universal, PyObject *readnl, |
2118 | | int kind, const char *start, const char *end, Py_ssize_t *consumed) |
2119 | 20.8M | { |
2120 | 20.8M | Py_ssize_t len = (end - start)/kind; |
2121 | | |
2122 | 20.8M | if (translated) { |
2123 | | /* Newlines are already translated, only search for \n */ |
2124 | 0 | const char *pos = find_control_char(kind, start, end, '\n'); |
2125 | 0 | if (pos != NULL) |
2126 | 0 | return (pos - start)/kind + 1; |
2127 | 0 | else { |
2128 | 0 | *consumed = len; |
2129 | 0 | return -1; |
2130 | 0 | } |
2131 | 0 | } |
2132 | 20.8M | else if (universal) { |
2133 | | /* Universal newline search. Find any of \r, \r\n, \n |
2134 | | * The decoder ensures that \r\n are not split in two pieces |
2135 | | */ |
2136 | 20.8M | const char *s = start; |
2137 | 83.3M | for (;;) { |
2138 | 83.3M | Py_UCS4 ch; |
2139 | | /* Fast path for non-control chars. The loop always ends |
2140 | | since the Unicode string is NUL-terminated. */ |
2141 | 257M | while (PyUnicode_READ(kind, s, 0) > '\r') |
2142 | 174M | s += kind; |
2143 | 83.3M | if (s >= end) { |
2144 | 33.4k | *consumed = len; |
2145 | 33.4k | return -1; |
2146 | 33.4k | } |
2147 | 83.3M | ch = PyUnicode_READ(kind, s, 0); |
2148 | 83.3M | s += kind; |
2149 | 83.3M | if (ch == '\n') |
2150 | 5.19M | return (s - start)/kind; |
2151 | 78.1M | if (ch == '\r') { |
2152 | 15.6M | if (PyUnicode_READ(kind, s, 0) == '\n') |
2153 | 439k | return (s - start)/kind + 1; |
2154 | 15.1M | else |
2155 | 15.1M | return (s - start)/kind; |
2156 | 15.6M | } |
2157 | 78.1M | } |
2158 | 20.8M | } |
2159 | 0 | else { |
2160 | | /* Non-universal mode. */ |
2161 | 0 | Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); |
2162 | 0 | const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); |
2163 | | /* Assume that readnl is an ASCII character. */ |
2164 | 0 | assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); |
2165 | 0 | if (readnl_len == 1) { |
2166 | 0 | const char *pos = find_control_char(kind, start, end, nl[0]); |
2167 | 0 | if (pos != NULL) |
2168 | 0 | return (pos - start)/kind + 1; |
2169 | 0 | *consumed = len; |
2170 | 0 | return -1; |
2171 | 0 | } |
2172 | 0 | else { |
2173 | 0 | const char *s = start; |
2174 | 0 | const char *e = end - (readnl_len - 1)*kind; |
2175 | 0 | const char *pos; |
2176 | 0 | if (e < s) |
2177 | 0 | e = s; |
2178 | 0 | while (s < e) { |
2179 | 0 | Py_ssize_t i; |
2180 | 0 | const char *pos = find_control_char(kind, s, end, nl[0]); |
2181 | 0 | if (pos == NULL || pos >= e) |
2182 | 0 | break; |
2183 | 0 | for (i = 1; i < readnl_len; i++) { |
2184 | 0 | if (PyUnicode_READ(kind, pos, i) != nl[i]) |
2185 | 0 | break; |
2186 | 0 | } |
2187 | 0 | if (i == readnl_len) |
2188 | 0 | return (pos - start)/kind + readnl_len; |
2189 | 0 | s = pos + kind; |
2190 | 0 | } |
2191 | 0 | pos = find_control_char(kind, e, end, nl[0]); |
2192 | 0 | if (pos == NULL) |
2193 | 0 | *consumed = len; |
2194 | 0 | else |
2195 | 0 | *consumed = (pos - start)/kind; |
2196 | 0 | return -1; |
2197 | 0 | } |
2198 | 0 | } |
2199 | 20.8M | } |
2200 | | |
2201 | | static PyObject * |
2202 | | _textiowrapper_readline(textio *self, Py_ssize_t limit) |
2203 | 0 | { |
2204 | 0 | PyObject *line = NULL, *chunks = NULL, *remaining = NULL; |
2205 | 0 | Py_ssize_t start, endpos, chunked, offset_to_buffer; |
2206 | 0 | int res; |
2207 | |
|
2208 | 0 | CHECK_CLOSED(self); |
2209 | | |
2210 | 0 | if (_textiowrapper_writeflush(self) < 0) |
2211 | 0 | return NULL; |
2212 | | |
2213 | 0 | chunked = 0; |
2214 | |
|
2215 | 0 | while (1) { |
2216 | 0 | const char *ptr; |
2217 | 0 | Py_ssize_t line_len; |
2218 | 0 | int kind; |
2219 | 0 | Py_ssize_t consumed = 0; |
2220 | | |
2221 | | /* First, get some data if necessary */ |
2222 | 0 | res = 1; |
2223 | 0 | while (!self->decoded_chars || |
2224 | 0 | !PyUnicode_GET_LENGTH(self->decoded_chars)) { |
2225 | 0 | res = textiowrapper_read_chunk(self, 0); |
2226 | 0 | if (res < 0) { |
2227 | | /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() |
2228 | | when EINTR occurs so we needn't do it ourselves. */ |
2229 | 0 | if (_PyIO_trap_eintr()) { |
2230 | 0 | continue; |
2231 | 0 | } |
2232 | 0 | goto error; |
2233 | 0 | } |
2234 | 0 | if (res == 0) |
2235 | 0 | break; |
2236 | 0 | } |
2237 | 0 | if (res == 0) { |
2238 | | /* end of file */ |
2239 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
2240 | 0 | Py_CLEAR(self->snapshot); |
2241 | 0 | start = endpos = offset_to_buffer = 0; |
2242 | 0 | break; |
2243 | 0 | } |
2244 | | |
2245 | 0 | if (remaining == NULL) { |
2246 | 0 | line = Py_NewRef(self->decoded_chars); |
2247 | 0 | start = self->decoded_chars_used; |
2248 | 0 | offset_to_buffer = 0; |
2249 | 0 | } |
2250 | 0 | else { |
2251 | 0 | assert(self->decoded_chars_used == 0); |
2252 | 0 | line = PyUnicode_Concat(remaining, self->decoded_chars); |
2253 | 0 | start = 0; |
2254 | 0 | offset_to_buffer = PyUnicode_GET_LENGTH(remaining); |
2255 | 0 | Py_CLEAR(remaining); |
2256 | 0 | if (line == NULL) |
2257 | 0 | goto error; |
2258 | 0 | } |
2259 | | |
2260 | 0 | ptr = PyUnicode_DATA(line); |
2261 | 0 | line_len = PyUnicode_GET_LENGTH(line); |
2262 | 0 | kind = PyUnicode_KIND(line); |
2263 | |
|
2264 | 0 | endpos = _PyIO_find_line_ending( |
2265 | 0 | self->readtranslate, self->readuniversal, self->readnl, |
2266 | 0 | kind, |
2267 | 0 | ptr + kind * start, |
2268 | 0 | ptr + kind * line_len, |
2269 | 0 | &consumed); |
2270 | 0 | if (endpos >= 0) { |
2271 | 0 | endpos += start; |
2272 | 0 | if (limit >= 0 && (endpos - start) + chunked >= limit) |
2273 | 0 | endpos = start + limit - chunked; |
2274 | 0 | break; |
2275 | 0 | } |
2276 | | |
2277 | | /* We can put aside up to `endpos` */ |
2278 | 0 | endpos = consumed + start; |
2279 | 0 | if (limit >= 0 && (endpos - start) + chunked >= limit) { |
2280 | | /* Didn't find line ending, but reached length limit */ |
2281 | 0 | endpos = start + limit - chunked; |
2282 | 0 | break; |
2283 | 0 | } |
2284 | | |
2285 | 0 | if (endpos > start) { |
2286 | | /* No line ending seen yet - put aside current data */ |
2287 | 0 | PyObject *s; |
2288 | 0 | if (chunks == NULL) { |
2289 | 0 | chunks = PyList_New(0); |
2290 | 0 | if (chunks == NULL) |
2291 | 0 | goto error; |
2292 | 0 | } |
2293 | 0 | s = PyUnicode_Substring(line, start, endpos); |
2294 | 0 | if (s == NULL) |
2295 | 0 | goto error; |
2296 | 0 | if (PyList_Append(chunks, s) < 0) { |
2297 | 0 | Py_DECREF(s); |
2298 | 0 | goto error; |
2299 | 0 | } |
2300 | 0 | chunked += PyUnicode_GET_LENGTH(s); |
2301 | 0 | Py_DECREF(s); |
2302 | 0 | } |
2303 | | /* There may be some remaining bytes we'll have to prepend to the |
2304 | | next chunk of data */ |
2305 | 0 | if (endpos < line_len) { |
2306 | 0 | remaining = PyUnicode_Substring(line, endpos, line_len); |
2307 | 0 | if (remaining == NULL) |
2308 | 0 | goto error; |
2309 | 0 | } |
2310 | 0 | Py_CLEAR(line); |
2311 | | /* We have consumed the buffer */ |
2312 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
2313 | 0 | } |
2314 | | |
2315 | 0 | if (line != NULL) { |
2316 | | /* Our line ends in the current buffer */ |
2317 | 0 | self->decoded_chars_used = endpos - offset_to_buffer; |
2318 | 0 | if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) { |
2319 | 0 | PyObject *s = PyUnicode_Substring(line, start, endpos); |
2320 | 0 | Py_CLEAR(line); |
2321 | 0 | if (s == NULL) |
2322 | 0 | goto error; |
2323 | 0 | line = s; |
2324 | 0 | } |
2325 | 0 | } |
2326 | 0 | if (remaining != NULL) { |
2327 | 0 | if (chunks == NULL) { |
2328 | 0 | chunks = PyList_New(0); |
2329 | 0 | if (chunks == NULL) |
2330 | 0 | goto error; |
2331 | 0 | } |
2332 | 0 | if (PyList_Append(chunks, remaining) < 0) |
2333 | 0 | goto error; |
2334 | 0 | Py_CLEAR(remaining); |
2335 | 0 | } |
2336 | 0 | if (chunks != NULL) { |
2337 | 0 | if (line != NULL) { |
2338 | 0 | if (PyList_Append(chunks, line) < 0) |
2339 | 0 | goto error; |
2340 | 0 | Py_DECREF(line); |
2341 | 0 | } |
2342 | 0 | line = PyUnicode_Join(&_Py_STR(empty), chunks); |
2343 | 0 | if (line == NULL) |
2344 | 0 | goto error; |
2345 | 0 | Py_CLEAR(chunks); |
2346 | 0 | } |
2347 | 0 | if (line == NULL) { |
2348 | 0 | line = &_Py_STR(empty); |
2349 | 0 | } |
2350 | |
|
2351 | 0 | return line; |
2352 | | |
2353 | 0 | error: |
2354 | 0 | Py_XDECREF(chunks); |
2355 | 0 | Py_XDECREF(remaining); |
2356 | 0 | Py_XDECREF(line); |
2357 | 0 | return NULL; |
2358 | 0 | } |
2359 | | |
2360 | | /*[clinic input] |
2361 | | @critical_section |
2362 | | _io.TextIOWrapper.readline |
2363 | | size: Py_ssize_t = -1 |
2364 | | / |
2365 | | [clinic start generated code]*/ |
2366 | | |
2367 | | static PyObject * |
2368 | | _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size) |
2369 | | /*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/ |
2370 | 0 | { |
2371 | 0 | CHECK_ATTACHED(self); |
2372 | 0 | return _textiowrapper_readline(self, size); |
2373 | 0 | } |
2374 | | |
2375 | | /* Seek and Tell */ |
2376 | | |
2377 | | typedef struct { |
2378 | | Py_off_t start_pos; |
2379 | | int dec_flags; |
2380 | | int bytes_to_feed; |
2381 | | int chars_to_skip; |
2382 | | char need_eof; |
2383 | | } cookie_type; |
2384 | | |
2385 | | /* |
2386 | | To speed up cookie packing/unpacking, we store the fields in a temporary |
2387 | | string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). |
2388 | | The following macros define at which offsets in the intermediary byte |
2389 | | string the various CookieStruct fields will be stored. |
2390 | | */ |
2391 | | |
2392 | | #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char)) |
2393 | | |
2394 | | #if PY_BIG_ENDIAN |
2395 | | /* We want the least significant byte of start_pos to also be the least |
2396 | | significant byte of the cookie, which means that in big-endian mode we |
2397 | | must copy the fields in reverse order. */ |
2398 | | |
2399 | | # define OFF_START_POS (sizeof(char) + 3 * sizeof(int)) |
2400 | | # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int)) |
2401 | | # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int)) |
2402 | | # define OFF_CHARS_TO_SKIP (sizeof(char)) |
2403 | | # define OFF_NEED_EOF 0 |
2404 | | |
2405 | | #else |
2406 | | /* Little-endian mode: the least significant byte of start_pos will |
2407 | | naturally end up the least significant byte of the cookie. */ |
2408 | | |
2409 | 0 | # define OFF_START_POS 0 |
2410 | 0 | # define OFF_DEC_FLAGS (sizeof(Py_off_t)) |
2411 | 0 | # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int)) |
2412 | 0 | # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int)) |
2413 | 0 | # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int)) |
2414 | | |
2415 | | #endif |
2416 | | |
2417 | | static int |
2418 | | textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) |
2419 | 0 | { |
2420 | 0 | unsigned char buffer[COOKIE_BUF_LEN]; |
2421 | 0 | PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj); |
2422 | 0 | if (cookieLong == NULL) |
2423 | 0 | return -1; |
2424 | | |
2425 | 0 | if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), |
2426 | 0 | PY_LITTLE_ENDIAN, 0, 1) < 0) { |
2427 | 0 | Py_DECREF(cookieLong); |
2428 | 0 | return -1; |
2429 | 0 | } |
2430 | 0 | Py_DECREF(cookieLong); |
2431 | |
|
2432 | 0 | memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos)); |
2433 | 0 | memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags)); |
2434 | 0 | memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed)); |
2435 | 0 | memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip)); |
2436 | 0 | memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof)); |
2437 | |
|
2438 | 0 | return 0; |
2439 | 0 | } |
2440 | | |
2441 | | static PyObject * |
2442 | | textiowrapper_build_cookie(cookie_type *cookie) |
2443 | 0 | { |
2444 | 0 | unsigned char buffer[COOKIE_BUF_LEN]; |
2445 | |
|
2446 | 0 | memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos)); |
2447 | 0 | memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags)); |
2448 | 0 | memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed)); |
2449 | 0 | memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip)); |
2450 | 0 | memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof)); |
2451 | |
|
2452 | 0 | return _PyLong_FromByteArray(buffer, sizeof(buffer), |
2453 | 0 | PY_LITTLE_ENDIAN, 0); |
2454 | 0 | } |
2455 | | |
2456 | | static int |
2457 | | _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) |
2458 | 0 | { |
2459 | 0 | PyObject *res; |
2460 | | /* When seeking to the start of the stream, we call decoder.reset() |
2461 | | rather than decoder.getstate(). |
2462 | | This is for a few decoders such as utf-16 for which the state value |
2463 | | at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of |
2464 | | utf-16, that we are expecting a BOM). |
2465 | | */ |
2466 | 0 | if (cookie->start_pos == 0 && cookie->dec_flags == 0) { |
2467 | 0 | res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); |
2468 | 0 | } |
2469 | 0 | else { |
2470 | 0 | res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate), |
2471 | 0 | "((yi))", "", cookie->dec_flags); |
2472 | 0 | } |
2473 | 0 | if (res == NULL) { |
2474 | 0 | return -1; |
2475 | 0 | } |
2476 | 0 | Py_DECREF(res); |
2477 | 0 | return 0; |
2478 | 0 | } |
2479 | | |
2480 | | static int |
2481 | | _textiowrapper_encoder_reset(textio *self, int start_of_stream) |
2482 | 0 | { |
2483 | 0 | PyObject *res; |
2484 | 0 | if (start_of_stream) { |
2485 | 0 | res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset)); |
2486 | 0 | self->encoding_start_of_stream = 1; |
2487 | 0 | } |
2488 | 0 | else { |
2489 | 0 | res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate), |
2490 | 0 | _PyLong_GetZero()); |
2491 | 0 | self->encoding_start_of_stream = 0; |
2492 | 0 | } |
2493 | 0 | if (res == NULL) |
2494 | 0 | return -1; |
2495 | 0 | Py_DECREF(res); |
2496 | 0 | return 0; |
2497 | 0 | } |
2498 | | |
2499 | | static int |
2500 | | _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) |
2501 | 0 | { |
2502 | | /* Same as _textiowrapper_decoder_setstate() above. */ |
2503 | 0 | return _textiowrapper_encoder_reset( |
2504 | 0 | self, cookie->start_pos == 0 && cookie->dec_flags == 0); |
2505 | 0 | } |
2506 | | |
2507 | | /*[clinic input] |
2508 | | @critical_section |
2509 | | _io.TextIOWrapper.seek |
2510 | | cookie as cookieObj: object |
2511 | | Zero or an opaque number returned by tell(). |
2512 | | whence: int(c_default='0') = os.SEEK_SET |
2513 | | The relative position to seek from. |
2514 | | / |
2515 | | |
2516 | | Set the stream position, and return the new stream position. |
2517 | | |
2518 | | Four operations are supported, given by the following argument |
2519 | | combinations: |
2520 | | |
2521 | | - seek(0, SEEK_SET): Rewind to the start of the stream. |
2522 | | - seek(cookie, SEEK_SET): Restore a previous position; |
2523 | | 'cookie' must be a number returned by tell(). |
2524 | | - seek(0, SEEK_END): Fast-forward to the end of the stream. |
2525 | | - seek(0, SEEK_CUR): Leave the current stream position unchanged. |
2526 | | |
2527 | | Any other argument combinations are invalid, |
2528 | | and may raise exceptions. |
2529 | | [clinic start generated code]*/ |
2530 | | |
2531 | | static PyObject * |
2532 | | _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) |
2533 | | /*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/ |
2534 | 0 | { |
2535 | 0 | PyObject *posobj; |
2536 | 0 | cookie_type cookie; |
2537 | 0 | PyObject *res; |
2538 | 0 | int cmp; |
2539 | 0 | PyObject *snapshot; |
2540 | |
|
2541 | 0 | CHECK_ATTACHED(self); |
2542 | 0 | CHECK_CLOSED(self); |
2543 | | |
2544 | 0 | Py_INCREF(cookieObj); |
2545 | |
|
2546 | 0 | if (!self->seekable) { |
2547 | 0 | _unsupported(self->state, "underlying stream is not seekable"); |
2548 | 0 | goto fail; |
2549 | 0 | } |
2550 | | |
2551 | 0 | PyObject *zero = _PyLong_GetZero(); // borrowed reference |
2552 | |
|
2553 | 0 | switch (whence) { |
2554 | 0 | case SEEK_CUR: |
2555 | | /* seek relative to current position */ |
2556 | 0 | cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ); |
2557 | 0 | if (cmp < 0) |
2558 | 0 | goto fail; |
2559 | | |
2560 | 0 | if (cmp == 0) { |
2561 | 0 | _unsupported(self->state, "can't do nonzero cur-relative seeks"); |
2562 | 0 | goto fail; |
2563 | 0 | } |
2564 | | |
2565 | | /* Seeking to the current position should attempt to |
2566 | | * sync the underlying buffer with the current position. |
2567 | | */ |
2568 | 0 | Py_DECREF(cookieObj); |
2569 | 0 | cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell)); |
2570 | 0 | if (cookieObj == NULL) |
2571 | 0 | goto fail; |
2572 | 0 | break; |
2573 | | |
2574 | 0 | case SEEK_END: |
2575 | | /* seek relative to end of file */ |
2576 | 0 | cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ); |
2577 | 0 | if (cmp < 0) |
2578 | 0 | goto fail; |
2579 | | |
2580 | 0 | if (cmp == 0) { |
2581 | 0 | _unsupported(self->state, "can't do nonzero end-relative seeks"); |
2582 | 0 | goto fail; |
2583 | 0 | } |
2584 | | |
2585 | 0 | if (_PyFile_Flush((PyObject *)self) < 0) { |
2586 | 0 | goto fail; |
2587 | 0 | } |
2588 | | |
2589 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
2590 | 0 | Py_CLEAR(self->snapshot); |
2591 | 0 | if (self->decoder) { |
2592 | 0 | res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset)); |
2593 | 0 | if (res == NULL) |
2594 | 0 | goto fail; |
2595 | 0 | Py_DECREF(res); |
2596 | 0 | } |
2597 | | |
2598 | 0 | res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2); |
2599 | 0 | Py_CLEAR(cookieObj); |
2600 | 0 | if (res == NULL) |
2601 | 0 | goto fail; |
2602 | 0 | if (self->encoder) { |
2603 | | /* If seek() == 0, we are at the start of stream, otherwise not */ |
2604 | 0 | cmp = PyObject_RichCompareBool(res, zero, Py_EQ); |
2605 | 0 | if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) { |
2606 | 0 | Py_DECREF(res); |
2607 | 0 | goto fail; |
2608 | 0 | } |
2609 | 0 | } |
2610 | 0 | return res; |
2611 | | |
2612 | 0 | case SEEK_SET: |
2613 | 0 | break; |
2614 | | |
2615 | 0 | default: |
2616 | 0 | PyErr_Format(PyExc_ValueError, |
2617 | 0 | "invalid whence (%d, should be %d, %d or %d)", whence, |
2618 | 0 | SEEK_SET, SEEK_CUR, SEEK_END); |
2619 | 0 | goto fail; |
2620 | 0 | } |
2621 | | |
2622 | 0 | cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT); |
2623 | 0 | if (cmp < 0) |
2624 | 0 | goto fail; |
2625 | | |
2626 | 0 | if (cmp == 1) { |
2627 | 0 | PyErr_Format(PyExc_ValueError, |
2628 | 0 | "negative seek position %R", cookieObj); |
2629 | 0 | goto fail; |
2630 | 0 | } |
2631 | | |
2632 | 0 | if (_PyFile_Flush((PyObject *)self) < 0) { |
2633 | 0 | goto fail; |
2634 | 0 | } |
2635 | | |
2636 | | /* The strategy of seek() is to go back to the safe start point |
2637 | | * and replay the effect of read(chars_to_skip) from there. |
2638 | | */ |
2639 | 0 | if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0) |
2640 | 0 | goto fail; |
2641 | | |
2642 | | /* Seek back to the safe start point. */ |
2643 | 0 | posobj = PyLong_FromOff_t(cookie.start_pos); |
2644 | 0 | if (posobj == NULL) |
2645 | 0 | goto fail; |
2646 | 0 | res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj); |
2647 | 0 | Py_DECREF(posobj); |
2648 | 0 | if (res == NULL) |
2649 | 0 | goto fail; |
2650 | 0 | Py_DECREF(res); |
2651 | |
|
2652 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
2653 | 0 | Py_CLEAR(self->snapshot); |
2654 | | |
2655 | | /* Restore the decoder to its state from the safe start point. */ |
2656 | 0 | if (self->decoder) { |
2657 | 0 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2658 | 0 | goto fail; |
2659 | 0 | } |
2660 | | |
2661 | 0 | if (cookie.chars_to_skip) { |
2662 | | /* Just like _read_chunk, feed the decoder and save a snapshot. */ |
2663 | 0 | PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read), |
2664 | 0 | "i", cookie.bytes_to_feed); |
2665 | 0 | PyObject *decoded; |
2666 | |
|
2667 | 0 | if (input_chunk == NULL) |
2668 | 0 | goto fail; |
2669 | | |
2670 | 0 | if (!PyBytes_Check(input_chunk)) { |
2671 | 0 | PyErr_Format(PyExc_TypeError, |
2672 | 0 | "underlying read() should have returned a bytes " |
2673 | 0 | "object, not '%.200s'", |
2674 | 0 | Py_TYPE(input_chunk)->tp_name); |
2675 | 0 | Py_DECREF(input_chunk); |
2676 | 0 | goto fail; |
2677 | 0 | } |
2678 | | |
2679 | 0 | snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk); |
2680 | 0 | if (snapshot == NULL) { |
2681 | 0 | goto fail; |
2682 | 0 | } |
2683 | 0 | Py_XSETREF(self->snapshot, snapshot); |
2684 | |
|
2685 | 0 | decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode), |
2686 | 0 | input_chunk, cookie.need_eof ? Py_True : Py_False, NULL); |
2687 | |
|
2688 | 0 | if (check_decoded(decoded) < 0) |
2689 | 0 | goto fail; |
2690 | | |
2691 | 0 | textiowrapper_set_decoded_chars(self, decoded); |
2692 | | |
2693 | | /* Skip chars_to_skip of the decoded characters. */ |
2694 | 0 | if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) { |
2695 | 0 | PyErr_SetString(PyExc_OSError, "can't restore logical file position"); |
2696 | 0 | goto fail; |
2697 | 0 | } |
2698 | 0 | self->decoded_chars_used = cookie.chars_to_skip; |
2699 | 0 | } |
2700 | 0 | else { |
2701 | 0 | snapshot = Py_BuildValue("iy", cookie.dec_flags, ""); |
2702 | 0 | if (snapshot == NULL) |
2703 | 0 | goto fail; |
2704 | 0 | Py_XSETREF(self->snapshot, snapshot); |
2705 | 0 | } |
2706 | | |
2707 | | /* Finally, reset the encoder (merely useful for proper BOM handling) */ |
2708 | 0 | if (self->encoder) { |
2709 | 0 | if (_textiowrapper_encoder_setstate(self, &cookie) < 0) |
2710 | 0 | goto fail; |
2711 | 0 | } |
2712 | 0 | return cookieObj; |
2713 | 0 | fail: |
2714 | 0 | Py_XDECREF(cookieObj); |
2715 | 0 | return NULL; |
2716 | |
|
2717 | 0 | } |
2718 | | |
2719 | | /*[clinic input] |
2720 | | @critical_section |
2721 | | _io.TextIOWrapper.tell |
2722 | | |
2723 | | Return the stream position as an opaque number. |
2724 | | |
2725 | | The return value of tell() can be given as input to seek(), to restore a |
2726 | | previous stream position. |
2727 | | [clinic start generated code]*/ |
2728 | | |
2729 | | static PyObject * |
2730 | | _io_TextIOWrapper_tell_impl(textio *self) |
2731 | | /*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/ |
2732 | 0 | { |
2733 | 0 | PyObject *res; |
2734 | 0 | PyObject *posobj = NULL; |
2735 | 0 | cookie_type cookie = {0,0,0,0,0}; |
2736 | 0 | PyObject *next_input; |
2737 | 0 | Py_ssize_t chars_to_skip, chars_decoded; |
2738 | 0 | Py_ssize_t skip_bytes, skip_back; |
2739 | 0 | PyObject *saved_state = NULL; |
2740 | 0 | const char *input, *input_end; |
2741 | 0 | Py_ssize_t dec_buffer_len; |
2742 | 0 | int dec_flags; |
2743 | |
|
2744 | 0 | CHECK_ATTACHED(self); |
2745 | 0 | CHECK_CLOSED(self); |
2746 | | |
2747 | 0 | if (!self->seekable) { |
2748 | 0 | _unsupported(self->state, "underlying stream is not seekable"); |
2749 | 0 | goto fail; |
2750 | 0 | } |
2751 | 0 | if (!self->telling) { |
2752 | 0 | PyErr_SetString(PyExc_OSError, |
2753 | 0 | "telling position disabled by next() call"); |
2754 | 0 | goto fail; |
2755 | 0 | } |
2756 | | |
2757 | 0 | if (_textiowrapper_writeflush(self) < 0) |
2758 | 0 | return NULL; |
2759 | 0 | if (_PyFile_Flush((PyObject *)self) < 0) { |
2760 | 0 | goto fail; |
2761 | 0 | } |
2762 | | |
2763 | 0 | posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell)); |
2764 | 0 | if (posobj == NULL) |
2765 | 0 | goto fail; |
2766 | | |
2767 | 0 | if (self->decoder == NULL || self->snapshot == NULL) { |
2768 | 0 | assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0); |
2769 | 0 | return posobj; |
2770 | 0 | } |
2771 | | |
2772 | | #if defined(HAVE_LARGEFILE_SUPPORT) |
2773 | | cookie.start_pos = PyLong_AsLongLong(posobj); |
2774 | | #else |
2775 | 0 | cookie.start_pos = PyLong_AsLong(posobj); |
2776 | 0 | #endif |
2777 | 0 | Py_DECREF(posobj); |
2778 | 0 | if (PyErr_Occurred()) |
2779 | 0 | goto fail; |
2780 | | |
2781 | | /* Skip backward to the snapshot point (see _read_chunk). */ |
2782 | 0 | assert(PyTuple_Check(self->snapshot)); |
2783 | 0 | if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input)) |
2784 | 0 | goto fail; |
2785 | | |
2786 | 0 | assert (PyBytes_Check(next_input)); |
2787 | |
|
2788 | 0 | cookie.start_pos -= PyBytes_GET_SIZE(next_input); |
2789 | | |
2790 | | /* How many decoded characters have been used up since the snapshot? */ |
2791 | 0 | if (self->decoded_chars_used == 0) { |
2792 | | /* We haven't moved from the snapshot point. */ |
2793 | 0 | return textiowrapper_build_cookie(&cookie); |
2794 | 0 | } |
2795 | | |
2796 | 0 | chars_to_skip = self->decoded_chars_used; |
2797 | | |
2798 | | /* Decoder state will be restored at the end */ |
2799 | 0 | saved_state = PyObject_CallMethodNoArgs(self->decoder, |
2800 | 0 | &_Py_ID(getstate)); |
2801 | 0 | if (saved_state == NULL) |
2802 | 0 | goto fail; |
2803 | | |
2804 | 0 | #define DECODER_GETSTATE() do { \ |
2805 | 0 | PyObject *dec_buffer; \ |
2806 | 0 | PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \ |
2807 | 0 | &_Py_ID(getstate)); \ |
2808 | 0 | if (_state == NULL) \ |
2809 | 0 | goto fail; \ |
2810 | 0 | if (!PyTuple_Check(_state)) { \ |
2811 | 0 | PyErr_SetString(PyExc_TypeError, \ |
2812 | 0 | "illegal decoder state"); \ |
2813 | 0 | Py_DECREF(_state); \ |
2814 | 0 | goto fail; \ |
2815 | 0 | } \ |
2816 | 0 | if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \ |
2817 | 0 | &dec_buffer, &dec_flags)) \ |
2818 | 0 | { \ |
2819 | 0 | Py_DECREF(_state); \ |
2820 | 0 | goto fail; \ |
2821 | 0 | } \ |
2822 | 0 | if (!PyBytes_Check(dec_buffer)) { \ |
2823 | 0 | PyErr_Format(PyExc_TypeError, \ |
2824 | 0 | "illegal decoder state: the first item should be a " \ |
2825 | 0 | "bytes object, not '%.200s'", \ |
2826 | 0 | Py_TYPE(dec_buffer)->tp_name); \ |
2827 | 0 | Py_DECREF(_state); \ |
2828 | 0 | goto fail; \ |
2829 | 0 | } \ |
2830 | 0 | dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \ |
2831 | 0 | Py_DECREF(_state); \ |
2832 | 0 | } while (0) |
2833 | | |
2834 | 0 | #define DECODER_DECODE(start, len, res) do { \ |
2835 | 0 | PyObject *_decoded = _PyObject_CallMethod( \ |
2836 | 0 | self->decoder, &_Py_ID(decode), "y#", start, len); \ |
2837 | 0 | if (check_decoded(_decoded) < 0) \ |
2838 | 0 | goto fail; \ |
2839 | 0 | res = PyUnicode_GET_LENGTH(_decoded); \ |
2840 | 0 | Py_DECREF(_decoded); \ |
2841 | 0 | } while (0) |
2842 | | |
2843 | | /* Fast search for an acceptable start point, close to our |
2844 | | current pos */ |
2845 | 0 | skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); |
2846 | 0 | skip_back = 1; |
2847 | 0 | assert(skip_back <= PyBytes_GET_SIZE(next_input)); |
2848 | 0 | input = PyBytes_AS_STRING(next_input); |
2849 | 0 | while (skip_bytes > 0) { |
2850 | | /* Decode up to temptative start point */ |
2851 | 0 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2852 | 0 | goto fail; |
2853 | 0 | DECODER_DECODE(input, skip_bytes, chars_decoded); |
2854 | 0 | if (chars_decoded <= chars_to_skip) { |
2855 | 0 | DECODER_GETSTATE(); |
2856 | 0 | if (dec_buffer_len == 0) { |
2857 | | /* Before pos and no bytes buffered in decoder => OK */ |
2858 | 0 | cookie.dec_flags = dec_flags; |
2859 | 0 | chars_to_skip -= chars_decoded; |
2860 | 0 | break; |
2861 | 0 | } |
2862 | | /* Skip back by buffered amount and reset heuristic */ |
2863 | 0 | skip_bytes -= dec_buffer_len; |
2864 | 0 | skip_back = 1; |
2865 | 0 | } |
2866 | 0 | else { |
2867 | | /* We're too far ahead, skip back a bit */ |
2868 | 0 | skip_bytes -= skip_back; |
2869 | 0 | skip_back *= 2; |
2870 | 0 | } |
2871 | 0 | } |
2872 | 0 | if (skip_bytes <= 0) { |
2873 | 0 | skip_bytes = 0; |
2874 | 0 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2875 | 0 | goto fail; |
2876 | 0 | } |
2877 | | |
2878 | | /* Note our initial start point. */ |
2879 | 0 | cookie.start_pos += skip_bytes; |
2880 | 0 | cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); |
2881 | 0 | if (chars_to_skip == 0) |
2882 | 0 | goto finally; |
2883 | | |
2884 | | /* We should be close to the desired position. Now feed the decoder one |
2885 | | * byte at a time until we reach the `chars_to_skip` target. |
2886 | | * As we go, note the nearest "safe start point" before the current |
2887 | | * location (a point where the decoder has nothing buffered, so seek() |
2888 | | * can safely start from there and advance to this location). |
2889 | | */ |
2890 | 0 | chars_decoded = 0; |
2891 | 0 | input = PyBytes_AS_STRING(next_input); |
2892 | 0 | input_end = input + PyBytes_GET_SIZE(next_input); |
2893 | 0 | input += skip_bytes; |
2894 | 0 | while (input < input_end) { |
2895 | 0 | Py_ssize_t n; |
2896 | |
|
2897 | 0 | DECODER_DECODE(input, (Py_ssize_t)1, n); |
2898 | | /* We got n chars for 1 byte */ |
2899 | 0 | chars_decoded += n; |
2900 | 0 | cookie.bytes_to_feed += 1; |
2901 | 0 | DECODER_GETSTATE(); |
2902 | | |
2903 | 0 | if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { |
2904 | | /* Decoder buffer is empty, so this is a safe start point. */ |
2905 | 0 | cookie.start_pos += cookie.bytes_to_feed; |
2906 | 0 | chars_to_skip -= chars_decoded; |
2907 | 0 | cookie.dec_flags = dec_flags; |
2908 | 0 | cookie.bytes_to_feed = 0; |
2909 | 0 | chars_decoded = 0; |
2910 | 0 | } |
2911 | 0 | if (chars_decoded >= chars_to_skip) |
2912 | 0 | break; |
2913 | 0 | input++; |
2914 | 0 | } |
2915 | 0 | if (input == input_end) { |
2916 | | /* We didn't get enough decoded data; signal EOF to get more. */ |
2917 | 0 | PyObject *decoded = _PyObject_CallMethod( |
2918 | 0 | self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True); |
2919 | 0 | if (check_decoded(decoded) < 0) |
2920 | 0 | goto fail; |
2921 | 0 | chars_decoded += PyUnicode_GET_LENGTH(decoded); |
2922 | 0 | Py_DECREF(decoded); |
2923 | 0 | cookie.need_eof = 1; |
2924 | |
|
2925 | 0 | if (chars_decoded < chars_to_skip) { |
2926 | 0 | PyErr_SetString(PyExc_OSError, |
2927 | 0 | "can't reconstruct logical file position"); |
2928 | 0 | goto fail; |
2929 | 0 | } |
2930 | 0 | } |
2931 | | |
2932 | 0 | finally: |
2933 | 0 | res = PyObject_CallMethodOneArg( |
2934 | 0 | self->decoder, &_Py_ID(setstate), saved_state); |
2935 | 0 | Py_DECREF(saved_state); |
2936 | 0 | if (res == NULL) |
2937 | 0 | return NULL; |
2938 | 0 | Py_DECREF(res); |
2939 | | |
2940 | | /* The returned cookie corresponds to the last safe start point. */ |
2941 | 0 | cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); |
2942 | 0 | return textiowrapper_build_cookie(&cookie); |
2943 | | |
2944 | 0 | fail: |
2945 | 0 | if (saved_state) { |
2946 | 0 | PyObject *exc = PyErr_GetRaisedException(); |
2947 | 0 | res = PyObject_CallMethodOneArg( |
2948 | 0 | self->decoder, &_Py_ID(setstate), saved_state); |
2949 | 0 | _PyErr_ChainExceptions1(exc); |
2950 | 0 | Py_DECREF(saved_state); |
2951 | 0 | Py_XDECREF(res); |
2952 | 0 | } |
2953 | 0 | return NULL; |
2954 | 0 | } |
2955 | | |
2956 | | /*[clinic input] |
2957 | | @critical_section |
2958 | | _io.TextIOWrapper.truncate |
2959 | | pos: object = None |
2960 | | / |
2961 | | [clinic start generated code]*/ |
2962 | | |
2963 | | static PyObject * |
2964 | | _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) |
2965 | | /*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/ |
2966 | 0 | { |
2967 | 0 | CHECK_ATTACHED(self) |
2968 | | |
2969 | 0 | if (_PyFile_Flush((PyObject *)self) < 0) { |
2970 | 0 | return NULL; |
2971 | 0 | } |
2972 | | |
2973 | 0 | return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos); |
2974 | 0 | } |
2975 | | |
2976 | | static PyObject * |
2977 | | textiowrapper_repr(PyObject *op) |
2978 | 0 | { |
2979 | 0 | PyObject *nameobj, *modeobj, *res, *s; |
2980 | 0 | int status; |
2981 | 0 | textio *self = textio_CAST(op); |
2982 | 0 | const char *type_name = Py_TYPE(self)->tp_name; |
2983 | |
|
2984 | 0 | CHECK_INITIALIZED(self); |
2985 | |
|
2986 | 0 | res = PyUnicode_FromFormat("<%.100s", type_name); |
2987 | 0 | if (res == NULL) |
2988 | 0 | return NULL; |
2989 | | |
2990 | 0 | status = Py_ReprEnter(op); |
2991 | 0 | if (status != 0) { |
2992 | 0 | if (status > 0) { |
2993 | 0 | PyErr_Format(PyExc_RuntimeError, |
2994 | 0 | "reentrant call inside %.100s.__repr__", |
2995 | 0 | type_name); |
2996 | 0 | } |
2997 | 0 | goto error; |
2998 | 0 | } |
2999 | 0 | if (PyObject_GetOptionalAttr(op, &_Py_ID(name), &nameobj) < 0) { |
3000 | 0 | if (!PyErr_ExceptionMatches(PyExc_ValueError)) { |
3001 | 0 | goto error; |
3002 | 0 | } |
3003 | | /* Ignore ValueError raised if the underlying stream was detached */ |
3004 | 0 | PyErr_Clear(); |
3005 | 0 | } |
3006 | 0 | if (nameobj != NULL) { |
3007 | 0 | s = PyUnicode_FromFormat(" name=%R", nameobj); |
3008 | 0 | Py_DECREF(nameobj); |
3009 | 0 | if (s == NULL) |
3010 | 0 | goto error; |
3011 | 0 | PyUnicode_AppendAndDel(&res, s); |
3012 | 0 | if (res == NULL) |
3013 | 0 | goto error; |
3014 | 0 | } |
3015 | 0 | if (PyObject_GetOptionalAttr(op, &_Py_ID(mode), &modeobj) < 0) { |
3016 | 0 | goto error; |
3017 | 0 | } |
3018 | 0 | if (modeobj != NULL) { |
3019 | 0 | s = PyUnicode_FromFormat(" mode=%R", modeobj); |
3020 | 0 | Py_DECREF(modeobj); |
3021 | 0 | if (s == NULL) |
3022 | 0 | goto error; |
3023 | 0 | PyUnicode_AppendAndDel(&res, s); |
3024 | 0 | if (res == NULL) |
3025 | 0 | goto error; |
3026 | 0 | } |
3027 | 0 | s = PyUnicode_FromFormat("%U encoding=%R>", |
3028 | 0 | res, self->encoding); |
3029 | 0 | Py_DECREF(res); |
3030 | 0 | if (status == 0) { |
3031 | 0 | Py_ReprLeave(op); |
3032 | 0 | } |
3033 | 0 | return s; |
3034 | | |
3035 | 0 | error: |
3036 | 0 | Py_XDECREF(res); |
3037 | 0 | if (status == 0) { |
3038 | 0 | Py_ReprLeave(op); |
3039 | 0 | } |
3040 | 0 | return NULL; |
3041 | 0 | } |
3042 | | |
3043 | | |
3044 | | /* Inquiries */ |
3045 | | |
3046 | | /*[clinic input] |
3047 | | @critical_section |
3048 | | _io.TextIOWrapper.fileno |
3049 | | [clinic start generated code]*/ |
3050 | | |
3051 | | static PyObject * |
3052 | | _io_TextIOWrapper_fileno_impl(textio *self) |
3053 | | /*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/ |
3054 | 0 | { |
3055 | 0 | CHECK_ATTACHED(self); |
3056 | 0 | return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno)); |
3057 | 0 | } |
3058 | | |
3059 | | /*[clinic input] |
3060 | | @critical_section |
3061 | | _io.TextIOWrapper.seekable |
3062 | | [clinic start generated code]*/ |
3063 | | |
3064 | | static PyObject * |
3065 | | _io_TextIOWrapper_seekable_impl(textio *self) |
3066 | | /*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/ |
3067 | 0 | { |
3068 | 0 | CHECK_ATTACHED(self); |
3069 | 0 | return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable)); |
3070 | 0 | } |
3071 | | |
3072 | | /*[clinic input] |
3073 | | @critical_section |
3074 | | _io.TextIOWrapper.readable |
3075 | | [clinic start generated code]*/ |
3076 | | |
3077 | | static PyObject * |
3078 | | _io_TextIOWrapper_readable_impl(textio *self) |
3079 | | /*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/ |
3080 | 0 | { |
3081 | 0 | CHECK_ATTACHED(self); |
3082 | 0 | return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable)); |
3083 | 0 | } |
3084 | | |
3085 | | /*[clinic input] |
3086 | | @critical_section |
3087 | | _io.TextIOWrapper.writable |
3088 | | [clinic start generated code]*/ |
3089 | | |
3090 | | static PyObject * |
3091 | | _io_TextIOWrapper_writable_impl(textio *self) |
3092 | | /*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/ |
3093 | 0 | { |
3094 | 0 | CHECK_ATTACHED(self); |
3095 | 0 | return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable)); |
3096 | 0 | } |
3097 | | |
3098 | | /*[clinic input] |
3099 | | @critical_section |
3100 | | _io.TextIOWrapper.isatty |
3101 | | [clinic start generated code]*/ |
3102 | | |
3103 | | static PyObject * |
3104 | | _io_TextIOWrapper_isatty_impl(textio *self) |
3105 | | /*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/ |
3106 | 0 | { |
3107 | 0 | CHECK_ATTACHED(self); |
3108 | 0 | return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty)); |
3109 | 0 | } |
3110 | | |
3111 | | /*[clinic input] |
3112 | | @critical_section |
3113 | | _io.TextIOWrapper.flush |
3114 | | [clinic start generated code]*/ |
3115 | | |
3116 | | static PyObject * |
3117 | | _io_TextIOWrapper_flush_impl(textio *self) |
3118 | | /*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/ |
3119 | 0 | { |
3120 | 0 | CHECK_ATTACHED(self); |
3121 | 0 | CHECK_CLOSED(self); |
3122 | 0 | self->telling = self->seekable; |
3123 | 0 | if (_textiowrapper_writeflush(self) < 0) |
3124 | 0 | return NULL; |
3125 | 0 | return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush)); |
3126 | 0 | } |
3127 | | |
3128 | | /*[clinic input] |
3129 | | @critical_section |
3130 | | _io.TextIOWrapper.close |
3131 | | [clinic start generated code]*/ |
3132 | | |
3133 | | static PyObject * |
3134 | | _io_TextIOWrapper_close_impl(textio *self) |
3135 | | /*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/ |
3136 | 0 | { |
3137 | 0 | PyObject *res; |
3138 | 0 | int r; |
3139 | 0 | CHECK_ATTACHED(self); |
3140 | |
|
3141 | 0 | res = _io_TextIOWrapper_closed_get_impl(self); |
3142 | 0 | if (res == NULL) |
3143 | 0 | return NULL; |
3144 | 0 | r = PyObject_IsTrue(res); |
3145 | 0 | Py_DECREF(res); |
3146 | 0 | if (r < 0) |
3147 | 0 | return NULL; |
3148 | | |
3149 | 0 | if (r > 0) { |
3150 | 0 | Py_RETURN_NONE; /* stream already closed */ |
3151 | 0 | } |
3152 | 0 | else { |
3153 | 0 | PyObject *exc = NULL; |
3154 | 0 | if (self->finalizing) { |
3155 | 0 | res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn), |
3156 | 0 | (PyObject *)self); |
3157 | 0 | if (res) { |
3158 | 0 | Py_DECREF(res); |
3159 | 0 | } |
3160 | 0 | else { |
3161 | 0 | PyErr_Clear(); |
3162 | 0 | } |
3163 | 0 | } |
3164 | 0 | if (_PyFile_Flush((PyObject *)self) < 0) { |
3165 | 0 | exc = PyErr_GetRaisedException(); |
3166 | 0 | } |
3167 | |
|
3168 | 0 | res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close)); |
3169 | 0 | if (exc != NULL) { |
3170 | 0 | _PyErr_ChainExceptions1(exc); |
3171 | 0 | Py_CLEAR(res); |
3172 | 0 | } |
3173 | 0 | return res; |
3174 | 0 | } |
3175 | 0 | } |
3176 | | |
3177 | | static PyObject * |
3178 | | textiowrapper_iternext_lock_held(PyObject *op) |
3179 | 0 | { |
3180 | 0 | _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op); |
3181 | 0 | PyObject *line; |
3182 | 0 | textio *self = textio_CAST(op); |
3183 | |
|
3184 | 0 | CHECK_ATTACHED(self); |
3185 | |
|
3186 | 0 | self->telling = 0; |
3187 | 0 | if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { |
3188 | | /* Skip method call overhead for speed */ |
3189 | 0 | line = _textiowrapper_readline(self, -1); |
3190 | 0 | } |
3191 | 0 | else { |
3192 | 0 | line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline)); |
3193 | 0 | if (line && !PyUnicode_Check(line)) { |
3194 | 0 | PyErr_Format(PyExc_OSError, |
3195 | 0 | "readline() should have returned a str object, " |
3196 | 0 | "not '%.200s'", Py_TYPE(line)->tp_name); |
3197 | 0 | Py_DECREF(line); |
3198 | 0 | return NULL; |
3199 | 0 | } |
3200 | 0 | } |
3201 | | |
3202 | 0 | if (line == NULL) |
3203 | 0 | return NULL; |
3204 | | |
3205 | 0 | if (PyUnicode_GET_LENGTH(line) == 0) { |
3206 | | /* Reached EOF or would have blocked */ |
3207 | 0 | Py_DECREF(line); |
3208 | 0 | Py_CLEAR(self->snapshot); |
3209 | 0 | self->telling = self->seekable; |
3210 | 0 | return NULL; |
3211 | 0 | } |
3212 | | |
3213 | 0 | return line; |
3214 | 0 | } |
3215 | | |
3216 | | static PyObject * |
3217 | | textiowrapper_iternext(PyObject *op) |
3218 | 0 | { |
3219 | 0 | PyObject *result; |
3220 | 0 | Py_BEGIN_CRITICAL_SECTION(op); |
3221 | 0 | result = textiowrapper_iternext_lock_held(op); |
3222 | 0 | Py_END_CRITICAL_SECTION(); |
3223 | 0 | return result; |
3224 | 0 | } |
3225 | | |
3226 | | /*[clinic input] |
3227 | | @critical_section |
3228 | | @getter |
3229 | | _io.TextIOWrapper.name |
3230 | | [clinic start generated code]*/ |
3231 | | |
3232 | | static PyObject * |
3233 | | _io_TextIOWrapper_name_get_impl(textio *self) |
3234 | | /*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/ |
3235 | 0 | { |
3236 | 0 | CHECK_ATTACHED(self); |
3237 | 0 | return PyObject_GetAttr(self->buffer, &_Py_ID(name)); |
3238 | 0 | } |
3239 | | |
3240 | | /*[clinic input] |
3241 | | @critical_section |
3242 | | @getter |
3243 | | _io.TextIOWrapper.closed |
3244 | | [clinic start generated code]*/ |
3245 | | |
3246 | | static PyObject * |
3247 | | _io_TextIOWrapper_closed_get_impl(textio *self) |
3248 | | /*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/ |
3249 | 0 | { |
3250 | 0 | CHECK_ATTACHED(self); |
3251 | 0 | return PyObject_GetAttr(self->buffer, &_Py_ID(closed)); |
3252 | 0 | } |
3253 | | |
3254 | | /*[clinic input] |
3255 | | @critical_section |
3256 | | @getter |
3257 | | _io.TextIOWrapper.newlines |
3258 | | [clinic start generated code]*/ |
3259 | | |
3260 | | static PyObject * |
3261 | | _io_TextIOWrapper_newlines_get_impl(textio *self) |
3262 | | /*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/ |
3263 | 0 | { |
3264 | 0 | PyObject *res; |
3265 | 0 | CHECK_ATTACHED(self); |
3266 | 0 | if (self->decoder == NULL || |
3267 | 0 | PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0) |
3268 | 0 | { |
3269 | 0 | Py_RETURN_NONE; |
3270 | 0 | } |
3271 | 0 | return res; |
3272 | 0 | } |
3273 | | |
3274 | | /*[clinic input] |
3275 | | @critical_section |
3276 | | @getter |
3277 | | _io.TextIOWrapper.errors |
3278 | | [clinic start generated code]*/ |
3279 | | |
3280 | | static PyObject * |
3281 | | _io_TextIOWrapper_errors_get_impl(textio *self) |
3282 | | /*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/ |
3283 | 0 | { |
3284 | 0 | CHECK_INITIALIZED(self); |
3285 | 0 | return Py_NewRef(self->errors); |
3286 | 0 | } |
3287 | | |
3288 | | /*[clinic input] |
3289 | | @critical_section |
3290 | | @getter |
3291 | | _io.TextIOWrapper._CHUNK_SIZE |
3292 | | [clinic start generated code]*/ |
3293 | | |
3294 | | static PyObject * |
3295 | | _io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self) |
3296 | | /*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/ |
3297 | 0 | { |
3298 | 0 | CHECK_ATTACHED(self); |
3299 | 0 | return PyLong_FromSsize_t(self->chunk_size); |
3300 | 0 | } |
3301 | | |
3302 | | /*[clinic input] |
3303 | | @critical_section |
3304 | | @setter |
3305 | | _io.TextIOWrapper._CHUNK_SIZE |
3306 | | [clinic start generated code]*/ |
3307 | | |
3308 | | static int |
3309 | | _io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value) |
3310 | | /*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/ |
3311 | 0 | { |
3312 | 0 | Py_ssize_t n; |
3313 | 0 | CHECK_ATTACHED_INT(self); |
3314 | 0 | if (value == NULL) { |
3315 | 0 | PyErr_SetString(PyExc_AttributeError, "cannot delete attribute"); |
3316 | 0 | return -1; |
3317 | 0 | } |
3318 | 0 | n = PyNumber_AsSsize_t(value, PyExc_ValueError); |
3319 | 0 | if (n == -1 && PyErr_Occurred()) |
3320 | 0 | return -1; |
3321 | 0 | if (n <= 0) { |
3322 | 0 | PyErr_SetString(PyExc_ValueError, |
3323 | 0 | "a strictly positive integer is required"); |
3324 | 0 | return -1; |
3325 | 0 | } |
3326 | 0 | self->chunk_size = n; |
3327 | 0 | return 0; |
3328 | 0 | } |
3329 | | |
3330 | | static PyMethodDef incrementalnewlinedecoder_methods[] = { |
3331 | | _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF |
3332 | | _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF |
3333 | | _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF |
3334 | | _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF |
3335 | | {NULL} |
3336 | | }; |
3337 | | |
3338 | | static PyGetSetDef incrementalnewlinedecoder_getset[] = { |
3339 | | {"newlines", incrementalnewlinedecoder_newlines_get, NULL, NULL}, |
3340 | | {NULL} |
3341 | | }; |
3342 | | |
3343 | | static PyType_Slot nldecoder_slots[] = { |
3344 | | {Py_tp_dealloc, incrementalnewlinedecoder_dealloc}, |
3345 | | {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__}, |
3346 | | {Py_tp_methods, incrementalnewlinedecoder_methods}, |
3347 | | {Py_tp_getset, incrementalnewlinedecoder_getset}, |
3348 | | {Py_tp_traverse, incrementalnewlinedecoder_traverse}, |
3349 | | {Py_tp_clear, incrementalnewlinedecoder_clear}, |
3350 | | {Py_tp_init, _io_IncrementalNewlineDecoder___init__}, |
3351 | | {0, NULL}, |
3352 | | }; |
3353 | | |
3354 | | PyType_Spec nldecoder_spec = { |
3355 | | .name = "_io.IncrementalNewlineDecoder", |
3356 | | .basicsize = sizeof(nldecoder_object), |
3357 | | .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | |
3358 | | Py_TPFLAGS_IMMUTABLETYPE), |
3359 | | .slots = nldecoder_slots, |
3360 | | }; |
3361 | | |
3362 | | |
3363 | | static PyMethodDef textiowrapper_methods[] = { |
3364 | | _IO_TEXTIOWRAPPER_DETACH_METHODDEF |
3365 | | _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF |
3366 | | _IO_TEXTIOWRAPPER_WRITE_METHODDEF |
3367 | | _IO_TEXTIOWRAPPER_READ_METHODDEF |
3368 | | _IO_TEXTIOWRAPPER_READLINE_METHODDEF |
3369 | | _IO_TEXTIOWRAPPER_FLUSH_METHODDEF |
3370 | | _IO_TEXTIOWRAPPER_CLOSE_METHODDEF |
3371 | | |
3372 | | _IO_TEXTIOWRAPPER_FILENO_METHODDEF |
3373 | | _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF |
3374 | | _IO_TEXTIOWRAPPER_READABLE_METHODDEF |
3375 | | _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF |
3376 | | _IO_TEXTIOWRAPPER_ISATTY_METHODDEF |
3377 | | |
3378 | | _IO_TEXTIOWRAPPER_SEEK_METHODDEF |
3379 | | _IO_TEXTIOWRAPPER_TELL_METHODDEF |
3380 | | _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF |
3381 | | |
3382 | | {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS}, |
3383 | | {NULL, NULL} |
3384 | | }; |
3385 | | |
3386 | | static PyMemberDef textiowrapper_members[] = { |
3387 | | {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY}, |
3388 | | {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY}, |
3389 | | {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY}, |
3390 | | {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY}, |
3391 | | {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0}, |
3392 | | {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY}, |
3393 | | {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY}, |
3394 | | {NULL} |
3395 | | }; |
3396 | | |
3397 | | static PyGetSetDef textiowrapper_getset[] = { |
3398 | | _IO_TEXTIOWRAPPER_NAME_GETSETDEF |
3399 | | _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF |
3400 | | _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF |
3401 | | _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF |
3402 | | _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF |
3403 | | {NULL} |
3404 | | }; |
3405 | | |
3406 | | PyType_Slot textiowrapper_slots[] = { |
3407 | | {Py_tp_dealloc, textiowrapper_dealloc}, |
3408 | | {Py_tp_repr, textiowrapper_repr}, |
3409 | | {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__}, |
3410 | | {Py_tp_traverse, textiowrapper_traverse}, |
3411 | | {Py_tp_clear, textiowrapper_clear}, |
3412 | | {Py_tp_iternext, textiowrapper_iternext}, |
3413 | | {Py_tp_methods, textiowrapper_methods}, |
3414 | | {Py_tp_members, textiowrapper_members}, |
3415 | | {Py_tp_getset, textiowrapper_getset}, |
3416 | | {Py_tp_init, _io_TextIOWrapper___init__}, |
3417 | | {0, NULL}, |
3418 | | }; |
3419 | | |
3420 | | PyType_Spec textiowrapper_spec = { |
3421 | | .name = "_io.TextIOWrapper", |
3422 | | .basicsize = sizeof(textio), |
3423 | | .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC | |
3424 | | Py_TPFLAGS_IMMUTABLETYPE), |
3425 | | .slots = textiowrapper_slots, |
3426 | | }; |