/src/Python-3.8.3/Modules/_io/textio.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | An implementation of Text I/O as defined by PEP 3116 - "New I/O" |
3 | | |
4 | | Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper. |
5 | | |
6 | | Written by Amaury Forgeot d'Arc and Antoine Pitrou |
7 | | */ |
8 | | |
9 | | #define PY_SSIZE_T_CLEAN |
10 | | #include "Python.h" |
11 | | #include "pycore_object.h" |
12 | | #include "structmember.h" |
13 | | #include "_iomodule.h" |
14 | | |
15 | | /*[clinic input] |
16 | | module _io |
17 | | class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type" |
18 | | class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe" |
19 | | [clinic start generated code]*/ |
20 | | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/ |
21 | | |
22 | | _Py_IDENTIFIER(close); |
23 | | _Py_IDENTIFIER(_dealloc_warn); |
24 | | _Py_IDENTIFIER(decode); |
25 | | _Py_IDENTIFIER(fileno); |
26 | | _Py_IDENTIFIER(flush); |
27 | | _Py_IDENTIFIER(getpreferredencoding); |
28 | | _Py_IDENTIFIER(isatty); |
29 | | _Py_IDENTIFIER(mode); |
30 | | _Py_IDENTIFIER(name); |
31 | | _Py_IDENTIFIER(raw); |
32 | | _Py_IDENTIFIER(read); |
33 | | _Py_IDENTIFIER(readable); |
34 | | _Py_IDENTIFIER(replace); |
35 | | _Py_IDENTIFIER(reset); |
36 | | _Py_IDENTIFIER(seek); |
37 | | _Py_IDENTIFIER(seekable); |
38 | | _Py_IDENTIFIER(setstate); |
39 | | _Py_IDENTIFIER(strict); |
40 | | _Py_IDENTIFIER(tell); |
41 | | _Py_IDENTIFIER(writable); |
42 | | |
43 | | /* TextIOBase */ |
44 | | |
45 | | PyDoc_STRVAR(textiobase_doc, |
46 | | "Base class for text I/O.\n" |
47 | | "\n" |
48 | | "This class provides a character and line based interface to stream\n" |
49 | | "I/O. There is no readinto method because Python's character strings\n" |
50 | | "are immutable. There is no public constructor.\n" |
51 | | ); |
52 | | |
53 | | static PyObject * |
54 | | _unsupported(const char *message) |
55 | 0 | { |
56 | 0 | _PyIO_State *state = IO_STATE(); |
57 | 0 | if (state != NULL) |
58 | 0 | PyErr_SetString(state->unsupported_operation, message); |
59 | 0 | return NULL; |
60 | 0 | } |
61 | | |
62 | | PyDoc_STRVAR(textiobase_detach_doc, |
63 | | "Separate the underlying buffer from the TextIOBase and return it.\n" |
64 | | "\n" |
65 | | "After the underlying buffer has been detached, the TextIO is in an\n" |
66 | | "unusable state.\n" |
67 | | ); |
68 | | |
69 | | static PyObject * |
70 | | textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored)) |
71 | 0 | { |
72 | 0 | return _unsupported("detach"); |
73 | 0 | } |
74 | | |
75 | | PyDoc_STRVAR(textiobase_read_doc, |
76 | | "Read at most n characters from stream.\n" |
77 | | "\n" |
78 | | "Read from underlying buffer until we have n characters or we hit EOF.\n" |
79 | | "If n is negative or omitted, read until EOF.\n" |
80 | | ); |
81 | | |
82 | | static PyObject * |
83 | | textiobase_read(PyObject *self, PyObject *args) |
84 | 0 | { |
85 | 0 | return _unsupported("read"); |
86 | 0 | } |
87 | | |
88 | | PyDoc_STRVAR(textiobase_readline_doc, |
89 | | "Read until newline or EOF.\n" |
90 | | "\n" |
91 | | "Returns an empty string if EOF is hit immediately.\n" |
92 | | ); |
93 | | |
94 | | static PyObject * |
95 | | textiobase_readline(PyObject *self, PyObject *args) |
96 | 0 | { |
97 | 0 | return _unsupported("readline"); |
98 | 0 | } |
99 | | |
100 | | PyDoc_STRVAR(textiobase_write_doc, |
101 | | "Write string to stream.\n" |
102 | | "Returns the number of characters written (which is always equal to\n" |
103 | | "the length of the string).\n" |
104 | | ); |
105 | | |
106 | | static PyObject * |
107 | | textiobase_write(PyObject *self, PyObject *args) |
108 | 0 | { |
109 | 0 | return _unsupported("write"); |
110 | 0 | } |
111 | | |
112 | | PyDoc_STRVAR(textiobase_encoding_doc, |
113 | | "Encoding of the text stream.\n" |
114 | | "\n" |
115 | | "Subclasses should override.\n" |
116 | | ); |
117 | | |
118 | | static PyObject * |
119 | | textiobase_encoding_get(PyObject *self, void *context) |
120 | 0 | { |
121 | 0 | Py_RETURN_NONE; |
122 | 0 | } |
123 | | |
124 | | PyDoc_STRVAR(textiobase_newlines_doc, |
125 | | "Line endings translated so far.\n" |
126 | | "\n" |
127 | | "Only line endings translated during reading are considered.\n" |
128 | | "\n" |
129 | | "Subclasses should override.\n" |
130 | | ); |
131 | | |
132 | | static PyObject * |
133 | | textiobase_newlines_get(PyObject *self, void *context) |
134 | 0 | { |
135 | 0 | Py_RETURN_NONE; |
136 | 0 | } |
137 | | |
138 | | PyDoc_STRVAR(textiobase_errors_doc, |
139 | | "The error setting of the decoder or encoder.\n" |
140 | | "\n" |
141 | | "Subclasses should override.\n" |
142 | | ); |
143 | | |
144 | | static PyObject * |
145 | | textiobase_errors_get(PyObject *self, void *context) |
146 | 0 | { |
147 | 0 | Py_RETURN_NONE; |
148 | 0 | } |
149 | | |
150 | | |
151 | | static PyMethodDef textiobase_methods[] = { |
152 | | {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc}, |
153 | | {"read", textiobase_read, METH_VARARGS, textiobase_read_doc}, |
154 | | {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc}, |
155 | | {"write", textiobase_write, METH_VARARGS, textiobase_write_doc}, |
156 | | {NULL, NULL} |
157 | | }; |
158 | | |
159 | | static PyGetSetDef textiobase_getset[] = { |
160 | | {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc}, |
161 | | {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc}, |
162 | | {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc}, |
163 | | {NULL} |
164 | | }; |
165 | | |
166 | | PyTypeObject PyTextIOBase_Type = { |
167 | | PyVarObject_HEAD_INIT(NULL, 0) |
168 | | "_io._TextIOBase", /*tp_name*/ |
169 | | 0, /*tp_basicsize*/ |
170 | | 0, /*tp_itemsize*/ |
171 | | 0, /*tp_dealloc*/ |
172 | | 0, /*tp_vectorcall_offset*/ |
173 | | 0, /*tp_getattr*/ |
174 | | 0, /*tp_setattr*/ |
175 | | 0, /*tp_as_async*/ |
176 | | 0, /*tp_repr*/ |
177 | | 0, /*tp_as_number*/ |
178 | | 0, /*tp_as_sequence*/ |
179 | | 0, /*tp_as_mapping*/ |
180 | | 0, /*tp_hash */ |
181 | | 0, /*tp_call*/ |
182 | | 0, /*tp_str*/ |
183 | | 0, /*tp_getattro*/ |
184 | | 0, /*tp_setattro*/ |
185 | | 0, /*tp_as_buffer*/ |
186 | | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
187 | | textiobase_doc, /* tp_doc */ |
188 | | 0, /* tp_traverse */ |
189 | | 0, /* tp_clear */ |
190 | | 0, /* tp_richcompare */ |
191 | | 0, /* tp_weaklistoffset */ |
192 | | 0, /* tp_iter */ |
193 | | 0, /* tp_iternext */ |
194 | | textiobase_methods, /* tp_methods */ |
195 | | 0, /* tp_members */ |
196 | | textiobase_getset, /* tp_getset */ |
197 | | &PyIOBase_Type, /* tp_base */ |
198 | | 0, /* tp_dict */ |
199 | | 0, /* tp_descr_get */ |
200 | | 0, /* tp_descr_set */ |
201 | | 0, /* tp_dictoffset */ |
202 | | 0, /* tp_init */ |
203 | | 0, /* tp_alloc */ |
204 | | 0, /* tp_new */ |
205 | | 0, /* tp_free */ |
206 | | 0, /* tp_is_gc */ |
207 | | 0, /* tp_bases */ |
208 | | 0, /* tp_mro */ |
209 | | 0, /* tp_cache */ |
210 | | 0, /* tp_subclasses */ |
211 | | 0, /* tp_weaklist */ |
212 | | 0, /* tp_del */ |
213 | | 0, /* tp_version_tag */ |
214 | | 0, /* tp_finalize */ |
215 | | }; |
216 | | |
217 | | |
218 | | /* IncrementalNewlineDecoder */ |
219 | | |
220 | | typedef struct { |
221 | | PyObject_HEAD |
222 | | PyObject *decoder; |
223 | | PyObject *errors; |
224 | | unsigned int pendingcr: 1; |
225 | | unsigned int translate: 1; |
226 | | unsigned int seennl: 3; |
227 | | } nldecoder_object; |
228 | | |
229 | | /*[clinic input] |
230 | | _io.IncrementalNewlineDecoder.__init__ |
231 | | decoder: object |
232 | | translate: int |
233 | | errors: object(c_default="NULL") = "strict" |
234 | | |
235 | | Codec used when reading a file in universal newlines mode. |
236 | | |
237 | | It wraps another incremental decoder, translating \r\n and \r into \n. |
238 | | It also records the types of newlines encountered. When used with |
239 | | translate=False, it ensures that the newline sequence is returned in |
240 | | one piece. When used with decoder=None, it expects unicode strings as |
241 | | decode input and translates newlines without first invoking an external |
242 | | decoder. |
243 | | [clinic start generated code]*/ |
244 | | |
245 | | static int |
246 | | _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self, |
247 | | PyObject *decoder, int translate, |
248 | | PyObject *errors) |
249 | | /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/ |
250 | 1 | { |
251 | 1 | self->decoder = decoder; |
252 | 1 | Py_INCREF(decoder); |
253 | | |
254 | 1 | if (errors == NULL) { |
255 | 1 | self->errors = _PyUnicode_FromId(&PyId_strict); |
256 | 1 | if (self->errors == NULL) |
257 | 0 | return -1; |
258 | 1 | } |
259 | 0 | else { |
260 | 0 | self->errors = errors; |
261 | 0 | } |
262 | 1 | Py_INCREF(self->errors); |
263 | | |
264 | 1 | self->translate = translate ? 1 : 0; |
265 | 1 | self->seennl = 0; |
266 | 1 | self->pendingcr = 0; |
267 | | |
268 | 1 | return 0; |
269 | 1 | } |
270 | | |
271 | | static void |
272 | | incrementalnewlinedecoder_dealloc(nldecoder_object *self) |
273 | 1 | { |
274 | 1 | Py_CLEAR(self->decoder); |
275 | 1 | Py_CLEAR(self->errors); |
276 | 1 | Py_TYPE(self)->tp_free((PyObject *)self); |
277 | 1 | } |
278 | | |
279 | | static int |
280 | | check_decoded(PyObject *decoded) |
281 | 6 | { |
282 | 6 | if (decoded == NULL) |
283 | 0 | return -1; |
284 | 6 | if (!PyUnicode_Check(decoded)) { |
285 | 0 | PyErr_Format(PyExc_TypeError, |
286 | 0 | "decoder should return a string result, not '%.200s'", |
287 | 0 | Py_TYPE(decoded)->tp_name); |
288 | 0 | Py_DECREF(decoded); |
289 | 0 | return -1; |
290 | 0 | } |
291 | 6 | if (PyUnicode_READY(decoded) < 0) { |
292 | 0 | Py_DECREF(decoded); |
293 | 0 | return -1; |
294 | 0 | } |
295 | 6 | return 0; |
296 | 6 | } |
297 | | |
298 | 0 | #define SEEN_CR 1 |
299 | 5 | #define SEEN_LF 2 |
300 | 0 | #define SEEN_CRLF 4 |
301 | 0 | #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF) |
302 | | |
303 | | PyObject * |
304 | | _PyIncrementalNewlineDecoder_decode(PyObject *myself, |
305 | | PyObject *input, int final) |
306 | 3 | { |
307 | 3 | PyObject *output; |
308 | 3 | Py_ssize_t output_len; |
309 | 3 | nldecoder_object *self = (nldecoder_object *) myself; |
310 | | |
311 | 3 | if (self->decoder == NULL) { |
312 | 0 | PyErr_SetString(PyExc_ValueError, |
313 | 0 | "IncrementalNewlineDecoder.__init__ not called"); |
314 | 0 | return NULL; |
315 | 0 | } |
316 | | |
317 | | /* decode input (with the eventual \r from a previous pass) */ |
318 | 3 | if (self->decoder != Py_None) { |
319 | 3 | output = PyObject_CallMethodObjArgs(self->decoder, |
320 | 3 | _PyIO_str_decode, input, final ? Py_True : Py_False, NULL); |
321 | 3 | } |
322 | 0 | else { |
323 | 0 | output = input; |
324 | 0 | Py_INCREF(output); |
325 | 0 | } |
326 | | |
327 | 3 | if (check_decoded(output) < 0) |
328 | 0 | return NULL; |
329 | | |
330 | 3 | output_len = PyUnicode_GET_LENGTH(output); |
331 | 3 | if (self->pendingcr && (final || output_len > 0)) { |
332 | | /* Prefix output with CR */ |
333 | 0 | int kind; |
334 | 0 | PyObject *modified; |
335 | 0 | char *out; |
336 | |
|
337 | 0 | modified = PyUnicode_New(output_len + 1, |
338 | 0 | PyUnicode_MAX_CHAR_VALUE(output)); |
339 | 0 | if (modified == NULL) |
340 | 0 | goto error; |
341 | 0 | kind = PyUnicode_KIND(modified); |
342 | 0 | out = PyUnicode_DATA(modified); |
343 | 0 | PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r'); |
344 | 0 | memcpy(out + kind, PyUnicode_DATA(output), kind * output_len); |
345 | 0 | Py_DECREF(output); |
346 | 0 | output = modified; /* output remains ready */ |
347 | 0 | self->pendingcr = 0; |
348 | 0 | output_len++; |
349 | 0 | } |
350 | | |
351 | | /* retain last \r even when not translating data: |
352 | | * then readline() is sure to get \r\n in one pass |
353 | | */ |
354 | 3 | if (!final) { |
355 | 2 | if (output_len > 0 |
356 | 2 | && PyUnicode_READ_CHAR(output, output_len - 1) == '\r') |
357 | 0 | { |
358 | 0 | PyObject *modified = PyUnicode_Substring(output, 0, output_len -1); |
359 | 0 | if (modified == NULL) |
360 | 0 | goto error; |
361 | 0 | Py_DECREF(output); |
362 | 0 | output = modified; |
363 | 0 | self->pendingcr = 1; |
364 | 0 | } |
365 | 2 | } |
366 | | |
367 | | /* Record which newlines are read and do newline translation if desired, |
368 | | all in one pass. */ |
369 | 3 | { |
370 | 3 | void *in_str; |
371 | 3 | Py_ssize_t len; |
372 | 3 | int seennl = self->seennl; |
373 | 3 | int only_lf = 0; |
374 | 3 | int kind; |
375 | | |
376 | 3 | in_str = PyUnicode_DATA(output); |
377 | 3 | len = PyUnicode_GET_LENGTH(output); |
378 | 3 | kind = PyUnicode_KIND(output); |
379 | | |
380 | 3 | if (len == 0) |
381 | 1 | return output; |
382 | | |
383 | | /* If, up to now, newlines are consistently \n, do a quick check |
384 | | for the \r *byte* with the libc's optimized memchr. |
385 | | */ |
386 | 2 | if (seennl == SEEN_LF || seennl == 0) { |
387 | 2 | only_lf = (memchr(in_str, '\r', kind * len) == NULL); |
388 | 2 | } |
389 | | |
390 | 2 | if (only_lf) { |
391 | | /* If not already seen, quick scan for a possible "\n" character. |
392 | | (there's nothing else to be done, even when in translation mode) |
393 | | */ |
394 | 2 | if (seennl == 0 && |
395 | 2 | memchr(in_str, '\n', kind * len) != NULL) { |
396 | 1 | if (kind == PyUnicode_1BYTE_KIND) |
397 | 1 | seennl |= SEEN_LF; |
398 | 0 | else { |
399 | 0 | Py_ssize_t i = 0; |
400 | 0 | for (;;) { |
401 | 0 | Py_UCS4 c; |
402 | | /* Fast loop for non-control characters */ |
403 | 0 | while (PyUnicode_READ(kind, in_str, i) > '\n') |
404 | 0 | i++; |
405 | 0 | c = PyUnicode_READ(kind, in_str, i++); |
406 | 0 | if (c == '\n') { |
407 | 0 | seennl |= SEEN_LF; |
408 | 0 | break; |
409 | 0 | } |
410 | 0 | if (i >= len) |
411 | 0 | break; |
412 | 0 | } |
413 | 0 | } |
414 | 1 | } |
415 | | /* Finished: we have scanned for newlines, and none of them |
416 | | need translating */ |
417 | 2 | } |
418 | 0 | else if (!self->translate) { |
419 | 0 | Py_ssize_t i = 0; |
420 | | /* We have already seen all newline types, no need to scan again */ |
421 | 0 | if (seennl == SEEN_ALL) |
422 | 0 | goto endscan; |
423 | 0 | for (;;) { |
424 | 0 | Py_UCS4 c; |
425 | | /* Fast loop for non-control characters */ |
426 | 0 | while (PyUnicode_READ(kind, in_str, i) > '\r') |
427 | 0 | i++; |
428 | 0 | c = PyUnicode_READ(kind, in_str, i++); |
429 | 0 | if (c == '\n') |
430 | 0 | seennl |= SEEN_LF; |
431 | 0 | else if (c == '\r') { |
432 | 0 | if (PyUnicode_READ(kind, in_str, i) == '\n') { |
433 | 0 | seennl |= SEEN_CRLF; |
434 | 0 | i++; |
435 | 0 | } |
436 | 0 | else |
437 | 0 | seennl |= SEEN_CR; |
438 | 0 | } |
439 | 0 | if (i >= len) |
440 | 0 | break; |
441 | 0 | if (seennl == SEEN_ALL) |
442 | 0 | break; |
443 | 0 | } |
444 | 0 | endscan: |
445 | 0 | ; |
446 | 0 | } |
447 | 0 | else { |
448 | 0 | void *translated; |
449 | 0 | int kind = PyUnicode_KIND(output); |
450 | 0 | void *in_str = PyUnicode_DATA(output); |
451 | 0 | Py_ssize_t in, out; |
452 | | /* XXX: Previous in-place translation here is disabled as |
453 | | resizing is not possible anymore */ |
454 | | /* We could try to optimize this so that we only do a copy |
455 | | when there is something to translate. On the other hand, |
456 | | we already know there is a \r byte, so chances are high |
457 | | that something needs to be done. */ |
458 | 0 | translated = PyMem_Malloc(kind * len); |
459 | 0 | if (translated == NULL) { |
460 | 0 | PyErr_NoMemory(); |
461 | 0 | goto error; |
462 | 0 | } |
463 | 0 | in = out = 0; |
464 | 0 | for (;;) { |
465 | 0 | Py_UCS4 c; |
466 | | /* Fast loop for non-control characters */ |
467 | 0 | while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r') |
468 | 0 | PyUnicode_WRITE(kind, translated, out++, c); |
469 | 0 | if (c == '\n') { |
470 | 0 | PyUnicode_WRITE(kind, translated, out++, c); |
471 | 0 | seennl |= SEEN_LF; |
472 | 0 | continue; |
473 | 0 | } |
474 | 0 | if (c == '\r') { |
475 | 0 | if (PyUnicode_READ(kind, in_str, in) == '\n') { |
476 | 0 | in++; |
477 | 0 | seennl |= SEEN_CRLF; |
478 | 0 | } |
479 | 0 | else |
480 | 0 | seennl |= SEEN_CR; |
481 | 0 | PyUnicode_WRITE(kind, translated, out++, '\n'); |
482 | 0 | continue; |
483 | 0 | } |
484 | 0 | if (in > len) |
485 | 0 | break; |
486 | 0 | PyUnicode_WRITE(kind, translated, out++, c); |
487 | 0 | } |
488 | 0 | Py_DECREF(output); |
489 | 0 | output = PyUnicode_FromKindAndData(kind, translated, out); |
490 | 0 | PyMem_Free(translated); |
491 | 0 | if (!output) |
492 | 0 | return NULL; |
493 | 0 | } |
494 | 2 | self->seennl |= seennl; |
495 | 2 | } |
496 | | |
497 | 0 | return output; |
498 | | |
499 | 0 | error: |
500 | 0 | Py_DECREF(output); |
501 | 0 | return NULL; |
502 | 2 | } |
503 | | |
504 | | /*[clinic input] |
505 | | _io.IncrementalNewlineDecoder.decode |
506 | | input: object |
507 | | final: bool(accept={int}) = False |
508 | | [clinic start generated code]*/ |
509 | | |
510 | | static PyObject * |
511 | | _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self, |
512 | | PyObject *input, int final) |
513 | | /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/ |
514 | 0 | { |
515 | 0 | return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final); |
516 | 0 | } |
517 | | |
518 | | /*[clinic input] |
519 | | _io.IncrementalNewlineDecoder.getstate |
520 | | [clinic start generated code]*/ |
521 | | |
522 | | static PyObject * |
523 | | _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self) |
524 | | /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/ |
525 | 0 | { |
526 | 0 | PyObject *buffer; |
527 | 0 | unsigned long long flag; |
528 | |
|
529 | 0 | if (self->decoder != Py_None) { |
530 | 0 | PyObject *state = PyObject_CallMethodObjArgs(self->decoder, |
531 | 0 | _PyIO_str_getstate, NULL); |
532 | 0 | if (state == NULL) |
533 | 0 | return NULL; |
534 | 0 | if (!PyTuple_Check(state)) { |
535 | 0 | PyErr_SetString(PyExc_TypeError, |
536 | 0 | "illegal decoder state"); |
537 | 0 | Py_DECREF(state); |
538 | 0 | return NULL; |
539 | 0 | } |
540 | 0 | if (!PyArg_ParseTuple(state, "OK;illegal decoder state", |
541 | 0 | &buffer, &flag)) |
542 | 0 | { |
543 | 0 | Py_DECREF(state); |
544 | 0 | return NULL; |
545 | 0 | } |
546 | 0 | Py_INCREF(buffer); |
547 | 0 | Py_DECREF(state); |
548 | 0 | } |
549 | 0 | else { |
550 | 0 | buffer = PyBytes_FromString(""); |
551 | 0 | flag = 0; |
552 | 0 | } |
553 | 0 | flag <<= 1; |
554 | 0 | if (self->pendingcr) |
555 | 0 | flag |= 1; |
556 | 0 | return Py_BuildValue("NK", buffer, flag); |
557 | 0 | } |
558 | | |
559 | | /*[clinic input] |
560 | | _io.IncrementalNewlineDecoder.setstate |
561 | | state: object |
562 | | / |
563 | | [clinic start generated code]*/ |
564 | | |
565 | | static PyObject * |
566 | | _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self, |
567 | | PyObject *state) |
568 | | /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/ |
569 | 0 | { |
570 | 0 | PyObject *buffer; |
571 | 0 | unsigned long long flag; |
572 | |
|
573 | 0 | if (!PyTuple_Check(state)) { |
574 | 0 | PyErr_SetString(PyExc_TypeError, "state argument must be a tuple"); |
575 | 0 | return NULL; |
576 | 0 | } |
577 | 0 | if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument", |
578 | 0 | &buffer, &flag)) |
579 | 0 | { |
580 | 0 | return NULL; |
581 | 0 | } |
582 | | |
583 | 0 | self->pendingcr = (int) (flag & 1); |
584 | 0 | flag >>= 1; |
585 | |
|
586 | 0 | if (self->decoder != Py_None) |
587 | 0 | return _PyObject_CallMethodId(self->decoder, |
588 | 0 | &PyId_setstate, "((OK))", buffer, flag); |
589 | 0 | else |
590 | 0 | Py_RETURN_NONE; |
591 | 0 | } |
592 | | |
593 | | /*[clinic input] |
594 | | _io.IncrementalNewlineDecoder.reset |
595 | | [clinic start generated code]*/ |
596 | | |
597 | | static PyObject * |
598 | | _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self) |
599 | | /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/ |
600 | 0 | { |
601 | 0 | self->seennl = 0; |
602 | 0 | self->pendingcr = 0; |
603 | 0 | if (self->decoder != Py_None) |
604 | 0 | return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); |
605 | 0 | else |
606 | 0 | Py_RETURN_NONE; |
607 | 0 | } |
608 | | |
609 | | static PyObject * |
610 | | incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context) |
611 | 0 | { |
612 | 0 | switch (self->seennl) { |
613 | 0 | case SEEN_CR: |
614 | 0 | return PyUnicode_FromString("\r"); |
615 | 0 | case SEEN_LF: |
616 | 0 | return PyUnicode_FromString("\n"); |
617 | 0 | case SEEN_CRLF: |
618 | 0 | return PyUnicode_FromString("\r\n"); |
619 | 0 | case SEEN_CR | SEEN_LF: |
620 | 0 | return Py_BuildValue("ss", "\r", "\n"); |
621 | 0 | case SEEN_CR | SEEN_CRLF: |
622 | 0 | return Py_BuildValue("ss", "\r", "\r\n"); |
623 | 0 | case SEEN_LF | SEEN_CRLF: |
624 | 0 | return Py_BuildValue("ss", "\n", "\r\n"); |
625 | 0 | case SEEN_CR | SEEN_LF | SEEN_CRLF: |
626 | 0 | return Py_BuildValue("sss", "\r", "\n", "\r\n"); |
627 | 0 | default: |
628 | 0 | Py_RETURN_NONE; |
629 | 0 | } |
630 | |
|
631 | 0 | } |
632 | | |
633 | | /* TextIOWrapper */ |
634 | | |
635 | | typedef PyObject * |
636 | | (*encodefunc_t)(PyObject *, PyObject *); |
637 | | |
638 | | typedef struct |
639 | | { |
640 | | PyObject_HEAD |
641 | | int ok; /* initialized? */ |
642 | | int detached; |
643 | | Py_ssize_t chunk_size; |
644 | | PyObject *buffer; |
645 | | PyObject *encoding; |
646 | | PyObject *encoder; |
647 | | PyObject *decoder; |
648 | | PyObject *readnl; |
649 | | PyObject *errors; |
650 | | const char *writenl; /* ASCII-encoded; NULL stands for \n */ |
651 | | char line_buffering; |
652 | | char write_through; |
653 | | char readuniversal; |
654 | | char readtranslate; |
655 | | char writetranslate; |
656 | | char seekable; |
657 | | char has_read1; |
658 | | char telling; |
659 | | char finalizing; |
660 | | /* Specialized encoding func (see below) */ |
661 | | encodefunc_t encodefunc; |
662 | | /* Whether or not it's the start of the stream */ |
663 | | char encoding_start_of_stream; |
664 | | |
665 | | /* Reads and writes are internally buffered in order to speed things up. |
666 | | However, any read will first flush the write buffer if itsn't empty. |
667 | | |
668 | | Please also note that text to be written is first encoded before being |
669 | | buffered. This is necessary so that encoding errors are immediately |
670 | | reported to the caller, but it unfortunately means that the |
671 | | IncrementalEncoder (whose encode() method is always written in Python) |
672 | | becomes a bottleneck for small writes. |
673 | | */ |
674 | | PyObject *decoded_chars; /* buffer for text returned from decoder */ |
675 | | Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */ |
676 | | PyObject *pending_bytes; // data waiting to be written. |
677 | | // ascii unicode, bytes, or list of them. |
678 | | Py_ssize_t pending_bytes_count; |
679 | | |
680 | | /* snapshot is either NULL, or a tuple (dec_flags, next_input) where |
681 | | * dec_flags is the second (integer) item of the decoder state and |
682 | | * next_input is the chunk of input bytes that comes next after the |
683 | | * snapshot point. We use this to reconstruct decoder states in tell(). |
684 | | */ |
685 | | PyObject *snapshot; |
686 | | /* Bytes-to-characters ratio for the current chunk. Serves as input for |
687 | | the heuristic in tell(). */ |
688 | | double b2cratio; |
689 | | |
690 | | /* Cache raw object if it's a FileIO object */ |
691 | | PyObject *raw; |
692 | | |
693 | | PyObject *weakreflist; |
694 | | PyObject *dict; |
695 | | } textio; |
696 | | |
697 | | static void |
698 | | textiowrapper_set_decoded_chars(textio *self, PyObject *chars); |
699 | | |
700 | | /* A couple of specialized cases in order to bypass the slow incremental |
701 | | encoding methods for the most popular encodings. */ |
702 | | |
703 | | static PyObject * |
704 | | ascii_encode(textio *self, PyObject *text) |
705 | 0 | { |
706 | 0 | return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors)); |
707 | 0 | } |
708 | | |
709 | | static PyObject * |
710 | | utf16be_encode(textio *self, PyObject *text) |
711 | 0 | { |
712 | 0 | return _PyUnicode_EncodeUTF16(text, |
713 | 0 | PyUnicode_AsUTF8(self->errors), 1); |
714 | 0 | } |
715 | | |
716 | | static PyObject * |
717 | | utf16le_encode(textio *self, PyObject *text) |
718 | 0 | { |
719 | 0 | return _PyUnicode_EncodeUTF16(text, |
720 | 0 | PyUnicode_AsUTF8(self->errors), -1); |
721 | 0 | } |
722 | | |
723 | | static PyObject * |
724 | | utf16_encode(textio *self, PyObject *text) |
725 | 0 | { |
726 | 0 | if (!self->encoding_start_of_stream) { |
727 | | /* Skip the BOM and use native byte ordering */ |
728 | | #if PY_BIG_ENDIAN |
729 | | return utf16be_encode(self, text); |
730 | | #else |
731 | 0 | return utf16le_encode(self, text); |
732 | 0 | #endif |
733 | 0 | } |
734 | 0 | return _PyUnicode_EncodeUTF16(text, |
735 | 0 | PyUnicode_AsUTF8(self->errors), 0); |
736 | 0 | } |
737 | | |
738 | | static PyObject * |
739 | | utf32be_encode(textio *self, PyObject *text) |
740 | 0 | { |
741 | 0 | return _PyUnicode_EncodeUTF32(text, |
742 | 0 | PyUnicode_AsUTF8(self->errors), 1); |
743 | 0 | } |
744 | | |
745 | | static PyObject * |
746 | | utf32le_encode(textio *self, PyObject *text) |
747 | 0 | { |
748 | 0 | return _PyUnicode_EncodeUTF32(text, |
749 | 0 | PyUnicode_AsUTF8(self->errors), -1); |
750 | 0 | } |
751 | | |
752 | | static PyObject * |
753 | | utf32_encode(textio *self, PyObject *text) |
754 | 0 | { |
755 | 0 | if (!self->encoding_start_of_stream) { |
756 | | /* Skip the BOM and use native byte ordering */ |
757 | | #if PY_BIG_ENDIAN |
758 | | return utf32be_encode(self, text); |
759 | | #else |
760 | 0 | return utf32le_encode(self, text); |
761 | 0 | #endif |
762 | 0 | } |
763 | 0 | return _PyUnicode_EncodeUTF32(text, |
764 | 0 | PyUnicode_AsUTF8(self->errors), 0); |
765 | 0 | } |
766 | | |
767 | | static PyObject * |
768 | | utf8_encode(textio *self, PyObject *text) |
769 | 0 | { |
770 | 0 | return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors)); |
771 | 0 | } |
772 | | |
773 | | static PyObject * |
774 | | latin1_encode(textio *self, PyObject *text) |
775 | 0 | { |
776 | 0 | return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors)); |
777 | 0 | } |
778 | | |
779 | | // Return true when encoding can be skipped when text is ascii. |
780 | | static inline int |
781 | | is_asciicompat_encoding(encodefunc_t f) |
782 | 14 | { |
783 | 14 | return f == (encodefunc_t) ascii_encode |
784 | 14 | || f == (encodefunc_t) latin1_encode |
785 | 14 | || f == (encodefunc_t) utf8_encode; |
786 | 14 | } |
787 | | |
788 | | /* Map normalized encoding names onto the specialized encoding funcs */ |
789 | | |
790 | | typedef struct { |
791 | | const char *name; |
792 | | encodefunc_t encodefunc; |
793 | | } encodefuncentry; |
794 | | |
795 | | static const encodefuncentry encodefuncs[] = { |
796 | | {"ascii", (encodefunc_t) ascii_encode}, |
797 | | {"iso8859-1", (encodefunc_t) latin1_encode}, |
798 | | {"utf-8", (encodefunc_t) utf8_encode}, |
799 | | {"utf-16-be", (encodefunc_t) utf16be_encode}, |
800 | | {"utf-16-le", (encodefunc_t) utf16le_encode}, |
801 | | {"utf-16", (encodefunc_t) utf16_encode}, |
802 | | {"utf-32-be", (encodefunc_t) utf32be_encode}, |
803 | | {"utf-32-le", (encodefunc_t) utf32le_encode}, |
804 | | {"utf-32", (encodefunc_t) utf32_encode}, |
805 | | {NULL, NULL} |
806 | | }; |
807 | | |
808 | | static int |
809 | | validate_newline(const char *newline) |
810 | 43 | { |
811 | 43 | if (newline && newline[0] != '\0' |
812 | 43 | && !(newline[0] == '\n' && newline[1] == '\0') |
813 | 43 | && !(newline[0] == '\r' && newline[1] == '\0') |
814 | 43 | && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { |
815 | 0 | PyErr_Format(PyExc_ValueError, |
816 | 0 | "illegal newline value: %s", newline); |
817 | 0 | return -1; |
818 | 0 | } |
819 | 43 | return 0; |
820 | 43 | } |
821 | | |
822 | | static int |
823 | | set_newline(textio *self, const char *newline) |
824 | 43 | { |
825 | 43 | PyObject *old = self->readnl; |
826 | 43 | if (newline == NULL) { |
827 | 1 | self->readnl = NULL; |
828 | 1 | } |
829 | 42 | else { |
830 | 42 | self->readnl = PyUnicode_FromString(newline); |
831 | 42 | if (self->readnl == NULL) { |
832 | 0 | self->readnl = old; |
833 | 0 | return -1; |
834 | 0 | } |
835 | 42 | } |
836 | 43 | self->readuniversal = (newline == NULL || newline[0] == '\0'); |
837 | 43 | self->readtranslate = (newline == NULL); |
838 | 43 | self->writetranslate = (newline == NULL || newline[0] != '\0'); |
839 | 43 | if (!self->readuniversal && self->readnl != NULL) { |
840 | | // validate_newline() accepts only ASCII newlines. |
841 | 42 | assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND); |
842 | 42 | self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl); |
843 | 42 | if (strcmp(self->writenl, "\n") == 0) { |
844 | 42 | self->writenl = NULL; |
845 | 42 | } |
846 | 42 | } |
847 | 1 | else { |
848 | | #ifdef MS_WINDOWS |
849 | | self->writenl = "\r\n"; |
850 | | #else |
851 | 1 | self->writenl = NULL; |
852 | 1 | #endif |
853 | 1 | } |
854 | 43 | Py_XDECREF(old); |
855 | 43 | return 0; |
856 | 43 | } |
857 | | |
858 | | static int |
859 | | _textiowrapper_set_decoder(textio *self, PyObject *codec_info, |
860 | | const char *errors) |
861 | 43 | { |
862 | 43 | PyObject *res; |
863 | 43 | int r; |
864 | | |
865 | 43 | res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL); |
866 | 43 | if (res == NULL) |
867 | 0 | return -1; |
868 | | |
869 | 43 | r = PyObject_IsTrue(res); |
870 | 43 | Py_DECREF(res); |
871 | 43 | if (r == -1) |
872 | 0 | return -1; |
873 | | |
874 | 43 | if (r != 1) |
875 | 28 | return 0; |
876 | | |
877 | 15 | Py_CLEAR(self->decoder); |
878 | 15 | self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors); |
879 | 15 | if (self->decoder == NULL) |
880 | 0 | return -1; |
881 | | |
882 | 15 | if (self->readuniversal) { |
883 | 1 | PyObject *incrementalDecoder = PyObject_CallFunction( |
884 | 1 | (PyObject *)&PyIncrementalNewlineDecoder_Type, |
885 | 1 | "Oi", self->decoder, (int)self->readtranslate); |
886 | 1 | if (incrementalDecoder == NULL) |
887 | 0 | return -1; |
888 | 1 | Py_CLEAR(self->decoder); |
889 | 1 | self->decoder = incrementalDecoder; |
890 | 1 | } |
891 | | |
892 | 15 | return 0; |
893 | 15 | } |
894 | | |
895 | | static PyObject* |
896 | | _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof) |
897 | 3 | { |
898 | 3 | PyObject *chars; |
899 | | |
900 | 3 | if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type) |
901 | 3 | chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof); |
902 | 0 | else |
903 | 0 | chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes, |
904 | 0 | eof ? Py_True : Py_False, NULL); |
905 | | |
906 | 3 | if (check_decoded(chars) < 0) |
907 | | // check_decoded already decreases refcount |
908 | 0 | return NULL; |
909 | | |
910 | 3 | return chars; |
911 | 3 | } |
912 | | |
913 | | static int |
914 | | _textiowrapper_set_encoder(textio *self, PyObject *codec_info, |
915 | | const char *errors) |
916 | 43 | { |
917 | 43 | PyObject *res; |
918 | 43 | int r; |
919 | | |
920 | 43 | res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL); |
921 | 43 | if (res == NULL) |
922 | 0 | return -1; |
923 | | |
924 | 43 | r = PyObject_IsTrue(res); |
925 | 43 | Py_DECREF(res); |
926 | 43 | if (r == -1) |
927 | 0 | return -1; |
928 | | |
929 | 43 | if (r != 1) |
930 | 15 | return 0; |
931 | | |
932 | 28 | Py_CLEAR(self->encoder); |
933 | 28 | self->encodefunc = NULL; |
934 | 28 | self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors); |
935 | 28 | if (self->encoder == NULL) |
936 | 0 | return -1; |
937 | | |
938 | | /* Get the normalized named of the codec */ |
939 | 28 | if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) { |
940 | 0 | return -1; |
941 | 0 | } |
942 | 28 | if (res != NULL && PyUnicode_Check(res)) { |
943 | 28 | const encodefuncentry *e = encodefuncs; |
944 | 28 | while (e->name != NULL) { |
945 | 28 | if (_PyUnicode_EqualToASCIIString(res, e->name)) { |
946 | 28 | self->encodefunc = e->encodefunc; |
947 | 28 | break; |
948 | 28 | } |
949 | 0 | e++; |
950 | 0 | } |
951 | 28 | } |
952 | 28 | Py_XDECREF(res); |
953 | | |
954 | 28 | return 0; |
955 | 28 | } |
956 | | |
957 | | static int |
958 | | _textiowrapper_fix_encoder_state(textio *self) |
959 | 43 | { |
960 | 43 | if (!self->seekable || !self->encoder) { |
961 | 15 | return 0; |
962 | 15 | } |
963 | | |
964 | 28 | self->encoding_start_of_stream = 1; |
965 | | |
966 | 28 | PyObject *cookieObj = PyObject_CallMethodObjArgs( |
967 | 28 | self->buffer, _PyIO_str_tell, NULL); |
968 | 28 | if (cookieObj == NULL) { |
969 | 0 | return -1; |
970 | 0 | } |
971 | | |
972 | 28 | int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ); |
973 | 28 | Py_DECREF(cookieObj); |
974 | 28 | if (cmp < 0) { |
975 | 0 | return -1; |
976 | 0 | } |
977 | | |
978 | 28 | if (cmp == 0) { |
979 | 14 | self->encoding_start_of_stream = 0; |
980 | 14 | PyObject *res = PyObject_CallMethodObjArgs( |
981 | 14 | self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL); |
982 | 14 | if (res == NULL) { |
983 | 0 | return -1; |
984 | 0 | } |
985 | 14 | Py_DECREF(res); |
986 | 14 | } |
987 | | |
988 | 28 | return 0; |
989 | 28 | } |
990 | | |
991 | | /*[clinic input] |
992 | | _io.TextIOWrapper.__init__ |
993 | | buffer: object |
994 | | encoding: str(accept={str, NoneType}) = None |
995 | | errors: object = None |
996 | | newline: str(accept={str, NoneType}) = None |
997 | | line_buffering: bool(accept={int}) = False |
998 | | write_through: bool(accept={int}) = False |
999 | | |
1000 | | Character and line based layer over a BufferedIOBase object, buffer. |
1001 | | |
1002 | | encoding gives the name of the encoding that the stream will be |
1003 | | decoded or encoded with. It defaults to locale.getpreferredencoding(False). |
1004 | | |
1005 | | errors determines the strictness of encoding and decoding (see |
1006 | | help(codecs.Codec) or the documentation for codecs.register) and |
1007 | | defaults to "strict". |
1008 | | |
1009 | | newline controls how line endings are handled. It can be None, '', |
1010 | | '\n', '\r', and '\r\n'. It works as follows: |
1011 | | |
1012 | | * On input, if newline is None, universal newlines mode is |
1013 | | enabled. Lines in the input can end in '\n', '\r', or '\r\n', and |
1014 | | these are translated into '\n' before being returned to the |
1015 | | caller. If it is '', universal newline mode is enabled, but line |
1016 | | endings are returned to the caller untranslated. If it has any of |
1017 | | the other legal values, input lines are only terminated by the given |
1018 | | string, and the line ending is returned to the caller untranslated. |
1019 | | |
1020 | | * On output, if newline is None, any '\n' characters written are |
1021 | | translated to the system default line separator, os.linesep. If |
1022 | | newline is '' or '\n', no translation takes place. If newline is any |
1023 | | of the other legal values, any '\n' characters written are translated |
1024 | | to the given string. |
1025 | | |
1026 | | If line_buffering is True, a call to flush is implied when a call to |
1027 | | write contains a newline character. |
1028 | | [clinic start generated code]*/ |
1029 | | |
1030 | | static int |
1031 | | _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, |
1032 | | const char *encoding, PyObject *errors, |
1033 | | const char *newline, int line_buffering, |
1034 | | int write_through) |
1035 | | /*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/ |
1036 | 43 | { |
1037 | 43 | PyObject *raw, *codec_info = NULL; |
1038 | 43 | _PyIO_State *state = NULL; |
1039 | 43 | PyObject *res; |
1040 | 43 | int r; |
1041 | | |
1042 | 43 | self->ok = 0; |
1043 | 43 | self->detached = 0; |
1044 | | |
1045 | 43 | if (errors == Py_None) { |
1046 | 1 | errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */ |
1047 | 1 | if (errors == NULL) { |
1048 | 0 | return -1; |
1049 | 0 | } |
1050 | 1 | } |
1051 | 42 | else if (!PyUnicode_Check(errors)) { |
1052 | | // Check 'errors' argument here because Argument Clinic doesn't support |
1053 | | // 'str(accept={str, NoneType})' converter. |
1054 | 0 | PyErr_Format( |
1055 | 0 | PyExc_TypeError, |
1056 | 0 | "TextIOWrapper() argument 'errors' must be str or None, not %.50s", |
1057 | 0 | errors->ob_type->tp_name); |
1058 | 0 | return -1; |
1059 | 0 | } |
1060 | | |
1061 | 43 | if (validate_newline(newline) < 0) { |
1062 | 0 | return -1; |
1063 | 0 | } |
1064 | | |
1065 | 43 | Py_CLEAR(self->buffer); |
1066 | 43 | Py_CLEAR(self->encoding); |
1067 | 43 | Py_CLEAR(self->encoder); |
1068 | 43 | Py_CLEAR(self->decoder); |
1069 | 43 | Py_CLEAR(self->readnl); |
1070 | 43 | Py_CLEAR(self->decoded_chars); |
1071 | 43 | Py_CLEAR(self->pending_bytes); |
1072 | 43 | Py_CLEAR(self->snapshot); |
1073 | 43 | Py_CLEAR(self->errors); |
1074 | 43 | Py_CLEAR(self->raw); |
1075 | 43 | self->decoded_chars_used = 0; |
1076 | 43 | self->pending_bytes_count = 0; |
1077 | 43 | self->encodefunc = NULL; |
1078 | 43 | self->b2cratio = 0.0; |
1079 | | |
1080 | 43 | if (encoding == NULL) { |
1081 | | /* Try os.device_encoding(fileno) */ |
1082 | 0 | PyObject *fileno; |
1083 | 0 | state = IO_STATE(); |
1084 | 0 | if (state == NULL) |
1085 | 0 | goto error; |
1086 | 0 | fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL); |
1087 | | /* Ignore only AttributeError and UnsupportedOperation */ |
1088 | 0 | if (fileno == NULL) { |
1089 | 0 | if (PyErr_ExceptionMatches(PyExc_AttributeError) || |
1090 | 0 | PyErr_ExceptionMatches(state->unsupported_operation)) { |
1091 | 0 | PyErr_Clear(); |
1092 | 0 | } |
1093 | 0 | else { |
1094 | 0 | goto error; |
1095 | 0 | } |
1096 | 0 | } |
1097 | 0 | else { |
1098 | 0 | int fd = _PyLong_AsInt(fileno); |
1099 | 0 | Py_DECREF(fileno); |
1100 | 0 | if (fd == -1 && PyErr_Occurred()) { |
1101 | 0 | goto error; |
1102 | 0 | } |
1103 | | |
1104 | 0 | self->encoding = _Py_device_encoding(fd); |
1105 | 0 | if (self->encoding == NULL) |
1106 | 0 | goto error; |
1107 | 0 | else if (!PyUnicode_Check(self->encoding)) |
1108 | 0 | Py_CLEAR(self->encoding); |
1109 | 0 | } |
1110 | 0 | } |
1111 | 43 | if (encoding == NULL && self->encoding == NULL) { |
1112 | 0 | PyObject *locale_module = _PyIO_get_locale_module(state); |
1113 | 0 | if (locale_module == NULL) |
1114 | 0 | goto catch_ImportError; |
1115 | 0 | self->encoding = _PyObject_CallMethodIdObjArgs( |
1116 | 0 | locale_module, &PyId_getpreferredencoding, Py_False, NULL); |
1117 | 0 | Py_DECREF(locale_module); |
1118 | 0 | if (self->encoding == NULL) { |
1119 | 0 | catch_ImportError: |
1120 | | /* |
1121 | | Importing locale can raise an ImportError because of |
1122 | | _functools, and locale.getpreferredencoding can raise an |
1123 | | ImportError if _locale is not available. These will happen |
1124 | | during module building. |
1125 | | */ |
1126 | 0 | if (PyErr_ExceptionMatches(PyExc_ImportError)) { |
1127 | 0 | PyErr_Clear(); |
1128 | 0 | self->encoding = PyUnicode_FromString("ascii"); |
1129 | 0 | } |
1130 | 0 | else |
1131 | 0 | goto error; |
1132 | 0 | } |
1133 | 0 | else if (!PyUnicode_Check(self->encoding)) |
1134 | 0 | Py_CLEAR(self->encoding); |
1135 | 0 | } |
1136 | 43 | if (self->encoding != NULL) { |
1137 | 0 | encoding = PyUnicode_AsUTF8(self->encoding); |
1138 | 0 | if (encoding == NULL) |
1139 | 0 | goto error; |
1140 | 0 | } |
1141 | 43 | else if (encoding != NULL) { |
1142 | 43 | self->encoding = PyUnicode_FromString(encoding); |
1143 | 43 | if (self->encoding == NULL) |
1144 | 0 | goto error; |
1145 | 43 | } |
1146 | 0 | else { |
1147 | 0 | PyErr_SetString(PyExc_OSError, |
1148 | 0 | "could not determine default encoding"); |
1149 | 0 | goto error; |
1150 | 0 | } |
1151 | | |
1152 | | /* Check we have been asked for a real text encoding */ |
1153 | 43 | codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()"); |
1154 | 43 | if (codec_info == NULL) { |
1155 | 0 | Py_CLEAR(self->encoding); |
1156 | 0 | goto error; |
1157 | 0 | } |
1158 | | |
1159 | | /* XXX: Failures beyond this point have the potential to leak elements |
1160 | | * of the partially constructed object (like self->encoding) |
1161 | | */ |
1162 | | |
1163 | 43 | Py_INCREF(errors); |
1164 | 43 | self->errors = errors; |
1165 | 43 | self->chunk_size = 8192; |
1166 | 43 | self->line_buffering = line_buffering; |
1167 | 43 | self->write_through = write_through; |
1168 | 43 | if (set_newline(self, newline) < 0) { |
1169 | 0 | goto error; |
1170 | 0 | } |
1171 | | |
1172 | 43 | self->buffer = buffer; |
1173 | 43 | Py_INCREF(buffer); |
1174 | | |
1175 | | /* Build the decoder object */ |
1176 | 43 | if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) |
1177 | 0 | goto error; |
1178 | | |
1179 | | /* Build the encoder object */ |
1180 | 43 | if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) |
1181 | 0 | goto error; |
1182 | | |
1183 | | /* Finished sorting out the codec details */ |
1184 | 43 | Py_CLEAR(codec_info); |
1185 | | |
1186 | 43 | if (Py_TYPE(buffer) == &PyBufferedReader_Type || |
1187 | 43 | Py_TYPE(buffer) == &PyBufferedWriter_Type || |
1188 | 43 | Py_TYPE(buffer) == &PyBufferedRandom_Type) |
1189 | 43 | { |
1190 | 43 | if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0) |
1191 | 0 | goto error; |
1192 | | /* Cache the raw FileIO object to speed up 'closed' checks */ |
1193 | 43 | if (raw != NULL) { |
1194 | 43 | if (Py_TYPE(raw) == &PyFileIO_Type) |
1195 | 43 | self->raw = raw; |
1196 | 0 | else |
1197 | 0 | Py_DECREF(raw); |
1198 | 43 | } |
1199 | 43 | } |
1200 | | |
1201 | 43 | res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL); |
1202 | 43 | if (res == NULL) |
1203 | 0 | goto error; |
1204 | 43 | r = PyObject_IsTrue(res); |
1205 | 43 | Py_DECREF(res); |
1206 | 43 | if (r < 0) |
1207 | 0 | goto error; |
1208 | 43 | self->seekable = self->telling = r; |
1209 | | |
1210 | 43 | r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res); |
1211 | 43 | if (r < 0) { |
1212 | 0 | goto error; |
1213 | 0 | } |
1214 | 43 | Py_XDECREF(res); |
1215 | 43 | self->has_read1 = r; |
1216 | | |
1217 | 43 | self->encoding_start_of_stream = 0; |
1218 | 43 | if (_textiowrapper_fix_encoder_state(self) < 0) { |
1219 | 0 | goto error; |
1220 | 0 | } |
1221 | | |
1222 | 43 | self->ok = 1; |
1223 | 43 | return 0; |
1224 | | |
1225 | 0 | error: |
1226 | 0 | Py_XDECREF(codec_info); |
1227 | 0 | return -1; |
1228 | 43 | } |
1229 | | |
1230 | | /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true, |
1231 | | * -1 on error. |
1232 | | */ |
1233 | | static int |
1234 | | convert_optional_bool(PyObject *obj, int default_value) |
1235 | 0 | { |
1236 | 0 | long v; |
1237 | 0 | if (obj == Py_None) { |
1238 | 0 | v = default_value; |
1239 | 0 | } |
1240 | 0 | else { |
1241 | 0 | v = PyLong_AsLong(obj); |
1242 | 0 | if (v == -1 && PyErr_Occurred()) |
1243 | 0 | return -1; |
1244 | 0 | } |
1245 | 0 | return v != 0; |
1246 | 0 | } |
1247 | | |
1248 | | static int |
1249 | | textiowrapper_change_encoding(textio *self, PyObject *encoding, |
1250 | | PyObject *errors, int newline_changed) |
1251 | 0 | { |
1252 | | /* Use existing settings where new settings are not specified */ |
1253 | 0 | if (encoding == Py_None && errors == Py_None && !newline_changed) { |
1254 | 0 | return 0; // no change |
1255 | 0 | } |
1256 | | |
1257 | 0 | if (encoding == Py_None) { |
1258 | 0 | encoding = self->encoding; |
1259 | 0 | if (errors == Py_None) { |
1260 | 0 | errors = self->errors; |
1261 | 0 | } |
1262 | 0 | } |
1263 | 0 | else if (errors == Py_None) { |
1264 | 0 | errors = _PyUnicode_FromId(&PyId_strict); |
1265 | 0 | if (errors == NULL) { |
1266 | 0 | return -1; |
1267 | 0 | } |
1268 | 0 | } |
1269 | | |
1270 | 0 | const char *c_errors = PyUnicode_AsUTF8(errors); |
1271 | 0 | if (c_errors == NULL) { |
1272 | 0 | return -1; |
1273 | 0 | } |
1274 | | |
1275 | | // Create new encoder & decoder |
1276 | 0 | PyObject *codec_info = _PyCodec_LookupTextEncoding( |
1277 | 0 | PyUnicode_AsUTF8(encoding), "codecs.open()"); |
1278 | 0 | if (codec_info == NULL) { |
1279 | 0 | return -1; |
1280 | 0 | } |
1281 | 0 | if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 || |
1282 | 0 | _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) { |
1283 | 0 | Py_DECREF(codec_info); |
1284 | 0 | return -1; |
1285 | 0 | } |
1286 | 0 | Py_DECREF(codec_info); |
1287 | |
|
1288 | 0 | Py_INCREF(encoding); |
1289 | 0 | Py_INCREF(errors); |
1290 | 0 | Py_SETREF(self->encoding, encoding); |
1291 | 0 | Py_SETREF(self->errors, errors); |
1292 | |
|
1293 | 0 | return _textiowrapper_fix_encoder_state(self); |
1294 | 0 | } |
1295 | | |
1296 | | /*[clinic input] |
1297 | | _io.TextIOWrapper.reconfigure |
1298 | | * |
1299 | | encoding: object = None |
1300 | | errors: object = None |
1301 | | newline as newline_obj: object(c_default="NULL") = None |
1302 | | line_buffering as line_buffering_obj: object = None |
1303 | | write_through as write_through_obj: object = None |
1304 | | |
1305 | | Reconfigure the text stream with new parameters. |
1306 | | |
1307 | | This also does an implicit stream flush. |
1308 | | |
1309 | | [clinic start generated code]*/ |
1310 | | |
1311 | | static PyObject * |
1312 | | _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding, |
1313 | | PyObject *errors, PyObject *newline_obj, |
1314 | | PyObject *line_buffering_obj, |
1315 | | PyObject *write_through_obj) |
1316 | | /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/ |
1317 | 0 | { |
1318 | 0 | int line_buffering; |
1319 | 0 | int write_through; |
1320 | 0 | const char *newline = NULL; |
1321 | | |
1322 | | /* Check if something is in the read buffer */ |
1323 | 0 | if (self->decoded_chars != NULL) { |
1324 | 0 | if (encoding != Py_None || errors != Py_None || newline_obj != NULL) { |
1325 | 0 | _unsupported("It is not possible to set the encoding or newline " |
1326 | 0 | "of stream after the first read"); |
1327 | 0 | return NULL; |
1328 | 0 | } |
1329 | 0 | } |
1330 | | |
1331 | 0 | if (newline_obj != NULL && newline_obj != Py_None) { |
1332 | 0 | newline = PyUnicode_AsUTF8(newline_obj); |
1333 | 0 | if (newline == NULL || validate_newline(newline) < 0) { |
1334 | 0 | return NULL; |
1335 | 0 | } |
1336 | 0 | } |
1337 | | |
1338 | 0 | line_buffering = convert_optional_bool(line_buffering_obj, |
1339 | 0 | self->line_buffering); |
1340 | 0 | write_through = convert_optional_bool(write_through_obj, |
1341 | 0 | self->write_through); |
1342 | 0 | if (line_buffering < 0 || write_through < 0) { |
1343 | 0 | return NULL; |
1344 | 0 | } |
1345 | | |
1346 | 0 | PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); |
1347 | 0 | if (res == NULL) { |
1348 | 0 | return NULL; |
1349 | 0 | } |
1350 | 0 | Py_DECREF(res); |
1351 | 0 | self->b2cratio = 0; |
1352 | |
|
1353 | 0 | if (newline_obj != NULL && set_newline(self, newline) < 0) { |
1354 | 0 | return NULL; |
1355 | 0 | } |
1356 | | |
1357 | 0 | if (textiowrapper_change_encoding( |
1358 | 0 | self, encoding, errors, newline_obj != NULL) < 0) { |
1359 | 0 | return NULL; |
1360 | 0 | } |
1361 | | |
1362 | 0 | self->line_buffering = line_buffering; |
1363 | 0 | self->write_through = write_through; |
1364 | 0 | Py_RETURN_NONE; |
1365 | 0 | } |
1366 | | |
1367 | | static int |
1368 | | textiowrapper_clear(textio *self) |
1369 | 1 | { |
1370 | 1 | self->ok = 0; |
1371 | 1 | Py_CLEAR(self->buffer); |
1372 | 1 | Py_CLEAR(self->encoding); |
1373 | 1 | Py_CLEAR(self->encoder); |
1374 | 1 | Py_CLEAR(self->decoder); |
1375 | 1 | Py_CLEAR(self->readnl); |
1376 | 1 | Py_CLEAR(self->decoded_chars); |
1377 | 1 | Py_CLEAR(self->pending_bytes); |
1378 | 1 | Py_CLEAR(self->snapshot); |
1379 | 1 | Py_CLEAR(self->errors); |
1380 | 1 | Py_CLEAR(self->raw); |
1381 | | |
1382 | 1 | Py_CLEAR(self->dict); |
1383 | 1 | return 0; |
1384 | 1 | } |
1385 | | |
1386 | | static void |
1387 | | textiowrapper_dealloc(textio *self) |
1388 | 1 | { |
1389 | 1 | self->finalizing = 1; |
1390 | 1 | if (_PyIOBase_finalize((PyObject *) self) < 0) |
1391 | 0 | return; |
1392 | 1 | self->ok = 0; |
1393 | 1 | _PyObject_GC_UNTRACK(self); |
1394 | 1 | if (self->weakreflist != NULL) |
1395 | 0 | PyObject_ClearWeakRefs((PyObject *)self); |
1396 | 1 | textiowrapper_clear(self); |
1397 | 1 | Py_TYPE(self)->tp_free((PyObject *)self); |
1398 | 1 | } |
1399 | | |
1400 | | static int |
1401 | | textiowrapper_traverse(textio *self, visitproc visit, void *arg) |
1402 | 90 | { |
1403 | 90 | Py_VISIT(self->buffer); |
1404 | 90 | Py_VISIT(self->encoding); |
1405 | 90 | Py_VISIT(self->encoder); |
1406 | 90 | Py_VISIT(self->decoder); |
1407 | 90 | Py_VISIT(self->readnl); |
1408 | 90 | Py_VISIT(self->decoded_chars); |
1409 | 90 | Py_VISIT(self->pending_bytes); |
1410 | 90 | Py_VISIT(self->snapshot); |
1411 | 90 | Py_VISIT(self->errors); |
1412 | 90 | Py_VISIT(self->raw); |
1413 | | |
1414 | 90 | Py_VISIT(self->dict); |
1415 | 90 | return 0; |
1416 | 90 | } |
1417 | | |
1418 | | static PyObject * |
1419 | | textiowrapper_closed_get(textio *self, void *context); |
1420 | | |
1421 | | /* This macro takes some shortcuts to make the common case faster. */ |
1422 | | #define CHECK_CLOSED(self) \ |
1423 | 289 | do { \ |
1424 | 289 | int r; \ |
1425 | 289 | PyObject *_res; \ |
1426 | 289 | if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \ |
1427 | 289 | if (self->raw != NULL) \ |
1428 | 289 | r = _PyFileIO_closed(self->raw); \ |
1429 | 289 | else { \ |
1430 | 0 | _res = textiowrapper_closed_get(self, NULL); \ |
1431 | 0 | if (_res == NULL) \ |
1432 | 0 | return NULL; \ |
1433 | 0 | r = PyObject_IsTrue(_res); \ |
1434 | 0 | Py_DECREF(_res); \ |
1435 | 0 | if (r < 0) \ |
1436 | 0 | return NULL; \ |
1437 | 0 | } \ |
1438 | 289 | if (r > 0) { \ |
1439 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1440 | 0 | "I/O operation on closed file."); \ |
1441 | 0 | return NULL; \ |
1442 | 0 | } \ |
1443 | 289 | } \ |
1444 | 289 | else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \ |
1445 | 0 | return NULL; \ |
1446 | 289 | } while (0) |
1447 | | |
1448 | | #define CHECK_INITIALIZED(self) \ |
1449 | 294 | if (self->ok <= 0) { \ |
1450 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1451 | 0 | "I/O operation on uninitialized object"); \ |
1452 | 0 | return NULL; \ |
1453 | 0 | } |
1454 | | |
1455 | | #define CHECK_ATTACHED(self) \ |
1456 | 294 | CHECK_INITIALIZED(self); \ |
1457 | 294 | if (self->detached) { \ |
1458 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1459 | 0 | "underlying buffer has been detached"); \ |
1460 | 0 | return NULL; \ |
1461 | 0 | } |
1462 | | |
1463 | | #define CHECK_ATTACHED_INT(self) \ |
1464 | 0 | if (self->ok <= 0) { \ |
1465 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1466 | 0 | "I/O operation on uninitialized object"); \ |
1467 | 0 | return -1; \ |
1468 | 0 | } else if (self->detached) { \ |
1469 | 0 | PyErr_SetString(PyExc_ValueError, \ |
1470 | 0 | "underlying buffer has been detached"); \ |
1471 | 0 | return -1; \ |
1472 | 0 | } |
1473 | | |
1474 | | |
1475 | | /*[clinic input] |
1476 | | _io.TextIOWrapper.detach |
1477 | | [clinic start generated code]*/ |
1478 | | |
1479 | | static PyObject * |
1480 | | _io_TextIOWrapper_detach_impl(textio *self) |
1481 | | /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/ |
1482 | 0 | { |
1483 | 0 | PyObject *buffer, *res; |
1484 | 0 | CHECK_ATTACHED(self); |
1485 | 0 | res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); |
1486 | 0 | if (res == NULL) |
1487 | 0 | return NULL; |
1488 | 0 | Py_DECREF(res); |
1489 | 0 | buffer = self->buffer; |
1490 | 0 | self->buffer = NULL; |
1491 | 0 | self->detached = 1; |
1492 | 0 | return buffer; |
1493 | 0 | } |
1494 | | |
1495 | | /* Flush the internal write buffer. This doesn't explicitly flush the |
1496 | | underlying buffered object, though. */ |
1497 | | static int |
1498 | | _textiowrapper_writeflush(textio *self) |
1499 | 275 | { |
1500 | 275 | if (self->pending_bytes == NULL) |
1501 | 261 | return 0; |
1502 | | |
1503 | 14 | PyObject *pending = self->pending_bytes; |
1504 | 14 | PyObject *b; |
1505 | | |
1506 | 14 | if (PyBytes_Check(pending)) { |
1507 | 0 | b = pending; |
1508 | 0 | Py_INCREF(b); |
1509 | 0 | } |
1510 | 14 | else if (PyUnicode_Check(pending)) { |
1511 | 14 | assert(PyUnicode_IS_ASCII(pending)); |
1512 | 14 | assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count); |
1513 | 14 | b = PyBytes_FromStringAndSize( |
1514 | 14 | PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending)); |
1515 | 14 | if (b == NULL) { |
1516 | 0 | return -1; |
1517 | 0 | } |
1518 | 14 | } |
1519 | 0 | else { |
1520 | 0 | assert(PyList_Check(pending)); |
1521 | 0 | b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count); |
1522 | 0 | if (b == NULL) { |
1523 | 0 | return -1; |
1524 | 0 | } |
1525 | | |
1526 | 0 | char *buf = PyBytes_AsString(b); |
1527 | 0 | Py_ssize_t pos = 0; |
1528 | |
|
1529 | 0 | for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) { |
1530 | 0 | PyObject *obj = PyList_GET_ITEM(pending, i); |
1531 | 0 | char *src; |
1532 | 0 | Py_ssize_t len; |
1533 | 0 | if (PyUnicode_Check(obj)) { |
1534 | 0 | assert(PyUnicode_IS_ASCII(obj)); |
1535 | 0 | src = PyUnicode_DATA(obj); |
1536 | 0 | len = PyUnicode_GET_LENGTH(obj); |
1537 | 0 | } |
1538 | 0 | else { |
1539 | 0 | assert(PyBytes_Check(obj)); |
1540 | 0 | if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) { |
1541 | 0 | Py_DECREF(b); |
1542 | 0 | return -1; |
1543 | 0 | } |
1544 | 0 | } |
1545 | 0 | memcpy(buf + pos, src, len); |
1546 | 0 | pos += len; |
1547 | 0 | } |
1548 | 0 | assert(pos == self->pending_bytes_count); |
1549 | 0 | } |
1550 | | |
1551 | 14 | self->pending_bytes_count = 0; |
1552 | 14 | self->pending_bytes = NULL; |
1553 | 14 | Py_DECREF(pending); |
1554 | | |
1555 | 14 | PyObject *ret; |
1556 | 14 | do { |
1557 | 14 | ret = PyObject_CallMethodObjArgs(self->buffer, |
1558 | 14 | _PyIO_str_write, b, NULL); |
1559 | 14 | } while (ret == NULL && _PyIO_trap_eintr()); |
1560 | 14 | Py_DECREF(b); |
1561 | 14 | if (ret == NULL) |
1562 | 0 | return -1; |
1563 | 14 | Py_DECREF(ret); |
1564 | 14 | return 0; |
1565 | 14 | } |
1566 | | |
1567 | | /*[clinic input] |
1568 | | _io.TextIOWrapper.write |
1569 | | text: unicode |
1570 | | / |
1571 | | [clinic start generated code]*/ |
1572 | | |
1573 | | static PyObject * |
1574 | | _io_TextIOWrapper_write_impl(textio *self, PyObject *text) |
1575 | | /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/ |
1576 | 14 | { |
1577 | 14 | PyObject *ret; |
1578 | 14 | PyObject *b; |
1579 | 14 | Py_ssize_t textlen; |
1580 | 14 | int haslf = 0; |
1581 | 14 | int needflush = 0, text_needflush = 0; |
1582 | | |
1583 | 14 | if (PyUnicode_READY(text) == -1) |
1584 | 0 | return NULL; |
1585 | | |
1586 | 28 | CHECK_ATTACHED(self); |
1587 | 14 | CHECK_CLOSED(self); |
1588 | | |
1589 | 14 | if (self->encoder == NULL) |
1590 | 0 | return _unsupported("not writable"); |
1591 | | |
1592 | 14 | Py_INCREF(text); |
1593 | | |
1594 | 14 | textlen = PyUnicode_GET_LENGTH(text); |
1595 | | |
1596 | 14 | if ((self->writetranslate && self->writenl != NULL) || self->line_buffering) |
1597 | 0 | if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1) |
1598 | 0 | haslf = 1; |
1599 | | |
1600 | 14 | if (haslf && self->writetranslate && self->writenl != NULL) { |
1601 | 0 | PyObject *newtext = _PyObject_CallMethodId( |
1602 | 0 | text, &PyId_replace, "ss", "\n", self->writenl); |
1603 | 0 | Py_DECREF(text); |
1604 | 0 | if (newtext == NULL) |
1605 | 0 | return NULL; |
1606 | 0 | text = newtext; |
1607 | 0 | } |
1608 | | |
1609 | 14 | if (self->write_through) |
1610 | 0 | text_needflush = 1; |
1611 | 14 | if (self->line_buffering && |
1612 | 14 | (haslf || |
1613 | 0 | PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1)) |
1614 | 0 | needflush = 1; |
1615 | | |
1616 | | /* XXX What if we were just reading? */ |
1617 | 14 | if (self->encodefunc != NULL) { |
1618 | 14 | if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) { |
1619 | 14 | b = text; |
1620 | 14 | Py_INCREF(b); |
1621 | 14 | } |
1622 | 0 | else { |
1623 | 0 | b = (*self->encodefunc)((PyObject *) self, text); |
1624 | 0 | } |
1625 | 14 | self->encoding_start_of_stream = 0; |
1626 | 14 | } |
1627 | 0 | else |
1628 | 0 | b = PyObject_CallMethodObjArgs(self->encoder, |
1629 | 0 | _PyIO_str_encode, text, NULL); |
1630 | | |
1631 | 14 | Py_DECREF(text); |
1632 | 14 | if (b == NULL) |
1633 | 0 | return NULL; |
1634 | 14 | if (b != text && !PyBytes_Check(b)) { |
1635 | 0 | PyErr_Format(PyExc_TypeError, |
1636 | 0 | "encoder should return a bytes object, not '%.200s'", |
1637 | 0 | Py_TYPE(b)->tp_name); |
1638 | 0 | Py_DECREF(b); |
1639 | 0 | return NULL; |
1640 | 0 | } |
1641 | | |
1642 | 14 | Py_ssize_t bytes_len; |
1643 | 14 | if (b == text) { |
1644 | 14 | bytes_len = PyUnicode_GET_LENGTH(b); |
1645 | 14 | } |
1646 | 0 | else { |
1647 | 0 | bytes_len = PyBytes_GET_SIZE(b); |
1648 | 0 | } |
1649 | | |
1650 | 14 | if (self->pending_bytes == NULL) { |
1651 | 14 | self->pending_bytes_count = 0; |
1652 | 14 | self->pending_bytes = b; |
1653 | 14 | } |
1654 | 0 | else if (!PyList_CheckExact(self->pending_bytes)) { |
1655 | 0 | PyObject *list = PyList_New(2); |
1656 | 0 | if (list == NULL) { |
1657 | 0 | Py_DECREF(b); |
1658 | 0 | return NULL; |
1659 | 0 | } |
1660 | 0 | PyList_SET_ITEM(list, 0, self->pending_bytes); |
1661 | 0 | PyList_SET_ITEM(list, 1, b); |
1662 | 0 | self->pending_bytes = list; |
1663 | 0 | } |
1664 | 0 | else { |
1665 | 0 | if (PyList_Append(self->pending_bytes, b) < 0) { |
1666 | 0 | Py_DECREF(b); |
1667 | 0 | return NULL; |
1668 | 0 | } |
1669 | 0 | Py_DECREF(b); |
1670 | 0 | } |
1671 | | |
1672 | 14 | self->pending_bytes_count += bytes_len; |
1673 | 14 | if (self->pending_bytes_count > self->chunk_size || needflush || |
1674 | 14 | text_needflush) { |
1675 | 0 | if (_textiowrapper_writeflush(self) < 0) |
1676 | 0 | return NULL; |
1677 | 0 | } |
1678 | | |
1679 | 14 | if (needflush) { |
1680 | 0 | ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL); |
1681 | 0 | if (ret == NULL) |
1682 | 0 | return NULL; |
1683 | 0 | Py_DECREF(ret); |
1684 | 0 | } |
1685 | | |
1686 | 14 | textiowrapper_set_decoded_chars(self, NULL); |
1687 | 14 | Py_CLEAR(self->snapshot); |
1688 | | |
1689 | 14 | if (self->decoder) { |
1690 | 0 | ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL); |
1691 | 0 | if (ret == NULL) |
1692 | 0 | return NULL; |
1693 | 0 | Py_DECREF(ret); |
1694 | 0 | } |
1695 | | |
1696 | 14 | return PyLong_FromSsize_t(textlen); |
1697 | 14 | } |
1698 | | |
1699 | | /* Steal a reference to chars and store it in the decoded_char buffer; |
1700 | | */ |
1701 | | static void |
1702 | | textiowrapper_set_decoded_chars(textio *self, PyObject *chars) |
1703 | 20 | { |
1704 | 20 | Py_XSETREF(self->decoded_chars, chars); |
1705 | 20 | self->decoded_chars_used = 0; |
1706 | 20 | } |
1707 | | |
1708 | | static PyObject * |
1709 | | textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n) |
1710 | 0 | { |
1711 | 0 | PyObject *chars; |
1712 | 0 | Py_ssize_t avail; |
1713 | |
|
1714 | 0 | if (self->decoded_chars == NULL) |
1715 | 0 | return PyUnicode_FromStringAndSize(NULL, 0); |
1716 | | |
1717 | | /* decoded_chars is guaranteed to be "ready". */ |
1718 | 0 | avail = (PyUnicode_GET_LENGTH(self->decoded_chars) |
1719 | 0 | - self->decoded_chars_used); |
1720 | |
|
1721 | 0 | assert(avail >= 0); |
1722 | |
|
1723 | 0 | if (n < 0 || n > avail) |
1724 | 0 | n = avail; |
1725 | |
|
1726 | 0 | if (self->decoded_chars_used > 0 || n < avail) { |
1727 | 0 | chars = PyUnicode_Substring(self->decoded_chars, |
1728 | 0 | self->decoded_chars_used, |
1729 | 0 | self->decoded_chars_used + n); |
1730 | 0 | if (chars == NULL) |
1731 | 0 | return NULL; |
1732 | 0 | } |
1733 | 0 | else { |
1734 | 0 | chars = self->decoded_chars; |
1735 | 0 | Py_INCREF(chars); |
1736 | 0 | } |
1737 | | |
1738 | 0 | self->decoded_chars_used += n; |
1739 | 0 | return chars; |
1740 | 0 | } |
1741 | | |
1742 | | /* Read and decode the next chunk of data from the BufferedReader. |
1743 | | */ |
1744 | | static int |
1745 | | textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint) |
1746 | 3 | { |
1747 | 3 | PyObject *dec_buffer = NULL; |
1748 | 3 | PyObject *dec_flags = NULL; |
1749 | 3 | PyObject *input_chunk = NULL; |
1750 | 3 | Py_buffer input_chunk_buf; |
1751 | 3 | PyObject *decoded_chars, *chunk_size; |
1752 | 3 | Py_ssize_t nbytes, nchars; |
1753 | 3 | int eof; |
1754 | | |
1755 | | /* The return value is True unless EOF was reached. The decoded string is |
1756 | | * placed in self._decoded_chars (replacing its previous value). The |
1757 | | * entire input chunk is sent to the decoder, though some of it may remain |
1758 | | * buffered in the decoder, yet to be converted. |
1759 | | */ |
1760 | | |
1761 | 3 | if (self->decoder == NULL) { |
1762 | 0 | _unsupported("not readable"); |
1763 | 0 | return -1; |
1764 | 0 | } |
1765 | | |
1766 | 3 | if (self->telling) { |
1767 | | /* To prepare for tell(), we need to snapshot a point in the file |
1768 | | * where the decoder's input buffer is empty. |
1769 | | */ |
1770 | |
|
1771 | 0 | PyObject *state = PyObject_CallMethodObjArgs(self->decoder, |
1772 | 0 | _PyIO_str_getstate, NULL); |
1773 | 0 | if (state == NULL) |
1774 | 0 | return -1; |
1775 | | /* Given this, we know there was a valid snapshot point |
1776 | | * len(dec_buffer) bytes ago with decoder state (b'', dec_flags). |
1777 | | */ |
1778 | 0 | if (!PyTuple_Check(state)) { |
1779 | 0 | PyErr_SetString(PyExc_TypeError, |
1780 | 0 | "illegal decoder state"); |
1781 | 0 | Py_DECREF(state); |
1782 | 0 | return -1; |
1783 | 0 | } |
1784 | 0 | if (!PyArg_ParseTuple(state, |
1785 | 0 | "OO;illegal decoder state", &dec_buffer, &dec_flags)) |
1786 | 0 | { |
1787 | 0 | Py_DECREF(state); |
1788 | 0 | return -1; |
1789 | 0 | } |
1790 | | |
1791 | 0 | if (!PyBytes_Check(dec_buffer)) { |
1792 | 0 | PyErr_Format(PyExc_TypeError, |
1793 | 0 | "illegal decoder state: the first item should be a " |
1794 | 0 | "bytes object, not '%.200s'", |
1795 | 0 | Py_TYPE(dec_buffer)->tp_name); |
1796 | 0 | Py_DECREF(state); |
1797 | 0 | return -1; |
1798 | 0 | } |
1799 | 0 | Py_INCREF(dec_buffer); |
1800 | 0 | Py_INCREF(dec_flags); |
1801 | 0 | Py_DECREF(state); |
1802 | 0 | } |
1803 | | |
1804 | | /* Read a chunk, decode it, and put the result in self._decoded_chars. */ |
1805 | 3 | if (size_hint > 0) { |
1806 | 0 | size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint); |
1807 | 0 | } |
1808 | 3 | chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint)); |
1809 | 3 | if (chunk_size == NULL) |
1810 | 0 | goto fail; |
1811 | | |
1812 | 3 | input_chunk = PyObject_CallMethodObjArgs(self->buffer, |
1813 | 3 | (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read), |
1814 | 3 | chunk_size, NULL); |
1815 | 3 | Py_DECREF(chunk_size); |
1816 | 3 | if (input_chunk == NULL) |
1817 | 0 | goto fail; |
1818 | | |
1819 | 3 | if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) { |
1820 | 0 | PyErr_Format(PyExc_TypeError, |
1821 | 0 | "underlying %s() should have returned a bytes-like object, " |
1822 | 0 | "not '%.200s'", (self->has_read1 ? "read1": "read"), |
1823 | 0 | Py_TYPE(input_chunk)->tp_name); |
1824 | 0 | goto fail; |
1825 | 0 | } |
1826 | | |
1827 | 3 | nbytes = input_chunk_buf.len; |
1828 | 3 | eof = (nbytes == 0); |
1829 | | |
1830 | 3 | decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof); |
1831 | 3 | PyBuffer_Release(&input_chunk_buf); |
1832 | 3 | if (decoded_chars == NULL) |
1833 | 0 | goto fail; |
1834 | | |
1835 | 3 | textiowrapper_set_decoded_chars(self, decoded_chars); |
1836 | 3 | nchars = PyUnicode_GET_LENGTH(decoded_chars); |
1837 | 3 | if (nchars > 0) |
1838 | 2 | self->b2cratio = (double) nbytes / nchars; |
1839 | 1 | else |
1840 | 1 | self->b2cratio = 0.0; |
1841 | 3 | if (nchars > 0) |
1842 | 2 | eof = 0; |
1843 | | |
1844 | 3 | if (self->telling) { |
1845 | | /* At the snapshot point, len(dec_buffer) bytes before the read, the |
1846 | | * next input to be decoded is dec_buffer + input_chunk. |
1847 | | */ |
1848 | 0 | PyObject *next_input = dec_buffer; |
1849 | 0 | PyBytes_Concat(&next_input, input_chunk); |
1850 | 0 | dec_buffer = NULL; /* Reference lost to PyBytes_Concat */ |
1851 | 0 | if (next_input == NULL) { |
1852 | 0 | goto fail; |
1853 | 0 | } |
1854 | 0 | PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input); |
1855 | 0 | if (snapshot == NULL) { |
1856 | 0 | dec_flags = NULL; |
1857 | 0 | goto fail; |
1858 | 0 | } |
1859 | 0 | Py_XSETREF(self->snapshot, snapshot); |
1860 | 0 | } |
1861 | 3 | Py_DECREF(input_chunk); |
1862 | | |
1863 | 3 | return (eof == 0); |
1864 | | |
1865 | 0 | fail: |
1866 | 0 | Py_XDECREF(dec_buffer); |
1867 | 0 | Py_XDECREF(dec_flags); |
1868 | 0 | Py_XDECREF(input_chunk); |
1869 | 0 | return -1; |
1870 | 3 | } |
1871 | | |
1872 | | /*[clinic input] |
1873 | | _io.TextIOWrapper.read |
1874 | | size as n: Py_ssize_t(accept={int, NoneType}) = -1 |
1875 | | / |
1876 | | [clinic start generated code]*/ |
1877 | | |
1878 | | static PyObject * |
1879 | | _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) |
1880 | | /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/ |
1881 | 0 | { |
1882 | 0 | PyObject *result = NULL, *chunks = NULL; |
1883 | |
|
1884 | 0 | CHECK_ATTACHED(self); |
1885 | 0 | CHECK_CLOSED(self); |
1886 | | |
1887 | 0 | if (self->decoder == NULL) |
1888 | 0 | return _unsupported("not readable"); |
1889 | | |
1890 | 0 | if (_textiowrapper_writeflush(self) < 0) |
1891 | 0 | return NULL; |
1892 | | |
1893 | 0 | if (n < 0) { |
1894 | | /* Read everything */ |
1895 | 0 | PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL); |
1896 | 0 | PyObject *decoded; |
1897 | 0 | if (bytes == NULL) |
1898 | 0 | goto fail; |
1899 | | |
1900 | 0 | if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) |
1901 | 0 | decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, |
1902 | 0 | bytes, 1); |
1903 | 0 | else |
1904 | 0 | decoded = PyObject_CallMethodObjArgs( |
1905 | 0 | self->decoder, _PyIO_str_decode, bytes, Py_True, NULL); |
1906 | 0 | Py_DECREF(bytes); |
1907 | 0 | if (check_decoded(decoded) < 0) |
1908 | 0 | goto fail; |
1909 | | |
1910 | 0 | result = textiowrapper_get_decoded_chars(self, -1); |
1911 | |
|
1912 | 0 | if (result == NULL) { |
1913 | 0 | Py_DECREF(decoded); |
1914 | 0 | return NULL; |
1915 | 0 | } |
1916 | | |
1917 | 0 | PyUnicode_AppendAndDel(&result, decoded); |
1918 | 0 | if (result == NULL) |
1919 | 0 | goto fail; |
1920 | | |
1921 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
1922 | 0 | Py_CLEAR(self->snapshot); |
1923 | 0 | return result; |
1924 | 0 | } |
1925 | 0 | else { |
1926 | 0 | int res = 1; |
1927 | 0 | Py_ssize_t remaining = n; |
1928 | |
|
1929 | 0 | result = textiowrapper_get_decoded_chars(self, n); |
1930 | 0 | if (result == NULL) |
1931 | 0 | goto fail; |
1932 | 0 | if (PyUnicode_READY(result) == -1) |
1933 | 0 | goto fail; |
1934 | 0 | remaining -= PyUnicode_GET_LENGTH(result); |
1935 | | |
1936 | | /* Keep reading chunks until we have n characters to return */ |
1937 | 0 | while (remaining > 0) { |
1938 | 0 | res = textiowrapper_read_chunk(self, remaining); |
1939 | 0 | if (res < 0) { |
1940 | | /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() |
1941 | | when EINTR occurs so we needn't do it ourselves. */ |
1942 | 0 | if (_PyIO_trap_eintr()) { |
1943 | 0 | continue; |
1944 | 0 | } |
1945 | 0 | goto fail; |
1946 | 0 | } |
1947 | 0 | if (res == 0) /* EOF */ |
1948 | 0 | break; |
1949 | 0 | if (chunks == NULL) { |
1950 | 0 | chunks = PyList_New(0); |
1951 | 0 | if (chunks == NULL) |
1952 | 0 | goto fail; |
1953 | 0 | } |
1954 | 0 | if (PyUnicode_GET_LENGTH(result) > 0 && |
1955 | 0 | PyList_Append(chunks, result) < 0) |
1956 | 0 | goto fail; |
1957 | 0 | Py_DECREF(result); |
1958 | 0 | result = textiowrapper_get_decoded_chars(self, remaining); |
1959 | 0 | if (result == NULL) |
1960 | 0 | goto fail; |
1961 | 0 | remaining -= PyUnicode_GET_LENGTH(result); |
1962 | 0 | } |
1963 | 0 | if (chunks != NULL) { |
1964 | 0 | if (result != NULL && PyList_Append(chunks, result) < 0) |
1965 | 0 | goto fail; |
1966 | 0 | Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks)); |
1967 | 0 | if (result == NULL) |
1968 | 0 | goto fail; |
1969 | 0 | Py_CLEAR(chunks); |
1970 | 0 | } |
1971 | 0 | return result; |
1972 | 0 | } |
1973 | 0 | fail: |
1974 | 0 | Py_XDECREF(result); |
1975 | 0 | Py_XDECREF(chunks); |
1976 | 0 | return NULL; |
1977 | 0 | } |
1978 | | |
1979 | | |
1980 | | /* NOTE: `end` must point to the real end of the Py_UCS4 storage, |
1981 | | that is to the NUL character. Otherwise the function will produce |
1982 | | incorrect results. */ |
1983 | | static const char * |
1984 | | find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch) |
1985 | 261 | { |
1986 | 261 | if (kind == PyUnicode_1BYTE_KIND) { |
1987 | 261 | assert(ch < 256); |
1988 | 261 | return (char *) memchr((const void *) s, (char) ch, end - s); |
1989 | 261 | } |
1990 | 0 | for (;;) { |
1991 | 0 | while (PyUnicode_READ(kind, s, 0) > ch) |
1992 | 0 | s += kind; |
1993 | 0 | if (PyUnicode_READ(kind, s, 0) == ch) |
1994 | 0 | return s; |
1995 | 0 | if (s == end) |
1996 | 0 | return NULL; |
1997 | 0 | s += kind; |
1998 | 0 | } |
1999 | 0 | } |
2000 | | |
2001 | | Py_ssize_t |
2002 | | _PyIO_find_line_ending( |
2003 | | int translated, int universal, PyObject *readnl, |
2004 | | int kind, const char *start, const char *end, Py_ssize_t *consumed) |
2005 | 261 | { |
2006 | 261 | Py_ssize_t len = (end - start)/kind; |
2007 | | |
2008 | 261 | if (translated) { |
2009 | | /* Newlines are already translated, only search for \n */ |
2010 | 261 | const char *pos = find_control_char(kind, start, end, '\n'); |
2011 | 261 | if (pos != NULL) |
2012 | 259 | return (pos - start)/kind + 1; |
2013 | 2 | else { |
2014 | 2 | *consumed = len; |
2015 | 2 | return -1; |
2016 | 2 | } |
2017 | 261 | } |
2018 | 0 | else if (universal) { |
2019 | | /* Universal newline search. Find any of \r, \r\n, \n |
2020 | | * The decoder ensures that \r\n are not split in two pieces |
2021 | | */ |
2022 | 0 | const char *s = start; |
2023 | 0 | for (;;) { |
2024 | 0 | Py_UCS4 ch; |
2025 | | /* Fast path for non-control chars. The loop always ends |
2026 | | since the Unicode string is NUL-terminated. */ |
2027 | 0 | while (PyUnicode_READ(kind, s, 0) > '\r') |
2028 | 0 | s += kind; |
2029 | 0 | if (s >= end) { |
2030 | 0 | *consumed = len; |
2031 | 0 | return -1; |
2032 | 0 | } |
2033 | 0 | ch = PyUnicode_READ(kind, s, 0); |
2034 | 0 | s += kind; |
2035 | 0 | if (ch == '\n') |
2036 | 0 | return (s - start)/kind; |
2037 | 0 | if (ch == '\r') { |
2038 | 0 | if (PyUnicode_READ(kind, s, 0) == '\n') |
2039 | 0 | return (s - start)/kind + 1; |
2040 | 0 | else |
2041 | 0 | return (s - start)/kind; |
2042 | 0 | } |
2043 | 0 | } |
2044 | 0 | } |
2045 | 0 | else { |
2046 | | /* Non-universal mode. */ |
2047 | 0 | Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl); |
2048 | 0 | Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl); |
2049 | | /* Assume that readnl is an ASCII character. */ |
2050 | 0 | assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND); |
2051 | 0 | if (readnl_len == 1) { |
2052 | 0 | const char *pos = find_control_char(kind, start, end, nl[0]); |
2053 | 0 | if (pos != NULL) |
2054 | 0 | return (pos - start)/kind + 1; |
2055 | 0 | *consumed = len; |
2056 | 0 | return -1; |
2057 | 0 | } |
2058 | 0 | else { |
2059 | 0 | const char *s = start; |
2060 | 0 | const char *e = end - (readnl_len - 1)*kind; |
2061 | 0 | const char *pos; |
2062 | 0 | if (e < s) |
2063 | 0 | e = s; |
2064 | 0 | while (s < e) { |
2065 | 0 | Py_ssize_t i; |
2066 | 0 | const char *pos = find_control_char(kind, s, end, nl[0]); |
2067 | 0 | if (pos == NULL || pos >= e) |
2068 | 0 | break; |
2069 | 0 | for (i = 1; i < readnl_len; i++) { |
2070 | 0 | if (PyUnicode_READ(kind, pos, i) != nl[i]) |
2071 | 0 | break; |
2072 | 0 | } |
2073 | 0 | if (i == readnl_len) |
2074 | 0 | return (pos - start)/kind + readnl_len; |
2075 | 0 | s = pos + kind; |
2076 | 0 | } |
2077 | 0 | pos = find_control_char(kind, e, end, nl[0]); |
2078 | 0 | if (pos == NULL) |
2079 | 0 | *consumed = len; |
2080 | 0 | else |
2081 | 0 | *consumed = (pos - start)/kind; |
2082 | 0 | return -1; |
2083 | 0 | } |
2084 | 0 | } |
2085 | 261 | } |
2086 | | |
2087 | | static PyObject * |
2088 | | _textiowrapper_readline(textio *self, Py_ssize_t limit) |
2089 | 260 | { |
2090 | 260 | PyObject *line = NULL, *chunks = NULL, *remaining = NULL; |
2091 | 260 | Py_ssize_t start, endpos, chunked, offset_to_buffer; |
2092 | 260 | int res; |
2093 | | |
2094 | 260 | CHECK_CLOSED(self); |
2095 | | |
2096 | 260 | if (_textiowrapper_writeflush(self) < 0) |
2097 | 0 | return NULL; |
2098 | | |
2099 | 260 | chunked = 0; |
2100 | | |
2101 | 262 | while (1) { |
2102 | 262 | char *ptr; |
2103 | 262 | Py_ssize_t line_len; |
2104 | 262 | int kind; |
2105 | 262 | Py_ssize_t consumed = 0; |
2106 | | |
2107 | | /* First, get some data if necessary */ |
2108 | 262 | res = 1; |
2109 | 264 | while (!self->decoded_chars || |
2110 | 264 | !PyUnicode_GET_LENGTH(self->decoded_chars)) { |
2111 | 3 | res = textiowrapper_read_chunk(self, 0); |
2112 | 3 | if (res < 0) { |
2113 | | /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals() |
2114 | | when EINTR occurs so we needn't do it ourselves. */ |
2115 | 0 | if (_PyIO_trap_eintr()) { |
2116 | 0 | continue; |
2117 | 0 | } |
2118 | 0 | goto error; |
2119 | 0 | } |
2120 | 3 | if (res == 0) |
2121 | 1 | break; |
2122 | 3 | } |
2123 | 262 | if (res == 0) { |
2124 | | /* end of file */ |
2125 | 1 | textiowrapper_set_decoded_chars(self, NULL); |
2126 | 1 | Py_CLEAR(self->snapshot); |
2127 | 1 | start = endpos = offset_to_buffer = 0; |
2128 | 1 | break; |
2129 | 1 | } |
2130 | | |
2131 | 261 | if (remaining == NULL) { |
2132 | 261 | line = self->decoded_chars; |
2133 | 261 | start = self->decoded_chars_used; |
2134 | 261 | offset_to_buffer = 0; |
2135 | 261 | Py_INCREF(line); |
2136 | 261 | } |
2137 | 0 | else { |
2138 | 0 | assert(self->decoded_chars_used == 0); |
2139 | 0 | line = PyUnicode_Concat(remaining, self->decoded_chars); |
2140 | 0 | start = 0; |
2141 | 0 | offset_to_buffer = PyUnicode_GET_LENGTH(remaining); |
2142 | 0 | Py_CLEAR(remaining); |
2143 | 0 | if (line == NULL) |
2144 | 0 | goto error; |
2145 | 0 | if (PyUnicode_READY(line) == -1) |
2146 | 0 | goto error; |
2147 | 0 | } |
2148 | | |
2149 | 261 | ptr = PyUnicode_DATA(line); |
2150 | 261 | line_len = PyUnicode_GET_LENGTH(line); |
2151 | 261 | kind = PyUnicode_KIND(line); |
2152 | | |
2153 | 261 | endpos = _PyIO_find_line_ending( |
2154 | 261 | self->readtranslate, self->readuniversal, self->readnl, |
2155 | 261 | kind, |
2156 | 261 | ptr + kind * start, |
2157 | 261 | ptr + kind * line_len, |
2158 | 261 | &consumed); |
2159 | 261 | if (endpos >= 0) { |
2160 | 259 | endpos += start; |
2161 | 259 | if (limit >= 0 && (endpos - start) + chunked >= limit) |
2162 | 0 | endpos = start + limit - chunked; |
2163 | 259 | break; |
2164 | 259 | } |
2165 | | |
2166 | | /* We can put aside up to `endpos` */ |
2167 | 2 | endpos = consumed + start; |
2168 | 2 | if (limit >= 0 && (endpos - start) + chunked >= limit) { |
2169 | | /* Didn't find line ending, but reached length limit */ |
2170 | 0 | endpos = start + limit - chunked; |
2171 | 0 | break; |
2172 | 0 | } |
2173 | | |
2174 | 2 | if (endpos > start) { |
2175 | | /* No line ending seen yet - put aside current data */ |
2176 | 1 | PyObject *s; |
2177 | 1 | if (chunks == NULL) { |
2178 | 1 | chunks = PyList_New(0); |
2179 | 1 | if (chunks == NULL) |
2180 | 0 | goto error; |
2181 | 1 | } |
2182 | 1 | s = PyUnicode_Substring(line, start, endpos); |
2183 | 1 | if (s == NULL) |
2184 | 0 | goto error; |
2185 | 1 | if (PyList_Append(chunks, s) < 0) { |
2186 | 0 | Py_DECREF(s); |
2187 | 0 | goto error; |
2188 | 0 | } |
2189 | 1 | chunked += PyUnicode_GET_LENGTH(s); |
2190 | 1 | Py_DECREF(s); |
2191 | 1 | } |
2192 | | /* There may be some remaining bytes we'll have to prepend to the |
2193 | | next chunk of data */ |
2194 | 2 | if (endpos < line_len) { |
2195 | 0 | remaining = PyUnicode_Substring(line, endpos, line_len); |
2196 | 0 | if (remaining == NULL) |
2197 | 0 | goto error; |
2198 | 0 | } |
2199 | 2 | Py_CLEAR(line); |
2200 | | /* We have consumed the buffer */ |
2201 | 2 | textiowrapper_set_decoded_chars(self, NULL); |
2202 | 2 | } |
2203 | | |
2204 | 260 | if (line != NULL) { |
2205 | | /* Our line ends in the current buffer */ |
2206 | 259 | self->decoded_chars_used = endpos - offset_to_buffer; |
2207 | 259 | if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) { |
2208 | 259 | PyObject *s = PyUnicode_Substring(line, start, endpos); |
2209 | 259 | Py_CLEAR(line); |
2210 | 259 | if (s == NULL) |
2211 | 0 | goto error; |
2212 | 259 | line = s; |
2213 | 259 | } |
2214 | 259 | } |
2215 | 260 | if (remaining != NULL) { |
2216 | 0 | if (chunks == NULL) { |
2217 | 0 | chunks = PyList_New(0); |
2218 | 0 | if (chunks == NULL) |
2219 | 0 | goto error; |
2220 | 0 | } |
2221 | 0 | if (PyList_Append(chunks, remaining) < 0) |
2222 | 0 | goto error; |
2223 | 0 | Py_CLEAR(remaining); |
2224 | 0 | } |
2225 | 260 | if (chunks != NULL) { |
2226 | 1 | if (line != NULL) { |
2227 | 1 | if (PyList_Append(chunks, line) < 0) |
2228 | 0 | goto error; |
2229 | 1 | Py_DECREF(line); |
2230 | 1 | } |
2231 | 1 | line = PyUnicode_Join(_PyIO_empty_str, chunks); |
2232 | 1 | if (line == NULL) |
2233 | 0 | goto error; |
2234 | 1 | Py_CLEAR(chunks); |
2235 | 1 | } |
2236 | 260 | if (line == NULL) { |
2237 | 1 | Py_INCREF(_PyIO_empty_str); |
2238 | 1 | line = _PyIO_empty_str; |
2239 | 1 | } |
2240 | | |
2241 | 260 | return line; |
2242 | | |
2243 | 0 | error: |
2244 | 0 | Py_XDECREF(chunks); |
2245 | 0 | Py_XDECREF(remaining); |
2246 | 0 | Py_XDECREF(line); |
2247 | 0 | return NULL; |
2248 | 260 | } |
2249 | | |
2250 | | /*[clinic input] |
2251 | | _io.TextIOWrapper.readline |
2252 | | size: Py_ssize_t = -1 |
2253 | | / |
2254 | | [clinic start generated code]*/ |
2255 | | |
2256 | | static PyObject * |
2257 | | _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size) |
2258 | | /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/ |
2259 | 0 | { |
2260 | 0 | CHECK_ATTACHED(self); |
2261 | 0 | return _textiowrapper_readline(self, size); |
2262 | 0 | } |
2263 | | |
2264 | | /* Seek and Tell */ |
2265 | | |
2266 | | typedef struct { |
2267 | | Py_off_t start_pos; |
2268 | | int dec_flags; |
2269 | | int bytes_to_feed; |
2270 | | int chars_to_skip; |
2271 | | char need_eof; |
2272 | | } cookie_type; |
2273 | | |
2274 | | /* |
2275 | | To speed up cookie packing/unpacking, we store the fields in a temporary |
2276 | | string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.). |
2277 | | The following macros define at which offsets in the intermediary byte |
2278 | | string the various CookieStruct fields will be stored. |
2279 | | */ |
2280 | | |
2281 | | #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char)) |
2282 | | |
2283 | | #if PY_BIG_ENDIAN |
2284 | | /* We want the least significant byte of start_pos to also be the least |
2285 | | significant byte of the cookie, which means that in big-endian mode we |
2286 | | must copy the fields in reverse order. */ |
2287 | | |
2288 | | # define OFF_START_POS (sizeof(char) + 3 * sizeof(int)) |
2289 | | # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int)) |
2290 | | # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int)) |
2291 | | # define OFF_CHARS_TO_SKIP (sizeof(char)) |
2292 | | # define OFF_NEED_EOF 0 |
2293 | | |
2294 | | #else |
2295 | | /* Little-endian mode: the least significant byte of start_pos will |
2296 | | naturally end up the least significant byte of the cookie. */ |
2297 | | |
2298 | 0 | # define OFF_START_POS 0 |
2299 | 0 | # define OFF_DEC_FLAGS (sizeof(Py_off_t)) |
2300 | 0 | # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int)) |
2301 | 0 | # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int)) |
2302 | 0 | # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int)) |
2303 | | |
2304 | | #endif |
2305 | | |
2306 | | static int |
2307 | | textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj) |
2308 | 0 | { |
2309 | 0 | unsigned char buffer[COOKIE_BUF_LEN]; |
2310 | 0 | PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj); |
2311 | 0 | if (cookieLong == NULL) |
2312 | 0 | return -1; |
2313 | | |
2314 | 0 | if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer), |
2315 | 0 | PY_LITTLE_ENDIAN, 0) < 0) { |
2316 | 0 | Py_DECREF(cookieLong); |
2317 | 0 | return -1; |
2318 | 0 | } |
2319 | 0 | Py_DECREF(cookieLong); |
2320 | |
|
2321 | 0 | memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos)); |
2322 | 0 | memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags)); |
2323 | 0 | memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed)); |
2324 | 0 | memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip)); |
2325 | 0 | memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof)); |
2326 | |
|
2327 | 0 | return 0; |
2328 | 0 | } |
2329 | | |
2330 | | static PyObject * |
2331 | | textiowrapper_build_cookie(cookie_type *cookie) |
2332 | 0 | { |
2333 | 0 | unsigned char buffer[COOKIE_BUF_LEN]; |
2334 | |
|
2335 | 0 | memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos)); |
2336 | 0 | memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags)); |
2337 | 0 | memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed)); |
2338 | 0 | memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip)); |
2339 | 0 | memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof)); |
2340 | |
|
2341 | 0 | return _PyLong_FromByteArray(buffer, sizeof(buffer), |
2342 | 0 | PY_LITTLE_ENDIAN, 0); |
2343 | 0 | } |
2344 | | |
2345 | | static int |
2346 | | _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie) |
2347 | 0 | { |
2348 | 0 | PyObject *res; |
2349 | | /* When seeking to the start of the stream, we call decoder.reset() |
2350 | | rather than decoder.getstate(). |
2351 | | This is for a few decoders such as utf-16 for which the state value |
2352 | | at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of |
2353 | | utf-16, that we are expecting a BOM). |
2354 | | */ |
2355 | 0 | if (cookie->start_pos == 0 && cookie->dec_flags == 0) |
2356 | 0 | res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL); |
2357 | 0 | else |
2358 | 0 | res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, |
2359 | 0 | "((yi))", "", cookie->dec_flags); |
2360 | 0 | if (res == NULL) |
2361 | 0 | return -1; |
2362 | 0 | Py_DECREF(res); |
2363 | 0 | return 0; |
2364 | 0 | } |
2365 | | |
2366 | | static int |
2367 | | _textiowrapper_encoder_reset(textio *self, int start_of_stream) |
2368 | 0 | { |
2369 | 0 | PyObject *res; |
2370 | 0 | if (start_of_stream) { |
2371 | 0 | res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL); |
2372 | 0 | self->encoding_start_of_stream = 1; |
2373 | 0 | } |
2374 | 0 | else { |
2375 | 0 | res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate, |
2376 | 0 | _PyLong_Zero, NULL); |
2377 | 0 | self->encoding_start_of_stream = 0; |
2378 | 0 | } |
2379 | 0 | if (res == NULL) |
2380 | 0 | return -1; |
2381 | 0 | Py_DECREF(res); |
2382 | 0 | return 0; |
2383 | 0 | } |
2384 | | |
2385 | | static int |
2386 | | _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie) |
2387 | 0 | { |
2388 | | /* Same as _textiowrapper_decoder_setstate() above. */ |
2389 | 0 | return _textiowrapper_encoder_reset( |
2390 | 0 | self, cookie->start_pos == 0 && cookie->dec_flags == 0); |
2391 | 0 | } |
2392 | | |
2393 | | /*[clinic input] |
2394 | | _io.TextIOWrapper.seek |
2395 | | cookie as cookieObj: object |
2396 | | whence: int = 0 |
2397 | | / |
2398 | | [clinic start generated code]*/ |
2399 | | |
2400 | | static PyObject * |
2401 | | _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence) |
2402 | | /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/ |
2403 | 0 | { |
2404 | 0 | PyObject *posobj; |
2405 | 0 | cookie_type cookie; |
2406 | 0 | PyObject *res; |
2407 | 0 | int cmp; |
2408 | 0 | PyObject *snapshot; |
2409 | |
|
2410 | 0 | CHECK_ATTACHED(self); |
2411 | 0 | CHECK_CLOSED(self); |
2412 | | |
2413 | 0 | Py_INCREF(cookieObj); |
2414 | |
|
2415 | 0 | if (!self->seekable) { |
2416 | 0 | _unsupported("underlying stream is not seekable"); |
2417 | 0 | goto fail; |
2418 | 0 | } |
2419 | | |
2420 | 0 | switch (whence) { |
2421 | 0 | case SEEK_CUR: |
2422 | | /* seek relative to current position */ |
2423 | 0 | cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ); |
2424 | 0 | if (cmp < 0) |
2425 | 0 | goto fail; |
2426 | | |
2427 | 0 | if (cmp == 0) { |
2428 | 0 | _unsupported("can't do nonzero cur-relative seeks"); |
2429 | 0 | goto fail; |
2430 | 0 | } |
2431 | | |
2432 | | /* Seeking to the current position should attempt to |
2433 | | * sync the underlying buffer with the current position. |
2434 | | */ |
2435 | 0 | Py_DECREF(cookieObj); |
2436 | 0 | cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL); |
2437 | 0 | if (cookieObj == NULL) |
2438 | 0 | goto fail; |
2439 | 0 | break; |
2440 | | |
2441 | 0 | case SEEK_END: |
2442 | | /* seek relative to end of file */ |
2443 | 0 | cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ); |
2444 | 0 | if (cmp < 0) |
2445 | 0 | goto fail; |
2446 | | |
2447 | 0 | if (cmp == 0) { |
2448 | 0 | _unsupported("can't do nonzero end-relative seeks"); |
2449 | 0 | goto fail; |
2450 | 0 | } |
2451 | | |
2452 | 0 | res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); |
2453 | 0 | if (res == NULL) |
2454 | 0 | goto fail; |
2455 | 0 | Py_DECREF(res); |
2456 | |
|
2457 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
2458 | 0 | Py_CLEAR(self->snapshot); |
2459 | 0 | if (self->decoder) { |
2460 | 0 | res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL); |
2461 | 0 | if (res == NULL) |
2462 | 0 | goto fail; |
2463 | 0 | Py_DECREF(res); |
2464 | 0 | } |
2465 | | |
2466 | 0 | res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2); |
2467 | 0 | Py_CLEAR(cookieObj); |
2468 | 0 | if (res == NULL) |
2469 | 0 | goto fail; |
2470 | 0 | if (self->encoder) { |
2471 | | /* If seek() == 0, we are at the start of stream, otherwise not */ |
2472 | 0 | cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ); |
2473 | 0 | if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) { |
2474 | 0 | Py_DECREF(res); |
2475 | 0 | goto fail; |
2476 | 0 | } |
2477 | 0 | } |
2478 | 0 | return res; |
2479 | | |
2480 | 0 | case SEEK_SET: |
2481 | 0 | break; |
2482 | | |
2483 | 0 | default: |
2484 | 0 | PyErr_Format(PyExc_ValueError, |
2485 | 0 | "invalid whence (%d, should be %d, %d or %d)", whence, |
2486 | 0 | SEEK_SET, SEEK_CUR, SEEK_END); |
2487 | 0 | goto fail; |
2488 | 0 | } |
2489 | | |
2490 | 0 | cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT); |
2491 | 0 | if (cmp < 0) |
2492 | 0 | goto fail; |
2493 | | |
2494 | 0 | if (cmp == 1) { |
2495 | 0 | PyErr_Format(PyExc_ValueError, |
2496 | 0 | "negative seek position %R", cookieObj); |
2497 | 0 | goto fail; |
2498 | 0 | } |
2499 | | |
2500 | 0 | res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL); |
2501 | 0 | if (res == NULL) |
2502 | 0 | goto fail; |
2503 | 0 | Py_DECREF(res); |
2504 | | |
2505 | | /* The strategy of seek() is to go back to the safe start point |
2506 | | * and replay the effect of read(chars_to_skip) from there. |
2507 | | */ |
2508 | 0 | if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0) |
2509 | 0 | goto fail; |
2510 | | |
2511 | | /* Seek back to the safe start point. */ |
2512 | 0 | posobj = PyLong_FromOff_t(cookie.start_pos); |
2513 | 0 | if (posobj == NULL) |
2514 | 0 | goto fail; |
2515 | 0 | res = PyObject_CallMethodObjArgs(self->buffer, |
2516 | 0 | _PyIO_str_seek, posobj, NULL); |
2517 | 0 | Py_DECREF(posobj); |
2518 | 0 | if (res == NULL) |
2519 | 0 | goto fail; |
2520 | 0 | Py_DECREF(res); |
2521 | |
|
2522 | 0 | textiowrapper_set_decoded_chars(self, NULL); |
2523 | 0 | Py_CLEAR(self->snapshot); |
2524 | | |
2525 | | /* Restore the decoder to its state from the safe start point. */ |
2526 | 0 | if (self->decoder) { |
2527 | 0 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2528 | 0 | goto fail; |
2529 | 0 | } |
2530 | | |
2531 | 0 | if (cookie.chars_to_skip) { |
2532 | | /* Just like _read_chunk, feed the decoder and save a snapshot. */ |
2533 | 0 | PyObject *input_chunk = _PyObject_CallMethodId( |
2534 | 0 | self->buffer, &PyId_read, "i", cookie.bytes_to_feed); |
2535 | 0 | PyObject *decoded; |
2536 | |
|
2537 | 0 | if (input_chunk == NULL) |
2538 | 0 | goto fail; |
2539 | | |
2540 | 0 | if (!PyBytes_Check(input_chunk)) { |
2541 | 0 | PyErr_Format(PyExc_TypeError, |
2542 | 0 | "underlying read() should have returned a bytes " |
2543 | 0 | "object, not '%.200s'", |
2544 | 0 | Py_TYPE(input_chunk)->tp_name); |
2545 | 0 | Py_DECREF(input_chunk); |
2546 | 0 | goto fail; |
2547 | 0 | } |
2548 | | |
2549 | 0 | snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk); |
2550 | 0 | if (snapshot == NULL) { |
2551 | 0 | goto fail; |
2552 | 0 | } |
2553 | 0 | Py_XSETREF(self->snapshot, snapshot); |
2554 | |
|
2555 | 0 | decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode, |
2556 | 0 | "Oi", input_chunk, (int)cookie.need_eof); |
2557 | |
|
2558 | 0 | if (check_decoded(decoded) < 0) |
2559 | 0 | goto fail; |
2560 | | |
2561 | 0 | textiowrapper_set_decoded_chars(self, decoded); |
2562 | | |
2563 | | /* Skip chars_to_skip of the decoded characters. */ |
2564 | 0 | if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) { |
2565 | 0 | PyErr_SetString(PyExc_OSError, "can't restore logical file position"); |
2566 | 0 | goto fail; |
2567 | 0 | } |
2568 | 0 | self->decoded_chars_used = cookie.chars_to_skip; |
2569 | 0 | } |
2570 | 0 | else { |
2571 | 0 | snapshot = Py_BuildValue("iy", cookie.dec_flags, ""); |
2572 | 0 | if (snapshot == NULL) |
2573 | 0 | goto fail; |
2574 | 0 | Py_XSETREF(self->snapshot, snapshot); |
2575 | 0 | } |
2576 | | |
2577 | | /* Finally, reset the encoder (merely useful for proper BOM handling) */ |
2578 | 0 | if (self->encoder) { |
2579 | 0 | if (_textiowrapper_encoder_setstate(self, &cookie) < 0) |
2580 | 0 | goto fail; |
2581 | 0 | } |
2582 | 0 | return cookieObj; |
2583 | 0 | fail: |
2584 | 0 | Py_XDECREF(cookieObj); |
2585 | 0 | return NULL; |
2586 | |
|
2587 | 0 | } |
2588 | | |
2589 | | /*[clinic input] |
2590 | | _io.TextIOWrapper.tell |
2591 | | [clinic start generated code]*/ |
2592 | | |
2593 | | static PyObject * |
2594 | | _io_TextIOWrapper_tell_impl(textio *self) |
2595 | | /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/ |
2596 | 0 | { |
2597 | 0 | PyObject *res; |
2598 | 0 | PyObject *posobj = NULL; |
2599 | 0 | cookie_type cookie = {0,0,0,0,0}; |
2600 | 0 | PyObject *next_input; |
2601 | 0 | Py_ssize_t chars_to_skip, chars_decoded; |
2602 | 0 | Py_ssize_t skip_bytes, skip_back; |
2603 | 0 | PyObject *saved_state = NULL; |
2604 | 0 | char *input, *input_end; |
2605 | 0 | Py_ssize_t dec_buffer_len; |
2606 | 0 | int dec_flags; |
2607 | |
|
2608 | 0 | CHECK_ATTACHED(self); |
2609 | 0 | CHECK_CLOSED(self); |
2610 | | |
2611 | 0 | if (!self->seekable) { |
2612 | 0 | _unsupported("underlying stream is not seekable"); |
2613 | 0 | goto fail; |
2614 | 0 | } |
2615 | 0 | if (!self->telling) { |
2616 | 0 | PyErr_SetString(PyExc_OSError, |
2617 | 0 | "telling position disabled by next() call"); |
2618 | 0 | goto fail; |
2619 | 0 | } |
2620 | | |
2621 | 0 | if (_textiowrapper_writeflush(self) < 0) |
2622 | 0 | return NULL; |
2623 | 0 | res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); |
2624 | 0 | if (res == NULL) |
2625 | 0 | goto fail; |
2626 | 0 | Py_DECREF(res); |
2627 | |
|
2628 | 0 | posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL); |
2629 | 0 | if (posobj == NULL) |
2630 | 0 | goto fail; |
2631 | | |
2632 | 0 | if (self->decoder == NULL || self->snapshot == NULL) { |
2633 | 0 | assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0); |
2634 | 0 | return posobj; |
2635 | 0 | } |
2636 | | |
2637 | | #if defined(HAVE_LARGEFILE_SUPPORT) |
2638 | | cookie.start_pos = PyLong_AsLongLong(posobj); |
2639 | | #else |
2640 | 0 | cookie.start_pos = PyLong_AsLong(posobj); |
2641 | 0 | #endif |
2642 | 0 | Py_DECREF(posobj); |
2643 | 0 | if (PyErr_Occurred()) |
2644 | 0 | goto fail; |
2645 | | |
2646 | | /* Skip backward to the snapshot point (see _read_chunk). */ |
2647 | 0 | assert(PyTuple_Check(self->snapshot)); |
2648 | 0 | if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input)) |
2649 | 0 | goto fail; |
2650 | | |
2651 | 0 | assert (PyBytes_Check(next_input)); |
2652 | |
|
2653 | 0 | cookie.start_pos -= PyBytes_GET_SIZE(next_input); |
2654 | | |
2655 | | /* How many decoded characters have been used up since the snapshot? */ |
2656 | 0 | if (self->decoded_chars_used == 0) { |
2657 | | /* We haven't moved from the snapshot point. */ |
2658 | 0 | return textiowrapper_build_cookie(&cookie); |
2659 | 0 | } |
2660 | | |
2661 | 0 | chars_to_skip = self->decoded_chars_used; |
2662 | | |
2663 | | /* Decoder state will be restored at the end */ |
2664 | 0 | saved_state = PyObject_CallMethodObjArgs(self->decoder, |
2665 | 0 | _PyIO_str_getstate, NULL); |
2666 | 0 | if (saved_state == NULL) |
2667 | 0 | goto fail; |
2668 | | |
2669 | 0 | #define DECODER_GETSTATE() do { \ |
2670 | 0 | PyObject *dec_buffer; \ |
2671 | 0 | PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \ |
2672 | 0 | _PyIO_str_getstate, NULL); \ |
2673 | 0 | if (_state == NULL) \ |
2674 | 0 | goto fail; \ |
2675 | 0 | if (!PyTuple_Check(_state)) { \ |
2676 | 0 | PyErr_SetString(PyExc_TypeError, \ |
2677 | 0 | "illegal decoder state"); \ |
2678 | 0 | Py_DECREF(_state); \ |
2679 | 0 | goto fail; \ |
2680 | 0 | } \ |
2681 | 0 | if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \ |
2682 | 0 | &dec_buffer, &dec_flags)) \ |
2683 | 0 | { \ |
2684 | 0 | Py_DECREF(_state); \ |
2685 | 0 | goto fail; \ |
2686 | 0 | } \ |
2687 | 0 | if (!PyBytes_Check(dec_buffer)) { \ |
2688 | 0 | PyErr_Format(PyExc_TypeError, \ |
2689 | 0 | "illegal decoder state: the first item should be a " \ |
2690 | 0 | "bytes object, not '%.200s'", \ |
2691 | 0 | Py_TYPE(dec_buffer)->tp_name); \ |
2692 | 0 | Py_DECREF(_state); \ |
2693 | 0 | goto fail; \ |
2694 | 0 | } \ |
2695 | 0 | dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \ |
2696 | 0 | Py_DECREF(_state); \ |
2697 | 0 | } while (0) |
2698 | | |
2699 | 0 | #define DECODER_DECODE(start, len, res) do { \ |
2700 | 0 | PyObject *_decoded = _PyObject_CallMethodId( \ |
2701 | 0 | self->decoder, &PyId_decode, "y#", start, len); \ |
2702 | 0 | if (check_decoded(_decoded) < 0) \ |
2703 | 0 | goto fail; \ |
2704 | 0 | res = PyUnicode_GET_LENGTH(_decoded); \ |
2705 | 0 | Py_DECREF(_decoded); \ |
2706 | 0 | } while (0) |
2707 | | |
2708 | | /* Fast search for an acceptable start point, close to our |
2709 | | current pos */ |
2710 | 0 | skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip); |
2711 | 0 | skip_back = 1; |
2712 | 0 | assert(skip_back <= PyBytes_GET_SIZE(next_input)); |
2713 | 0 | input = PyBytes_AS_STRING(next_input); |
2714 | 0 | while (skip_bytes > 0) { |
2715 | | /* Decode up to temptative start point */ |
2716 | 0 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2717 | 0 | goto fail; |
2718 | 0 | DECODER_DECODE(input, skip_bytes, chars_decoded); |
2719 | 0 | if (chars_decoded <= chars_to_skip) { |
2720 | 0 | DECODER_GETSTATE(); |
2721 | 0 | if (dec_buffer_len == 0) { |
2722 | | /* Before pos and no bytes buffered in decoder => OK */ |
2723 | 0 | cookie.dec_flags = dec_flags; |
2724 | 0 | chars_to_skip -= chars_decoded; |
2725 | 0 | break; |
2726 | 0 | } |
2727 | | /* Skip back by buffered amount and reset heuristic */ |
2728 | 0 | skip_bytes -= dec_buffer_len; |
2729 | 0 | skip_back = 1; |
2730 | 0 | } |
2731 | 0 | else { |
2732 | | /* We're too far ahead, skip back a bit */ |
2733 | 0 | skip_bytes -= skip_back; |
2734 | 0 | skip_back *= 2; |
2735 | 0 | } |
2736 | 0 | } |
2737 | 0 | if (skip_bytes <= 0) { |
2738 | 0 | skip_bytes = 0; |
2739 | 0 | if (_textiowrapper_decoder_setstate(self, &cookie) < 0) |
2740 | 0 | goto fail; |
2741 | 0 | } |
2742 | | |
2743 | | /* Note our initial start point. */ |
2744 | 0 | cookie.start_pos += skip_bytes; |
2745 | 0 | cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); |
2746 | 0 | if (chars_to_skip == 0) |
2747 | 0 | goto finally; |
2748 | | |
2749 | | /* We should be close to the desired position. Now feed the decoder one |
2750 | | * byte at a time until we reach the `chars_to_skip` target. |
2751 | | * As we go, note the nearest "safe start point" before the current |
2752 | | * location (a point where the decoder has nothing buffered, so seek() |
2753 | | * can safely start from there and advance to this location). |
2754 | | */ |
2755 | 0 | chars_decoded = 0; |
2756 | 0 | input = PyBytes_AS_STRING(next_input); |
2757 | 0 | input_end = input + PyBytes_GET_SIZE(next_input); |
2758 | 0 | input += skip_bytes; |
2759 | 0 | while (input < input_end) { |
2760 | 0 | Py_ssize_t n; |
2761 | |
|
2762 | 0 | DECODER_DECODE(input, (Py_ssize_t)1, n); |
2763 | | /* We got n chars for 1 byte */ |
2764 | 0 | chars_decoded += n; |
2765 | 0 | cookie.bytes_to_feed += 1; |
2766 | 0 | DECODER_GETSTATE(); |
2767 | | |
2768 | 0 | if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) { |
2769 | | /* Decoder buffer is empty, so this is a safe start point. */ |
2770 | 0 | cookie.start_pos += cookie.bytes_to_feed; |
2771 | 0 | chars_to_skip -= chars_decoded; |
2772 | 0 | cookie.dec_flags = dec_flags; |
2773 | 0 | cookie.bytes_to_feed = 0; |
2774 | 0 | chars_decoded = 0; |
2775 | 0 | } |
2776 | 0 | if (chars_decoded >= chars_to_skip) |
2777 | 0 | break; |
2778 | 0 | input++; |
2779 | 0 | } |
2780 | 0 | if (input == input_end) { |
2781 | | /* We didn't get enough decoded data; signal EOF to get more. */ |
2782 | 0 | PyObject *decoded = _PyObject_CallMethodId( |
2783 | 0 | self->decoder, &PyId_decode, "yi", "", /* final = */ 1); |
2784 | 0 | if (check_decoded(decoded) < 0) |
2785 | 0 | goto fail; |
2786 | 0 | chars_decoded += PyUnicode_GET_LENGTH(decoded); |
2787 | 0 | Py_DECREF(decoded); |
2788 | 0 | cookie.need_eof = 1; |
2789 | |
|
2790 | 0 | if (chars_decoded < chars_to_skip) { |
2791 | 0 | PyErr_SetString(PyExc_OSError, |
2792 | 0 | "can't reconstruct logical file position"); |
2793 | 0 | goto fail; |
2794 | 0 | } |
2795 | 0 | } |
2796 | | |
2797 | 0 | finally: |
2798 | 0 | res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL); |
2799 | 0 | Py_DECREF(saved_state); |
2800 | 0 | if (res == NULL) |
2801 | 0 | return NULL; |
2802 | 0 | Py_DECREF(res); |
2803 | | |
2804 | | /* The returned cookie corresponds to the last safe start point. */ |
2805 | 0 | cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int); |
2806 | 0 | return textiowrapper_build_cookie(&cookie); |
2807 | | |
2808 | 0 | fail: |
2809 | 0 | if (saved_state) { |
2810 | 0 | PyObject *type, *value, *traceback; |
2811 | 0 | PyErr_Fetch(&type, &value, &traceback); |
2812 | 0 | res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL); |
2813 | 0 | _PyErr_ChainExceptions(type, value, traceback); |
2814 | 0 | Py_DECREF(saved_state); |
2815 | 0 | Py_XDECREF(res); |
2816 | 0 | } |
2817 | 0 | return NULL; |
2818 | 0 | } |
2819 | | |
2820 | | /*[clinic input] |
2821 | | _io.TextIOWrapper.truncate |
2822 | | pos: object = None |
2823 | | / |
2824 | | [clinic start generated code]*/ |
2825 | | |
2826 | | static PyObject * |
2827 | | _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos) |
2828 | | /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/ |
2829 | 0 | { |
2830 | 0 | PyObject *res; |
2831 | |
|
2832 | 0 | CHECK_ATTACHED(self) |
2833 | | |
2834 | 0 | res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL); |
2835 | 0 | if (res == NULL) |
2836 | 0 | return NULL; |
2837 | 0 | Py_DECREF(res); |
2838 | |
|
2839 | 0 | return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL); |
2840 | 0 | } |
2841 | | |
2842 | | static PyObject * |
2843 | | textiowrapper_repr(textio *self) |
2844 | 0 | { |
2845 | 0 | PyObject *nameobj, *modeobj, *res, *s; |
2846 | 0 | int status; |
2847 | |
|
2848 | 0 | CHECK_INITIALIZED(self); |
2849 | |
|
2850 | 0 | res = PyUnicode_FromString("<_io.TextIOWrapper"); |
2851 | 0 | if (res == NULL) |
2852 | 0 | return NULL; |
2853 | | |
2854 | 0 | status = Py_ReprEnter((PyObject *)self); |
2855 | 0 | if (status != 0) { |
2856 | 0 | if (status > 0) { |
2857 | 0 | PyErr_Format(PyExc_RuntimeError, |
2858 | 0 | "reentrant call inside %s.__repr__", |
2859 | 0 | Py_TYPE(self)->tp_name); |
2860 | 0 | } |
2861 | 0 | goto error; |
2862 | 0 | } |
2863 | 0 | if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) { |
2864 | 0 | if (!PyErr_ExceptionMatches(PyExc_ValueError)) { |
2865 | 0 | goto error; |
2866 | 0 | } |
2867 | | /* Ignore ValueError raised if the underlying stream was detached */ |
2868 | 0 | PyErr_Clear(); |
2869 | 0 | } |
2870 | 0 | if (nameobj != NULL) { |
2871 | 0 | s = PyUnicode_FromFormat(" name=%R", nameobj); |
2872 | 0 | Py_DECREF(nameobj); |
2873 | 0 | if (s == NULL) |
2874 | 0 | goto error; |
2875 | 0 | PyUnicode_AppendAndDel(&res, s); |
2876 | 0 | if (res == NULL) |
2877 | 0 | goto error; |
2878 | 0 | } |
2879 | 0 | if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) { |
2880 | 0 | goto error; |
2881 | 0 | } |
2882 | 0 | if (modeobj != NULL) { |
2883 | 0 | s = PyUnicode_FromFormat(" mode=%R", modeobj); |
2884 | 0 | Py_DECREF(modeobj); |
2885 | 0 | if (s == NULL) |
2886 | 0 | goto error; |
2887 | 0 | PyUnicode_AppendAndDel(&res, s); |
2888 | 0 | if (res == NULL) |
2889 | 0 | goto error; |
2890 | 0 | } |
2891 | 0 | s = PyUnicode_FromFormat("%U encoding=%R>", |
2892 | 0 | res, self->encoding); |
2893 | 0 | Py_DECREF(res); |
2894 | 0 | if (status == 0) { |
2895 | 0 | Py_ReprLeave((PyObject *)self); |
2896 | 0 | } |
2897 | 0 | return s; |
2898 | | |
2899 | 0 | error: |
2900 | 0 | Py_XDECREF(res); |
2901 | 0 | if (status == 0) { |
2902 | 0 | Py_ReprLeave((PyObject *)self); |
2903 | 0 | } |
2904 | 0 | return NULL; |
2905 | 0 | } |
2906 | | |
2907 | | |
2908 | | /* Inquiries */ |
2909 | | |
2910 | | /*[clinic input] |
2911 | | _io.TextIOWrapper.fileno |
2912 | | [clinic start generated code]*/ |
2913 | | |
2914 | | static PyObject * |
2915 | | _io_TextIOWrapper_fileno_impl(textio *self) |
2916 | | /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/ |
2917 | 0 | { |
2918 | 0 | CHECK_ATTACHED(self); |
2919 | 0 | return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL); |
2920 | 0 | } |
2921 | | |
2922 | | /*[clinic input] |
2923 | | _io.TextIOWrapper.seekable |
2924 | | [clinic start generated code]*/ |
2925 | | |
2926 | | static PyObject * |
2927 | | _io_TextIOWrapper_seekable_impl(textio *self) |
2928 | | /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/ |
2929 | 0 | { |
2930 | 0 | CHECK_ATTACHED(self); |
2931 | 0 | return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL); |
2932 | 0 | } |
2933 | | |
2934 | | /*[clinic input] |
2935 | | _io.TextIOWrapper.readable |
2936 | | [clinic start generated code]*/ |
2937 | | |
2938 | | static PyObject * |
2939 | | _io_TextIOWrapper_readable_impl(textio *self) |
2940 | | /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/ |
2941 | 0 | { |
2942 | 0 | CHECK_ATTACHED(self); |
2943 | 0 | return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL); |
2944 | 0 | } |
2945 | | |
2946 | | /*[clinic input] |
2947 | | _io.TextIOWrapper.writable |
2948 | | [clinic start generated code]*/ |
2949 | | |
2950 | | static PyObject * |
2951 | | _io_TextIOWrapper_writable_impl(textio *self) |
2952 | | /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/ |
2953 | 0 | { |
2954 | 0 | CHECK_ATTACHED(self); |
2955 | 0 | return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL); |
2956 | 0 | } |
2957 | | |
2958 | | /*[clinic input] |
2959 | | _io.TextIOWrapper.isatty |
2960 | | [clinic start generated code]*/ |
2961 | | |
2962 | | static PyObject * |
2963 | | _io_TextIOWrapper_isatty_impl(textio *self) |
2964 | | /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/ |
2965 | 0 | { |
2966 | 0 | CHECK_ATTACHED(self); |
2967 | 0 | return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL); |
2968 | 0 | } |
2969 | | |
2970 | | /*[clinic input] |
2971 | | _io.TextIOWrapper.flush |
2972 | | [clinic start generated code]*/ |
2973 | | |
2974 | | static PyObject * |
2975 | | _io_TextIOWrapper_flush_impl(textio *self) |
2976 | | /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/ |
2977 | 15 | { |
2978 | 15 | CHECK_ATTACHED(self); |
2979 | 15 | CHECK_CLOSED(self); |
2980 | 15 | self->telling = self->seekable; |
2981 | 15 | if (_textiowrapper_writeflush(self) < 0) |
2982 | 0 | return NULL; |
2983 | 15 | return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL); |
2984 | 15 | } |
2985 | | |
2986 | | /*[clinic input] |
2987 | | _io.TextIOWrapper.close |
2988 | | [clinic start generated code]*/ |
2989 | | |
2990 | | static PyObject * |
2991 | | _io_TextIOWrapper_close_impl(textio *self) |
2992 | | /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/ |
2993 | 1 | { |
2994 | 1 | PyObject *res; |
2995 | 1 | int r; |
2996 | 1 | CHECK_ATTACHED(self); |
2997 | | |
2998 | 1 | res = textiowrapper_closed_get(self, NULL); |
2999 | 1 | if (res == NULL) |
3000 | 0 | return NULL; |
3001 | 1 | r = PyObject_IsTrue(res); |
3002 | 1 | Py_DECREF(res); |
3003 | 1 | if (r < 0) |
3004 | 0 | return NULL; |
3005 | | |
3006 | 1 | if (r > 0) { |
3007 | 0 | Py_RETURN_NONE; /* stream already closed */ |
3008 | 0 | } |
3009 | 1 | else { |
3010 | 1 | PyObject *exc = NULL, *val, *tb; |
3011 | 1 | if (self->finalizing) { |
3012 | 0 | res = _PyObject_CallMethodIdObjArgs(self->buffer, |
3013 | 0 | &PyId__dealloc_warn, |
3014 | 0 | self, NULL); |
3015 | 0 | if (res) |
3016 | 0 | Py_DECREF(res); |
3017 | 0 | else |
3018 | 0 | PyErr_Clear(); |
3019 | 0 | } |
3020 | 1 | res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL); |
3021 | 1 | if (res == NULL) |
3022 | 0 | PyErr_Fetch(&exc, &val, &tb); |
3023 | 1 | else |
3024 | 1 | Py_DECREF(res); |
3025 | | |
3026 | 1 | res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL); |
3027 | 1 | if (exc != NULL) { |
3028 | 0 | _PyErr_ChainExceptions(exc, val, tb); |
3029 | 0 | Py_CLEAR(res); |
3030 | 0 | } |
3031 | 1 | return res; |
3032 | 1 | } |
3033 | 1 | } |
3034 | | |
3035 | | static PyObject * |
3036 | | textiowrapper_iternext(textio *self) |
3037 | 260 | { |
3038 | 260 | PyObject *line; |
3039 | | |
3040 | 260 | CHECK_ATTACHED(self); |
3041 | | |
3042 | 260 | self->telling = 0; |
3043 | 260 | if (Py_TYPE(self) == &PyTextIOWrapper_Type) { |
3044 | | /* Skip method call overhead for speed */ |
3045 | 260 | line = _textiowrapper_readline(self, -1); |
3046 | 260 | } |
3047 | 0 | else { |
3048 | 0 | line = PyObject_CallMethodObjArgs((PyObject *)self, |
3049 | 0 | _PyIO_str_readline, NULL); |
3050 | 0 | if (line && !PyUnicode_Check(line)) { |
3051 | 0 | PyErr_Format(PyExc_OSError, |
3052 | 0 | "readline() should have returned a str object, " |
3053 | 0 | "not '%.200s'", Py_TYPE(line)->tp_name); |
3054 | 0 | Py_DECREF(line); |
3055 | 0 | return NULL; |
3056 | 0 | } |
3057 | 0 | } |
3058 | | |
3059 | 260 | if (line == NULL || PyUnicode_READY(line) == -1) |
3060 | 0 | return NULL; |
3061 | | |
3062 | 260 | if (PyUnicode_GET_LENGTH(line) == 0) { |
3063 | | /* Reached EOF or would have blocked */ |
3064 | 1 | Py_DECREF(line); |
3065 | 1 | Py_CLEAR(self->snapshot); |
3066 | 1 | self->telling = self->seekable; |
3067 | 1 | return NULL; |
3068 | 1 | } |
3069 | | |
3070 | 259 | return line; |
3071 | 260 | } |
3072 | | |
3073 | | static PyObject * |
3074 | | textiowrapper_name_get(textio *self, void *context) |
3075 | 0 | { |
3076 | 0 | CHECK_ATTACHED(self); |
3077 | 0 | return _PyObject_GetAttrId(self->buffer, &PyId_name); |
3078 | 0 | } |
3079 | | |
3080 | | static PyObject * |
3081 | | textiowrapper_closed_get(textio *self, void *context) |
3082 | 4 | { |
3083 | 4 | CHECK_ATTACHED(self); |
3084 | 4 | return PyObject_GetAttr(self->buffer, _PyIO_str_closed); |
3085 | 4 | } |
3086 | | |
3087 | | static PyObject * |
3088 | | textiowrapper_newlines_get(textio *self, void *context) |
3089 | 0 | { |
3090 | 0 | PyObject *res; |
3091 | 0 | CHECK_ATTACHED(self); |
3092 | 0 | if (self->decoder == NULL || |
3093 | 0 | _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0) |
3094 | 0 | { |
3095 | 0 | Py_RETURN_NONE; |
3096 | 0 | } |
3097 | 0 | return res; |
3098 | 0 | } |
3099 | | |
3100 | | static PyObject * |
3101 | | textiowrapper_errors_get(textio *self, void *context) |
3102 | 0 | { |
3103 | 0 | CHECK_INITIALIZED(self); |
3104 | 0 | Py_INCREF(self->errors); |
3105 | 0 | return self->errors; |
3106 | 0 | } |
3107 | | |
3108 | | static PyObject * |
3109 | | textiowrapper_chunk_size_get(textio *self, void *context) |
3110 | 0 | { |
3111 | 0 | CHECK_ATTACHED(self); |
3112 | 0 | return PyLong_FromSsize_t(self->chunk_size); |
3113 | 0 | } |
3114 | | |
3115 | | static int |
3116 | | textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context) |
3117 | 0 | { |
3118 | 0 | Py_ssize_t n; |
3119 | 0 | CHECK_ATTACHED_INT(self); |
3120 | 0 | if (arg == NULL) { |
3121 | 0 | PyErr_SetString(PyExc_AttributeError, "cannot delete attribute"); |
3122 | 0 | return -1; |
3123 | 0 | } |
3124 | 0 | n = PyNumber_AsSsize_t(arg, PyExc_ValueError); |
3125 | 0 | if (n == -1 && PyErr_Occurred()) |
3126 | 0 | return -1; |
3127 | 0 | if (n <= 0) { |
3128 | 0 | PyErr_SetString(PyExc_ValueError, |
3129 | 0 | "a strictly positive integer is required"); |
3130 | 0 | return -1; |
3131 | 0 | } |
3132 | 0 | self->chunk_size = n; |
3133 | 0 | return 0; |
3134 | 0 | } |
3135 | | |
3136 | | #include "clinic/textio.c.h" |
3137 | | |
3138 | | static PyMethodDef incrementalnewlinedecoder_methods[] = { |
3139 | | _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF |
3140 | | _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF |
3141 | | _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF |
3142 | | _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF |
3143 | | {NULL} |
3144 | | }; |
3145 | | |
3146 | | static PyGetSetDef incrementalnewlinedecoder_getset[] = { |
3147 | | {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL}, |
3148 | | {NULL} |
3149 | | }; |
3150 | | |
3151 | | PyTypeObject PyIncrementalNewlineDecoder_Type = { |
3152 | | PyVarObject_HEAD_INIT(NULL, 0) |
3153 | | "_io.IncrementalNewlineDecoder", /*tp_name*/ |
3154 | | sizeof(nldecoder_object), /*tp_basicsize*/ |
3155 | | 0, /*tp_itemsize*/ |
3156 | | (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/ |
3157 | | 0, /*tp_vectorcall_offset*/ |
3158 | | 0, /*tp_getattr*/ |
3159 | | 0, /*tp_setattr*/ |
3160 | | 0, /*tp_as_async*/ |
3161 | | 0, /*tp_repr*/ |
3162 | | 0, /*tp_as_number*/ |
3163 | | 0, /*tp_as_sequence*/ |
3164 | | 0, /*tp_as_mapping*/ |
3165 | | 0, /*tp_hash */ |
3166 | | 0, /*tp_call*/ |
3167 | | 0, /*tp_str*/ |
3168 | | 0, /*tp_getattro*/ |
3169 | | 0, /*tp_setattro*/ |
3170 | | 0, /*tp_as_buffer*/ |
3171 | | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ |
3172 | | _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */ |
3173 | | 0, /* tp_traverse */ |
3174 | | 0, /* tp_clear */ |
3175 | | 0, /* tp_richcompare */ |
3176 | | 0, /*tp_weaklistoffset*/ |
3177 | | 0, /* tp_iter */ |
3178 | | 0, /* tp_iternext */ |
3179 | | incrementalnewlinedecoder_methods, /* tp_methods */ |
3180 | | 0, /* tp_members */ |
3181 | | incrementalnewlinedecoder_getset, /* tp_getset */ |
3182 | | 0, /* tp_base */ |
3183 | | 0, /* tp_dict */ |
3184 | | 0, /* tp_descr_get */ |
3185 | | 0, /* tp_descr_set */ |
3186 | | 0, /* tp_dictoffset */ |
3187 | | _io_IncrementalNewlineDecoder___init__, /* tp_init */ |
3188 | | 0, /* tp_alloc */ |
3189 | | PyType_GenericNew, /* tp_new */ |
3190 | | }; |
3191 | | |
3192 | | |
3193 | | static PyMethodDef textiowrapper_methods[] = { |
3194 | | _IO_TEXTIOWRAPPER_DETACH_METHODDEF |
3195 | | _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF |
3196 | | _IO_TEXTIOWRAPPER_WRITE_METHODDEF |
3197 | | _IO_TEXTIOWRAPPER_READ_METHODDEF |
3198 | | _IO_TEXTIOWRAPPER_READLINE_METHODDEF |
3199 | | _IO_TEXTIOWRAPPER_FLUSH_METHODDEF |
3200 | | _IO_TEXTIOWRAPPER_CLOSE_METHODDEF |
3201 | | |
3202 | | _IO_TEXTIOWRAPPER_FILENO_METHODDEF |
3203 | | _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF |
3204 | | _IO_TEXTIOWRAPPER_READABLE_METHODDEF |
3205 | | _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF |
3206 | | _IO_TEXTIOWRAPPER_ISATTY_METHODDEF |
3207 | | |
3208 | | _IO_TEXTIOWRAPPER_SEEK_METHODDEF |
3209 | | _IO_TEXTIOWRAPPER_TELL_METHODDEF |
3210 | | _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF |
3211 | | {NULL, NULL} |
3212 | | }; |
3213 | | |
3214 | | static PyMemberDef textiowrapper_members[] = { |
3215 | | {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY}, |
3216 | | {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY}, |
3217 | | {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY}, |
3218 | | {"write_through", T_BOOL, offsetof(textio, write_through), READONLY}, |
3219 | | {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0}, |
3220 | | {NULL} |
3221 | | }; |
3222 | | |
3223 | | static PyGetSetDef textiowrapper_getset[] = { |
3224 | | {"name", (getter)textiowrapper_name_get, NULL, NULL}, |
3225 | | {"closed", (getter)textiowrapper_closed_get, NULL, NULL}, |
3226 | | /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL}, |
3227 | | */ |
3228 | | {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL}, |
3229 | | {"errors", (getter)textiowrapper_errors_get, NULL, NULL}, |
3230 | | {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get, |
3231 | | (setter)textiowrapper_chunk_size_set, NULL}, |
3232 | | {NULL} |
3233 | | }; |
3234 | | |
3235 | | PyTypeObject PyTextIOWrapper_Type = { |
3236 | | PyVarObject_HEAD_INIT(NULL, 0) |
3237 | | "_io.TextIOWrapper", /*tp_name*/ |
3238 | | sizeof(textio), /*tp_basicsize*/ |
3239 | | 0, /*tp_itemsize*/ |
3240 | | (destructor)textiowrapper_dealloc, /*tp_dealloc*/ |
3241 | | 0, /*tp_vectorcall_offset*/ |
3242 | | 0, /*tp_getattr*/ |
3243 | | 0, /*tps_etattr*/ |
3244 | | 0, /*tp_as_async*/ |
3245 | | (reprfunc)textiowrapper_repr,/*tp_repr*/ |
3246 | | 0, /*tp_as_number*/ |
3247 | | 0, /*tp_as_sequence*/ |
3248 | | 0, /*tp_as_mapping*/ |
3249 | | 0, /*tp_hash */ |
3250 | | 0, /*tp_call*/ |
3251 | | 0, /*tp_str*/ |
3252 | | 0, /*tp_getattro*/ |
3253 | | 0, /*tp_setattro*/ |
3254 | | 0, /*tp_as_buffer*/ |
3255 | | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
3256 | | | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
3257 | | _io_TextIOWrapper___init____doc__, /* tp_doc */ |
3258 | | (traverseproc)textiowrapper_traverse, /* tp_traverse */ |
3259 | | (inquiry)textiowrapper_clear, /* tp_clear */ |
3260 | | 0, /* tp_richcompare */ |
3261 | | offsetof(textio, weakreflist), /*tp_weaklistoffset*/ |
3262 | | 0, /* tp_iter */ |
3263 | | (iternextfunc)textiowrapper_iternext, /* tp_iternext */ |
3264 | | textiowrapper_methods, /* tp_methods */ |
3265 | | textiowrapper_members, /* tp_members */ |
3266 | | textiowrapper_getset, /* tp_getset */ |
3267 | | 0, /* tp_base */ |
3268 | | 0, /* tp_dict */ |
3269 | | 0, /* tp_descr_get */ |
3270 | | 0, /* tp_descr_set */ |
3271 | | offsetof(textio, dict), /*tp_dictoffset*/ |
3272 | | _io_TextIOWrapper___init__, /* tp_init */ |
3273 | | 0, /* tp_alloc */ |
3274 | | PyType_GenericNew, /* tp_new */ |
3275 | | 0, /* tp_free */ |
3276 | | 0, /* tp_is_gc */ |
3277 | | 0, /* tp_bases */ |
3278 | | 0, /* tp_mro */ |
3279 | | 0, /* tp_cache */ |
3280 | | 0, /* tp_subclasses */ |
3281 | | 0, /* tp_weaklist */ |
3282 | | 0, /* tp_del */ |
3283 | | 0, /* tp_version_tag */ |
3284 | | 0, /* tp_finalize */ |
3285 | | }; |