Coverage Report

Created: 2026-02-09 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_io/textio.c
Line
Count
Source
1
/*
2
    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4
    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6
    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7
*/
8
9
#include "Python.h"
10
#include "pycore_call.h"          // _PyObject_CallMethod()
11
#include "pycore_codecs.h"        // _PyCodecInfo_GetIncrementalDecoder()
12
#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
13
#include "pycore_interp.h"        // PyInterpreterState.fs_codec
14
#include "pycore_long.h"          // _PyLong_GetZero()
15
#include "pycore_object.h"        // _PyObject_GC_UNTRACK()
16
#include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
17
#include "pycore_pystate.h"       // _PyInterpreterState_GET()
18
#include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString()
19
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
20
21
#include "_iomodule.h"
22
23
/*[clinic input]
24
module _io
25
class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
26
class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
27
class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
28
[clinic start generated code]*/
29
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
30
31
typedef struct nldecoder_object nldecoder_object;
32
typedef struct textio textio;
33
34
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
35
#include "clinic/textio.c.h"
36
#undef clinic_state
37
38
/* TextIOBase */
39
40
PyDoc_STRVAR(textiobase_doc,
41
    "Base class for text I/O.\n"
42
    "\n"
43
    "This class provides a character and line based interface to stream\n"
44
    "I/O. There is no readinto method because Python's character strings\n"
45
    "are immutable.\n"
46
    );
47
48
static PyObject *
49
_unsupported(_PyIO_State *state, const char *message)
50
0
{
51
0
    PyErr_SetString(state->unsupported_operation, message);
52
0
    return NULL;
53
0
}
54
55
/*[clinic input]
56
@permit_long_docstring_body
57
_io._TextIOBase.detach
58
    cls: defining_class
59
    /
60
61
Separate the underlying buffer from the TextIOBase and return it.
62
63
After the underlying buffer has been detached, the TextIO is in an unusable state.
64
[clinic start generated code]*/
65
66
static PyObject *
67
_io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
68
/*[clinic end generated code: output=50915f40c609eaa4 input=8cd0652c17d7f015]*/
69
0
{
70
0
    _PyIO_State *state = get_io_state_by_cls(cls);
71
0
    return _unsupported(state, "detach");
72
0
}
73
74
/*[clinic input]
75
_io._TextIOBase.read
76
    cls: defining_class
77
    size: int(unused=True) = -1
78
    /
79
80
Read at most size characters from stream.
81
82
Read from underlying buffer until we have size characters or we hit EOF.
83
If size is negative or omitted, read until EOF.
84
[clinic start generated code]*/
85
86
static PyObject *
87
_io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
88
                          int Py_UNUSED(size))
89
/*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
90
0
{
91
0
    _PyIO_State *state = get_io_state_by_cls(cls);
92
0
    return _unsupported(state, "read");
93
0
}
94
95
/*[clinic input]
96
_io._TextIOBase.readline
97
    cls: defining_class
98
    size: int(unused=True) = -1
99
    /
100
101
Read until newline or EOF.
102
103
Return an empty string if EOF is hit immediately.
104
If size is specified, at most size characters will be read.
105
[clinic start generated code]*/
106
107
static PyObject *
108
_io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
109
                              int Py_UNUSED(size))
110
/*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
111
0
{
112
0
    _PyIO_State *state = get_io_state_by_cls(cls);
113
0
    return _unsupported(state, "readline");
114
0
}
115
116
/*[clinic input]
117
_io._TextIOBase.write
118
    cls: defining_class
119
    s: str(unused=True)
120
    /
121
122
Write string s to stream.
123
124
Return the number of characters written
125
(which is always equal to the length of the string).
126
[clinic start generated code]*/
127
128
static PyObject *
129
_io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
130
                           const char *Py_UNUSED(s))
131
/*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
132
0
{
133
0
    _PyIO_State *state = get_io_state_by_cls(cls);
134
0
    return _unsupported(state, "write");
135
0
}
136
137
/*[clinic input]
138
@getter
139
_io._TextIOBase.encoding
140
141
Encoding of the text stream.
142
143
Subclasses should override.
144
[clinic start generated code]*/
145
146
static PyObject *
147
_io__TextIOBase_encoding_get_impl(PyObject *self)
148
/*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
149
0
{
150
0
    Py_RETURN_NONE;
151
0
}
152
153
/*[clinic input]
154
@getter
155
_io._TextIOBase.newlines
156
157
Line endings translated so far.
158
159
Only line endings translated during reading are considered.
160
161
Subclasses should override.
162
[clinic start generated code]*/
163
164
static PyObject *
165
_io__TextIOBase_newlines_get_impl(PyObject *self)
166
/*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
167
0
{
168
0
    Py_RETURN_NONE;
169
0
}
170
171
/*[clinic input]
172
@getter
173
_io._TextIOBase.errors
174
175
The error setting of the decoder or encoder.
176
177
Subclasses should override.
178
[clinic start generated code]*/
179
180
static PyObject *
181
_io__TextIOBase_errors_get_impl(PyObject *self)
182
/*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
183
0
{
184
0
    Py_RETURN_NONE;
185
0
}
186
187
188
static PyMethodDef textiobase_methods[] = {
189
    _IO__TEXTIOBASE_DETACH_METHODDEF
190
    _IO__TEXTIOBASE_READ_METHODDEF
191
    _IO__TEXTIOBASE_READLINE_METHODDEF
192
    _IO__TEXTIOBASE_WRITE_METHODDEF
193
    {NULL, NULL}
194
};
195
196
static PyGetSetDef textiobase_getset[] = {
197
    _IO__TEXTIOBASE_ENCODING_GETSETDEF
198
    _IO__TEXTIOBASE_NEWLINES_GETSETDEF
199
    _IO__TEXTIOBASE_ERRORS_GETSETDEF
200
    {NULL}
201
};
202
203
static PyType_Slot textiobase_slots[] = {
204
    {Py_tp_doc, (void *)textiobase_doc},
205
    {Py_tp_methods, textiobase_methods},
206
    {Py_tp_getset, textiobase_getset},
207
    {0, NULL},
208
};
209
210
/* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
211
PyType_Spec _Py_textiobase_spec = {
212
    .name = "_io._TextIOBase",
213
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
214
              Py_TPFLAGS_IMMUTABLETYPE),
215
    .slots = textiobase_slots,
216
};
217
218
/* IncrementalNewlineDecoder */
219
220
struct nldecoder_object {
221
    PyObject_HEAD
222
    PyObject *decoder;
223
    PyObject *errors;
224
    unsigned int pendingcr: 1;
225
    unsigned int translate: 1;
226
    unsigned int seennl: 3;
227
};
228
229
95.5k
#define nldecoder_object_CAST(op)   ((nldecoder_object *)(op))
230
231
/*[clinic input]
232
_io.IncrementalNewlineDecoder.__init__
233
    decoder: object
234
    translate: bool
235
    errors: object(c_default="NULL") = "strict"
236
237
Codec used when reading a file in universal newlines mode.
238
239
It wraps another incremental decoder, translating \r\n and \r into \n.
240
It also records the types of newlines encountered.  When used with
241
translate=False, it ensures that the newline sequence is returned in
242
one piece. When used with decoder=None, it expects unicode strings as
243
decode input and translates newlines without first invoking an external
244
decoder.
245
[clinic start generated code]*/
246
247
static int
248
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249
                                            PyObject *decoder, int translate,
250
                                            PyObject *errors)
251
/*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
252
16.0k
{
253
254
16.0k
    if (errors == NULL) {
255
16.0k
        errors = &_Py_ID(strict);
256
16.0k
    }
257
0
    else {
258
0
        errors = Py_NewRef(errors);
259
0
    }
260
261
16.0k
    Py_XSETREF(self->errors, errors);
262
16.0k
    Py_XSETREF(self->decoder, Py_NewRef(decoder));
263
16.0k
    self->translate = translate ? 1 : 0;
264
16.0k
    self->seennl = 0;
265
16.0k
    self->pendingcr = 0;
266
267
16.0k
    return 0;
268
16.0k
}
269
270
static int
271
incrementalnewlinedecoder_traverse(PyObject *op, visitproc visit, void *arg)
272
1.81k
{
273
1.81k
    nldecoder_object *self = nldecoder_object_CAST(op);
274
1.81k
    Py_VISIT(Py_TYPE(self));
275
1.81k
    Py_VISIT(self->decoder);
276
1.81k
    Py_VISIT(self->errors);
277
1.81k
    return 0;
278
1.81k
}
279
280
static int
281
incrementalnewlinedecoder_clear(PyObject *op)
282
16.0k
{
283
16.0k
    nldecoder_object *self = nldecoder_object_CAST(op);
284
16.0k
    Py_CLEAR(self->decoder);
285
16.0k
    Py_CLEAR(self->errors);
286
16.0k
    return 0;
287
16.0k
}
288
289
static void
290
incrementalnewlinedecoder_dealloc(PyObject *op)
291
16.0k
{
292
16.0k
    nldecoder_object *self = nldecoder_object_CAST(op);
293
16.0k
    PyTypeObject *tp = Py_TYPE(self);
294
16.0k
    _PyObject_GC_UNTRACK(self);
295
16.0k
    (void)incrementalnewlinedecoder_clear(op);
296
16.0k
    tp->tp_free(self);
297
16.0k
    Py_DECREF(tp);
298
16.0k
}
299
300
static int
301
check_decoded(PyObject *decoded)
302
61.6k
{
303
61.6k
    if (decoded == NULL)
304
0
        return -1;
305
61.6k
    if (!PyUnicode_Check(decoded)) {
306
0
        PyErr_Format(PyExc_TypeError,
307
0
                     "decoder should return a string result, not '%.200s'",
308
0
                     Py_TYPE(decoded)->tp_name);
309
0
        Py_DECREF(decoded);
310
0
        return -1;
311
0
    }
312
61.6k
    return 0;
313
61.6k
}
314
315
#define CHECK_INITIALIZED_DECODER(self) \
316
61.6k
    if (self->errors == NULL) { \
317
0
        PyErr_SetString(PyExc_ValueError, \
318
0
                        "IncrementalNewlineDecoder.__init__() not called"); \
319
0
        return NULL; \
320
0
    }
321
322
29.3M
#define SEEN_CR   1
323
24.3M
#define SEEN_LF   2
324
23.0M
#define SEEN_CRLF 4
325
23.0M
#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
326
327
PyObject *
328
_PyIncrementalNewlineDecoder_decode(PyObject *myself,
329
                                    PyObject *input, int final)
330
61.6k
{
331
61.6k
    PyObject *output;
332
61.6k
    Py_ssize_t output_len;
333
61.6k
    nldecoder_object *self = nldecoder_object_CAST(myself);
334
335
61.6k
    CHECK_INITIALIZED_DECODER(self);
336
337
    /* decode input (with the eventual \r from a previous pass) */
338
61.6k
    if (self->decoder != Py_None) {
339
34
        output = PyObject_CallMethodObjArgs(self->decoder,
340
34
            &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
341
34
    }
342
61.6k
    else {
343
61.6k
        output = Py_NewRef(input);
344
61.6k
    }
345
346
61.6k
    if (check_decoded(output) < 0)
347
0
        return NULL;
348
349
61.6k
    output_len = PyUnicode_GET_LENGTH(output);
350
61.6k
    if (self->pendingcr && (final || output_len > 0)) {
351
        /* Prefix output with CR */
352
0
        int kind;
353
0
        PyObject *modified;
354
0
        char *out;
355
356
0
        modified = PyUnicode_New(output_len + 1,
357
0
                                 PyUnicode_MAX_CHAR_VALUE(output));
358
0
        if (modified == NULL)
359
0
            goto error;
360
0
        kind = PyUnicode_KIND(modified);
361
0
        out = PyUnicode_DATA(modified);
362
0
        PyUnicode_WRITE(kind, out, 0, '\r');
363
0
        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
364
0
        Py_SETREF(output, modified);
365
0
        self->pendingcr = 0;
366
0
        output_len++;
367
0
    }
368
369
    /* retain last \r even when not translating data:
370
     * then readline() is sure to get \r\n in one pass
371
     */
372
61.6k
    if (!final) {
373
28
        if (output_len > 0
374
28
            && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
375
0
        {
376
0
            PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
377
0
            if (modified == NULL)
378
0
                goto error;
379
0
            Py_SETREF(output, modified);
380
0
            self->pendingcr = 1;
381
0
        }
382
28
    }
383
384
    /* Record which newlines are read and do newline translation if desired,
385
       all in one pass. */
386
61.6k
    {
387
61.6k
        const void *in_str;
388
61.6k
        Py_ssize_t len;
389
61.6k
        int seennl = self->seennl;
390
61.6k
        int only_lf = 0;
391
61.6k
        int kind;
392
393
61.6k
        in_str = PyUnicode_DATA(output);
394
61.6k
        len = PyUnicode_GET_LENGTH(output);
395
61.6k
        kind = PyUnicode_KIND(output);
396
397
61.6k
        if (len == 0)
398
2
            return output;
399
400
        /* If, up to now, newlines are consistently \n, do a quick check
401
           for the \r *byte* with the libc's optimized memchr.
402
           */
403
61.6k
        if (seennl == SEEN_LF || seennl == 0) {
404
28.9k
            only_lf = (memchr(in_str, '\r', kind * len) == NULL);
405
28.9k
        }
406
407
61.6k
        if (only_lf) {
408
            /* If not already seen, quick scan for a possible "\n" character.
409
               (there's nothing else to be done, even when in translation mode)
410
            */
411
22.8k
            if (seennl == 0 &&
412
16.1k
                memchr(in_str, '\n', kind * len) != NULL) {
413
1.88k
                if (kind == PyUnicode_1BYTE_KIND)
414
929
                    seennl |= SEEN_LF;
415
958
                else {
416
958
                    Py_ssize_t i = 0;
417
149k
                    for (;;) {
418
149k
                        Py_UCS4 c;
419
                        /* Fast loop for non-control characters */
420
1.05M
                        while (PyUnicode_READ(kind, in_str, i) > '\n')
421
909k
                            i++;
422
149k
                        c = PyUnicode_READ(kind, in_str, i++);
423
149k
                        if (c == '\n') {
424
772
                            seennl |= SEEN_LF;
425
772
                            break;
426
772
                        }
427
149k
                        if (i >= len)
428
186
                            break;
429
149k
                    }
430
958
                }
431
1.88k
            }
432
            /* Finished: we have scanned for newlines, and none of them
433
               need translating */
434
22.8k
        }
435
38.7k
        else if (!self->translate) {
436
38.7k
            Py_ssize_t i = 0;
437
            /* We have already seen all newline types, no need to scan again */
438
38.7k
            if (seennl == SEEN_ALL)
439
14.7k
                goto endscan;
440
22.9M
            for (;;) {
441
22.9M
                Py_UCS4 c;
442
                /* Fast loop for non-control characters */
443
76.8M
                while (PyUnicode_READ(kind, in_str, i) > '\r')
444
53.8M
                    i++;
445
22.9M
                c = PyUnicode_READ(kind, in_str, i++);
446
22.9M
                if (c == '\n')
447
1.20M
                    seennl |= SEEN_LF;
448
21.7M
                else if (c == '\r') {
449
6.35M
                    if (PyUnicode_READ(kind, in_str, i) == '\n') {
450
3.45k
                        seennl |= SEEN_CRLF;
451
3.45k
                        i++;
452
3.45k
                    }
453
6.35M
                    else
454
6.35M
                        seennl |= SEEN_CR;
455
6.35M
                }
456
22.9M
                if (i >= len)
457
22.7k
                    break;
458
22.9M
                if (seennl == SEEN_ALL)
459
1.25k
                    break;
460
22.9M
            }
461
38.7k
        endscan:
462
38.7k
            ;
463
38.7k
        }
464
0
        else {
465
0
            void *translated;
466
0
            int kind = PyUnicode_KIND(output);
467
0
            const void *in_str = PyUnicode_DATA(output);
468
0
            Py_ssize_t in, out;
469
            /* XXX: Previous in-place translation here is disabled as
470
               resizing is not possible anymore */
471
            /* We could try to optimize this so that we only do a copy
472
               when there is something to translate. On the other hand,
473
               we already know there is a \r byte, so chances are high
474
               that something needs to be done. */
475
0
            translated = PyMem_Malloc(kind * len);
476
0
            if (translated == NULL) {
477
0
                PyErr_NoMemory();
478
0
                goto error;
479
0
            }
480
0
            in = out = 0;
481
0
            for (;;) {
482
0
                Py_UCS4 c;
483
                /* Fast loop for non-control characters */
484
0
                while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
485
0
                    PyUnicode_WRITE(kind, translated, out++, c);
486
0
                if (c == '\n') {
487
0
                    PyUnicode_WRITE(kind, translated, out++, c);
488
0
                    seennl |= SEEN_LF;
489
0
                    continue;
490
0
                }
491
0
                if (c == '\r') {
492
0
                    if (PyUnicode_READ(kind, in_str, in) == '\n') {
493
0
                        in++;
494
0
                        seennl |= SEEN_CRLF;
495
0
                    }
496
0
                    else
497
0
                        seennl |= SEEN_CR;
498
0
                    PyUnicode_WRITE(kind, translated, out++, '\n');
499
0
                    continue;
500
0
                }
501
0
                if (in > len)
502
0
                    break;
503
0
                PyUnicode_WRITE(kind, translated, out++, c);
504
0
            }
505
0
            Py_DECREF(output);
506
0
            output = PyUnicode_FromKindAndData(kind, translated, out);
507
0
            PyMem_Free(translated);
508
0
            if (!output)
509
0
                return NULL;
510
0
        }
511
61.6k
        self->seennl |= seennl;
512
61.6k
    }
513
514
0
    return output;
515
516
0
  error:
517
0
    Py_DECREF(output);
518
0
    return NULL;
519
61.6k
}
520
521
/*[clinic input]
522
_io.IncrementalNewlineDecoder.decode
523
    input: object
524
    final: bool = False
525
[clinic start generated code]*/
526
527
static PyObject *
528
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
529
                                          PyObject *input, int final)
530
/*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
531
0
{
532
0
    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
533
0
}
534
535
/*[clinic input]
536
_io.IncrementalNewlineDecoder.getstate
537
[clinic start generated code]*/
538
539
static PyObject *
540
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
541
/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
542
0
{
543
0
    PyObject *buffer;
544
0
    unsigned long long flag;
545
546
0
    CHECK_INITIALIZED_DECODER(self);
547
548
0
    if (self->decoder != Py_None) {
549
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
550
0
           &_Py_ID(getstate));
551
0
        if (state == NULL)
552
0
            return NULL;
553
0
        if (!PyTuple_Check(state)) {
554
0
            PyErr_SetString(PyExc_TypeError,
555
0
                            "illegal decoder state");
556
0
            Py_DECREF(state);
557
0
            return NULL;
558
0
        }
559
0
        if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
560
0
                              &buffer, &flag))
561
0
        {
562
0
            Py_DECREF(state);
563
0
            return NULL;
564
0
        }
565
0
        Py_INCREF(buffer);
566
0
        Py_DECREF(state);
567
0
    }
568
0
    else {
569
0
        buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
570
0
        flag = 0;
571
0
    }
572
0
    flag <<= 1;
573
0
    if (self->pendingcr)
574
0
        flag |= 1;
575
0
    return Py_BuildValue("NK", buffer, flag);
576
0
}
577
578
/*[clinic input]
579
_io.IncrementalNewlineDecoder.setstate
580
    state: object
581
    /
582
[clinic start generated code]*/
583
584
static PyObject *
585
_io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self,
586
                                            PyObject *state)
587
/*[clinic end generated code: output=09135cb6e78a1dc8 input=c53fb505a76dbbe2]*/
588
0
{
589
0
    PyObject *buffer;
590
0
    unsigned long long flag;
591
592
0
    CHECK_INITIALIZED_DECODER(self);
593
594
0
    if (!PyTuple_Check(state)) {
595
0
        PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
596
0
        return NULL;
597
0
    }
598
0
    if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
599
0
                          &buffer, &flag))
600
0
    {
601
0
        return NULL;
602
0
    }
603
604
0
    self->pendingcr = (int) (flag & 1);
605
0
    flag >>= 1;
606
607
0
    if (self->decoder != Py_None) {
608
0
        return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
609
0
                                    "((OK))", buffer, flag);
610
0
    }
611
0
    else {
612
0
        Py_RETURN_NONE;
613
0
    }
614
0
}
615
616
/*[clinic input]
617
_io.IncrementalNewlineDecoder.reset
618
[clinic start generated code]*/
619
620
static PyObject *
621
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
622
/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
623
0
{
624
0
    CHECK_INITIALIZED_DECODER(self);
625
626
0
    self->seennl = 0;
627
0
    self->pendingcr = 0;
628
0
    if (self->decoder != Py_None)
629
0
        return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
630
0
    else
631
0
        Py_RETURN_NONE;
632
0
}
633
634
static PyObject *
635
incrementalnewlinedecoder_newlines_get(PyObject *op, void *Py_UNUSED(context))
636
0
{
637
0
    nldecoder_object *self = nldecoder_object_CAST(op);
638
0
    CHECK_INITIALIZED_DECODER(self);
639
640
0
    switch (self->seennl) {
641
0
    case SEEN_CR:
642
0
        return PyUnicode_FromString("\r");
643
0
    case SEEN_LF:
644
0
        return PyUnicode_FromString("\n");
645
0
    case SEEN_CRLF:
646
0
        return PyUnicode_FromString("\r\n");
647
0
    case SEEN_CR | SEEN_LF:
648
0
        return Py_BuildValue("ss", "\r", "\n");
649
0
    case SEEN_CR | SEEN_CRLF:
650
0
        return Py_BuildValue("ss", "\r", "\r\n");
651
0
    case SEEN_LF | SEEN_CRLF:
652
0
        return Py_BuildValue("ss", "\n", "\r\n");
653
0
    case SEEN_CR | SEEN_LF | SEEN_CRLF:
654
0
        return Py_BuildValue("sss", "\r", "\n", "\r\n");
655
0
    default:
656
0
        Py_RETURN_NONE;
657
0
   }
658
659
0
}
660
661
/* TextIOWrapper */
662
663
typedef PyObject *(*encodefunc_t)(PyObject *, PyObject *);
664
665
struct textio
666
{
667
    PyObject_HEAD
668
    int ok; /* initialized? */
669
    int detached;
670
    Py_ssize_t chunk_size;
671
    PyObject *buffer;
672
    PyObject *encoding;
673
    PyObject *encoder;
674
    PyObject *decoder;
675
    PyObject *readnl;
676
    PyObject *errors;
677
    const char *writenl; /* ASCII-encoded; NULL stands for \n */
678
    char line_buffering;
679
    char write_through;
680
    char readuniversal;
681
    char readtranslate;
682
    char writetranslate;
683
    char seekable;
684
    char has_read1;
685
    char telling;
686
    char finalizing;
687
    /* Specialized encoding func (see below) */
688
    encodefunc_t encodefunc;
689
    /* Whether or not it's the start of the stream */
690
    char encoding_start_of_stream;
691
692
    /* Reads and writes are internally buffered in order to speed things up.
693
       However, any read will first flush the write buffer if itsn't empty.
694
695
       Please also note that text to be written is first encoded before being
696
       buffered. This is necessary so that encoding errors are immediately
697
       reported to the caller, but it unfortunately means that the
698
       IncrementalEncoder (whose encode() method is always written in Python)
699
       becomes a bottleneck for small writes.
700
    */
701
    PyObject *decoded_chars;       /* buffer for text returned from decoder */
702
    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
703
    PyObject *pending_bytes;       // data waiting to be written.
704
                                   // ascii unicode, bytes, or list of them.
705
    Py_ssize_t pending_bytes_count;
706
707
    /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
708
     * dec_flags is the second (integer) item of the decoder state and
709
     * next_input is the chunk of input bytes that comes next after the
710
     * snapshot point.  We use this to reconstruct decoder states in tell().
711
     */
712
    PyObject *snapshot;
713
    /* Bytes-to-characters ratio for the current chunk. Serves as input for
714
       the heuristic in tell(). */
715
    double b2cratio;
716
717
    /* Cache raw object if it's a FileIO object */
718
    PyObject *raw;
719
720
    PyObject *weakreflist;
721
    PyObject *dict;
722
723
    _PyIO_State *state;
724
};
725
726
8.98k
#define textio_CAST(op) ((textio *)(op))
727
728
static void
729
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
730
731
/* A couple of specialized cases in order to bypass the slow incremental
732
   encoding methods for the most popular encodings. */
733
734
static PyObject *
735
ascii_encode(PyObject *op, PyObject *text)
736
0
{
737
0
    textio *self = textio_CAST(op);
738
0
    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
739
0
}
740
741
static PyObject *
742
utf16be_encode(PyObject *op, PyObject *text)
743
0
{
744
0
    textio *self = textio_CAST(op);
745
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 1);
746
0
}
747
748
static PyObject *
749
utf16le_encode(PyObject *op, PyObject *text)
750
0
{
751
0
    textio *self = textio_CAST(op);
752
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), -1);
753
0
}
754
755
static PyObject *
756
utf16_encode(PyObject *op, PyObject *text)
757
0
{
758
0
    textio *self = textio_CAST(op);
759
0
    if (!self->encoding_start_of_stream) {
760
        /* Skip the BOM and use native byte ordering */
761
#if PY_BIG_ENDIAN
762
        return utf16be_encode(op, text);
763
#else
764
0
        return utf16le_encode(op, text);
765
0
#endif
766
0
    }
767
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 0);
768
0
}
769
770
static PyObject *
771
utf32be_encode(PyObject *op, PyObject *text)
772
0
{
773
0
    textio *self = textio_CAST(op);
774
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 1);
775
0
}
776
777
static PyObject *
778
utf32le_encode(PyObject *op, PyObject *text)
779
0
{
780
0
    textio *self = textio_CAST(op);
781
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), -1);
782
0
}
783
784
static PyObject *
785
utf32_encode(PyObject *op, PyObject *text)
786
0
{
787
0
    textio *self = textio_CAST(op);
788
0
    if (!self->encoding_start_of_stream) {
789
        /* Skip the BOM and use native byte ordering */
790
#if PY_BIG_ENDIAN
791
        return utf32be_encode(op, text);
792
#else
793
0
        return utf32le_encode(op, text);
794
0
#endif
795
0
    }
796
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 0);
797
0
}
798
799
static PyObject *
800
utf8_encode(PyObject *op, PyObject *text)
801
0
{
802
0
    textio *self = textio_CAST(op);
803
0
    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
804
0
}
805
806
static PyObject *
807
latin1_encode(PyObject *op, PyObject *text)
808
0
{
809
0
    textio *self = textio_CAST(op);
810
0
    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
811
0
}
812
813
// Return true when encoding can be skipped when text is ascii.
814
static inline int
815
is_asciicompat_encoding(encodefunc_t f)
816
40.0k
{
817
40.0k
    return f == ascii_encode || f == latin1_encode || f == utf8_encode;
818
40.0k
}
819
820
/* Map normalized encoding names onto the specialized encoding funcs */
821
822
typedef struct {
823
    const char *name;
824
    encodefunc_t encodefunc;
825
} encodefuncentry;
826
827
static const encodefuncentry encodefuncs[] = {
828
    {"ascii",       ascii_encode},
829
    {"iso8859-1",   latin1_encode},
830
    {"utf-8",       utf8_encode},
831
    {"utf-16-be",   utf16be_encode},
832
    {"utf-16-le",   utf16le_encode},
833
    {"utf-16",      utf16_encode},
834
    {"utf-32-be",   utf32be_encode},
835
    {"utf-32-le",   utf32le_encode},
836
    {"utf-32",      utf32_encode},
837
    {NULL, NULL}
838
};
839
840
static int
841
validate_newline(const char *newline)
842
102
{
843
102
    if (newline && newline[0] != '\0'
844
96
        && !(newline[0] == '\n' && newline[1] == '\0')
845
0
        && !(newline[0] == '\r' && newline[1] == '\0')
846
0
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
847
0
        PyErr_Format(PyExc_ValueError,
848
0
                     "illegal newline value: %s", newline);
849
0
        return -1;
850
0
    }
851
102
    return 0;
852
102
}
853
854
static int
855
set_newline(textio *self, const char *newline)
856
102
{
857
102
    PyObject *old = self->readnl;
858
102
    if (newline == NULL) {
859
6
        self->readnl = NULL;
860
6
    }
861
96
    else {
862
96
        self->readnl = PyUnicode_FromString(newline);
863
96
        if (self->readnl == NULL) {
864
0
            self->readnl = old;
865
0
            return -1;
866
0
        }
867
96
    }
868
102
    self->readuniversal = (newline == NULL || newline[0] == '\0');
869
102
    self->readtranslate = (newline == NULL);
870
102
    self->writetranslate = (newline == NULL || newline[0] != '\0');
871
102
    if (!self->readuniversal && self->readnl != NULL) {
872
        // validate_newline() accepts only ASCII newlines.
873
96
        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
874
96
        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
875
96
        if (strcmp(self->writenl, "\n") == 0) {
876
96
            self->writenl = NULL;
877
96
        }
878
96
    }
879
6
    else {
880
#ifdef MS_WINDOWS
881
        self->writenl = "\r\n";
882
#else
883
6
        self->writenl = NULL;
884
6
#endif
885
6
    }
886
102
    Py_XDECREF(old);
887
102
    return 0;
888
102
}
889
890
static int
891
_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
892
                           const char *errors)
893
102
{
894
102
    PyObject *res;
895
102
    int r;
896
897
102
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
898
102
    if (res == NULL)
899
0
        return -1;
900
901
102
    r = PyObject_IsTrue(res);
902
102
    Py_DECREF(res);
903
102
    if (r == -1)
904
0
        return -1;
905
906
102
    if (r != 1)
907
64
        return 0;
908
909
38
    Py_CLEAR(self->decoder);
910
38
    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
911
38
    if (self->decoder == NULL)
912
0
        return -1;
913
914
38
    if (self->readuniversal) {
915
6
        _PyIO_State *state = self->state;
916
6
        PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
917
6
            (PyObject *)state->PyIncrementalNewlineDecoder_Type,
918
6
            self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
919
6
        if (incrementalDecoder == NULL)
920
0
            return -1;
921
6
        Py_XSETREF(self->decoder, incrementalDecoder);
922
6
    }
923
924
38
    return 0;
925
38
}
926
927
static PyObject*
928
_textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
929
                      int eof)
930
30
{
931
30
    PyObject *chars;
932
933
30
    if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
934
30
        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
935
0
    else
936
0
        chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
937
0
                                           eof ? Py_True : Py_False, NULL);
938
939
30
    if (check_decoded(chars) < 0)
940
        // check_decoded already decreases refcount
941
0
        return NULL;
942
943
30
    return chars;
944
30
}
945
946
static int
947
_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
948
                           const char *errors)
949
102
{
950
102
    PyObject *res;
951
102
    int r;
952
953
102
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
954
102
    if (res == NULL)
955
0
        return -1;
956
957
102
    r = PyObject_IsTrue(res);
958
102
    Py_DECREF(res);
959
102
    if (r == -1)
960
0
        return -1;
961
962
102
    if (r != 1)
963
38
        return 0;
964
965
64
    Py_CLEAR(self->encoder);
966
64
    self->encodefunc = NULL;
967
64
    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
968
64
    if (self->encoder == NULL)
969
0
        return -1;
970
971
    /* Get the normalized named of the codec */
972
64
    if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
973
0
        return -1;
974
0
    }
975
64
    if (res != NULL && PyUnicode_Check(res)) {
976
64
        const encodefuncentry *e = encodefuncs;
977
192
        while (e->name != NULL) {
978
192
            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
979
64
                self->encodefunc = e->encodefunc;
980
64
                break;
981
64
            }
982
128
            e++;
983
128
        }
984
64
    }
985
64
    Py_XDECREF(res);
986
987
64
    return 0;
988
64
}
989
990
static int
991
_textiowrapper_fix_encoder_state(textio *self)
992
102
{
993
102
    if (!self->seekable || !self->encoder) {
994
38
        return 0;
995
38
    }
996
997
64
    self->encoding_start_of_stream = 1;
998
999
64
    PyObject *cookieObj = PyObject_CallMethodNoArgs(
1000
64
        self->buffer, &_Py_ID(tell));
1001
64
    if (cookieObj == NULL) {
1002
0
        return -1;
1003
0
    }
1004
1005
64
    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
1006
64
    Py_DECREF(cookieObj);
1007
64
    if (cmp < 0) {
1008
0
        return -1;
1009
0
    }
1010
1011
64
    if (cmp == 0) {
1012
30
        self->encoding_start_of_stream = 0;
1013
30
        PyObject *res = PyObject_CallMethodOneArg(
1014
30
            self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1015
30
        if (res == NULL) {
1016
0
            return -1;
1017
0
        }
1018
30
        Py_DECREF(res);
1019
30
    }
1020
1021
64
    return 0;
1022
64
}
1023
1024
static int
1025
io_check_errors(PyObject *errors)
1026
96
{
1027
96
    assert(errors != NULL && errors != Py_None);
1028
1029
96
    PyInterpreterState *interp = _PyInterpreterState_GET();
1030
96
#ifndef Py_DEBUG
1031
    /* In release mode, only check in development mode (-X dev) */
1032
96
    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1033
96
        return 0;
1034
96
    }
1035
#else
1036
    /* Always check in debug mode */
1037
#endif
1038
1039
    /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1040
       before_PyUnicode_InitEncodings() is called. */
1041
0
    if (!interp->unicode.fs_codec.encoding) {
1042
0
        return 0;
1043
0
    }
1044
1045
0
    const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1046
0
    if (name == NULL) {
1047
0
        return -1;
1048
0
    }
1049
0
    PyObject *handler = PyCodec_LookupError(name);
1050
0
    if (handler != NULL) {
1051
0
        Py_DECREF(handler);
1052
0
        return 0;
1053
0
    }
1054
0
    return -1;
1055
0
}
1056
1057
1058
1059
/*[clinic input]
1060
_io.TextIOWrapper.__init__
1061
    buffer: object
1062
    encoding: str(accept={str, NoneType}) = None
1063
    errors: object = None
1064
    newline: str(accept={str, NoneType}) = None
1065
    line_buffering: bool = False
1066
    write_through: bool = False
1067
1068
Character and line based layer over a BufferedIOBase object, buffer.
1069
1070
encoding gives the name of the encoding that the stream will be
1071
decoded or encoded with. It defaults to locale.getencoding().
1072
1073
errors determines the strictness of encoding and decoding (see
1074
help(codecs.Codec) or the documentation for codecs.register) and
1075
defaults to "strict".
1076
1077
newline controls how line endings are handled. It can be None, '',
1078
'\n', '\r', and '\r\n'.  It works as follows:
1079
1080
* On input, if newline is None, universal newlines mode is
1081
  enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1082
  these are translated into '\n' before being returned to the
1083
  caller. If it is '', universal newline mode is enabled, but line
1084
  endings are returned to the caller untranslated. If it has any of
1085
  the other legal values, input lines are only terminated by the given
1086
  string, and the line ending is returned to the caller untranslated.
1087
1088
* On output, if newline is None, any '\n' characters written are
1089
  translated to the system default line separator, os.linesep. If
1090
  newline is '' or '\n', no translation takes place. If newline is any
1091
  of the other legal values, any '\n' characters written are translated
1092
  to the given string.
1093
1094
If line_buffering is True, a call to flush is implied when a call to
1095
write contains a newline character.
1096
[clinic start generated code]*/
1097
1098
static int
1099
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1100
                                const char *encoding, PyObject *errors,
1101
                                const char *newline, int line_buffering,
1102
                                int write_through)
1103
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1104
102
{
1105
102
    PyObject *raw, *codec_info = NULL;
1106
102
    PyObject *res;
1107
102
    int r;
1108
1109
102
    self->ok = 0;
1110
102
    self->detached = 0;
1111
1112
102
    if (encoding == NULL) {
1113
0
        PyInterpreterState *interp = _PyInterpreterState_GET();
1114
0
        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1115
0
            if (PyErr_WarnEx(PyExc_EncodingWarning,
1116
0
                             "'encoding' argument not specified", 1)) {
1117
0
                return -1;
1118
0
            }
1119
0
        }
1120
0
    }
1121
1122
102
    if (errors == Py_None) {
1123
6
        errors = &_Py_ID(strict);
1124
6
    }
1125
96
    else if (!PyUnicode_Check(errors)) {
1126
        // Check 'errors' argument here because Argument Clinic doesn't support
1127
        // 'str(accept={str, NoneType})' converter.
1128
0
        PyErr_Format(
1129
0
            PyExc_TypeError,
1130
0
            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1131
0
            Py_TYPE(errors)->tp_name);
1132
0
        return -1;
1133
0
    }
1134
96
    else if (io_check_errors(errors)) {
1135
0
        return -1;
1136
0
    }
1137
102
    const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1138
102
    if (errors_str == NULL) {
1139
0
        return -1;
1140
0
    }
1141
1142
102
    if (validate_newline(newline) < 0) {
1143
0
        return -1;
1144
0
    }
1145
1146
102
    Py_CLEAR(self->buffer);
1147
102
    Py_CLEAR(self->encoding);
1148
102
    Py_CLEAR(self->encoder);
1149
102
    Py_CLEAR(self->decoder);
1150
102
    Py_CLEAR(self->readnl);
1151
102
    Py_CLEAR(self->decoded_chars);
1152
102
    Py_CLEAR(self->pending_bytes);
1153
102
    Py_CLEAR(self->snapshot);
1154
102
    Py_CLEAR(self->errors);
1155
102
    Py_CLEAR(self->raw);
1156
102
    self->decoded_chars_used = 0;
1157
102
    self->pending_bytes_count = 0;
1158
102
    self->encodefunc = NULL;
1159
102
    self->b2cratio = 0.0;
1160
1161
102
    if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1162
0
        _Py_DECLARE_STR(utf_8, "utf-8");
1163
0
        self->encoding = &_Py_STR(utf_8);
1164
0
    }
1165
102
    else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1166
0
        self->encoding = _Py_GetLocaleEncodingObject();
1167
0
        if (self->encoding == NULL) {
1168
0
            goto error;
1169
0
        }
1170
0
        assert(PyUnicode_Check(self->encoding));
1171
0
    }
1172
1173
102
    if (self->encoding != NULL) {
1174
0
        encoding = PyUnicode_AsUTF8(self->encoding);
1175
0
        if (encoding == NULL)
1176
0
            goto error;
1177
0
    }
1178
102
    else if (encoding != NULL) {
1179
102
        self->encoding = PyUnicode_FromString(encoding);
1180
102
        if (self->encoding == NULL)
1181
0
            goto error;
1182
102
    }
1183
0
    else {
1184
0
        PyErr_SetString(PyExc_OSError,
1185
0
                        "could not determine default encoding");
1186
0
        goto error;
1187
0
    }
1188
1189
    /* Check we have been asked for a real text encoding */
1190
102
    codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
1191
102
    if (codec_info == NULL) {
1192
0
        Py_CLEAR(self->encoding);
1193
0
        goto error;
1194
0
    }
1195
1196
    /* XXX: Failures beyond this point have the potential to leak elements
1197
     * of the partially constructed object (like self->encoding)
1198
     */
1199
1200
102
    self->errors = Py_NewRef(errors);
1201
102
    self->chunk_size = 8192;
1202
102
    self->line_buffering = line_buffering;
1203
102
    self->write_through = write_through;
1204
102
    if (set_newline(self, newline) < 0) {
1205
0
        goto error;
1206
0
    }
1207
1208
102
    self->buffer = Py_NewRef(buffer);
1209
1210
    /* Build the decoder object */
1211
102
    _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1212
102
    self->state = state;
1213
102
    if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1214
0
        goto error;
1215
1216
    /* Build the encoder object */
1217
102
    if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1218
0
        goto error;
1219
1220
    /* Finished sorting out the codec details */
1221
102
    Py_CLEAR(codec_info);
1222
1223
102
    if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1224
64
        Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1225
0
        Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1226
102
    {
1227
102
        if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1228
0
            goto error;
1229
        /* Cache the raw FileIO object to speed up 'closed' checks */
1230
102
        if (raw != NULL) {
1231
102
            if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1232
102
                self->raw = raw;
1233
0
            else
1234
0
                Py_DECREF(raw);
1235
102
        }
1236
102
    }
1237
1238
102
    res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1239
102
    if (res == NULL)
1240
0
        goto error;
1241
102
    r = PyObject_IsTrue(res);
1242
102
    Py_DECREF(res);
1243
102
    if (r < 0)
1244
0
        goto error;
1245
102
    self->seekable = self->telling = r;
1246
1247
102
    r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1248
102
    if (r < 0) {
1249
0
        goto error;
1250
0
    }
1251
102
    self->has_read1 = r;
1252
1253
102
    self->encoding_start_of_stream = 0;
1254
102
    if (_textiowrapper_fix_encoder_state(self) < 0) {
1255
0
        goto error;
1256
0
    }
1257
1258
102
    self->ok = 1;
1259
102
    return 0;
1260
1261
0
  error:
1262
0
    Py_XDECREF(codec_info);
1263
0
    return -1;
1264
102
}
1265
1266
/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1267
 * -1 on error.
1268
 */
1269
static int
1270
convert_optional_bool(PyObject *obj, int default_value)
1271
0
{
1272
0
    long v;
1273
0
    if (obj == Py_None) {
1274
0
        v = default_value;
1275
0
    }
1276
0
    else {
1277
0
        v = PyLong_AsLong(obj);
1278
0
        if (v == -1 && PyErr_Occurred())
1279
0
            return -1;
1280
0
    }
1281
0
    return v != 0;
1282
0
}
1283
1284
static int
1285
textiowrapper_change_encoding(textio *self, PyObject *encoding,
1286
                              PyObject *errors, int newline_changed)
1287
0
{
1288
    /* Use existing settings where new settings are not specified */
1289
0
    if (encoding == Py_None && errors == Py_None && !newline_changed) {
1290
0
        return 0;  // no change
1291
0
    }
1292
1293
0
    if (encoding == Py_None) {
1294
0
        encoding = self->encoding;
1295
0
        if (errors == Py_None) {
1296
0
            errors = self->errors;
1297
0
        }
1298
0
        Py_INCREF(encoding);
1299
0
    }
1300
0
    else {
1301
0
        if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1302
0
            encoding = _Py_GetLocaleEncodingObject();
1303
0
            if (encoding == NULL) {
1304
0
                return -1;
1305
0
            }
1306
0
        } else {
1307
0
            Py_INCREF(encoding);
1308
0
        }
1309
0
        if (errors == Py_None) {
1310
0
            errors = &_Py_ID(strict);
1311
0
        }
1312
0
    }
1313
0
    Py_INCREF(errors);
1314
1315
0
    const char *c_encoding = PyUnicode_AsUTF8(encoding);
1316
0
    if (c_encoding == NULL) {
1317
0
        Py_DECREF(encoding);
1318
0
        Py_DECREF(errors);
1319
0
        return -1;
1320
0
    }
1321
0
    const char *c_errors = PyUnicode_AsUTF8(errors);
1322
0
    if (c_errors == NULL) {
1323
0
        Py_DECREF(encoding);
1324
0
        Py_DECREF(errors);
1325
0
        return -1;
1326
0
    }
1327
1328
    // Create new encoder & decoder
1329
0
    PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
1330
0
    if (codec_info == NULL) {
1331
0
        Py_DECREF(encoding);
1332
0
        Py_DECREF(errors);
1333
0
        return -1;
1334
0
    }
1335
0
    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1336
0
            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1337
0
        Py_DECREF(codec_info);
1338
0
        Py_DECREF(encoding);
1339
0
        Py_DECREF(errors);
1340
0
        return -1;
1341
0
    }
1342
0
    Py_DECREF(codec_info);
1343
1344
0
    Py_SETREF(self->encoding, encoding);
1345
0
    Py_SETREF(self->errors, errors);
1346
1347
0
    return _textiowrapper_fix_encoder_state(self);
1348
0
}
1349
1350
/*[clinic input]
1351
@critical_section
1352
_io.TextIOWrapper.reconfigure
1353
    *
1354
    encoding: object = None
1355
    errors: object = None
1356
    newline as newline_obj: object(c_default="NULL") = None
1357
    line_buffering as line_buffering_obj: object = None
1358
    write_through as write_through_obj: object = None
1359
1360
Reconfigure the text stream with new parameters.
1361
1362
This also does an implicit stream flush.
1363
1364
[clinic start generated code]*/
1365
1366
static PyObject *
1367
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1368
                                   PyObject *errors, PyObject *newline_obj,
1369
                                   PyObject *line_buffering_obj,
1370
                                   PyObject *write_through_obj)
1371
/*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1372
0
{
1373
0
    int line_buffering;
1374
0
    int write_through;
1375
0
    const char *newline = NULL;
1376
1377
0
    if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1378
0
        PyErr_Format(PyExc_TypeError,
1379
0
                "reconfigure() argument 'encoding' must be str or None, not %s",
1380
0
                Py_TYPE(encoding)->tp_name);
1381
0
        return NULL;
1382
0
    }
1383
0
    if (errors != Py_None && !PyUnicode_Check(errors)) {
1384
0
        PyErr_Format(PyExc_TypeError,
1385
0
                "reconfigure() argument 'errors' must be str or None, not %s",
1386
0
                Py_TYPE(errors)->tp_name);
1387
0
        return NULL;
1388
0
    }
1389
0
    if (newline_obj != NULL && newline_obj != Py_None &&
1390
0
        !PyUnicode_Check(newline_obj))
1391
0
    {
1392
0
        PyErr_Format(PyExc_TypeError,
1393
0
                "reconfigure() argument 'newline' must be str or None, not %s",
1394
0
                Py_TYPE(newline_obj)->tp_name);
1395
0
        return NULL;
1396
0
    }
1397
    /* Check if something is in the read buffer */
1398
0
    if (self->decoded_chars != NULL) {
1399
0
        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1400
0
            _unsupported(self->state,
1401
0
                         "It is not possible to set the encoding or newline "
1402
0
                         "of stream after the first read");
1403
0
            return NULL;
1404
0
        }
1405
0
    }
1406
1407
0
    if (newline_obj != NULL && newline_obj != Py_None) {
1408
0
        newline = PyUnicode_AsUTF8(newline_obj);
1409
0
        if (newline == NULL || validate_newline(newline) < 0) {
1410
0
            return NULL;
1411
0
        }
1412
0
    }
1413
1414
0
    line_buffering = convert_optional_bool(line_buffering_obj,
1415
0
                                           self->line_buffering);
1416
0
    if (line_buffering < 0) {
1417
0
        return NULL;
1418
0
    }
1419
0
    write_through = convert_optional_bool(write_through_obj,
1420
0
                                          self->write_through);
1421
0
    if (write_through < 0) {
1422
0
        return NULL;
1423
0
    }
1424
1425
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1426
0
        return NULL;
1427
0
    }
1428
0
    self->b2cratio = 0;
1429
1430
0
    if (newline_obj != NULL && set_newline(self, newline) < 0) {
1431
0
        return NULL;
1432
0
    }
1433
1434
0
    if (textiowrapper_change_encoding(
1435
0
            self, encoding, errors, newline_obj != NULL) < 0) {
1436
0
        return NULL;
1437
0
    }
1438
1439
0
    self->line_buffering = line_buffering;
1440
0
    self->write_through = write_through;
1441
0
    Py_RETURN_NONE;
1442
0
}
1443
1444
static int
1445
textiowrapper_clear(PyObject *op)
1446
6
{
1447
6
    textio *self = textio_CAST(op);
1448
6
    self->ok = 0;
1449
6
    Py_CLEAR(self->buffer);
1450
6
    Py_CLEAR(self->encoding);
1451
6
    Py_CLEAR(self->encoder);
1452
6
    Py_CLEAR(self->decoder);
1453
6
    Py_CLEAR(self->readnl);
1454
6
    Py_CLEAR(self->decoded_chars);
1455
6
    Py_CLEAR(self->pending_bytes);
1456
6
    Py_CLEAR(self->snapshot);
1457
6
    Py_CLEAR(self->errors);
1458
6
    Py_CLEAR(self->raw);
1459
1460
6
    Py_CLEAR(self->dict);
1461
6
    return 0;
1462
6
}
1463
1464
static void
1465
textiowrapper_dealloc(PyObject *op)
1466
6
{
1467
6
    textio *self = textio_CAST(op);
1468
6
    PyTypeObject *tp = Py_TYPE(self);
1469
6
    self->finalizing = 1;
1470
6
    if (_PyIOBase_finalize(op) < 0)
1471
0
        return;
1472
6
    self->ok = 0;
1473
6
    _PyObject_GC_UNTRACK(self);
1474
6
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
1475
6
    (void)textiowrapper_clear(op);
1476
6
    tp->tp_free(self);
1477
6
    Py_DECREF(tp);
1478
6
}
1479
1480
static int
1481
textiowrapper_traverse(PyObject *op, visitproc visit, void *arg)
1482
3.51k
{
1483
3.51k
    textio *self = textio_CAST(op);
1484
3.51k
    Py_VISIT(Py_TYPE(self));
1485
3.51k
    Py_VISIT(self->buffer);
1486
3.51k
    Py_VISIT(self->encoding);
1487
3.51k
    Py_VISIT(self->encoder);
1488
3.51k
    Py_VISIT(self->decoder);
1489
3.51k
    Py_VISIT(self->readnl);
1490
3.51k
    Py_VISIT(self->decoded_chars);
1491
3.51k
    Py_VISIT(self->pending_bytes);
1492
3.51k
    Py_VISIT(self->snapshot);
1493
3.51k
    Py_VISIT(self->errors);
1494
3.51k
    Py_VISIT(self->raw);
1495
1496
3.51k
    Py_VISIT(self->dict);
1497
3.51k
    return 0;
1498
3.51k
}
1499
1500
static PyObject *
1501
_io_TextIOWrapper_closed_get_impl(textio *self);
1502
1503
/* This macro takes some shortcuts to make the common case faster. */
1504
#define CHECK_CLOSED(self) \
1505
45.5k
    do { \
1506
45.5k
        int r; \
1507
45.5k
        PyObject *_res; \
1508
45.5k
        if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1509
45.5k
            if (self->raw != NULL) \
1510
45.5k
                r = _PyFileIO_closed(self->raw); \
1511
45.5k
            else { \
1512
0
                _res = _io_TextIOWrapper_closed_get_impl(self); \
1513
0
                if (_res == NULL) \
1514
0
                    return NULL; \
1515
0
                r = PyObject_IsTrue(_res); \
1516
0
                Py_DECREF(_res); \
1517
0
                if (r < 0) \
1518
0
                    return NULL; \
1519
0
            } \
1520
45.5k
            if (r > 0) { \
1521
0
                PyErr_SetString(PyExc_ValueError, \
1522
0
                                "I/O operation on closed file."); \
1523
0
                return NULL; \
1524
0
            } \
1525
45.5k
        } \
1526
45.5k
        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1527
0
            return NULL; \
1528
45.5k
    } while (0)
1529
1530
#define CHECK_INITIALIZED(self) \
1531
45.5k
    if (self->ok <= 0) { \
1532
0
        PyErr_SetString(PyExc_ValueError, \
1533
0
            "I/O operation on uninitialized object"); \
1534
0
        return NULL; \
1535
0
    }
1536
1537
#define CHECK_ATTACHED(self) \
1538
45.5k
    CHECK_INITIALIZED(self); \
1539
45.5k
    if (self->detached) { \
1540
0
        PyErr_SetString(PyExc_ValueError, \
1541
0
             "underlying buffer has been detached"); \
1542
0
        return NULL; \
1543
0
    }
1544
1545
#define CHECK_ATTACHED_INT(self) \
1546
0
    if (self->ok <= 0) { \
1547
0
        PyErr_SetString(PyExc_ValueError, \
1548
0
            "I/O operation on uninitialized object"); \
1549
0
        return -1; \
1550
0
    } else if (self->detached) { \
1551
0
        PyErr_SetString(PyExc_ValueError, \
1552
0
             "underlying buffer has been detached"); \
1553
0
        return -1; \
1554
0
    }
1555
1556
1557
/*[clinic input]
1558
@critical_section
1559
_io.TextIOWrapper.detach
1560
[clinic start generated code]*/
1561
1562
static PyObject *
1563
_io_TextIOWrapper_detach_impl(textio *self)
1564
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1565
0
{
1566
0
    PyObject *buffer;
1567
0
    CHECK_ATTACHED(self);
1568
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1569
0
        return NULL;
1570
0
    }
1571
0
    buffer = self->buffer;
1572
0
    self->buffer = NULL;
1573
0
    self->detached = 1;
1574
0
    return buffer;
1575
0
}
1576
1577
/* Flush the internal write buffer. This doesn't explicitly flush the
1578
   underlying buffered object, though. */
1579
static int
1580
_textiowrapper_writeflush(textio *self)
1581
45.5k
{
1582
45.5k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
1583
1584
45.5k
    if (self->pending_bytes == NULL)
1585
5.46k
        return 0;
1586
1587
40.0k
    PyObject *pending = self->pending_bytes;
1588
40.0k
    PyObject *b;
1589
1590
40.0k
    if (PyBytes_Check(pending)) {
1591
0
        b = Py_NewRef(pending);
1592
0
    }
1593
40.0k
    else if (PyUnicode_Check(pending)) {
1594
40.0k
        assert(PyUnicode_IS_ASCII(pending));
1595
40.0k
        assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1596
40.0k
        b = PyBytes_FromStringAndSize(
1597
40.0k
                PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1598
40.0k
        if (b == NULL) {
1599
0
            return -1;
1600
0
        }
1601
40.0k
    }
1602
0
    else {
1603
0
        assert(PyList_Check(pending));
1604
0
        b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1605
0
        if (b == NULL) {
1606
0
            return -1;
1607
0
        }
1608
1609
0
        char *buf = PyBytes_AsString(b);
1610
0
        Py_ssize_t pos = 0;
1611
1612
0
        for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1613
0
            PyObject *obj = PyList_GET_ITEM(pending, i);
1614
0
            char *src;
1615
0
            Py_ssize_t len;
1616
0
            if (PyUnicode_Check(obj)) {
1617
0
                assert(PyUnicode_IS_ASCII(obj));
1618
0
                src = PyUnicode_DATA(obj);
1619
0
                len = PyUnicode_GET_LENGTH(obj);
1620
0
            }
1621
0
            else {
1622
0
                assert(PyBytes_Check(obj));
1623
0
                if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1624
0
                    Py_DECREF(b);
1625
0
                    return -1;
1626
0
                }
1627
0
            }
1628
0
            memcpy(buf + pos, src, len);
1629
0
            pos += len;
1630
0
        }
1631
0
        assert(pos == self->pending_bytes_count);
1632
0
    }
1633
1634
40.0k
    self->pending_bytes_count = 0;
1635
40.0k
    self->pending_bytes = NULL;
1636
40.0k
    Py_DECREF(pending);
1637
1638
40.0k
    PyObject *ret;
1639
40.0k
    do {
1640
40.0k
        ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1641
40.0k
    } while (ret == NULL && _PyIO_trap_eintr());
1642
40.0k
    Py_DECREF(b);
1643
    // NOTE: We cleared buffer but we don't know how many bytes are actually written
1644
    // when an error occurred.
1645
40.0k
    if (ret == NULL)
1646
0
        return -1;
1647
40.0k
    Py_DECREF(ret);
1648
40.0k
    return 0;
1649
40.0k
}
1650
1651
/*[clinic input]
1652
@critical_section
1653
_io.TextIOWrapper.write
1654
    text: unicode
1655
    /
1656
[clinic start generated code]*/
1657
1658
static PyObject *
1659
_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1660
/*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1661
40.0k
{
1662
40.0k
    PyObject *ret;
1663
40.0k
    PyObject *b;
1664
40.0k
    Py_ssize_t textlen;
1665
40.0k
    int haslf = 0;
1666
40.0k
    int needflush = 0, text_needflush = 0;
1667
1668
40.0k
    CHECK_ATTACHED(self);
1669
40.0k
    CHECK_CLOSED(self);
1670
1671
40.0k
    if (self->encoder == NULL) {
1672
0
        return _unsupported(self->state, "not writable");
1673
0
    }
1674
1675
40.0k
    Py_INCREF(text);
1676
1677
40.0k
    textlen = PyUnicode_GET_LENGTH(text);
1678
1679
40.0k
    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1680
40.0k
        if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1681
40.0k
            haslf = 1;
1682
1683
40.0k
    if (haslf && self->writetranslate && self->writenl != NULL) {
1684
0
        PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1685
0
                                                 "ss", "\n", self->writenl);
1686
0
        Py_DECREF(text);
1687
0
        if (newtext == NULL)
1688
0
            return NULL;
1689
0
        text = newtext;
1690
0
    }
1691
1692
40.0k
    if (self->write_through)
1693
0
        text_needflush = 1;
1694
40.0k
    if (self->line_buffering &&
1695
40.0k
        (haslf ||
1696
0
         PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1697
40.0k
        needflush = 1;
1698
1699
    /* XXX What if we were just reading? */
1700
40.0k
    if (self->encodefunc != NULL) {
1701
40.0k
        if (PyUnicode_IS_ASCII(text) &&
1702
                // See bpo-43260
1703
40.0k
                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1704
40.0k
                is_asciicompat_encoding(self->encodefunc)) {
1705
40.0k
            b = Py_NewRef(text);
1706
40.0k
        }
1707
0
        else {
1708
0
            b = (*self->encodefunc)((PyObject *) self, text);
1709
0
        }
1710
40.0k
        self->encoding_start_of_stream = 0;
1711
40.0k
    }
1712
0
    else {
1713
0
        b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1714
0
    }
1715
1716
40.0k
    Py_DECREF(text);
1717
40.0k
    if (b == NULL)
1718
0
        return NULL;
1719
40.0k
    if (b != text && !PyBytes_Check(b)) {
1720
0
        PyErr_Format(PyExc_TypeError,
1721
0
                     "encoder should return a bytes object, not '%.200s'",
1722
0
                     Py_TYPE(b)->tp_name);
1723
0
        Py_DECREF(b);
1724
0
        return NULL;
1725
0
    }
1726
1727
40.0k
    Py_ssize_t bytes_len;
1728
40.0k
    if (b == text) {
1729
40.0k
        bytes_len = PyUnicode_GET_LENGTH(b);
1730
40.0k
    }
1731
0
    else {
1732
0
        bytes_len = PyBytes_GET_SIZE(b);
1733
0
    }
1734
1735
    // We should avoid concatenating huge data.
1736
    // Flush the buffer before adding b to the buffer if b is not small.
1737
    // https://github.com/python/cpython/issues/87426
1738
40.0k
    if (bytes_len >= self->chunk_size) {
1739
        // _textiowrapper_writeflush() calls buffer.write().
1740
        // self->pending_bytes can be appended during buffer->write()
1741
        // or other thread.
1742
        // We need to loop until buffer becomes empty.
1743
        // https://github.com/python/cpython/issues/118138
1744
        // https://github.com/python/cpython/issues/119506
1745
0
        while (self->pending_bytes != NULL) {
1746
0
            if (_textiowrapper_writeflush(self) < 0) {
1747
0
                Py_DECREF(b);
1748
0
                return NULL;
1749
0
            }
1750
0
        }
1751
0
    }
1752
1753
40.0k
    if (self->pending_bytes == NULL) {
1754
40.0k
        assert(self->pending_bytes_count == 0);
1755
40.0k
        self->pending_bytes = b;
1756
40.0k
    }
1757
0
    else if (!PyList_CheckExact(self->pending_bytes)) {
1758
0
        PyObject *list = PyList_New(2);
1759
0
        if (list == NULL) {
1760
0
            Py_DECREF(b);
1761
0
            return NULL;
1762
0
        }
1763
        // Since Python 3.12, allocating GC object won't trigger GC and release
1764
        // GIL. See https://github.com/python/cpython/issues/97922
1765
0
        assert(!PyList_CheckExact(self->pending_bytes));
1766
0
        PyList_SET_ITEM(list, 0, self->pending_bytes);
1767
0
        PyList_SET_ITEM(list, 1, b);
1768
0
        self->pending_bytes = list;
1769
0
    }
1770
0
    else {
1771
0
        if (PyList_Append(self->pending_bytes, b) < 0) {
1772
0
            Py_DECREF(b);
1773
0
            return NULL;
1774
0
        }
1775
0
        Py_DECREF(b);
1776
0
    }
1777
1778
40.0k
    self->pending_bytes_count += bytes_len;
1779
40.0k
    if (self->pending_bytes_count >= self->chunk_size || needflush ||
1780
40.0k
        text_needflush) {
1781
40.0k
        if (_textiowrapper_writeflush(self) < 0)
1782
0
            return NULL;
1783
40.0k
    }
1784
1785
40.0k
    if (needflush) {
1786
40.0k
        if (_PyFile_Flush(self->buffer) < 0) {
1787
0
            return NULL;
1788
0
        }
1789
40.0k
    }
1790
1791
40.0k
    if (self->snapshot != NULL) {
1792
0
        textiowrapper_set_decoded_chars(self, NULL);
1793
0
        Py_CLEAR(self->snapshot);
1794
0
    }
1795
1796
40.0k
    if (self->decoder) {
1797
0
        ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1798
0
        if (ret == NULL)
1799
0
            return NULL;
1800
0
        Py_DECREF(ret);
1801
0
    }
1802
1803
40.0k
    return PyLong_FromSsize_t(textlen);
1804
40.0k
}
1805
1806
/* Steal a reference to chars and store it in the decoded_char buffer;
1807
 */
1808
static void
1809
textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1810
60
{
1811
60
    Py_XSETREF(self->decoded_chars, chars);
1812
60
    self->decoded_chars_used = 0;
1813
60
}
1814
1815
static PyObject *
1816
textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1817
4
{
1818
4
    PyObject *chars;
1819
4
    Py_ssize_t avail;
1820
1821
4
    if (self->decoded_chars == NULL)
1822
4
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1823
1824
0
    avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1825
0
             - self->decoded_chars_used);
1826
1827
0
    assert(avail >= 0);
1828
1829
0
    if (n < 0 || n > avail)
1830
0
        n = avail;
1831
1832
0
    if (self->decoded_chars_used > 0 || n < avail) {
1833
0
        chars = PyUnicode_Substring(self->decoded_chars,
1834
0
                                    self->decoded_chars_used,
1835
0
                                    self->decoded_chars_used + n);
1836
0
        if (chars == NULL)
1837
0
            return NULL;
1838
0
    }
1839
0
    else {
1840
0
        chars = Py_NewRef(self->decoded_chars);
1841
0
    }
1842
1843
0
    self->decoded_chars_used += n;
1844
0
    return chars;
1845
0
}
1846
1847
/* Read and decode the next chunk of data from the BufferedReader.
1848
 */
1849
static int
1850
textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1851
30
{
1852
30
    PyObject *dec_buffer = NULL;
1853
30
    PyObject *dec_flags = NULL;
1854
30
    PyObject *input_chunk = NULL;
1855
30
    Py_buffer input_chunk_buf;
1856
30
    PyObject *decoded_chars, *chunk_size;
1857
30
    Py_ssize_t nbytes, nchars;
1858
30
    int eof;
1859
1860
    /* The return value is True unless EOF was reached.  The decoded string is
1861
     * placed in self._decoded_chars (replacing its previous value).  The
1862
     * entire input chunk is sent to the decoder, though some of it may remain
1863
     * buffered in the decoder, yet to be converted.
1864
     */
1865
1866
30
    if (self->decoder == NULL) {
1867
0
        _unsupported(self->state, "not readable");
1868
0
        return -1;
1869
0
    }
1870
1871
30
    if (self->telling) {
1872
        /* To prepare for tell(), we need to snapshot a point in the file
1873
         * where the decoder's input buffer is empty.
1874
         */
1875
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1876
0
                                                     &_Py_ID(getstate));
1877
0
        if (state == NULL)
1878
0
            return -1;
1879
        /* Given this, we know there was a valid snapshot point
1880
         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1881
         */
1882
0
        if (!PyTuple_Check(state)) {
1883
0
            PyErr_SetString(PyExc_TypeError,
1884
0
                            "illegal decoder state");
1885
0
            Py_DECREF(state);
1886
0
            return -1;
1887
0
        }
1888
0
        if (!PyArg_ParseTuple(state,
1889
0
                              "OO;illegal decoder state", &dec_buffer, &dec_flags))
1890
0
        {
1891
0
            Py_DECREF(state);
1892
0
            return -1;
1893
0
        }
1894
1895
0
        if (!PyBytes_Check(dec_buffer)) {
1896
0
            PyErr_Format(PyExc_TypeError,
1897
0
                         "illegal decoder state: the first item should be a "
1898
0
                         "bytes object, not '%.200s'",
1899
0
                         Py_TYPE(dec_buffer)->tp_name);
1900
0
            Py_DECREF(state);
1901
0
            return -1;
1902
0
        }
1903
0
        Py_INCREF(dec_buffer);
1904
0
        Py_INCREF(dec_flags);
1905
0
        Py_DECREF(state);
1906
0
    }
1907
1908
    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1909
30
    if (size_hint > 0) {
1910
0
        size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1911
0
    }
1912
30
    chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1913
30
    if (chunk_size == NULL)
1914
0
        goto fail;
1915
1916
30
    input_chunk = PyObject_CallMethodOneArg(self->buffer,
1917
30
        (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1918
30
        chunk_size);
1919
30
    Py_DECREF(chunk_size);
1920
30
    if (input_chunk == NULL)
1921
0
        goto fail;
1922
1923
30
    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1924
0
        PyErr_Format(PyExc_TypeError,
1925
0
                     "underlying %s() should have returned a bytes-like object, "
1926
0
                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
1927
0
                     Py_TYPE(input_chunk)->tp_name);
1928
0
        goto fail;
1929
0
    }
1930
1931
30
    nbytes = input_chunk_buf.len;
1932
30
    eof = (nbytes == 0);
1933
1934
30
    decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1935
30
                                          input_chunk, eof);
1936
30
    PyBuffer_Release(&input_chunk_buf);
1937
30
    if (decoded_chars == NULL)
1938
0
        goto fail;
1939
1940
30
    textiowrapper_set_decoded_chars(self, decoded_chars);
1941
30
    nchars = PyUnicode_GET_LENGTH(decoded_chars);
1942
30
    if (nchars > 0)
1943
28
        self->b2cratio = (double) nbytes / nchars;
1944
2
    else
1945
2
        self->b2cratio = 0.0;
1946
30
    if (nchars > 0)
1947
28
        eof = 0;
1948
1949
30
    if (self->telling) {
1950
        /* At the snapshot point, len(dec_buffer) bytes before the read, the
1951
         * next input to be decoded is dec_buffer + input_chunk.
1952
         */
1953
0
        PyObject *next_input = dec_buffer;
1954
0
        PyBytes_Concat(&next_input, input_chunk);
1955
0
        dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1956
0
        if (next_input == NULL) {
1957
0
            goto fail;
1958
0
        }
1959
0
        PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1960
0
        if (snapshot == NULL) {
1961
0
            dec_flags = NULL;
1962
0
            goto fail;
1963
0
        }
1964
0
        Py_XSETREF(self->snapshot, snapshot);
1965
0
    }
1966
30
    Py_DECREF(input_chunk);
1967
1968
30
    return (eof == 0);
1969
1970
0
  fail:
1971
0
    Py_XDECREF(dec_buffer);
1972
0
    Py_XDECREF(dec_flags);
1973
0
    Py_XDECREF(input_chunk);
1974
0
    return -1;
1975
30
}
1976
1977
/*[clinic input]
1978
@critical_section
1979
_io.TextIOWrapper.read
1980
    size as n: Py_ssize_t(accept={int, NoneType}) = -1
1981
    /
1982
[clinic start generated code]*/
1983
1984
static PyObject *
1985
_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1986
/*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
1987
4
{
1988
4
    PyObject *result = NULL, *chunks = NULL;
1989
1990
4
    CHECK_ATTACHED(self);
1991
4
    CHECK_CLOSED(self);
1992
1993
4
    if (self->decoder == NULL) {
1994
0
        return _unsupported(self->state, "not readable");
1995
0
    }
1996
1997
4
    if (_textiowrapper_writeflush(self) < 0)
1998
0
        return NULL;
1999
2000
4
    if (n < 0) {
2001
        /* Read everything */
2002
4
        PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
2003
4
        PyObject *decoded;
2004
4
        if (bytes == NULL)
2005
0
            goto fail;
2006
2007
4
        if (bytes == Py_None){
2008
0
            Py_DECREF(bytes);
2009
0
            PyErr_SetString(PyExc_BlockingIOError, "Read returned None.");
2010
0
            return NULL;
2011
0
        }
2012
2013
4
        _PyIO_State *state = self->state;
2014
4
        if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
2015
4
            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
2016
4
                                                          bytes, 1);
2017
0
        else
2018
0
            decoded = PyObject_CallMethodObjArgs(
2019
0
                self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2020
4
        Py_DECREF(bytes);
2021
4
        if (check_decoded(decoded) < 0)
2022
0
            goto fail;
2023
2024
4
        result = textiowrapper_get_decoded_chars(self, -1);
2025
2026
4
        if (result == NULL) {
2027
0
            Py_DECREF(decoded);
2028
0
            return NULL;
2029
0
        }
2030
2031
4
        PyUnicode_AppendAndDel(&result, decoded);
2032
4
        if (result == NULL)
2033
0
            goto fail;
2034
2035
4
        if (self->snapshot != NULL) {
2036
0
            textiowrapper_set_decoded_chars(self, NULL);
2037
0
            Py_CLEAR(self->snapshot);
2038
0
        }
2039
4
        return result;
2040
4
    }
2041
0
    else {
2042
0
        int res = 1;
2043
0
        Py_ssize_t remaining = n;
2044
2045
0
        result = textiowrapper_get_decoded_chars(self, n);
2046
0
        if (result == NULL)
2047
0
            goto fail;
2048
0
        remaining -= PyUnicode_GET_LENGTH(result);
2049
2050
        /* Keep reading chunks until we have n characters to return */
2051
0
        while (remaining > 0) {
2052
0
            res = textiowrapper_read_chunk(self, remaining);
2053
0
            if (res < 0) {
2054
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2055
                   when EINTR occurs so we needn't do it ourselves. */
2056
0
                if (_PyIO_trap_eintr()) {
2057
0
                    continue;
2058
0
                }
2059
0
                goto fail;
2060
0
            }
2061
0
            if (res == 0)  /* EOF */
2062
0
                break;
2063
0
            if (chunks == NULL) {
2064
0
                chunks = PyList_New(0);
2065
0
                if (chunks == NULL)
2066
0
                    goto fail;
2067
0
            }
2068
0
            if (PyUnicode_GET_LENGTH(result) > 0 &&
2069
0
                PyList_Append(chunks, result) < 0)
2070
0
                goto fail;
2071
0
            Py_DECREF(result);
2072
0
            result = textiowrapper_get_decoded_chars(self, remaining);
2073
0
            if (result == NULL)
2074
0
                goto fail;
2075
0
            remaining -= PyUnicode_GET_LENGTH(result);
2076
0
        }
2077
0
        if (chunks != NULL) {
2078
0
            if (result != NULL && PyList_Append(chunks, result) < 0)
2079
0
                goto fail;
2080
0
            _Py_DECLARE_STR(empty, "");
2081
0
            Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2082
0
            if (result == NULL)
2083
0
                goto fail;
2084
0
            Py_CLEAR(chunks);
2085
0
        }
2086
0
        return result;
2087
0
    }
2088
0
  fail:
2089
0
    Py_XDECREF(result);
2090
0
    Py_XDECREF(chunks);
2091
0
    return NULL;
2092
4
}
2093
2094
2095
/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2096
   that is to the NUL character. Otherwise the function will produce
2097
   incorrect results. */
2098
static const char *
2099
find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2100
5.00M
{
2101
5.00M
    if (kind == PyUnicode_1BYTE_KIND) {
2102
5.12k
        assert(ch < 256);
2103
5.12k
        return (char *) memchr((const void *) s, (char) ch, end - s);
2104
5.12k
    }
2105
91.3M
    for (;;) {
2106
153M
        while (PyUnicode_READ(kind, s, 0) > ch)
2107
61.9M
            s += kind;
2108
91.3M
        if (PyUnicode_READ(kind, s, 0) == ch)
2109
4.99M
            return s;
2110
86.3M
        if (s == end)
2111
4.65k
            return NULL;
2112
86.3M
        s += kind;
2113
86.3M
    }
2114
4.99M
}
2115
2116
Py_ssize_t
2117
_PyIO_find_line_ending(
2118
    int translated, int universal, PyObject *readnl,
2119
    int kind, const char *start, const char *end, Py_ssize_t *consumed)
2120
21.8M
{
2121
21.8M
    Py_ssize_t len = (end - start)/kind;
2122
2123
21.8M
    if (translated) {
2124
        /* Newlines are already translated, only search for \n */
2125
5.48k
        const char *pos = find_control_char(kind, start, end, '\n');
2126
5.48k
        if (pos != NULL)
2127
5.45k
            return (pos - start)/kind + 1;
2128
28
        else {
2129
28
            *consumed = len;
2130
28
            return -1;
2131
28
        }
2132
5.48k
    }
2133
21.8M
    else if (universal) {
2134
        /* Universal newline search. Find any of \r, \r\n, \n
2135
         * The decoder ensures that \r\n are not split in two pieces
2136
         */
2137
16.8M
        const char *s = start;
2138
76.0M
        for (;;) {
2139
76.0M
            Py_UCS4 ch;
2140
            /* Fast path for non-control chars. The loop always ends
2141
               since the Unicode string is NUL-terminated. */
2142
234M
            while (PyUnicode_READ(kind, s, 0) > '\r')
2143
158M
                s += kind;
2144
76.0M
            if (s >= end) {
2145
32.1k
                *consumed = len;
2146
32.1k
                return -1;
2147
32.1k
            }
2148
76.0M
            ch = PyUnicode_READ(kind, s, 0);
2149
76.0M
            s += kind;
2150
76.0M
            if (ch == '\n')
2151
3.94M
                return (s - start)/kind;
2152
72.0M
            if (ch == '\r') {
2153
12.8M
                if (PyUnicode_READ(kind, s, 0) == '\n')
2154
441k
                    return (s - start)/kind + 1;
2155
12.3M
                else
2156
12.3M
                    return (s - start)/kind;
2157
12.8M
            }
2158
72.0M
        }
2159
16.8M
    }
2160
4.99M
    else {
2161
        /* Non-universal mode. */
2162
4.99M
        Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2163
4.99M
        const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2164
        /* Assume that readnl is an ASCII character. */
2165
4.99M
        assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2166
4.99M
        if (readnl_len == 1) {
2167
4.99M
            const char *pos = find_control_char(kind, start, end, nl[0]);
2168
4.99M
            if (pos != NULL)
2169
4.99M
                return (pos - start)/kind + 1;
2170
4.64k
            *consumed = len;
2171
4.64k
            return -1;
2172
4.99M
        }
2173
0
        else {
2174
0
            const char *s = start;
2175
0
            const char *e = end - (readnl_len - 1)*kind;
2176
0
            const char *pos;
2177
0
            if (e < s)
2178
0
                e = s;
2179
0
            while (s < e) {
2180
0
                Py_ssize_t i;
2181
0
                const char *pos = find_control_char(kind, s, end, nl[0]);
2182
0
                if (pos == NULL || pos >= e)
2183
0
                    break;
2184
0
                for (i = 1; i < readnl_len; i++) {
2185
0
                    if (PyUnicode_READ(kind, pos, i) != nl[i])
2186
0
                        break;
2187
0
                }
2188
0
                if (i == readnl_len)
2189
0
                    return (pos - start)/kind + readnl_len;
2190
0
                s = pos + kind;
2191
0
            }
2192
0
            pos = find_control_char(kind, e, end, nl[0]);
2193
0
            if (pos == NULL)
2194
0
                *consumed = len;
2195
0
            else
2196
0
                *consumed = (pos - start)/kind;
2197
0
            return -1;
2198
0
        }
2199
4.99M
    }
2200
21.8M
}
2201
2202
static PyObject *
2203
_textiowrapper_readline(textio *self, Py_ssize_t limit)
2204
5.45k
{
2205
5.45k
    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2206
5.45k
    Py_ssize_t start, endpos, chunked, offset_to_buffer;
2207
5.45k
    int res;
2208
2209
5.45k
    CHECK_CLOSED(self);
2210
2211
5.45k
    if (_textiowrapper_writeflush(self) < 0)
2212
0
        return NULL;
2213
2214
5.45k
    chunked = 0;
2215
2216
5.48k
    while (1) {
2217
5.48k
        const char *ptr;
2218
5.48k
        Py_ssize_t line_len;
2219
5.48k
        int kind;
2220
5.48k
        Py_ssize_t consumed = 0;
2221
2222
        /* First, get some data if necessary */
2223
5.48k
        res = 1;
2224
5.51k
        while (!self->decoded_chars ||
2225
5.48k
               !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2226
30
            res = textiowrapper_read_chunk(self, 0);
2227
30
            if (res < 0) {
2228
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2229
                   when EINTR occurs so we needn't do it ourselves. */
2230
0
                if (_PyIO_trap_eintr()) {
2231
0
                    continue;
2232
0
                }
2233
0
                goto error;
2234
0
            }
2235
30
            if (res == 0)
2236
2
                break;
2237
30
        }
2238
5.48k
        if (res == 0) {
2239
            /* end of file */
2240
2
            textiowrapper_set_decoded_chars(self, NULL);
2241
2
            Py_CLEAR(self->snapshot);
2242
2
            start = endpos = offset_to_buffer = 0;
2243
2
            break;
2244
2
        }
2245
2246
5.48k
        if (remaining == NULL) {
2247
5.48k
            line = Py_NewRef(self->decoded_chars);
2248
5.48k
            start = self->decoded_chars_used;
2249
5.48k
            offset_to_buffer = 0;
2250
5.48k
        }
2251
0
        else {
2252
0
            assert(self->decoded_chars_used == 0);
2253
0
            line = PyUnicode_Concat(remaining, self->decoded_chars);
2254
0
            start = 0;
2255
0
            offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2256
0
            Py_CLEAR(remaining);
2257
0
            if (line == NULL)
2258
0
                goto error;
2259
0
        }
2260
2261
5.48k
        ptr = PyUnicode_DATA(line);
2262
5.48k
        line_len = PyUnicode_GET_LENGTH(line);
2263
5.48k
        kind = PyUnicode_KIND(line);
2264
2265
5.48k
        endpos = _PyIO_find_line_ending(
2266
5.48k
            self->readtranslate, self->readuniversal, self->readnl,
2267
5.48k
            kind,
2268
5.48k
            ptr + kind * start,
2269
5.48k
            ptr + kind * line_len,
2270
5.48k
            &consumed);
2271
5.48k
        if (endpos >= 0) {
2272
5.45k
            endpos += start;
2273
5.45k
            if (limit >= 0 && (endpos - start) + chunked >= limit)
2274
0
                endpos = start + limit - chunked;
2275
5.45k
            break;
2276
5.45k
        }
2277
2278
        /* We can put aside up to `endpos` */
2279
28
        endpos = consumed + start;
2280
28
        if (limit >= 0 && (endpos - start) + chunked >= limit) {
2281
            /* Didn't find line ending, but reached length limit */
2282
0
            endpos = start + limit - chunked;
2283
0
            break;
2284
0
        }
2285
2286
28
        if (endpos > start) {
2287
            /* No line ending seen yet - put aside current data */
2288
26
            PyObject *s;
2289
26
            if (chunks == NULL) {
2290
26
                chunks = PyList_New(0);
2291
26
                if (chunks == NULL)
2292
0
                    goto error;
2293
26
            }
2294
26
            s = PyUnicode_Substring(line, start, endpos);
2295
26
            if (s == NULL)
2296
0
                goto error;
2297
26
            if (PyList_Append(chunks, s) < 0) {
2298
0
                Py_DECREF(s);
2299
0
                goto error;
2300
0
            }
2301
26
            chunked += PyUnicode_GET_LENGTH(s);
2302
26
            Py_DECREF(s);
2303
26
        }
2304
        /* There may be some remaining bytes we'll have to prepend to the
2305
           next chunk of data */
2306
28
        if (endpos < line_len) {
2307
0
            remaining = PyUnicode_Substring(line, endpos, line_len);
2308
0
            if (remaining == NULL)
2309
0
                goto error;
2310
0
        }
2311
28
        Py_CLEAR(line);
2312
        /* We have consumed the buffer */
2313
28
        textiowrapper_set_decoded_chars(self, NULL);
2314
28
    }
2315
2316
5.45k
    if (line != NULL) {
2317
        /* Our line ends in the current buffer */
2318
5.45k
        self->decoded_chars_used = endpos - offset_to_buffer;
2319
5.45k
        if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2320
5.45k
            PyObject *s = PyUnicode_Substring(line, start, endpos);
2321
5.45k
            Py_CLEAR(line);
2322
5.45k
            if (s == NULL)
2323
0
                goto error;
2324
5.45k
            line = s;
2325
5.45k
        }
2326
5.45k
    }
2327
5.45k
    if (remaining != NULL) {
2328
0
        if (chunks == NULL) {
2329
0
            chunks = PyList_New(0);
2330
0
            if (chunks == NULL)
2331
0
                goto error;
2332
0
        }
2333
0
        if (PyList_Append(chunks, remaining) < 0)
2334
0
            goto error;
2335
0
        Py_CLEAR(remaining);
2336
0
    }
2337
5.45k
    if (chunks != NULL) {
2338
26
        if (line != NULL) {
2339
26
            if (PyList_Append(chunks, line) < 0)
2340
0
                goto error;
2341
26
            Py_DECREF(line);
2342
26
        }
2343
26
        line = PyUnicode_Join(&_Py_STR(empty), chunks);
2344
26
        if (line == NULL)
2345
0
            goto error;
2346
26
        Py_CLEAR(chunks);
2347
26
    }
2348
5.45k
    if (line == NULL) {
2349
2
        line = &_Py_STR(empty);
2350
2
    }
2351
2352
5.45k
    return line;
2353
2354
0
  error:
2355
0
    Py_XDECREF(chunks);
2356
0
    Py_XDECREF(remaining);
2357
0
    Py_XDECREF(line);
2358
0
    return NULL;
2359
5.45k
}
2360
2361
/*[clinic input]
2362
@critical_section
2363
_io.TextIOWrapper.readline
2364
    size: Py_ssize_t = -1
2365
    /
2366
[clinic start generated code]*/
2367
2368
static PyObject *
2369
_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2370
/*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2371
0
{
2372
0
    CHECK_ATTACHED(self);
2373
0
    return _textiowrapper_readline(self, size);
2374
0
}
2375
2376
/* Seek and Tell */
2377
2378
typedef struct {
2379
    Py_off_t start_pos;
2380
    int dec_flags;
2381
    int bytes_to_feed;
2382
    int chars_to_skip;
2383
    char need_eof;
2384
} cookie_type;
2385
2386
/*
2387
   To speed up cookie packing/unpacking, we store the fields in a temporary
2388
   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2389
   The following macros define at which offsets in the intermediary byte
2390
   string the various CookieStruct fields will be stored.
2391
 */
2392
2393
#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2394
2395
#if PY_BIG_ENDIAN
2396
/* We want the least significant byte of start_pos to also be the least
2397
   significant byte of the cookie, which means that in big-endian mode we
2398
   must copy the fields in reverse order. */
2399
2400
# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2401
# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2402
# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2403
# define OFF_CHARS_TO_SKIP  (sizeof(char))
2404
# define OFF_NEED_EOF       0
2405
2406
#else
2407
/* Little-endian mode: the least significant byte of start_pos will
2408
   naturally end up the least significant byte of the cookie. */
2409
2410
0
# define OFF_START_POS      0
2411
0
# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2412
0
# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2413
0
# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2414
0
# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2415
2416
#endif
2417
2418
static int
2419
textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2420
0
{
2421
0
    unsigned char buffer[COOKIE_BUF_LEN];
2422
0
    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2423
0
    if (cookieLong == NULL)
2424
0
        return -1;
2425
2426
0
    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2427
0
                            PY_LITTLE_ENDIAN, 0, 1) < 0) {
2428
0
        Py_DECREF(cookieLong);
2429
0
        return -1;
2430
0
    }
2431
0
    Py_DECREF(cookieLong);
2432
2433
0
    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2434
0
    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2435
0
    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2436
0
    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2437
0
    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2438
2439
0
    return 0;
2440
0
}
2441
2442
static PyObject *
2443
textiowrapper_build_cookie(cookie_type *cookie)
2444
0
{
2445
0
    unsigned char buffer[COOKIE_BUF_LEN];
2446
2447
0
    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2448
0
    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2449
0
    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2450
0
    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2451
0
    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2452
2453
0
    return _PyLong_FromByteArray(buffer, sizeof(buffer),
2454
0
                                 PY_LITTLE_ENDIAN, 0);
2455
0
}
2456
2457
static int
2458
_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2459
0
{
2460
0
    PyObject *res;
2461
    /* When seeking to the start of the stream, we call decoder.reset()
2462
       rather than decoder.getstate().
2463
       This is for a few decoders such as utf-16 for which the state value
2464
       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2465
       utf-16, that we are expecting a BOM).
2466
    */
2467
0
    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2468
0
        res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2469
0
    }
2470
0
    else {
2471
0
        res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2472
0
                                   "((yi))", "", cookie->dec_flags);
2473
0
    }
2474
0
    if (res == NULL) {
2475
0
        return -1;
2476
0
    }
2477
0
    Py_DECREF(res);
2478
0
    return 0;
2479
0
}
2480
2481
static int
2482
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2483
0
{
2484
0
    PyObject *res;
2485
0
    if (start_of_stream) {
2486
0
        res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2487
0
        self->encoding_start_of_stream = 1;
2488
0
    }
2489
0
    else {
2490
0
        res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2491
0
                                        _PyLong_GetZero());
2492
0
        self->encoding_start_of_stream = 0;
2493
0
    }
2494
0
    if (res == NULL)
2495
0
        return -1;
2496
0
    Py_DECREF(res);
2497
0
    return 0;
2498
0
}
2499
2500
static int
2501
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2502
0
{
2503
    /* Same as _textiowrapper_decoder_setstate() above. */
2504
0
    return _textiowrapper_encoder_reset(
2505
0
        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2506
0
}
2507
2508
/*[clinic input]
2509
@critical_section
2510
_io.TextIOWrapper.seek
2511
    cookie as cookieObj: object
2512
      Zero or an opaque number returned by tell().
2513
    whence: int(c_default='0') = os.SEEK_SET
2514
      The relative position to seek from.
2515
    /
2516
2517
Set the stream position, and return the new stream position.
2518
2519
Four operations are supported, given by the following argument
2520
combinations:
2521
2522
- seek(0, SEEK_SET): Rewind to the start of the stream.
2523
- seek(cookie, SEEK_SET): Restore a previous position;
2524
  'cookie' must be a number returned by tell().
2525
- seek(0, SEEK_END): Fast-forward to the end of the stream.
2526
- seek(0, SEEK_CUR): Leave the current stream position unchanged.
2527
2528
Any other argument combinations are invalid,
2529
and may raise exceptions.
2530
[clinic start generated code]*/
2531
2532
static PyObject *
2533
_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2534
/*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2535
0
{
2536
0
    PyObject *posobj;
2537
0
    cookie_type cookie;
2538
0
    PyObject *res;
2539
0
    int cmp;
2540
0
    PyObject *snapshot;
2541
2542
0
    CHECK_ATTACHED(self);
2543
0
    CHECK_CLOSED(self);
2544
2545
0
    Py_INCREF(cookieObj);
2546
2547
0
    if (!self->seekable) {
2548
0
        _unsupported(self->state, "underlying stream is not seekable");
2549
0
        goto fail;
2550
0
    }
2551
2552
0
    PyObject *zero = _PyLong_GetZero();  // borrowed reference
2553
2554
0
    switch (whence) {
2555
0
    case SEEK_CUR:
2556
        /* seek relative to current position */
2557
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2558
0
        if (cmp < 0)
2559
0
            goto fail;
2560
2561
0
        if (cmp == 0) {
2562
0
            _unsupported(self->state, "can't do nonzero cur-relative seeks");
2563
0
            goto fail;
2564
0
        }
2565
2566
        /* Seeking to the current position should attempt to
2567
         * sync the underlying buffer with the current position.
2568
         */
2569
0
        Py_DECREF(cookieObj);
2570
0
        cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2571
0
        if (cookieObj == NULL)
2572
0
            goto fail;
2573
0
        break;
2574
2575
0
    case SEEK_END:
2576
        /* seek relative to end of file */
2577
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2578
0
        if (cmp < 0)
2579
0
            goto fail;
2580
2581
0
        if (cmp == 0) {
2582
0
            _unsupported(self->state, "can't do nonzero end-relative seeks");
2583
0
            goto fail;
2584
0
        }
2585
2586
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
2587
0
            goto fail;
2588
0
        }
2589
2590
0
        textiowrapper_set_decoded_chars(self, NULL);
2591
0
        Py_CLEAR(self->snapshot);
2592
0
        if (self->decoder) {
2593
0
            res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2594
0
            if (res == NULL)
2595
0
                goto fail;
2596
0
            Py_DECREF(res);
2597
0
        }
2598
2599
0
        res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2600
0
        Py_CLEAR(cookieObj);
2601
0
        if (res == NULL)
2602
0
            goto fail;
2603
0
        if (self->encoder) {
2604
            /* If seek() == 0, we are at the start of stream, otherwise not */
2605
0
            cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2606
0
            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2607
0
                Py_DECREF(res);
2608
0
                goto fail;
2609
0
            }
2610
0
        }
2611
0
        return res;
2612
2613
0
    case SEEK_SET:
2614
0
        break;
2615
2616
0
    default:
2617
0
        PyErr_Format(PyExc_ValueError,
2618
0
                     "invalid whence (%d, should be %d, %d or %d)", whence,
2619
0
                     SEEK_SET, SEEK_CUR, SEEK_END);
2620
0
        goto fail;
2621
0
    }
2622
2623
0
    cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2624
0
    if (cmp < 0)
2625
0
        goto fail;
2626
2627
0
    if (cmp == 1) {
2628
0
        PyErr_Format(PyExc_ValueError,
2629
0
                     "negative seek position %R", cookieObj);
2630
0
        goto fail;
2631
0
    }
2632
2633
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2634
0
        goto fail;
2635
0
    }
2636
2637
    /* The strategy of seek() is to go back to the safe start point
2638
     * and replay the effect of read(chars_to_skip) from there.
2639
     */
2640
0
    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2641
0
        goto fail;
2642
2643
    /* Seek back to the safe start point. */
2644
0
    posobj = PyLong_FromOff_t(cookie.start_pos);
2645
0
    if (posobj == NULL)
2646
0
        goto fail;
2647
0
    res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2648
0
    Py_DECREF(posobj);
2649
0
    if (res == NULL)
2650
0
        goto fail;
2651
0
    Py_DECREF(res);
2652
2653
0
    textiowrapper_set_decoded_chars(self, NULL);
2654
0
    Py_CLEAR(self->snapshot);
2655
2656
    /* Restore the decoder to its state from the safe start point. */
2657
0
    if (self->decoder) {
2658
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2659
0
            goto fail;
2660
0
    }
2661
2662
0
    if (cookie.chars_to_skip) {
2663
        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2664
0
        PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2665
0
                                                     "i", cookie.bytes_to_feed);
2666
0
        PyObject *decoded;
2667
2668
0
        if (input_chunk == NULL)
2669
0
            goto fail;
2670
2671
0
        if (!PyBytes_Check(input_chunk)) {
2672
0
            PyErr_Format(PyExc_TypeError,
2673
0
                         "underlying read() should have returned a bytes "
2674
0
                         "object, not '%.200s'",
2675
0
                         Py_TYPE(input_chunk)->tp_name);
2676
0
            Py_DECREF(input_chunk);
2677
0
            goto fail;
2678
0
        }
2679
2680
0
        snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2681
0
        if (snapshot == NULL) {
2682
0
            goto fail;
2683
0
        }
2684
0
        Py_XSETREF(self->snapshot, snapshot);
2685
2686
0
        decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2687
0
            input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2688
2689
0
        if (check_decoded(decoded) < 0)
2690
0
            goto fail;
2691
2692
0
        textiowrapper_set_decoded_chars(self, decoded);
2693
2694
        /* Skip chars_to_skip of the decoded characters. */
2695
0
        if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2696
0
            PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2697
0
            goto fail;
2698
0
        }
2699
0
        self->decoded_chars_used = cookie.chars_to_skip;
2700
0
    }
2701
0
    else {
2702
0
        snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2703
0
        if (snapshot == NULL)
2704
0
            goto fail;
2705
0
        Py_XSETREF(self->snapshot, snapshot);
2706
0
    }
2707
2708
    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2709
0
    if (self->encoder) {
2710
0
        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2711
0
            goto fail;
2712
0
    }
2713
0
    return cookieObj;
2714
0
  fail:
2715
0
    Py_XDECREF(cookieObj);
2716
0
    return NULL;
2717
2718
0
}
2719
2720
/*[clinic input]
2721
@critical_section
2722
_io.TextIOWrapper.tell
2723
2724
Return the stream position as an opaque number.
2725
2726
The return value of tell() can be given as input to seek(), to restore a
2727
previous stream position.
2728
[clinic start generated code]*/
2729
2730
static PyObject *
2731
_io_TextIOWrapper_tell_impl(textio *self)
2732
/*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/
2733
0
{
2734
0
    PyObject *res;
2735
0
    PyObject *posobj = NULL;
2736
0
    cookie_type cookie = {0,0,0,0,0};
2737
0
    PyObject *next_input;
2738
0
    Py_ssize_t chars_to_skip, chars_decoded;
2739
0
    Py_ssize_t skip_bytes, skip_back;
2740
0
    PyObject *saved_state = NULL;
2741
0
    const char *input, *input_end;
2742
0
    Py_ssize_t dec_buffer_len;
2743
0
    int dec_flags;
2744
2745
0
    CHECK_ATTACHED(self);
2746
0
    CHECK_CLOSED(self);
2747
2748
0
    if (!self->seekable) {
2749
0
        _unsupported(self->state, "underlying stream is not seekable");
2750
0
        goto fail;
2751
0
    }
2752
0
    if (!self->telling) {
2753
0
        PyErr_SetString(PyExc_OSError,
2754
0
                        "telling position disabled by next() call");
2755
0
        goto fail;
2756
0
    }
2757
2758
0
    if (_textiowrapper_writeflush(self) < 0)
2759
0
        return NULL;
2760
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2761
0
        goto fail;
2762
0
    }
2763
2764
0
    posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2765
0
    if (posobj == NULL)
2766
0
        goto fail;
2767
2768
0
    if (self->decoder == NULL || self->snapshot == NULL) {
2769
0
        assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2770
0
        return posobj;
2771
0
    }
2772
2773
#if defined(HAVE_LARGEFILE_SUPPORT)
2774
    cookie.start_pos = PyLong_AsLongLong(posobj);
2775
#else
2776
0
    cookie.start_pos = PyLong_AsLong(posobj);
2777
0
#endif
2778
0
    Py_DECREF(posobj);
2779
0
    if (PyErr_Occurred())
2780
0
        goto fail;
2781
2782
    /* Skip backward to the snapshot point (see _read_chunk). */
2783
0
    assert(PyTuple_Check(self->snapshot));
2784
0
    if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2785
0
        goto fail;
2786
2787
0
    assert (PyBytes_Check(next_input));
2788
2789
0
    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2790
2791
    /* How many decoded characters have been used up since the snapshot? */
2792
0
    if (self->decoded_chars_used == 0)  {
2793
        /* We haven't moved from the snapshot point. */
2794
0
        return textiowrapper_build_cookie(&cookie);
2795
0
    }
2796
2797
0
    chars_to_skip = self->decoded_chars_used;
2798
2799
    /* Decoder state will be restored at the end */
2800
0
    saved_state = PyObject_CallMethodNoArgs(self->decoder,
2801
0
                                             &_Py_ID(getstate));
2802
0
    if (saved_state == NULL)
2803
0
        goto fail;
2804
2805
0
#define DECODER_GETSTATE() do { \
2806
0
        PyObject *dec_buffer; \
2807
0
        PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2808
0
            &_Py_ID(getstate)); \
2809
0
        if (_state == NULL) \
2810
0
            goto fail; \
2811
0
        if (!PyTuple_Check(_state)) { \
2812
0
            PyErr_SetString(PyExc_TypeError, \
2813
0
                            "illegal decoder state"); \
2814
0
            Py_DECREF(_state); \
2815
0
            goto fail; \
2816
0
        } \
2817
0
        if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2818
0
                              &dec_buffer, &dec_flags)) \
2819
0
        { \
2820
0
            Py_DECREF(_state); \
2821
0
            goto fail; \
2822
0
        } \
2823
0
        if (!PyBytes_Check(dec_buffer)) { \
2824
0
            PyErr_Format(PyExc_TypeError, \
2825
0
                         "illegal decoder state: the first item should be a " \
2826
0
                         "bytes object, not '%.200s'", \
2827
0
                         Py_TYPE(dec_buffer)->tp_name); \
2828
0
            Py_DECREF(_state); \
2829
0
            goto fail; \
2830
0
        } \
2831
0
        dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2832
0
        Py_DECREF(_state); \
2833
0
    } while (0)
2834
2835
0
#define DECODER_DECODE(start, len, res) do { \
2836
0
        PyObject *_decoded = _PyObject_CallMethod( \
2837
0
            self->decoder, &_Py_ID(decode), "y#", start, len); \
2838
0
        if (check_decoded(_decoded) < 0) \
2839
0
            goto fail; \
2840
0
        res = PyUnicode_GET_LENGTH(_decoded); \
2841
0
        Py_DECREF(_decoded); \
2842
0
    } while (0)
2843
2844
    /* Fast search for an acceptable start point, close to our
2845
       current pos */
2846
0
    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2847
0
    skip_back = 1;
2848
0
    assert(skip_bytes <= PyBytes_GET_SIZE(next_input));
2849
0
    input = PyBytes_AS_STRING(next_input);
2850
0
    while (skip_bytes > 0) {
2851
        /* Decode up to temptative start point */
2852
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2853
0
            goto fail;
2854
0
        DECODER_DECODE(input, skip_bytes, chars_decoded);
2855
0
        if (chars_decoded <= chars_to_skip) {
2856
0
            DECODER_GETSTATE();
2857
0
            if (dec_buffer_len == 0) {
2858
                /* Before pos and no bytes buffered in decoder => OK */
2859
0
                cookie.dec_flags = dec_flags;
2860
0
                chars_to_skip -= chars_decoded;
2861
0
                break;
2862
0
            }
2863
            /* Skip back by buffered amount and reset heuristic */
2864
0
            skip_bytes -= dec_buffer_len;
2865
0
            skip_back = 1;
2866
0
        }
2867
0
        else {
2868
            /* We're too far ahead, skip back a bit */
2869
0
            skip_bytes -= skip_back;
2870
0
            skip_back *= 2;
2871
0
        }
2872
0
    }
2873
0
    if (skip_bytes <= 0) {
2874
0
        skip_bytes = 0;
2875
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2876
0
            goto fail;
2877
0
    }
2878
2879
    /* Note our initial start point. */
2880
0
    cookie.start_pos += skip_bytes;
2881
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2882
0
    if (chars_to_skip == 0)
2883
0
        goto finally;
2884
2885
    /* We should be close to the desired position.  Now feed the decoder one
2886
     * byte at a time until we reach the `chars_to_skip` target.
2887
     * As we go, note the nearest "safe start point" before the current
2888
     * location (a point where the decoder has nothing buffered, so seek()
2889
     * can safely start from there and advance to this location).
2890
     */
2891
0
    chars_decoded = 0;
2892
0
    input = PyBytes_AS_STRING(next_input);
2893
0
    input_end = input + PyBytes_GET_SIZE(next_input);
2894
0
    input += skip_bytes;
2895
0
    while (input < input_end) {
2896
0
        Py_ssize_t n;
2897
2898
0
        DECODER_DECODE(input, (Py_ssize_t)1, n);
2899
        /* We got n chars for 1 byte */
2900
0
        chars_decoded += n;
2901
0
        cookie.bytes_to_feed += 1;
2902
0
        DECODER_GETSTATE();
2903
2904
0
        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2905
            /* Decoder buffer is empty, so this is a safe start point. */
2906
0
            cookie.start_pos += cookie.bytes_to_feed;
2907
0
            chars_to_skip -= chars_decoded;
2908
0
            cookie.dec_flags = dec_flags;
2909
0
            cookie.bytes_to_feed = 0;
2910
0
            chars_decoded = 0;
2911
0
        }
2912
0
        if (chars_decoded >= chars_to_skip)
2913
0
            break;
2914
0
        input++;
2915
0
    }
2916
0
    if (input == input_end) {
2917
        /* We didn't get enough decoded data; signal EOF to get more. */
2918
0
        PyObject *decoded = _PyObject_CallMethod(
2919
0
            self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2920
0
        if (check_decoded(decoded) < 0)
2921
0
            goto fail;
2922
0
        chars_decoded += PyUnicode_GET_LENGTH(decoded);
2923
0
        Py_DECREF(decoded);
2924
0
        cookie.need_eof = 1;
2925
2926
0
        if (chars_decoded < chars_to_skip) {
2927
0
            PyErr_SetString(PyExc_OSError,
2928
0
                            "can't reconstruct logical file position");
2929
0
            goto fail;
2930
0
        }
2931
0
    }
2932
2933
0
finally:
2934
0
    res = PyObject_CallMethodOneArg(
2935
0
            self->decoder, &_Py_ID(setstate), saved_state);
2936
0
    Py_DECREF(saved_state);
2937
0
    if (res == NULL)
2938
0
        return NULL;
2939
0
    Py_DECREF(res);
2940
2941
    /* The returned cookie corresponds to the last safe start point. */
2942
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2943
0
    return textiowrapper_build_cookie(&cookie);
2944
2945
0
fail:
2946
0
    if (saved_state) {
2947
0
        PyObject *exc = PyErr_GetRaisedException();
2948
0
        res = PyObject_CallMethodOneArg(
2949
0
                self->decoder, &_Py_ID(setstate), saved_state);
2950
0
        _PyErr_ChainExceptions1(exc);
2951
0
        Py_DECREF(saved_state);
2952
0
        Py_XDECREF(res);
2953
0
    }
2954
0
    return NULL;
2955
0
}
2956
2957
/*[clinic input]
2958
@critical_section
2959
_io.TextIOWrapper.truncate
2960
    pos: object = None
2961
    /
2962
[clinic start generated code]*/
2963
2964
static PyObject *
2965
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2966
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
2967
0
{
2968
0
    CHECK_ATTACHED(self)
2969
2970
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2971
0
        return NULL;
2972
0
    }
2973
2974
0
    return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2975
0
}
2976
2977
static PyObject *
2978
textiowrapper_repr(PyObject *op)
2979
0
{
2980
0
    PyObject *nameobj, *modeobj, *res, *s;
2981
0
    int status;
2982
0
    textio *self = textio_CAST(op);
2983
0
    const char *type_name = Py_TYPE(self)->tp_name;
2984
2985
0
    CHECK_INITIALIZED(self);
2986
2987
0
    res = PyUnicode_FromFormat("<%.100s", type_name);
2988
0
    if (res == NULL)
2989
0
        return NULL;
2990
2991
0
    status = Py_ReprEnter(op);
2992
0
    if (status != 0) {
2993
0
        if (status > 0) {
2994
0
            PyErr_Format(PyExc_RuntimeError,
2995
0
                         "reentrant call inside %.100s.__repr__",
2996
0
                         type_name);
2997
0
        }
2998
0
        goto error;
2999
0
    }
3000
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(name), &nameobj) < 0) {
3001
0
        if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
3002
0
            goto error;
3003
0
        }
3004
        /* Ignore ValueError raised if the underlying stream was detached */
3005
0
        PyErr_Clear();
3006
0
    }
3007
0
    if (nameobj != NULL) {
3008
0
        s = PyUnicode_FromFormat(" name=%R", nameobj);
3009
0
        Py_DECREF(nameobj);
3010
0
        if (s == NULL)
3011
0
            goto error;
3012
0
        PyUnicode_AppendAndDel(&res, s);
3013
0
        if (res == NULL)
3014
0
            goto error;
3015
0
    }
3016
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(mode), &modeobj) < 0) {
3017
0
        goto error;
3018
0
    }
3019
0
    if (modeobj != NULL) {
3020
0
        s = PyUnicode_FromFormat(" mode=%R", modeobj);
3021
0
        Py_DECREF(modeobj);
3022
0
        if (s == NULL)
3023
0
            goto error;
3024
0
        PyUnicode_AppendAndDel(&res, s);
3025
0
        if (res == NULL)
3026
0
            goto error;
3027
0
    }
3028
0
    s = PyUnicode_FromFormat("%U encoding=%R>",
3029
0
                             res, self->encoding);
3030
0
    Py_DECREF(res);
3031
0
    if (status == 0) {
3032
0
        Py_ReprLeave(op);
3033
0
    }
3034
0
    return s;
3035
3036
0
  error:
3037
0
    Py_XDECREF(res);
3038
0
    if (status == 0) {
3039
0
        Py_ReprLeave(op);
3040
0
    }
3041
0
    return NULL;
3042
0
}
3043
3044
3045
/* Inquiries */
3046
3047
/*[clinic input]
3048
@critical_section
3049
_io.TextIOWrapper.fileno
3050
[clinic start generated code]*/
3051
3052
static PyObject *
3053
_io_TextIOWrapper_fileno_impl(textio *self)
3054
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3055
0
{
3056
0
    CHECK_ATTACHED(self);
3057
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3058
0
}
3059
3060
/*[clinic input]
3061
@critical_section
3062
_io.TextIOWrapper.seekable
3063
[clinic start generated code]*/
3064
3065
static PyObject *
3066
_io_TextIOWrapper_seekable_impl(textio *self)
3067
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3068
0
{
3069
0
    CHECK_ATTACHED(self);
3070
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3071
0
}
3072
3073
/*[clinic input]
3074
@critical_section
3075
_io.TextIOWrapper.readable
3076
[clinic start generated code]*/
3077
3078
static PyObject *
3079
_io_TextIOWrapper_readable_impl(textio *self)
3080
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3081
0
{
3082
0
    CHECK_ATTACHED(self);
3083
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3084
0
}
3085
3086
/*[clinic input]
3087
@critical_section
3088
_io.TextIOWrapper.writable
3089
[clinic start generated code]*/
3090
3091
static PyObject *
3092
_io_TextIOWrapper_writable_impl(textio *self)
3093
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3094
0
{
3095
0
    CHECK_ATTACHED(self);
3096
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3097
0
}
3098
3099
/*[clinic input]
3100
@critical_section
3101
_io.TextIOWrapper.isatty
3102
[clinic start generated code]*/
3103
3104
static PyObject *
3105
_io_TextIOWrapper_isatty_impl(textio *self)
3106
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3107
0
{
3108
0
    CHECK_ATTACHED(self);
3109
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3110
0
}
3111
3112
/*[clinic input]
3113
@critical_section
3114
_io.TextIOWrapper.flush
3115
[clinic start generated code]*/
3116
3117
static PyObject *
3118
_io_TextIOWrapper_flush_impl(textio *self)
3119
/*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3120
6
{
3121
6
    CHECK_ATTACHED(self);
3122
6
    CHECK_CLOSED(self);
3123
6
    self->telling = self->seekable;
3124
6
    if (_textiowrapper_writeflush(self) < 0)
3125
0
        return NULL;
3126
6
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3127
6
}
3128
3129
/*[clinic input]
3130
@critical_section
3131
_io.TextIOWrapper.close
3132
[clinic start generated code]*/
3133
3134
static PyObject *
3135
_io_TextIOWrapper_close_impl(textio *self)
3136
/*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3137
6
{
3138
6
    PyObject *res;
3139
6
    int r;
3140
6
    CHECK_ATTACHED(self);
3141
3142
6
    res = _io_TextIOWrapper_closed_get_impl(self);
3143
6
    if (res == NULL)
3144
0
        return NULL;
3145
6
    r = PyObject_IsTrue(res);
3146
6
    Py_DECREF(res);
3147
6
    if (r < 0)
3148
0
        return NULL;
3149
3150
6
    if (r > 0) {
3151
0
        Py_RETURN_NONE; /* stream already closed */
3152
0
    }
3153
6
    if (self->detached) {
3154
0
        Py_RETURN_NONE; /* gh-142594 null pointer issue */
3155
0
    }
3156
6
    else {
3157
6
        PyObject *exc = NULL;
3158
6
        if (self->finalizing) {
3159
0
            res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3160
0
                                            (PyObject *)self);
3161
0
            if (res) {
3162
0
                Py_DECREF(res);
3163
0
            }
3164
0
            else {
3165
0
                PyErr_Clear();
3166
0
            }
3167
0
        }
3168
6
        if (_PyFile_Flush((PyObject *)self) < 0) {
3169
0
            exc = PyErr_GetRaisedException();
3170
0
        }
3171
3172
6
        res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3173
6
        if (exc != NULL) {
3174
0
            _PyErr_ChainExceptions1(exc);
3175
0
            Py_CLEAR(res);
3176
0
        }
3177
6
        return res;
3178
6
    }
3179
6
}
3180
3181
static PyObject *
3182
textiowrapper_iternext_lock_held(PyObject *op)
3183
5.45k
{
3184
5.45k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
3185
5.45k
    PyObject *line;
3186
5.45k
    textio *self = textio_CAST(op);
3187
3188
5.45k
    CHECK_ATTACHED(self);
3189
3190
5.45k
    self->telling = 0;
3191
5.45k
    if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3192
        /* Skip method call overhead for speed */
3193
5.45k
        line = _textiowrapper_readline(self, -1);
3194
5.45k
    }
3195
0
    else {
3196
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
3197
0
        if (line && !PyUnicode_Check(line)) {
3198
0
            PyErr_Format(PyExc_OSError,
3199
0
                         "readline() should have returned a str object, "
3200
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
3201
0
            Py_DECREF(line);
3202
0
            return NULL;
3203
0
        }
3204
0
    }
3205
3206
5.45k
    if (line == NULL)
3207
0
        return NULL;
3208
3209
5.45k
    if (PyUnicode_GET_LENGTH(line) == 0) {
3210
        /* Reached EOF or would have blocked */
3211
2
        Py_DECREF(line);
3212
2
        Py_CLEAR(self->snapshot);
3213
2
        self->telling = self->seekable;
3214
2
        return NULL;
3215
2
    }
3216
3217
5.45k
    return line;
3218
5.45k
}
3219
3220
static PyObject *
3221
textiowrapper_iternext(PyObject *op)
3222
5.45k
{
3223
5.45k
    PyObject *result;
3224
5.45k
    Py_BEGIN_CRITICAL_SECTION(op);
3225
5.45k
    result = textiowrapper_iternext_lock_held(op);
3226
5.45k
    Py_END_CRITICAL_SECTION();
3227
5.45k
    return result;
3228
5.45k
}
3229
3230
/*[clinic input]
3231
@critical_section
3232
@getter
3233
_io.TextIOWrapper.name
3234
[clinic start generated code]*/
3235
3236
static PyObject *
3237
_io_TextIOWrapper_name_get_impl(textio *self)
3238
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3239
0
{
3240
0
    CHECK_ATTACHED(self);
3241
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3242
0
}
3243
3244
/*[clinic input]
3245
@critical_section
3246
@getter
3247
_io.TextIOWrapper.closed
3248
[clinic start generated code]*/
3249
3250
static PyObject *
3251
_io_TextIOWrapper_closed_get_impl(textio *self)
3252
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3253
20
{
3254
20
    CHECK_ATTACHED(self);
3255
20
    return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3256
20
}
3257
3258
/*[clinic input]
3259
@critical_section
3260
@getter
3261
_io.TextIOWrapper.newlines
3262
[clinic start generated code]*/
3263
3264
static PyObject *
3265
_io_TextIOWrapper_newlines_get_impl(textio *self)
3266
/*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3267
0
{
3268
0
    PyObject *res;
3269
0
    CHECK_ATTACHED(self);
3270
0
    if (self->decoder == NULL ||
3271
0
        PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3272
0
    {
3273
0
        Py_RETURN_NONE;
3274
0
    }
3275
0
    return res;
3276
0
}
3277
3278
/*[clinic input]
3279
@critical_section
3280
@getter
3281
_io.TextIOWrapper.errors
3282
[clinic start generated code]*/
3283
3284
static PyObject *
3285
_io_TextIOWrapper_errors_get_impl(textio *self)
3286
/*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3287
0
{
3288
0
    CHECK_INITIALIZED(self);
3289
0
    return Py_NewRef(self->errors);
3290
0
}
3291
3292
/*[clinic input]
3293
@critical_section
3294
@getter
3295
_io.TextIOWrapper._CHUNK_SIZE
3296
[clinic start generated code]*/
3297
3298
static PyObject *
3299
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3300
/*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3301
0
{
3302
0
    CHECK_ATTACHED(self);
3303
0
    return PyLong_FromSsize_t(self->chunk_size);
3304
0
}
3305
3306
/*[clinic input]
3307
@critical_section
3308
@setter
3309
_io.TextIOWrapper._CHUNK_SIZE
3310
[clinic start generated code]*/
3311
3312
static int
3313
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3314
/*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3315
0
{
3316
0
    Py_ssize_t n;
3317
0
    CHECK_ATTACHED_INT(self);
3318
0
    if (value == NULL) {
3319
0
        PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3320
0
        return -1;
3321
0
    }
3322
0
    n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3323
0
    if (n == -1 && PyErr_Occurred())
3324
0
        return -1;
3325
0
    if (n <= 0) {
3326
0
        PyErr_SetString(PyExc_ValueError,
3327
0
                        "a strictly positive integer is required");
3328
0
        return -1;
3329
0
    }
3330
0
    self->chunk_size = n;
3331
0
    return 0;
3332
0
}
3333
3334
static PyMethodDef incrementalnewlinedecoder_methods[] = {
3335
    _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3336
    _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3337
    _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3338
    _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3339
    {NULL}
3340
};
3341
3342
static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3343
    {"newlines", incrementalnewlinedecoder_newlines_get, NULL, NULL},
3344
    {NULL}
3345
};
3346
3347
static PyType_Slot nldecoder_slots[] = {
3348
    {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3349
    {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3350
    {Py_tp_methods, incrementalnewlinedecoder_methods},
3351
    {Py_tp_getset, incrementalnewlinedecoder_getset},
3352
    {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3353
    {Py_tp_clear, incrementalnewlinedecoder_clear},
3354
    {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3355
    {0, NULL},
3356
};
3357
3358
PyType_Spec _Py_nldecoder_spec = {
3359
    .name = "_io.IncrementalNewlineDecoder",
3360
    .basicsize = sizeof(nldecoder_object),
3361
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3362
              Py_TPFLAGS_IMMUTABLETYPE),
3363
    .slots = nldecoder_slots,
3364
};
3365
3366
3367
static PyMethodDef textiowrapper_methods[] = {
3368
    _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3369
    _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3370
    _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3371
    _IO_TEXTIOWRAPPER_READ_METHODDEF
3372
    _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3373
    _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3374
    _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3375
3376
    _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3377
    _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3378
    _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3379
    _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3380
    _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3381
3382
    _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3383
    _IO_TEXTIOWRAPPER_TELL_METHODDEF
3384
    _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3385
3386
    {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS},
3387
    {NULL, NULL}
3388
};
3389
3390
static PyMemberDef textiowrapper_members[] = {
3391
    {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3392
    {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3393
    {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3394
    {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3395
    {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3396
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3397
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3398
    {NULL}
3399
};
3400
3401
static PyGetSetDef textiowrapper_getset[] = {
3402
    _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3403
    _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3404
    _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3405
    _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3406
    _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3407
    {NULL}
3408
};
3409
3410
static PyType_Slot textiowrapper_slots[] = {
3411
    {Py_tp_dealloc, textiowrapper_dealloc},
3412
    {Py_tp_repr, textiowrapper_repr},
3413
    {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3414
    {Py_tp_traverse, textiowrapper_traverse},
3415
    {Py_tp_clear, textiowrapper_clear},
3416
    {Py_tp_iternext, textiowrapper_iternext},
3417
    {Py_tp_methods, textiowrapper_methods},
3418
    {Py_tp_members, textiowrapper_members},
3419
    {Py_tp_getset, textiowrapper_getset},
3420
    {Py_tp_init, _io_TextIOWrapper___init__},
3421
    {0, NULL},
3422
};
3423
3424
PyType_Spec _Py_textiowrapper_spec = {
3425
    .name = "_io.TextIOWrapper",
3426
    .basicsize = sizeof(textio),
3427
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3428
              Py_TPFLAGS_IMMUTABLETYPE),
3429
    .slots = textiowrapper_slots,
3430
};