Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_io/textio.c
Line
Count
Source
1
/*
2
    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4
    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6
    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7
*/
8
9
#include "Python.h"
10
#include "pycore_call.h"          // _PyObject_CallMethod()
11
#include "pycore_codecs.h"        // _PyCodecInfo_GetIncrementalDecoder()
12
#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
13
#include "pycore_interp.h"        // PyInterpreterState.fs_codec
14
#include "pycore_long.h"          // _PyLong_GetZero()
15
#include "pycore_object.h"        // _PyObject_GC_UNTRACK()
16
#include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
17
#include "pycore_pystate.h"       // _PyInterpreterState_GET()
18
#include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString()
19
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
20
21
#include "_iomodule.h"
22
23
/*[clinic input]
24
module _io
25
class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
26
class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
27
class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
28
[clinic start generated code]*/
29
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
30
31
typedef struct nldecoder_object nldecoder_object;
32
typedef struct textio textio;
33
34
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
35
#include "clinic/textio.c.h"
36
#undef clinic_state
37
38
/* TextIOBase */
39
40
PyDoc_STRVAR(textiobase_doc,
41
    "Base class for text I/O.\n"
42
    "\n"
43
    "This class provides a character and line based interface to stream\n"
44
    "I/O. There is no readinto method because Python's character strings\n"
45
    "are immutable.\n"
46
    );
47
48
static PyObject *
49
_unsupported(_PyIO_State *state, const char *message)
50
0
{
51
0
    PyErr_SetString(state->unsupported_operation, message);
52
0
    return NULL;
53
0
}
54
55
/*[clinic input]
56
_io._TextIOBase.detach
57
    cls: defining_class
58
    /
59
60
Separate the underlying buffer from the TextIOBase and return it.
61
62
After the underlying buffer has been detached, the TextIO is in
63
an unusable state.
64
[clinic start generated code]*/
65
66
static PyObject *
67
_io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
68
/*[clinic end generated code: output=50915f40c609eaa4 input=8099c088abcb87d8]*/
69
0
{
70
0
    _PyIO_State *state = get_io_state_by_cls(cls);
71
0
    return _unsupported(state, "detach");
72
0
}
73
74
/*[clinic input]
75
_io._TextIOBase.read
76
    cls: defining_class
77
    size: int(unused=True) = -1
78
    /
79
80
Read at most size characters from stream.
81
82
Read from underlying buffer until we have size characters or we hit
83
EOF.  If size is negative or omitted, read until EOF.
84
[clinic start generated code]*/
85
86
static PyObject *
87
_io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
88
                          int Py_UNUSED(size))
89
/*[clinic end generated code: output=51a5178a309ce647 input=c9fd4cc1cf1b4614]*/
90
0
{
91
0
    _PyIO_State *state = get_io_state_by_cls(cls);
92
0
    return _unsupported(state, "read");
93
0
}
94
95
/*[clinic input]
96
_io._TextIOBase.readline
97
    cls: defining_class
98
    size: int(unused=True) = -1
99
    /
100
101
Read until newline or EOF.
102
103
Return an empty string if EOF is hit immediately.
104
If size is specified, at most size characters will be read.
105
[clinic start generated code]*/
106
107
static PyObject *
108
_io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
109
                              int Py_UNUSED(size))
110
/*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
111
0
{
112
0
    _PyIO_State *state = get_io_state_by_cls(cls);
113
0
    return _unsupported(state, "readline");
114
0
}
115
116
/*[clinic input]
117
_io._TextIOBase.write
118
    cls: defining_class
119
    s: str(unused=True)
120
    /
121
122
Write string s to stream.
123
124
Return the number of characters written
125
(which is always equal to the length of the string).
126
[clinic start generated code]*/
127
128
static PyObject *
129
_io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
130
                           const char *Py_UNUSED(s))
131
/*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
132
0
{
133
0
    _PyIO_State *state = get_io_state_by_cls(cls);
134
0
    return _unsupported(state, "write");
135
0
}
136
137
/*[clinic input]
138
@getter
139
_io._TextIOBase.encoding
140
141
Encoding of the text stream.
142
143
Subclasses should override.
144
[clinic start generated code]*/
145
146
static PyObject *
147
_io__TextIOBase_encoding_get_impl(PyObject *self)
148
/*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
149
0
{
150
0
    Py_RETURN_NONE;
151
0
}
152
153
/*[clinic input]
154
@getter
155
_io._TextIOBase.newlines
156
157
Line endings translated so far.
158
159
Only line endings translated during reading are considered.
160
161
Subclasses should override.
162
[clinic start generated code]*/
163
164
static PyObject *
165
_io__TextIOBase_newlines_get_impl(PyObject *self)
166
/*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
167
0
{
168
0
    Py_RETURN_NONE;
169
0
}
170
171
/*[clinic input]
172
@getter
173
_io._TextIOBase.errors
174
175
The error setting of the decoder or encoder.
176
177
Subclasses should override.
178
[clinic start generated code]*/
179
180
static PyObject *
181
_io__TextIOBase_errors_get_impl(PyObject *self)
182
/*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
183
0
{
184
0
    Py_RETURN_NONE;
185
0
}
186
187
188
static PyMethodDef textiobase_methods[] = {
189
    _IO__TEXTIOBASE_DETACH_METHODDEF
190
    _IO__TEXTIOBASE_READ_METHODDEF
191
    _IO__TEXTIOBASE_READLINE_METHODDEF
192
    _IO__TEXTIOBASE_WRITE_METHODDEF
193
    {NULL, NULL}
194
};
195
196
static PyGetSetDef textiobase_getset[] = {
197
    _IO__TEXTIOBASE_ENCODING_GETSETDEF
198
    _IO__TEXTIOBASE_NEWLINES_GETSETDEF
199
    _IO__TEXTIOBASE_ERRORS_GETSETDEF
200
    {NULL}
201
};
202
203
static PyType_Slot textiobase_slots[] = {
204
    {Py_tp_doc, (void *)textiobase_doc},
205
    {Py_tp_methods, textiobase_methods},
206
    {Py_tp_getset, textiobase_getset},
207
    {0, NULL},
208
};
209
210
/* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
211
PyType_Spec _Py_textiobase_spec = {
212
    .name = "_io._TextIOBase",
213
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
214
              Py_TPFLAGS_IMMUTABLETYPE),
215
    .slots = textiobase_slots,
216
};
217
218
/* IncrementalNewlineDecoder */
219
220
struct nldecoder_object {
221
    PyObject_HEAD
222
    PyObject *decoder;
223
    PyObject *errors;
224
    unsigned int pendingcr: 1;
225
    unsigned int translate: 1;
226
    unsigned int seennl: 3;
227
};
228
229
99.2k
#define nldecoder_object_CAST(op)   ((nldecoder_object *)(op))
230
231
/*[clinic input]
232
_io.IncrementalNewlineDecoder.__init__
233
    decoder: object
234
    translate: bool
235
    errors: object(c_default="NULL") = "strict"
236
237
Codec used when reading a file in universal newlines mode.
238
239
It wraps another incremental decoder, translating \r\n and \r into \n.
240
It also records the types of newlines encountered.  When used with
241
translate=False, it ensures that the newline sequence is returned in
242
one piece. When used with decoder=None, it expects unicode strings as
243
decode input and translates newlines without first invoking an external
244
decoder.
245
[clinic start generated code]*/
246
247
static int
248
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249
                                            PyObject *decoder, int translate,
250
                                            PyObject *errors)
251
/*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
252
15.8k
{
253
254
15.8k
    if (errors == NULL) {
255
15.8k
        errors = &_Py_ID(strict);
256
15.8k
    }
257
0
    else {
258
0
        errors = Py_NewRef(errors);
259
0
    }
260
261
15.8k
    Py_XSETREF(self->errors, errors);
262
15.8k
    Py_XSETREF(self->decoder, Py_NewRef(decoder));
263
15.8k
    self->translate = translate ? 1 : 0;
264
15.8k
    self->seennl = 0;
265
15.8k
    self->pendingcr = 0;
266
267
15.8k
    return 0;
268
15.8k
}
269
270
static int
271
incrementalnewlinedecoder_traverse(PyObject *op, visitproc visit, void *arg)
272
5.74k
{
273
5.74k
    nldecoder_object *self = nldecoder_object_CAST(op);
274
5.74k
    Py_VISIT(Py_TYPE(self));
275
5.74k
    Py_VISIT(self->decoder);
276
5.74k
    Py_VISIT(self->errors);
277
5.74k
    return 0;
278
5.74k
}
279
280
static int
281
incrementalnewlinedecoder_clear(PyObject *op)
282
15.8k
{
283
15.8k
    nldecoder_object *self = nldecoder_object_CAST(op);
284
15.8k
    Py_CLEAR(self->decoder);
285
15.8k
    Py_CLEAR(self->errors);
286
15.8k
    return 0;
287
15.8k
}
288
289
static void
290
incrementalnewlinedecoder_dealloc(PyObject *op)
291
15.8k
{
292
15.8k
    nldecoder_object *self = nldecoder_object_CAST(op);
293
15.8k
    PyTypeObject *tp = Py_TYPE(self);
294
15.8k
    _PyObject_GC_UNTRACK(self);
295
15.8k
    (void)incrementalnewlinedecoder_clear(op);
296
15.8k
    tp->tp_free(self);
297
15.8k
    Py_DECREF(tp);
298
15.8k
}
299
300
static int
301
check_decoded(PyObject *decoded)
302
61.8k
{
303
61.8k
    if (decoded == NULL)
304
0
        return -1;
305
61.8k
    if (!PyUnicode_Check(decoded)) {
306
0
        PyErr_Format(PyExc_TypeError,
307
0
                     "decoder should return a string result, not '%.200s'",
308
0
                     Py_TYPE(decoded)->tp_name);
309
0
        Py_DECREF(decoded);
310
0
        return -1;
311
0
    }
312
61.8k
    return 0;
313
61.8k
}
314
315
#define CHECK_INITIALIZED_DECODER(self) \
316
61.7k
    if (self->errors == NULL) { \
317
0
        PyErr_SetString(PyExc_ValueError, \
318
0
                        "IncrementalNewlineDecoder.__init__() not called"); \
319
0
        return NULL; \
320
0
    }
321
322
33.2M
#define SEEN_CR   1
323
27.1M
#define SEEN_LF   2
324
25.9M
#define SEEN_CRLF 4
325
25.9M
#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
326
327
PyObject *
328
_PyIncrementalNewlineDecoder_decode(PyObject *myself,
329
                                    PyObject *input, int final)
330
61.7k
{
331
61.7k
    PyObject *output;
332
61.7k
    Py_ssize_t output_len;
333
61.7k
    nldecoder_object *self = nldecoder_object_CAST(myself);
334
335
61.7k
    CHECK_INITIALIZED_DECODER(self);
336
337
    /* decode input (with the eventual \r from a previous pass) */
338
61.7k
    if (self->decoder != Py_None) {
339
130
        output = PyObject_CallMethodObjArgs(self->decoder,
340
130
            &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
341
130
    }
342
61.5k
    else {
343
61.5k
        output = Py_NewRef(input);
344
61.5k
    }
345
346
61.7k
    if (check_decoded(output) < 0)
347
0
        return NULL;
348
349
61.7k
    output_len = PyUnicode_GET_LENGTH(output);
350
61.7k
    if (self->pendingcr && (final || output_len > 0)) {
351
        /* Prefix output with CR */
352
0
        int kind;
353
0
        PyObject *modified;
354
0
        char *out;
355
356
0
        modified = PyUnicode_New(output_len + 1,
357
0
                                 PyUnicode_MAX_CHAR_VALUE(output));
358
0
        if (modified == NULL)
359
0
            goto error;
360
0
        kind = PyUnicode_KIND(modified);
361
0
        out = PyUnicode_DATA(modified);
362
0
        PyUnicode_WRITE(kind, out, 0, '\r');
363
0
        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
364
0
        Py_SETREF(output, modified);
365
0
        self->pendingcr = 0;
366
0
        output_len++;
367
0
    }
368
369
    /* retain last \r even when not translating data:
370
     * then readline() is sure to get \r\n in one pass
371
     */
372
61.7k
    if (!final) {
373
112
        if (output_len > 0
374
112
            && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
375
0
        {
376
0
            PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
377
0
            if (modified == NULL)
378
0
                goto error;
379
0
            Py_SETREF(output, modified);
380
0
            self->pendingcr = 1;
381
0
        }
382
112
    }
383
384
    /* Record which newlines are read and do newline translation if desired,
385
       all in one pass. */
386
61.7k
    {
387
61.7k
        const void *in_str;
388
61.7k
        Py_ssize_t len;
389
61.7k
        int seennl = self->seennl;
390
61.7k
        int only_lf = 0;
391
61.7k
        int kind;
392
393
61.7k
        in_str = PyUnicode_DATA(output);
394
61.7k
        len = PyUnicode_GET_LENGTH(output);
395
61.7k
        kind = PyUnicode_KIND(output);
396
397
61.7k
        if (len == 0)
398
10
            return output;
399
400
        /* If, up to now, newlines are consistently \n, do a quick check
401
           for the \r *byte* with the libc's optimized memchr.
402
           */
403
61.6k
        if (seennl == SEEN_LF || seennl == 0) {
404
31.1k
            only_lf = (memchr(in_str, '\r', kind * len) == NULL);
405
31.1k
        }
406
407
61.6k
        if (only_lf) {
408
            /* If not already seen, quick scan for a possible "\n" character.
409
               (there's nothing else to be done, even when in translation mode)
410
            */
411
25.1k
            if (seennl == 0 &&
412
16.8k
                memchr(in_str, '\n', kind * len) != NULL) {
413
1.92k
                if (kind == PyUnicode_1BYTE_KIND)
414
914
                    seennl |= SEEN_LF;
415
1.01k
                else {
416
1.01k
                    Py_ssize_t i = 0;
417
161k
                    for (;;) {
418
161k
                        Py_UCS4 c;
419
                        /* Fast loop for non-control characters */
420
1.07M
                        while (PyUnicode_READ(kind, in_str, i) > '\n')
421
912k
                            i++;
422
161k
                        c = PyUnicode_READ(kind, in_str, i++);
423
161k
                        if (c == '\n') {
424
817
                            seennl |= SEEN_LF;
425
817
                            break;
426
817
                        }
427
160k
                        if (i >= len)
428
195
                            break;
429
160k
                    }
430
1.01k
                }
431
1.92k
            }
432
            /* Finished: we have scanned for newlines, and none of them
433
               need translating */
434
25.1k
        }
435
36.5k
        else if (!self->translate) {
436
36.5k
            Py_ssize_t i = 0;
437
            /* We have already seen all newline types, no need to scan again */
438
36.5k
            if (seennl == SEEN_ALL)
439
13.1k
                goto endscan;
440
25.9M
            for (;;) {
441
25.9M
                Py_UCS4 c;
442
                /* Fast loop for non-control characters */
443
73.2M
                while (PyUnicode_READ(kind, in_str, i) > '\r')
444
47.3M
                    i++;
445
25.9M
                c = PyUnicode_READ(kind, in_str, i++);
446
25.9M
                if (c == '\n')
447
1.13M
                    seennl |= SEEN_LF;
448
24.7M
                else if (c == '\r') {
449
7.34M
                    if (PyUnicode_READ(kind, in_str, i) == '\n') {
450
3.42k
                        seennl |= SEEN_CRLF;
451
3.42k
                        i++;
452
3.42k
                    }
453
7.33M
                    else
454
7.33M
                        seennl |= SEEN_CR;
455
7.34M
                }
456
25.9M
                if (i >= len)
457
22.1k
                    break;
458
25.8M
                if (seennl == SEEN_ALL)
459
1.20k
                    break;
460
25.8M
            }
461
36.5k
        endscan:
462
36.5k
            ;
463
36.5k
        }
464
0
        else {
465
0
            void *translated;
466
0
            int kind = PyUnicode_KIND(output);
467
0
            const void *in_str = PyUnicode_DATA(output);
468
0
            Py_ssize_t in, out;
469
            /* XXX: Previous in-place translation here is disabled as
470
               resizing is not possible anymore */
471
            /* We could try to optimize this so that we only do a copy
472
               when there is something to translate. On the other hand,
473
               we already know there is a \r byte, so chances are high
474
               that something needs to be done. */
475
0
            translated = PyMem_Malloc(kind * len);
476
0
            if (translated == NULL) {
477
0
                PyErr_NoMemory();
478
0
                goto error;
479
0
            }
480
0
            in = out = 0;
481
0
            for (;;) {
482
0
                Py_UCS4 c;
483
                /* Fast loop for non-control characters */
484
0
                while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
485
0
                    PyUnicode_WRITE(kind, translated, out++, c);
486
0
                if (c == '\n') {
487
0
                    PyUnicode_WRITE(kind, translated, out++, c);
488
0
                    seennl |= SEEN_LF;
489
0
                    continue;
490
0
                }
491
0
                if (c == '\r') {
492
0
                    if (PyUnicode_READ(kind, in_str, in) == '\n') {
493
0
                        in++;
494
0
                        seennl |= SEEN_CRLF;
495
0
                    }
496
0
                    else
497
0
                        seennl |= SEEN_CR;
498
0
                    PyUnicode_WRITE(kind, translated, out++, '\n');
499
0
                    continue;
500
0
                }
501
0
                if (in > len)
502
0
                    break;
503
0
                PyUnicode_WRITE(kind, translated, out++, c);
504
0
            }
505
0
            Py_DECREF(output);
506
0
            output = PyUnicode_FromKindAndData(kind, translated, out);
507
0
            PyMem_Free(translated);
508
0
            if (!output)
509
0
                return NULL;
510
0
        }
511
61.6k
        self->seennl |= seennl;
512
61.6k
    }
513
514
0
    return output;
515
516
0
  error:
517
0
    Py_DECREF(output);
518
0
    return NULL;
519
61.6k
}
520
521
/*[clinic input]
522
@critical_section
523
_io.IncrementalNewlineDecoder.decode
524
    input: object
525
    final: bool = False
526
[clinic start generated code]*/
527
528
static PyObject *
529
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
530
                                          PyObject *input, int final)
531
/*[clinic end generated code: output=0d486755bb37a66e input=9475d16a73168504]*/
532
0
{
533
0
    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
534
0
}
535
536
/*[clinic input]
537
@critical_section
538
_io.IncrementalNewlineDecoder.getstate
539
[clinic start generated code]*/
540
541
static PyObject *
542
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
543
/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=dc3e1f27aa850f12]*/
544
0
{
545
0
    PyObject *buffer;
546
0
    unsigned long long flag;
547
548
0
    CHECK_INITIALIZED_DECODER(self);
549
550
0
    if (self->decoder != Py_None) {
551
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
552
0
           &_Py_ID(getstate));
553
0
        if (state == NULL)
554
0
            return NULL;
555
0
        if (!PyTuple_Check(state)) {
556
0
            PyErr_SetString(PyExc_TypeError,
557
0
                            "illegal decoder state");
558
0
            Py_DECREF(state);
559
0
            return NULL;
560
0
        }
561
0
        if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
562
0
                              &buffer, &flag))
563
0
        {
564
0
            Py_DECREF(state);
565
0
            return NULL;
566
0
        }
567
0
        Py_INCREF(buffer);
568
0
        Py_DECREF(state);
569
0
    }
570
0
    else {
571
0
        buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
572
0
        flag = 0;
573
0
    }
574
0
    flag <<= 1;
575
0
    if (self->pendingcr)
576
0
        flag |= 1;
577
0
    return Py_BuildValue("NK", buffer, flag);
578
0
}
579
580
/*[clinic input]
581
@critical_section
582
_io.IncrementalNewlineDecoder.setstate
583
    state: object
584
    /
585
[clinic start generated code]*/
586
587
static PyObject *
588
_io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self,
589
                                            PyObject *state)
590
/*[clinic end generated code: output=09135cb6e78a1dc8 input=275fd3982d2b08cb]*/
591
0
{
592
0
    PyObject *buffer;
593
0
    unsigned long long flag;
594
595
0
    CHECK_INITIALIZED_DECODER(self);
596
597
0
    if (!PyTuple_Check(state)) {
598
0
        PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
599
0
        return NULL;
600
0
    }
601
0
    if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
602
0
                          &buffer, &flag))
603
0
    {
604
0
        return NULL;
605
0
    }
606
607
0
    self->pendingcr = (int) (flag & 1);
608
0
    flag >>= 1;
609
610
0
    if (self->decoder != Py_None) {
611
0
        return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
612
0
                                    "((OK))", buffer, flag);
613
0
    }
614
0
    else {
615
0
        Py_RETURN_NONE;
616
0
    }
617
0
}
618
619
/*[clinic input]
620
@critical_section
621
_io.IncrementalNewlineDecoder.reset
622
[clinic start generated code]*/
623
624
static PyObject *
625
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
626
/*[clinic end generated code: output=32fa40c7462aa8ff input=31bd8ae4e36cec83]*/
627
0
{
628
0
    CHECK_INITIALIZED_DECODER(self);
629
630
0
    self->seennl = 0;
631
0
    self->pendingcr = 0;
632
0
    if (self->decoder != Py_None)
633
0
        return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
634
0
    else
635
0
        Py_RETURN_NONE;
636
0
}
637
638
static PyObject *
639
incrementalnewlinedecoder_newlines_get(PyObject *op, void *Py_UNUSED(context))
640
0
{
641
0
    nldecoder_object *self = nldecoder_object_CAST(op);
642
0
    CHECK_INITIALIZED_DECODER(self);
643
644
0
    switch (self->seennl) {
645
0
    case SEEN_CR:
646
0
        return PyUnicode_FromString("\r");
647
0
    case SEEN_LF:
648
0
        return PyUnicode_FromString("\n");
649
0
    case SEEN_CRLF:
650
0
        return PyUnicode_FromString("\r\n");
651
0
    case SEEN_CR | SEEN_LF:
652
0
        return Py_BuildValue("ss", "\r", "\n");
653
0
    case SEEN_CR | SEEN_CRLF:
654
0
        return Py_BuildValue("ss", "\r", "\r\n");
655
0
    case SEEN_LF | SEEN_CRLF:
656
0
        return Py_BuildValue("ss", "\n", "\r\n");
657
0
    case SEEN_CR | SEEN_LF | SEEN_CRLF:
658
0
        return Py_BuildValue("sss", "\r", "\n", "\r\n");
659
0
    default:
660
0
        Py_RETURN_NONE;
661
0
   }
662
663
0
}
664
665
/* TextIOWrapper */
666
667
typedef PyObject *(*encodefunc_t)(PyObject *, PyObject *);
668
669
struct textio
670
{
671
    PyObject_HEAD
672
    int ok; /* initialized? */
673
    int detached;
674
    Py_ssize_t chunk_size;
675
    /* Use helpers buffer_*() functions to access buffer; many operations can set it to
676
       NULL (see gh-143008, gh-142594). */
677
    PyObject *buffer;
678
    PyObject *encoding;
679
    PyObject *encoder;
680
    PyObject *decoder;
681
    PyObject *readnl;
682
    PyObject *errors;
683
    const char *writenl; /* ASCII-encoded; NULL stands for \n */
684
    char line_buffering;
685
    char write_through;
686
    char readuniversal;
687
    char readtranslate;
688
    char writetranslate;
689
    char seekable;
690
    char has_read1;
691
    char telling;
692
    char finalizing;
693
    /* Specialized encoding func (see below) */
694
    encodefunc_t encodefunc;
695
    /* Whether or not it's the start of the stream */
696
    char encoding_start_of_stream;
697
698
    /* Reads and writes are internally buffered in order to speed things up.
699
       However, any read will first flush the write buffer if itsn't empty.
700
701
       Please also note that text to be written is first encoded before being
702
       buffered. This is necessary so that encoding errors are immediately
703
       reported to the caller, but it unfortunately means that the
704
       IncrementalEncoder (whose encode() method is always written in Python)
705
       becomes a bottleneck for small writes.
706
    */
707
    PyObject *decoded_chars;       /* buffer for text returned from decoder */
708
    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
709
    PyObject *pending_bytes;       // data waiting to be written.
710
                                   // ascii unicode, bytes, or list of them.
711
    Py_ssize_t pending_bytes_count;
712
713
    /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
714
     * dec_flags is the second (integer) item of the decoder state and
715
     * next_input is the chunk of input bytes that comes next after the
716
     * snapshot point.  We use this to reconstruct decoder states in tell().
717
     */
718
    PyObject *snapshot;
719
    /* Bytes-to-characters ratio for the current chunk. Serves as input for
720
       the heuristic in tell(). */
721
    double b2cratio;
722
723
    /* Cache raw object if it's a FileIO object */
724
    PyObject *raw;
725
726
    PyObject *weakreflist;
727
    PyObject *dict;
728
729
    _PyIO_State *state;
730
};
731
732
29.2k
#define textio_CAST(op) ((textio *)(op))
733
734
/* Helpers to safely operate on self->buffer.
735
736
   self->buffer can be detached (set to NULL) by any user code that is called
737
   leading to NULL pointer dereferences (see gh-143008, gh-142594). Protect
738
   against that by using helpers to check self->buffer validity at callsites. */
739
static PyObject *
740
buffer_access_safe(textio *self)
741
521k
{
742
    /* Check self->buffer directly but match errors of CHECK_ATTACHED since this
743
       is called during construction and finalization where self->ok == 0. */
744
521k
    if (self->buffer == NULL) {
745
0
        if (self->ok <= 0) {
746
0
            PyErr_SetString(PyExc_ValueError,
747
0
                            "I/O operation on uninitialized object");
748
0
        }
749
0
        else {
750
0
            PyErr_SetString(PyExc_ValueError,
751
0
                            "underlying buffer has been detached");
752
0
        }
753
0
        return NULL;
754
0
    }
755
756
    /* Returning a borrowed reference is safe since TextIOWrapper methods are
757
       protected by critical sections. */
758
521k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
759
521k
    return self->buffer;
760
521k
}
761
762
static PyObject *
763
buffer_getattr(textio *self, PyObject *attr_name)
764
67
{
765
67
    PyObject *buffer = buffer_access_safe(self);
766
67
    if (buffer == NULL) {
767
0
        return NULL;
768
0
    }
769
770
67
    return PyObject_GetAttr(buffer, attr_name);
771
67
}
772
773
static PyObject *
774
buffer_callmethod_noargs(textio *self, PyObject *name)
775
373
{
776
373
    PyObject *buffer = buffer_access_safe(self);
777
373
    if (buffer == NULL) {
778
0
        return NULL;
779
0
    }
780
781
373
    return PyObject_CallMethodNoArgs(buffer, name);
782
373
}
783
784
static PyObject *
785
buffer_callmethod_onearg(textio *self, PyObject *name, PyObject *arg)
786
260k
{
787
260k
    PyObject *buffer = buffer_access_safe(self);
788
260k
    if (buffer == NULL) {
789
0
        return NULL;
790
0
    }
791
792
260k
    return PyObject_CallMethodOneArg(buffer, name, arg);
793
260k
}
794
795
static void
796
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
797
798
/* A couple of specialized cases in order to bypass the slow incremental
799
   encoding methods for the most popular encodings. */
800
801
static PyObject *
802
ascii_encode(PyObject *op, PyObject *text)
803
0
{
804
0
    textio *self = textio_CAST(op);
805
0
    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
806
0
}
807
808
static PyObject *
809
utf16be_encode(PyObject *op, PyObject *text)
810
0
{
811
0
    textio *self = textio_CAST(op);
812
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 1);
813
0
}
814
815
static PyObject *
816
utf16le_encode(PyObject *op, PyObject *text)
817
0
{
818
0
    textio *self = textio_CAST(op);
819
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), -1);
820
0
}
821
822
static PyObject *
823
utf16_encode(PyObject *op, PyObject *text)
824
0
{
825
0
    textio *self = textio_CAST(op);
826
0
    if (!self->encoding_start_of_stream) {
827
        /* Skip the BOM and use native byte ordering */
828
#if PY_BIG_ENDIAN
829
        return utf16be_encode(op, text);
830
#else
831
0
        return utf16le_encode(op, text);
832
0
#endif
833
0
    }
834
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 0);
835
0
}
836
837
static PyObject *
838
utf32be_encode(PyObject *op, PyObject *text)
839
0
{
840
0
    textio *self = textio_CAST(op);
841
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 1);
842
0
}
843
844
static PyObject *
845
utf32le_encode(PyObject *op, PyObject *text)
846
0
{
847
0
    textio *self = textio_CAST(op);
848
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), -1);
849
0
}
850
851
static PyObject *
852
utf32_encode(PyObject *op, PyObject *text)
853
0
{
854
0
    textio *self = textio_CAST(op);
855
0
    if (!self->encoding_start_of_stream) {
856
        /* Skip the BOM and use native byte ordering */
857
#if PY_BIG_ENDIAN
858
        return utf32be_encode(op, text);
859
#else
860
0
        return utf32le_encode(op, text);
861
0
#endif
862
0
    }
863
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 0);
864
0
}
865
866
static PyObject *
867
utf8_encode(PyObject *op, PyObject *text)
868
3.04k
{
869
3.04k
    textio *self = textio_CAST(op);
870
3.04k
    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
871
3.04k
}
872
873
static PyObject *
874
latin1_encode(PyObject *op, PyObject *text)
875
0
{
876
0
    textio *self = textio_CAST(op);
877
0
    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
878
0
}
879
880
// Return true when encoding can be skipped when text is ascii.
881
static inline int
882
is_asciicompat_encoding(encodefunc_t f)
883
257k
{
884
257k
    return f == ascii_encode || f == latin1_encode || f == utf8_encode;
885
257k
}
886
887
/* Map normalized encoding names onto the specialized encoding funcs */
888
889
typedef struct {
890
    const char *name;
891
    encodefunc_t encodefunc;
892
} encodefuncentry;
893
894
static const encodefuncentry encodefuncs[] = {
895
    {"ascii",       ascii_encode},
896
    {"iso8859-1",   latin1_encode},
897
    {"utf-8",       utf8_encode},
898
    {"utf-16-be",   utf16be_encode},
899
    {"utf-16-le",   utf16le_encode},
900
    {"utf-16",      utf16_encode},
901
    {"utf-32-be",   utf32be_encode},
902
    {"utf-32-le",   utf32le_encode},
903
    {"utf-32",      utf32_encode},
904
    {NULL, NULL}
905
};
906
907
static int
908
validate_newline(const char *newline)
909
127
{
910
127
    if (newline && newline[0] != '\0'
911
108
        && !(newline[0] == '\n' && newline[1] == '\0')
912
0
        && !(newline[0] == '\r' && newline[1] == '\0')
913
0
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
914
0
        PyErr_Format(PyExc_ValueError,
915
0
                     "illegal newline value: %s", newline);
916
0
        return -1;
917
0
    }
918
127
    return 0;
919
127
}
920
921
static int
922
set_newline(textio *self, const char *newline)
923
127
{
924
127
    PyObject *old = self->readnl;
925
127
    if (newline == NULL) {
926
19
        self->readnl = NULL;
927
19
    }
928
108
    else {
929
108
        self->readnl = PyUnicode_FromString(newline);
930
108
        if (self->readnl == NULL) {
931
0
            self->readnl = old;
932
0
            return -1;
933
0
        }
934
108
    }
935
127
    self->readuniversal = (newline == NULL || newline[0] == '\0');
936
127
    self->readtranslate = (newline == NULL);
937
127
    self->writetranslate = (newline == NULL || newline[0] != '\0');
938
127
    if (!self->readuniversal && self->readnl != NULL) {
939
        // validate_newline() accepts only ASCII newlines.
940
108
        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
941
108
        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
942
108
        if (strcmp(self->writenl, "\n") == 0) {
943
108
            self->writenl = NULL;
944
108
        }
945
108
    }
946
19
    else {
947
#ifdef MS_WINDOWS
948
        self->writenl = "\r\n";
949
#else
950
19
        self->writenl = NULL;
951
19
#endif
952
19
    }
953
127
    Py_XDECREF(old);
954
127
    return 0;
955
127
}
956
957
static int
958
_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
959
                           const char *errors)
960
127
{
961
127
    PyObject *res;
962
127
    int r;
963
964
127
    res = buffer_callmethod_noargs(self, &_Py_ID(readable));
965
127
    if (res == NULL)
966
0
        return -1;
967
968
127
    r = PyObject_IsTrue(res);
969
127
    Py_DECREF(res);
970
127
    if (r == -1)
971
0
        return -1;
972
973
127
    if (r != 1)
974
73
        return 0;
975
976
54
    Py_CLEAR(self->decoder);
977
54
    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
978
54
    if (self->decoder == NULL)
979
0
        return -1;
980
981
54
    if (self->readuniversal) {
982
18
        _PyIO_State *state = self->state;
983
18
        PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
984
18
            (PyObject *)state->PyIncrementalNewlineDecoder_Type,
985
18
            self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
986
18
        if (incrementalDecoder == NULL)
987
0
            return -1;
988
18
        Py_XSETREF(self->decoder, incrementalDecoder);
989
18
    }
990
991
54
    return 0;
992
54
}
993
994
static PyObject*
995
_textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
996
                      int eof)
997
122
{
998
122
    PyObject *chars;
999
1000
122
    if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
1001
122
        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
1002
0
    else
1003
0
        chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
1004
0
                                           eof ? Py_True : Py_False, NULL);
1005
1006
122
    if (check_decoded(chars) < 0)
1007
        // check_decoded already decreases refcount
1008
0
        return NULL;
1009
1010
122
    return chars;
1011
122
}
1012
1013
static int
1014
_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
1015
                           const char *errors)
1016
127
{
1017
127
    PyObject *res;
1018
127
    int r;
1019
1020
127
    res = buffer_callmethod_noargs(self, &_Py_ID(writable));
1021
127
    if (res == NULL)
1022
0
        return -1;
1023
1024
127
    r = PyObject_IsTrue(res);
1025
127
    Py_DECREF(res);
1026
127
    if (r == -1)
1027
0
        return -1;
1028
1029
127
    if (r != 1)
1030
54
        return 0;
1031
1032
73
    Py_CLEAR(self->encoder);
1033
73
    self->encodefunc = NULL;
1034
73
    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
1035
73
    if (self->encoder == NULL)
1036
0
        return -1;
1037
1038
    /* Get the normalized named of the codec */
1039
73
    if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
1040
0
        return -1;
1041
0
    }
1042
73
    if (res != NULL && PyUnicode_Check(res)) {
1043
73
        const encodefuncentry *e = encodefuncs;
1044
219
        while (e->name != NULL) {
1045
219
            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
1046
73
                self->encodefunc = e->encodefunc;
1047
73
                break;
1048
73
            }
1049
146
            e++;
1050
146
        }
1051
73
    }
1052
73
    Py_XDECREF(res);
1053
1054
73
    return 0;
1055
73
}
1056
1057
static int
1058
_textiowrapper_fix_encoder_state(textio *self)
1059
127
{
1060
127
    if (!self->seekable || !self->encoder) {
1061
54
        return 0;
1062
54
    }
1063
1064
73
    self->encoding_start_of_stream = 1;
1065
1066
73
    PyObject *cookieObj = buffer_callmethod_noargs(self, &_Py_ID(tell));
1067
73
    if (cookieObj == NULL) {
1068
0
        return -1;
1069
0
    }
1070
1071
73
    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
1072
73
    Py_DECREF(cookieObj);
1073
73
    if (cmp < 0) {
1074
0
        return -1;
1075
0
    }
1076
1077
73
    if (cmp == 0) {
1078
34
        self->encoding_start_of_stream = 0;
1079
34
        PyObject *res = PyObject_CallMethodOneArg(
1080
34
            self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1081
34
        if (res == NULL) {
1082
0
            return -1;
1083
0
        }
1084
34
        Py_DECREF(res);
1085
34
    }
1086
1087
73
    return 0;
1088
73
}
1089
1090
static int
1091
io_check_errors(PyObject *errors)
1092
108
{
1093
108
    assert(errors != NULL && errors != Py_None);
1094
1095
108
    PyInterpreterState *interp = _PyInterpreterState_GET();
1096
108
#ifndef Py_DEBUG
1097
    /* In release mode, only check in development mode (-X dev) */
1098
108
    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1099
108
        return 0;
1100
108
    }
1101
#else
1102
    /* Always check in debug mode */
1103
#endif
1104
1105
    /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1106
       before_PyUnicode_InitEncodings() is called. */
1107
0
    if (!interp->unicode.fs_codec.encoding) {
1108
0
        return 0;
1109
0
    }
1110
1111
0
    const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1112
0
    if (name == NULL) {
1113
0
        return -1;
1114
0
    }
1115
0
    PyObject *handler = PyCodec_LookupError(name);
1116
0
    if (handler != NULL) {
1117
0
        Py_DECREF(handler);
1118
0
        return 0;
1119
0
    }
1120
0
    return -1;
1121
0
}
1122
1123
1124
1125
/*[clinic input]
1126
@critical_section
1127
_io.TextIOWrapper.__init__
1128
    buffer: object
1129
    encoding: str(accept={str, NoneType}) = None
1130
    errors: object = None
1131
    newline: str(accept={str, NoneType}) = None
1132
    line_buffering: bool = False
1133
    write_through: bool = False
1134
1135
Character and line based layer over a BufferedIOBase object, buffer.
1136
1137
encoding gives the name of the encoding that the stream will be
1138
decoded or encoded with. It defaults to locale.getencoding().
1139
1140
errors determines the strictness of encoding and decoding (see
1141
help(codecs.Codec) or the documentation for codecs.register) and
1142
defaults to "strict".
1143
1144
newline controls how line endings are handled. It can be None, '',
1145
'\n', '\r', and '\r\n'.  It works as follows:
1146
1147
* On input, if newline is None, universal newlines mode is
1148
  enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1149
  these are translated into '\n' before being returned to the
1150
  caller. If it is '', universal newline mode is enabled, but line
1151
  endings are returned to the caller untranslated. If it has any of
1152
  the other legal values, input lines are only terminated by the given
1153
  string, and the line ending is returned to the caller untranslated.
1154
1155
* On output, if newline is None, any '\n' characters written are
1156
  translated to the system default line separator, os.linesep. If
1157
  newline is '' or '\n', no translation takes place. If newline is any
1158
  of the other legal values, any '\n' characters written are translated
1159
  to the given string.
1160
1161
If line_buffering is True, a call to flush is implied when a call to
1162
write contains a newline character.
1163
[clinic start generated code]*/
1164
1165
static int
1166
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1167
                                const char *encoding, PyObject *errors,
1168
                                const char *newline, int line_buffering,
1169
                                int write_through)
1170
/*[clinic end generated code: output=72267c0c01032ed2 input=0f077220214c40a4]*/
1171
127
{
1172
127
    PyObject *raw, *codec_info = NULL;
1173
127
    PyObject *res;
1174
127
    int r;
1175
1176
127
    self->ok = 0;
1177
127
    self->detached = 0;
1178
1179
127
    if (encoding == NULL) {
1180
0
        PyInterpreterState *interp = _PyInterpreterState_GET();
1181
0
        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1182
0
            if (PyErr_WarnEx(PyExc_EncodingWarning,
1183
0
                             "'encoding' argument not specified", 1)) {
1184
0
                return -1;
1185
0
            }
1186
0
        }
1187
0
    }
1188
1189
127
    if (errors == Py_None) {
1190
19
        errors = &_Py_ID(strict);
1191
19
    }
1192
108
    else if (!PyUnicode_Check(errors)) {
1193
        // Check 'errors' argument here because Argument Clinic doesn't support
1194
        // 'str(accept={str, NoneType})' converter.
1195
0
        PyErr_Format(
1196
0
            PyExc_TypeError,
1197
0
            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1198
0
            Py_TYPE(errors)->tp_name);
1199
0
        return -1;
1200
0
    }
1201
108
    else if (io_check_errors(errors)) {
1202
0
        return -1;
1203
0
    }
1204
127
    const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1205
127
    if (errors_str == NULL) {
1206
0
        return -1;
1207
0
    }
1208
1209
127
    if (validate_newline(newline) < 0) {
1210
0
        return -1;
1211
0
    }
1212
1213
127
    Py_CLEAR(self->buffer);
1214
127
    Py_CLEAR(self->encoding);
1215
127
    Py_CLEAR(self->encoder);
1216
127
    Py_CLEAR(self->decoder);
1217
127
    Py_CLEAR(self->readnl);
1218
127
    Py_CLEAR(self->decoded_chars);
1219
127
    Py_CLEAR(self->pending_bytes);
1220
127
    Py_CLEAR(self->snapshot);
1221
127
    Py_CLEAR(self->errors);
1222
127
    Py_CLEAR(self->raw);
1223
127
    self->decoded_chars_used = 0;
1224
127
    self->pending_bytes_count = 0;
1225
127
    self->encodefunc = NULL;
1226
127
    self->b2cratio = 0.0;
1227
1228
127
    if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1229
0
        _Py_DECLARE_STR(utf_8, "utf-8");
1230
0
        self->encoding = &_Py_STR(utf_8);
1231
0
    }
1232
127
    else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1233
0
        self->encoding = _Py_GetLocaleEncodingObject();
1234
0
        if (self->encoding == NULL) {
1235
0
            goto error;
1236
0
        }
1237
0
        assert(PyUnicode_Check(self->encoding));
1238
0
    }
1239
1240
127
    if (self->encoding != NULL) {
1241
0
        encoding = PyUnicode_AsUTF8(self->encoding);
1242
0
        if (encoding == NULL)
1243
0
            goto error;
1244
0
    }
1245
127
    else if (encoding != NULL) {
1246
127
        self->encoding = PyUnicode_FromString(encoding);
1247
127
        if (self->encoding == NULL)
1248
0
            goto error;
1249
127
    }
1250
0
    else {
1251
0
        PyErr_SetString(PyExc_OSError,
1252
0
                        "could not determine default encoding");
1253
0
        goto error;
1254
0
    }
1255
1256
    /* Check we have been asked for a real text encoding */
1257
127
    codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
1258
127
    if (codec_info == NULL) {
1259
0
        Py_CLEAR(self->encoding);
1260
0
        goto error;
1261
0
    }
1262
1263
    /* XXX: Failures beyond this point have the potential to leak elements
1264
     * of the partially constructed object (like self->encoding)
1265
     */
1266
1267
127
    self->errors = Py_NewRef(errors);
1268
127
    self->chunk_size = 8192;
1269
127
    self->line_buffering = line_buffering;
1270
127
    self->write_through = write_through;
1271
127
    if (set_newline(self, newline) < 0) {
1272
0
        goto error;
1273
0
    }
1274
1275
127
    self->buffer = Py_NewRef(buffer);
1276
1277
    /* Build the decoder object */
1278
127
    _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1279
127
    self->state = state;
1280
127
    if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1281
0
        goto error;
1282
1283
    /* Build the encoder object */
1284
127
    if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1285
0
        goto error;
1286
1287
    /* Finished sorting out the codec details */
1288
127
    Py_CLEAR(codec_info);
1289
1290
127
    if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1291
73
        Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1292
0
        Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1293
127
    {
1294
127
        if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1295
0
            goto error;
1296
        /* Cache the raw FileIO object to speed up 'closed' checks */
1297
127
        if (raw != NULL) {
1298
127
            if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1299
127
                self->raw = raw;
1300
0
            else
1301
0
                Py_DECREF(raw);
1302
127
        }
1303
127
    }
1304
1305
127
    res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1306
127
    if (res == NULL)
1307
0
        goto error;
1308
127
    r = PyObject_IsTrue(res);
1309
127
    Py_DECREF(res);
1310
127
    if (r < 0)
1311
0
        goto error;
1312
127
    self->seekable = self->telling = r;
1313
1314
127
    r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1315
127
    if (r < 0) {
1316
0
        goto error;
1317
0
    }
1318
127
    self->has_read1 = r;
1319
1320
127
    self->encoding_start_of_stream = 0;
1321
127
    if (_textiowrapper_fix_encoder_state(self) < 0) {
1322
0
        goto error;
1323
0
    }
1324
1325
127
    self->ok = 1;
1326
127
    return 0;
1327
1328
0
  error:
1329
0
    Py_XDECREF(codec_info);
1330
0
    return -1;
1331
127
}
1332
1333
/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1334
 * -1 on error.
1335
 */
1336
static int
1337
convert_optional_bool(PyObject *obj, int default_value)
1338
0
{
1339
0
    long v;
1340
0
    if (obj == Py_None) {
1341
0
        v = default_value;
1342
0
    }
1343
0
    else {
1344
0
        v = PyLong_AsLong(obj);
1345
0
        if (v == -1 && PyErr_Occurred())
1346
0
            return -1;
1347
0
    }
1348
0
    return v != 0;
1349
0
}
1350
1351
static int
1352
textiowrapper_change_encoding(textio *self, PyObject *encoding,
1353
                              PyObject *errors, int newline_changed)
1354
0
{
1355
    /* Use existing settings where new settings are not specified */
1356
0
    if (encoding == Py_None && errors == Py_None && !newline_changed) {
1357
0
        return 0;  // no change
1358
0
    }
1359
1360
0
    if (encoding == Py_None) {
1361
0
        encoding = self->encoding;
1362
0
        if (errors == Py_None) {
1363
0
            errors = self->errors;
1364
0
        }
1365
0
        Py_INCREF(encoding);
1366
0
    }
1367
0
    else {
1368
0
        if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1369
0
            encoding = _Py_GetLocaleEncodingObject();
1370
0
            if (encoding == NULL) {
1371
0
                return -1;
1372
0
            }
1373
0
        } else {
1374
0
            Py_INCREF(encoding);
1375
0
        }
1376
0
        if (errors == Py_None) {
1377
0
            errors = &_Py_ID(strict);
1378
0
        }
1379
0
    }
1380
0
    Py_INCREF(errors);
1381
1382
0
    const char *c_encoding = PyUnicode_AsUTF8(encoding);
1383
0
    if (c_encoding == NULL) {
1384
0
        Py_DECREF(encoding);
1385
0
        Py_DECREF(errors);
1386
0
        return -1;
1387
0
    }
1388
0
    const char *c_errors = PyUnicode_AsUTF8(errors);
1389
0
    if (c_errors == NULL) {
1390
0
        Py_DECREF(encoding);
1391
0
        Py_DECREF(errors);
1392
0
        return -1;
1393
0
    }
1394
1395
    // Create new encoder & decoder
1396
0
    PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
1397
0
    if (codec_info == NULL) {
1398
0
        Py_DECREF(encoding);
1399
0
        Py_DECREF(errors);
1400
0
        return -1;
1401
0
    }
1402
0
    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1403
0
            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1404
0
        Py_DECREF(codec_info);
1405
0
        Py_DECREF(encoding);
1406
0
        Py_DECREF(errors);
1407
0
        return -1;
1408
0
    }
1409
0
    Py_DECREF(codec_info);
1410
1411
0
    Py_SETREF(self->encoding, encoding);
1412
0
    Py_SETREF(self->errors, errors);
1413
1414
0
    return _textiowrapper_fix_encoder_state(self);
1415
0
}
1416
1417
/*[clinic input]
1418
@critical_section
1419
_io.TextIOWrapper.reconfigure
1420
    *
1421
    encoding: object = None
1422
    errors: object = None
1423
    newline as newline_obj: object(c_default="NULL") = None
1424
    line_buffering as line_buffering_obj: object = None
1425
    write_through as write_through_obj: object = None
1426
1427
Reconfigure the text stream with new parameters.
1428
1429
This also does an implicit stream flush.
1430
1431
[clinic start generated code]*/
1432
1433
static PyObject *
1434
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1435
                                   PyObject *errors, PyObject *newline_obj,
1436
                                   PyObject *line_buffering_obj,
1437
                                   PyObject *write_through_obj)
1438
/*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1439
0
{
1440
0
    int line_buffering;
1441
0
    int write_through;
1442
0
    const char *newline = NULL;
1443
1444
0
    if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1445
0
        PyErr_Format(PyExc_TypeError,
1446
0
                "reconfigure() argument 'encoding' must be str or None, not %s",
1447
0
                Py_TYPE(encoding)->tp_name);
1448
0
        return NULL;
1449
0
    }
1450
0
    if (errors != Py_None && !PyUnicode_Check(errors)) {
1451
0
        PyErr_Format(PyExc_TypeError,
1452
0
                "reconfigure() argument 'errors' must be str or None, not %s",
1453
0
                Py_TYPE(errors)->tp_name);
1454
0
        return NULL;
1455
0
    }
1456
0
    if (newline_obj != NULL && newline_obj != Py_None &&
1457
0
        !PyUnicode_Check(newline_obj))
1458
0
    {
1459
0
        PyErr_Format(PyExc_TypeError,
1460
0
                "reconfigure() argument 'newline' must be str or None, not %s",
1461
0
                Py_TYPE(newline_obj)->tp_name);
1462
0
        return NULL;
1463
0
    }
1464
    /* Check if something is in the read buffer */
1465
0
    if (self->decoded_chars != NULL) {
1466
0
        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1467
0
            _unsupported(self->state,
1468
0
                         "It is not possible to set the encoding or newline "
1469
0
                         "of stream after the first read");
1470
0
            return NULL;
1471
0
        }
1472
0
    }
1473
1474
0
    if (newline_obj != NULL && newline_obj != Py_None) {
1475
0
        newline = PyUnicode_AsUTF8(newline_obj);
1476
0
        if (newline == NULL || validate_newline(newline) < 0) {
1477
0
            return NULL;
1478
0
        }
1479
0
    }
1480
1481
0
    line_buffering = convert_optional_bool(line_buffering_obj,
1482
0
                                           self->line_buffering);
1483
0
    if (line_buffering < 0) {
1484
0
        return NULL;
1485
0
    }
1486
0
    write_through = convert_optional_bool(write_through_obj,
1487
0
                                          self->write_through);
1488
0
    if (write_through < 0) {
1489
0
        return NULL;
1490
0
    }
1491
1492
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1493
0
        return NULL;
1494
0
    }
1495
0
    self->b2cratio = 0;
1496
1497
0
    if (newline_obj != NULL && set_newline(self, newline) < 0) {
1498
0
        return NULL;
1499
0
    }
1500
1501
0
    if (textiowrapper_change_encoding(
1502
0
            self, encoding, errors, newline_obj != NULL) < 0) {
1503
0
        return NULL;
1504
0
    }
1505
1506
0
    self->line_buffering = line_buffering;
1507
0
    self->write_through = write_through;
1508
0
    Py_RETURN_NONE;
1509
0
}
1510
1511
static int
1512
textiowrapper_clear(PyObject *op)
1513
19
{
1514
19
    textio *self = textio_CAST(op);
1515
19
    self->ok = 0;
1516
19
    Py_CLEAR(self->buffer);
1517
19
    Py_CLEAR(self->encoding);
1518
19
    Py_CLEAR(self->encoder);
1519
19
    Py_CLEAR(self->decoder);
1520
19
    Py_CLEAR(self->readnl);
1521
19
    Py_CLEAR(self->decoded_chars);
1522
19
    Py_CLEAR(self->pending_bytes);
1523
19
    Py_CLEAR(self->snapshot);
1524
19
    Py_CLEAR(self->errors);
1525
19
    Py_CLEAR(self->raw);
1526
1527
19
    Py_CLEAR(self->dict);
1528
19
    return 0;
1529
19
}
1530
1531
static void
1532
textiowrapper_dealloc(PyObject *op)
1533
19
{
1534
19
    textio *self = textio_CAST(op);
1535
19
    PyTypeObject *tp = Py_TYPE(self);
1536
19
    self->finalizing = 1;
1537
19
    if (_PyIOBase_finalize(op) < 0)
1538
0
        return;
1539
19
    self->ok = 0;
1540
19
    _PyObject_GC_UNTRACK(self);
1541
19
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
1542
19
    (void)textiowrapper_clear(op);
1543
19
    tp->tp_free(self);
1544
19
    Py_DECREF(tp);
1545
19
}
1546
1547
static int
1548
textiowrapper_traverse(PyObject *op, visitproc visit, void *arg)
1549
3.90k
{
1550
3.90k
    textio *self = textio_CAST(op);
1551
3.90k
    Py_VISIT(Py_TYPE(self));
1552
3.90k
    Py_VISIT(self->buffer);
1553
3.90k
    Py_VISIT(self->encoding);
1554
3.90k
    Py_VISIT(self->encoder);
1555
3.90k
    Py_VISIT(self->decoder);
1556
3.90k
    Py_VISIT(self->readnl);
1557
3.90k
    Py_VISIT(self->decoded_chars);
1558
3.90k
    Py_VISIT(self->pending_bytes);
1559
3.90k
    Py_VISIT(self->snapshot);
1560
3.90k
    Py_VISIT(self->errors);
1561
3.90k
    Py_VISIT(self->raw);
1562
1563
3.90k
    Py_VISIT(self->dict);
1564
3.90k
    return 0;
1565
3.90k
}
1566
1567
static PyObject *
1568
_io_TextIOWrapper_closed_get_impl(textio *self);
1569
1570
/* This macro takes some shortcuts to make the common case faster. */
1571
#define CHECK_CLOSED(self) \
1572
282k
    do { \
1573
282k
        int r; \
1574
282k
        PyObject *_res; \
1575
282k
        if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1576
282k
            if (self->raw != NULL) \
1577
282k
                r = _PyFileIO_closed(self->raw); \
1578
282k
            else { \
1579
0
                _res = _io_TextIOWrapper_closed_get_impl(self); \
1580
0
                if (_res == NULL) \
1581
0
                    return NULL; \
1582
0
                r = PyObject_IsTrue(_res); \
1583
0
                Py_DECREF(_res); \
1584
0
                if (r < 0) \
1585
0
                    return NULL; \
1586
0
            } \
1587
282k
            if (r > 0) { \
1588
0
                PyErr_SetString(PyExc_ValueError, \
1589
0
                                "I/O operation on closed file."); \
1590
0
                return NULL; \
1591
0
            } \
1592
282k
        } \
1593
282k
        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1594
0
            return NULL; \
1595
282k
    } while (0)
1596
1597
#define CHECK_INITIALIZED(self) \
1598
282k
    if (self->ok <= 0) { \
1599
0
        PyErr_SetString(PyExc_ValueError, \
1600
0
            "I/O operation on uninitialized object"); \
1601
0
        return NULL; \
1602
0
    }
1603
1604
#define CHECK_ATTACHED(self) \
1605
282k
    CHECK_INITIALIZED(self); \
1606
282k
    if (self->detached) { \
1607
0
        PyErr_SetString(PyExc_ValueError, \
1608
0
             "underlying buffer has been detached"); \
1609
0
        return NULL; \
1610
0
    }
1611
1612
#define CHECK_ATTACHED_INT(self) \
1613
0
    if (self->ok <= 0) { \
1614
0
        PyErr_SetString(PyExc_ValueError, \
1615
0
            "I/O operation on uninitialized object"); \
1616
0
        return -1; \
1617
0
    } else if (self->detached) { \
1618
0
        PyErr_SetString(PyExc_ValueError, \
1619
0
             "underlying buffer has been detached"); \
1620
0
        return -1; \
1621
0
    }
1622
1623
1624
/*[clinic input]
1625
@critical_section
1626
_io.TextIOWrapper.detach
1627
[clinic start generated code]*/
1628
1629
static PyObject *
1630
_io_TextIOWrapper_detach_impl(textio *self)
1631
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1632
0
{
1633
0
    PyObject *buffer;
1634
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1635
0
        return NULL;
1636
0
    }
1637
    /* _PyFile_Flush could detach before returning; raise an exception. */
1638
0
    buffer = buffer_access_safe(self);
1639
0
    if (buffer == NULL) {
1640
0
        return NULL;
1641
0
    }
1642
0
    self->buffer = NULL;
1643
0
    self->detached = 1;
1644
0
    return buffer;
1645
0
}
1646
1647
/* Flush the internal write buffer. This doesn't explicitly flush the
1648
   underlying buffered object, though. */
1649
static int
1650
_textiowrapper_writeflush(textio *self)
1651
282k
{
1652
282k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
1653
1654
282k
    if (self->pending_bytes == NULL)
1655
22.2k
        return 0;
1656
1657
260k
    PyObject *pending = self->pending_bytes;
1658
260k
    PyObject *b;
1659
1660
260k
    if (PyBytes_Check(pending)) {
1661
3.04k
        b = Py_NewRef(pending);
1662
3.04k
    }
1663
257k
    else if (PyUnicode_Check(pending)) {
1664
257k
        assert(PyUnicode_IS_ASCII(pending));
1665
257k
        assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1666
257k
        b = PyBytes_FromStringAndSize(
1667
257k
                PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1668
257k
        if (b == NULL) {
1669
0
            return -1;
1670
0
        }
1671
257k
    }
1672
0
    else {
1673
0
        assert(PyList_Check(pending));
1674
0
        b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1675
0
        if (b == NULL) {
1676
0
            return -1;
1677
0
        }
1678
1679
0
        char *buf = PyBytes_AsString(b);
1680
0
        Py_ssize_t pos = 0;
1681
1682
0
        for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1683
0
            PyObject *obj = PyList_GET_ITEM(pending, i);
1684
0
            char *src;
1685
0
            Py_ssize_t len;
1686
0
            if (PyUnicode_Check(obj)) {
1687
0
                assert(PyUnicode_IS_ASCII(obj));
1688
0
                src = PyUnicode_DATA(obj);
1689
0
                len = PyUnicode_GET_LENGTH(obj);
1690
0
            }
1691
0
            else {
1692
0
                assert(PyBytes_Check(obj));
1693
0
                if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1694
0
                    Py_DECREF(b);
1695
0
                    return -1;
1696
0
                }
1697
0
            }
1698
0
            memcpy(buf + pos, src, len);
1699
0
            pos += len;
1700
0
        }
1701
0
        assert(pos == self->pending_bytes_count);
1702
0
    }
1703
1704
260k
    self->pending_bytes_count = 0;
1705
260k
    self->pending_bytes = NULL;
1706
260k
    Py_DECREF(pending);
1707
1708
260k
    PyObject *ret;
1709
260k
    do {
1710
260k
        ret = buffer_callmethod_onearg(self, &_Py_ID(write), b);
1711
260k
    } while (ret == NULL && _PyIO_trap_eintr());
1712
260k
    Py_DECREF(b);
1713
    // NOTE: We cleared buffer but we don't know how many bytes are actually written
1714
    // when an error occurred.
1715
260k
    if (ret == NULL)
1716
0
        return -1;
1717
260k
    Py_DECREF(ret);
1718
260k
    return 0;
1719
260k
}
1720
1721
/*[clinic input]
1722
@critical_section
1723
_io.TextIOWrapper.write
1724
    text: unicode
1725
    /
1726
[clinic start generated code]*/
1727
1728
static PyObject *
1729
_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1730
/*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1731
260k
{
1732
260k
    PyObject *ret;
1733
260k
    PyObject *b;
1734
260k
    Py_ssize_t textlen;
1735
260k
    int haslf = 0;
1736
260k
    int needflush = 0, text_needflush = 0;
1737
1738
260k
    CHECK_ATTACHED(self);
1739
260k
    CHECK_CLOSED(self);
1740
1741
260k
    if (self->encoder == NULL) {
1742
0
        return _unsupported(self->state, "not writable");
1743
0
    }
1744
1745
260k
    Py_INCREF(text);
1746
1747
260k
    textlen = PyUnicode_GET_LENGTH(text);
1748
1749
260k
    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1750
260k
        if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1751
260k
            haslf = 1;
1752
1753
260k
    if (haslf && self->writetranslate && self->writenl != NULL) {
1754
0
        PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1755
0
                                                 "ss", "\n", self->writenl);
1756
0
        Py_DECREF(text);
1757
0
        if (newtext == NULL)
1758
0
            return NULL;
1759
0
        text = newtext;
1760
0
    }
1761
1762
260k
    if (self->write_through)
1763
0
        text_needflush = 1;
1764
260k
    if (self->line_buffering &&
1765
260k
        (haslf ||
1766
0
         PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1767
260k
        needflush = 1;
1768
1769
    /* XXX What if we were just reading? */
1770
260k
    if (self->encodefunc != NULL) {
1771
260k
        if (PyUnicode_IS_ASCII(text) &&
1772
                // See bpo-43260
1773
257k
                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1774
257k
                is_asciicompat_encoding(self->encodefunc)) {
1775
257k
            b = Py_NewRef(text);
1776
257k
        }
1777
3.04k
        else {
1778
3.04k
            b = (*self->encodefunc)((PyObject *) self, text);
1779
3.04k
        }
1780
260k
        self->encoding_start_of_stream = 0;
1781
260k
    }
1782
0
    else {
1783
0
        b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1784
0
    }
1785
1786
260k
    Py_DECREF(text);
1787
260k
    if (b == NULL)
1788
0
        return NULL;
1789
260k
    if (b != text && !PyBytes_Check(b)) {
1790
0
        PyErr_Format(PyExc_TypeError,
1791
0
                     "encoder should return a bytes object, not '%.200s'",
1792
0
                     Py_TYPE(b)->tp_name);
1793
0
        Py_DECREF(b);
1794
0
        return NULL;
1795
0
    }
1796
1797
260k
    Py_ssize_t bytes_len;
1798
260k
    if (b == text) {
1799
257k
        bytes_len = PyUnicode_GET_LENGTH(b);
1800
257k
    }
1801
3.04k
    else {
1802
3.04k
        bytes_len = PyBytes_GET_SIZE(b);
1803
3.04k
    }
1804
1805
    // We should avoid concatenating huge data.
1806
    // Flush the buffer before adding b to the buffer if b is not small.
1807
    // https://github.com/python/cpython/issues/87426
1808
260k
    if (bytes_len >= self->chunk_size) {
1809
        // _textiowrapper_writeflush() calls buffer.write().
1810
        // self->pending_bytes can be appended during buffer->write()
1811
        // or other thread.
1812
        // We need to loop until buffer becomes empty.
1813
        // https://github.com/python/cpython/issues/118138
1814
        // https://github.com/python/cpython/issues/119506
1815
0
        while (self->pending_bytes != NULL) {
1816
0
            if (_textiowrapper_writeflush(self) < 0) {
1817
0
                Py_DECREF(b);
1818
0
                return NULL;
1819
0
            }
1820
0
        }
1821
0
    }
1822
1823
260k
    if (self->pending_bytes == NULL) {
1824
260k
        assert(self->pending_bytes_count == 0);
1825
260k
        self->pending_bytes = b;
1826
260k
    }
1827
0
    else if (!PyList_CheckExact(self->pending_bytes)) {
1828
0
        PyObject *list = PyList_New(2);
1829
0
        if (list == NULL) {
1830
0
            Py_DECREF(b);
1831
0
            return NULL;
1832
0
        }
1833
        // Since Python 3.12, allocating GC object won't trigger GC and release
1834
        // GIL. See https://github.com/python/cpython/issues/97922
1835
0
        assert(!PyList_CheckExact(self->pending_bytes));
1836
0
        PyList_SET_ITEM(list, 0, self->pending_bytes);
1837
0
        PyList_SET_ITEM(list, 1, b);
1838
0
        self->pending_bytes = list;
1839
0
    }
1840
0
    else {
1841
0
        if (PyList_Append(self->pending_bytes, b) < 0) {
1842
0
            Py_DECREF(b);
1843
0
            return NULL;
1844
0
        }
1845
0
        Py_DECREF(b);
1846
0
    }
1847
1848
260k
    self->pending_bytes_count += bytes_len;
1849
260k
    if (self->pending_bytes_count >= self->chunk_size || needflush ||
1850
260k
        text_needflush) {
1851
260k
        if (_textiowrapper_writeflush(self) < 0)
1852
0
            return NULL;
1853
260k
    }
1854
1855
260k
    if (needflush) {
1856
260k
        PyObject *buffer = buffer_access_safe(self);
1857
260k
        if (buffer == NULL || _PyFile_Flush(buffer) < 0) {
1858
0
            return NULL;
1859
0
        }
1860
260k
    }
1861
1862
260k
    if (self->snapshot != NULL) {
1863
0
        textiowrapper_set_decoded_chars(self, NULL);
1864
0
        Py_CLEAR(self->snapshot);
1865
0
    }
1866
1867
260k
    if (self->decoder) {
1868
0
        ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1869
0
        if (ret == NULL)
1870
0
            return NULL;
1871
0
        Py_DECREF(ret);
1872
0
    }
1873
1874
260k
    return PyLong_FromSsize_t(textlen);
1875
260k
}
1876
1877
/* Steal a reference to chars and store it in the decoded_char buffer;
1878
 */
1879
static void
1880
textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1881
244
{
1882
244
    Py_XSETREF(self->decoded_chars, chars);
1883
244
    self->decoded_chars_used = 0;
1884
244
}
1885
1886
static PyObject *
1887
textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1888
8
{
1889
8
    PyObject *chars;
1890
8
    Py_ssize_t avail;
1891
1892
8
    if (self->decoded_chars == NULL)
1893
8
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1894
1895
0
    avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1896
0
             - self->decoded_chars_used);
1897
1898
0
    assert(avail >= 0);
1899
1900
0
    if (n < 0 || n > avail)
1901
0
        n = avail;
1902
1903
0
    if (self->decoded_chars_used > 0 || n < avail) {
1904
0
        chars = PyUnicode_Substring(self->decoded_chars,
1905
0
                                    self->decoded_chars_used,
1906
0
                                    self->decoded_chars_used + n);
1907
0
        if (chars == NULL)
1908
0
            return NULL;
1909
0
    }
1910
0
    else {
1911
0
        chars = Py_NewRef(self->decoded_chars);
1912
0
    }
1913
1914
0
    self->decoded_chars_used += n;
1915
0
    return chars;
1916
0
}
1917
1918
/* Read and decode the next chunk of data from the BufferedReader.
1919
 */
1920
static int
1921
textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1922
122
{
1923
122
    PyObject *dec_buffer = NULL;
1924
122
    PyObject *dec_flags = NULL;
1925
122
    PyObject *input_chunk = NULL;
1926
122
    Py_buffer input_chunk_buf;
1927
122
    PyObject *decoded_chars, *chunk_size;
1928
122
    Py_ssize_t nbytes, nchars;
1929
122
    int eof;
1930
1931
    /* The return value is True unless EOF was reached.  The decoded string is
1932
     * placed in self._decoded_chars (replacing its previous value).  The
1933
     * entire input chunk is sent to the decoder, though some of it may remain
1934
     * buffered in the decoder, yet to be converted.
1935
     */
1936
1937
122
    if (self->decoder == NULL) {
1938
0
        _unsupported(self->state, "not readable");
1939
0
        return -1;
1940
0
    }
1941
1942
122
    if (self->telling) {
1943
        /* To prepare for tell(), we need to snapshot a point in the file
1944
         * where the decoder's input buffer is empty.
1945
         */
1946
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1947
0
                                                     &_Py_ID(getstate));
1948
0
        if (state == NULL)
1949
0
            return -1;
1950
        /* Given this, we know there was a valid snapshot point
1951
         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1952
         */
1953
0
        if (!PyTuple_Check(state)) {
1954
0
            PyErr_SetString(PyExc_TypeError,
1955
0
                            "illegal decoder state");
1956
0
            Py_DECREF(state);
1957
0
            return -1;
1958
0
        }
1959
0
        if (!PyArg_ParseTuple(state,
1960
0
                              "OO;illegal decoder state", &dec_buffer, &dec_flags))
1961
0
        {
1962
0
            Py_DECREF(state);
1963
0
            return -1;
1964
0
        }
1965
1966
0
        if (!PyBytes_Check(dec_buffer)) {
1967
0
            PyErr_Format(PyExc_TypeError,
1968
0
                         "illegal decoder state: the first item should be a "
1969
0
                         "bytes object, not '%.200s'",
1970
0
                         Py_TYPE(dec_buffer)->tp_name);
1971
0
            Py_DECREF(state);
1972
0
            return -1;
1973
0
        }
1974
0
        Py_INCREF(dec_buffer);
1975
0
        Py_INCREF(dec_flags);
1976
0
        Py_DECREF(state);
1977
0
    }
1978
1979
    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1980
122
    if (size_hint > 0) {
1981
0
        size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1982
0
    }
1983
122
    chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1984
122
    if (chunk_size == NULL)
1985
0
        goto fail;
1986
1987
122
    input_chunk = buffer_callmethod_onearg(self,
1988
122
                                           (self->has_read1 ? &_Py_ID(read1) :
1989
122
                                                              &_Py_ID(read)),
1990
122
                                           chunk_size);
1991
122
    Py_DECREF(chunk_size);
1992
122
    if (input_chunk == NULL)
1993
0
        goto fail;
1994
1995
122
    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1996
0
        PyErr_Format(PyExc_TypeError,
1997
0
                     "underlying %s() should have returned a bytes-like object, "
1998
0
                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
1999
0
                     Py_TYPE(input_chunk)->tp_name);
2000
0
        goto fail;
2001
0
    }
2002
2003
122
    nbytes = input_chunk_buf.len;
2004
122
    eof = (nbytes == 0);
2005
2006
122
    decoded_chars = _textiowrapper_decode(self->state, self->decoder,
2007
122
                                          input_chunk, eof);
2008
122
    PyBuffer_Release(&input_chunk_buf);
2009
122
    if (decoded_chars == NULL)
2010
0
        goto fail;
2011
2012
122
    textiowrapper_set_decoded_chars(self, decoded_chars);
2013
122
    nchars = PyUnicode_GET_LENGTH(decoded_chars);
2014
122
    if (nchars > 0)
2015
112
        self->b2cratio = (double) nbytes / nchars;
2016
10
    else
2017
10
        self->b2cratio = 0.0;
2018
122
    if (nchars > 0)
2019
112
        eof = 0;
2020
2021
122
    if (self->telling) {
2022
        /* At the snapshot point, len(dec_buffer) bytes before the read, the
2023
         * next input to be decoded is dec_buffer + input_chunk.
2024
         */
2025
0
        PyObject *next_input = dec_buffer;
2026
0
        PyBytes_Concat(&next_input, input_chunk);
2027
0
        dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
2028
0
        if (next_input == NULL) {
2029
0
            goto fail;
2030
0
        }
2031
0
        PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
2032
0
        if (snapshot == NULL) {
2033
0
            dec_flags = NULL;
2034
0
            goto fail;
2035
0
        }
2036
0
        Py_XSETREF(self->snapshot, snapshot);
2037
0
    }
2038
122
    Py_DECREF(input_chunk);
2039
2040
122
    return (eof == 0);
2041
2042
0
  fail:
2043
0
    Py_XDECREF(dec_buffer);
2044
0
    Py_XDECREF(dec_flags);
2045
0
    Py_XDECREF(input_chunk);
2046
0
    return -1;
2047
122
}
2048
2049
/*[clinic input]
2050
@critical_section
2051
_io.TextIOWrapper.read
2052
    size as n: Py_ssize_t(accept={int, NoneType}) = -1
2053
    /
2054
[clinic start generated code]*/
2055
2056
static PyObject *
2057
_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
2058
/*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
2059
8
{
2060
8
    PyObject *result = NULL, *chunks = NULL;
2061
2062
8
    CHECK_ATTACHED(self);
2063
8
    CHECK_CLOSED(self);
2064
2065
8
    if (self->decoder == NULL) {
2066
0
        return _unsupported(self->state, "not readable");
2067
0
    }
2068
2069
8
    if (_textiowrapper_writeflush(self) < 0)
2070
0
        return NULL;
2071
2072
8
    if (n < 0) {
2073
        /* Read everything */
2074
8
        PyObject *bytes = buffer_callmethod_noargs(self, &_Py_ID(read));
2075
8
        PyObject *decoded;
2076
8
        if (bytes == NULL)
2077
0
            goto fail;
2078
2079
8
        if (bytes == Py_None){
2080
0
            Py_DECREF(bytes);
2081
0
            PyErr_SetString(PyExc_BlockingIOError, "Read returned None.");
2082
0
            return NULL;
2083
0
        }
2084
2085
8
        _PyIO_State *state = self->state;
2086
8
        if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
2087
8
            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
2088
8
                                                          bytes, 1);
2089
0
        else
2090
0
            decoded = PyObject_CallMethodObjArgs(
2091
0
                self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2092
8
        Py_DECREF(bytes);
2093
8
        if (check_decoded(decoded) < 0)
2094
0
            goto fail;
2095
2096
8
        result = textiowrapper_get_decoded_chars(self, -1);
2097
2098
8
        if (result == NULL) {
2099
0
            Py_DECREF(decoded);
2100
0
            return NULL;
2101
0
        }
2102
2103
8
        PyUnicode_AppendAndDel(&result, decoded);
2104
8
        if (result == NULL)
2105
0
            goto fail;
2106
2107
8
        if (self->snapshot != NULL) {
2108
0
            textiowrapper_set_decoded_chars(self, NULL);
2109
0
            Py_CLEAR(self->snapshot);
2110
0
        }
2111
8
        return result;
2112
8
    }
2113
0
    else {
2114
0
        int res = 1;
2115
0
        Py_ssize_t remaining = n;
2116
2117
0
        result = textiowrapper_get_decoded_chars(self, n);
2118
0
        if (result == NULL)
2119
0
            goto fail;
2120
0
        remaining -= PyUnicode_GET_LENGTH(result);
2121
2122
        /* Keep reading chunks until we have n characters to return */
2123
0
        while (remaining > 0) {
2124
0
            res = textiowrapper_read_chunk(self, remaining);
2125
0
            if (res < 0) {
2126
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2127
                   when EINTR occurs so we needn't do it ourselves. */
2128
0
                if (_PyIO_trap_eintr()) {
2129
0
                    continue;
2130
0
                }
2131
0
                goto fail;
2132
0
            }
2133
0
            if (res == 0)  /* EOF */
2134
0
                break;
2135
0
            if (chunks == NULL) {
2136
0
                chunks = PyList_New(0);
2137
0
                if (chunks == NULL)
2138
0
                    goto fail;
2139
0
            }
2140
0
            if (PyUnicode_GET_LENGTH(result) > 0 &&
2141
0
                PyList_Append(chunks, result) < 0)
2142
0
                goto fail;
2143
0
            Py_DECREF(result);
2144
0
            result = textiowrapper_get_decoded_chars(self, remaining);
2145
0
            if (result == NULL)
2146
0
                goto fail;
2147
0
            remaining -= PyUnicode_GET_LENGTH(result);
2148
0
        }
2149
0
        if (chunks != NULL) {
2150
0
            if (result != NULL && PyList_Append(chunks, result) < 0)
2151
0
                goto fail;
2152
0
            _Py_DECLARE_STR(empty, "");
2153
0
            Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2154
0
            if (result == NULL)
2155
0
                goto fail;
2156
0
            Py_CLEAR(chunks);
2157
0
        }
2158
0
        return result;
2159
0
    }
2160
0
  fail:
2161
0
    Py_XDECREF(result);
2162
0
    Py_XDECREF(chunks);
2163
0
    return NULL;
2164
8
}
2165
2166
2167
/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2168
   that is to the NUL character. Otherwise the function will produce
2169
   incorrect results. */
2170
static const char *
2171
find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2172
22.3k
{
2173
22.3k
    if (kind == PyUnicode_1BYTE_KIND) {
2174
21.6k
        assert(ch < 256);
2175
21.6k
        return (char *) memchr((const void *) s, (char) ch, end - s);
2176
21.6k
    }
2177
776
    for (;;) {
2178
35.4k
        while (PyUnicode_READ(kind, s, 0) > ch)
2179
34.7k
            s += kind;
2180
776
        if (PyUnicode_READ(kind, s, 0) == ch)
2181
772
            return s;
2182
4
        if (s == end)
2183
4
            return NULL;
2184
0
        s += kind;
2185
0
    }
2186
776
}
2187
2188
Py_ssize_t
2189
_PyIO_find_line_ending(
2190
    int translated, int universal, PyObject *readnl,
2191
    int kind, const char *start, const char *end, Py_ssize_t *consumed)
2192
19.4M
{
2193
19.4M
    Py_ssize_t len = (end - start)/kind;
2194
2195
19.4M
    if (translated) {
2196
        /* Newlines are already translated, only search for \n */
2197
22.3k
        const char *pos = find_control_char(kind, start, end, '\n');
2198
22.3k
        if (pos != NULL)
2199
22.2k
            return (pos - start)/kind + 1;
2200
112
        else {
2201
112
            *consumed = len;
2202
112
            return -1;
2203
112
        }
2204
22.3k
    }
2205
19.4M
    else if (universal) {
2206
        /* Universal newline search. Find any of \r, \r\n, \n
2207
         * The decoder ensures that \r\n are not split in two pieces
2208
         */
2209
19.4M
        const char *s = start;
2210
87.0M
        for (;;) {
2211
87.0M
            Py_UCS4 ch;
2212
            /* Fast path for non-control chars. The loop always ends
2213
               since the Unicode string is NUL-terminated. */
2214
240M
            while (PyUnicode_READ(kind, s, 0) > '\r')
2215
153M
                s += kind;
2216
87.0M
            if (s >= end) {
2217
30.7k
                *consumed = len;
2218
30.7k
                return -1;
2219
30.7k
            }
2220
86.9M
            ch = PyUnicode_READ(kind, s, 0);
2221
86.9M
            s += kind;
2222
86.9M
            if (ch == '\n')
2223
4.68M
                return (s - start)/kind;
2224
82.2M
            if (ch == '\r') {
2225
14.7M
                if (PyUnicode_READ(kind, s, 0) == '\n')
2226
440k
                    return (s - start)/kind + 1;
2227
14.3M
                else
2228
14.3M
                    return (s - start)/kind;
2229
14.7M
            }
2230
82.2M
        }
2231
19.4M
    }
2232
64
    else {
2233
        /* Non-universal mode. */
2234
64
        Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2235
64
        const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2236
        /* Assume that readnl is an ASCII character. */
2237
64
        assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2238
64
        if (readnl_len == 1) {
2239
64
            const char *pos = find_control_char(kind, start, end, nl[0]);
2240
64
            if (pos != NULL)
2241
64
                return (pos - start)/kind + 1;
2242
0
            *consumed = len;
2243
0
            return -1;
2244
64
        }
2245
0
        else {
2246
0
            const char *s = start;
2247
0
            const char *e = end - (readnl_len - 1)*kind;
2248
0
            const char *pos;
2249
0
            if (e < s)
2250
0
                e = s;
2251
0
            while (s < e) {
2252
0
                Py_ssize_t i;
2253
0
                const char *pos = find_control_char(kind, s, end, nl[0]);
2254
0
                if (pos == NULL || pos >= e)
2255
0
                    break;
2256
0
                for (i = 1; i < readnl_len; i++) {
2257
0
                    if (PyUnicode_READ(kind, pos, i) != nl[i])
2258
0
                        break;
2259
0
                }
2260
0
                if (i == readnl_len)
2261
0
                    return (pos - start)/kind + readnl_len;
2262
0
                s = pos + kind;
2263
0
            }
2264
0
            pos = find_control_char(kind, e, end, nl[0]);
2265
0
            if (pos == NULL)
2266
0
                *consumed = len;
2267
0
            else
2268
0
                *consumed = (pos - start)/kind;
2269
0
            return -1;
2270
0
        }
2271
64
    }
2272
19.4M
}
2273
2274
static PyObject *
2275
_textiowrapper_readline(textio *self, Py_ssize_t limit)
2276
22.2k
{
2277
22.2k
    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2278
22.2k
    Py_ssize_t start, endpos, chunked, offset_to_buffer;
2279
22.2k
    int res;
2280
2281
22.2k
    CHECK_CLOSED(self);
2282
2283
22.2k
    if (_textiowrapper_writeflush(self) < 0)
2284
0
        return NULL;
2285
2286
22.2k
    chunked = 0;
2287
2288
22.3k
    while (1) {
2289
22.3k
        const char *ptr;
2290
22.3k
        Py_ssize_t line_len;
2291
22.3k
        int kind;
2292
22.3k
        Py_ssize_t consumed = 0;
2293
2294
        /* First, get some data if necessary */
2295
22.3k
        res = 1;
2296
22.4k
        while (!self->decoded_chars ||
2297
22.3k
               !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2298
122
            res = textiowrapper_read_chunk(self, 0);
2299
122
            if (res < 0) {
2300
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2301
                   when EINTR occurs so we needn't do it ourselves. */
2302
0
                if (_PyIO_trap_eintr()) {
2303
0
                    continue;
2304
0
                }
2305
0
                goto error;
2306
0
            }
2307
122
            if (res == 0)
2308
10
                break;
2309
122
        }
2310
22.3k
        if (res == 0) {
2311
            /* end of file */
2312
10
            textiowrapper_set_decoded_chars(self, NULL);
2313
10
            Py_CLEAR(self->snapshot);
2314
10
            start = endpos = offset_to_buffer = 0;
2315
10
            break;
2316
10
        }
2317
2318
22.3k
        if (remaining == NULL) {
2319
22.3k
            line = Py_NewRef(self->decoded_chars);
2320
22.3k
            start = self->decoded_chars_used;
2321
22.3k
            offset_to_buffer = 0;
2322
22.3k
        }
2323
0
        else {
2324
0
            assert(self->decoded_chars_used == 0);
2325
0
            line = PyUnicode_Concat(remaining, self->decoded_chars);
2326
0
            start = 0;
2327
0
            offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2328
0
            Py_CLEAR(remaining);
2329
0
            if (line == NULL)
2330
0
                goto error;
2331
0
        }
2332
2333
22.3k
        ptr = PyUnicode_DATA(line);
2334
22.3k
        line_len = PyUnicode_GET_LENGTH(line);
2335
22.3k
        kind = PyUnicode_KIND(line);
2336
2337
22.3k
        endpos = _PyIO_find_line_ending(
2338
22.3k
            self->readtranslate, self->readuniversal, self->readnl,
2339
22.3k
            kind,
2340
22.3k
            ptr + kind * start,
2341
22.3k
            ptr + kind * line_len,
2342
22.3k
            &consumed);
2343
22.3k
        if (endpos >= 0) {
2344
22.2k
            endpos += start;
2345
22.2k
            if (limit >= 0 && (endpos - start) + chunked >= limit)
2346
0
                endpos = start + limit - chunked;
2347
22.2k
            break;
2348
22.2k
        }
2349
2350
        /* We can put aside up to `endpos` */
2351
112
        endpos = consumed + start;
2352
112
        if (limit >= 0 && (endpos - start) + chunked >= limit) {
2353
            /* Didn't find line ending, but reached length limit */
2354
0
            endpos = start + limit - chunked;
2355
0
            break;
2356
0
        }
2357
2358
112
        if (endpos > start) {
2359
            /* No line ending seen yet - put aside current data */
2360
102
            PyObject *s;
2361
102
            if (chunks == NULL) {
2362
102
                chunks = PyList_New(0);
2363
102
                if (chunks == NULL)
2364
0
                    goto error;
2365
102
            }
2366
102
            s = PyUnicode_Substring(line, start, endpos);
2367
102
            if (s == NULL)
2368
0
                goto error;
2369
102
            if (PyList_Append(chunks, s) < 0) {
2370
0
                Py_DECREF(s);
2371
0
                goto error;
2372
0
            }
2373
102
            chunked += PyUnicode_GET_LENGTH(s);
2374
102
            Py_DECREF(s);
2375
102
        }
2376
        /* There may be some remaining bytes we'll have to prepend to the
2377
           next chunk of data */
2378
112
        if (endpos < line_len) {
2379
0
            remaining = PyUnicode_Substring(line, endpos, line_len);
2380
0
            if (remaining == NULL)
2381
0
                goto error;
2382
0
        }
2383
112
        Py_CLEAR(line);
2384
        /* We have consumed the buffer */
2385
112
        textiowrapper_set_decoded_chars(self, NULL);
2386
112
    }
2387
2388
22.2k
    if (line != NULL) {
2389
        /* Our line ends in the current buffer */
2390
22.2k
        self->decoded_chars_used = endpos - offset_to_buffer;
2391
22.2k
        if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2392
22.2k
            PyObject *s = PyUnicode_Substring(line, start, endpos);
2393
22.2k
            Py_CLEAR(line);
2394
22.2k
            if (s == NULL)
2395
0
                goto error;
2396
22.2k
            line = s;
2397
22.2k
        }
2398
22.2k
    }
2399
22.2k
    if (remaining != NULL) {
2400
0
        if (chunks == NULL) {
2401
0
            chunks = PyList_New(0);
2402
0
            if (chunks == NULL)
2403
0
                goto error;
2404
0
        }
2405
0
        if (PyList_Append(chunks, remaining) < 0)
2406
0
            goto error;
2407
0
        Py_CLEAR(remaining);
2408
0
    }
2409
22.2k
    if (chunks != NULL) {
2410
102
        if (line != NULL) {
2411
102
            if (PyList_Append(chunks, line) < 0)
2412
0
                goto error;
2413
102
            Py_DECREF(line);
2414
102
        }
2415
102
        line = PyUnicode_Join(&_Py_STR(empty), chunks);
2416
102
        if (line == NULL)
2417
0
            goto error;
2418
102
        Py_CLEAR(chunks);
2419
102
    }
2420
22.2k
    if (line == NULL) {
2421
10
        line = &_Py_STR(empty);
2422
10
    }
2423
2424
22.2k
    return line;
2425
2426
0
  error:
2427
0
    Py_XDECREF(chunks);
2428
0
    Py_XDECREF(remaining);
2429
0
    Py_XDECREF(line);
2430
0
    return NULL;
2431
22.2k
}
2432
2433
/*[clinic input]
2434
@critical_section
2435
_io.TextIOWrapper.readline
2436
    size: Py_ssize_t = -1
2437
    /
2438
[clinic start generated code]*/
2439
2440
static PyObject *
2441
_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2442
/*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2443
0
{
2444
0
    CHECK_ATTACHED(self);
2445
0
    return _textiowrapper_readline(self, size);
2446
0
}
2447
2448
/* Seek and Tell */
2449
2450
typedef struct {
2451
    Py_off_t start_pos;
2452
    int dec_flags;
2453
    int bytes_to_feed;
2454
    int chars_to_skip;
2455
    char need_eof;
2456
} cookie_type;
2457
2458
/*
2459
   To speed up cookie packing/unpacking, we store the fields in a temporary
2460
   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2461
   The following macros define at which offsets in the intermediary byte
2462
   string the various CookieStruct fields will be stored.
2463
 */
2464
2465
#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2466
2467
#if PY_BIG_ENDIAN
2468
/* We want the least significant byte of start_pos to also be the least
2469
   significant byte of the cookie, which means that in big-endian mode we
2470
   must copy the fields in reverse order. */
2471
2472
# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2473
# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2474
# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2475
# define OFF_CHARS_TO_SKIP  (sizeof(char))
2476
# define OFF_NEED_EOF       0
2477
2478
#else
2479
/* Little-endian mode: the least significant byte of start_pos will
2480
   naturally end up the least significant byte of the cookie. */
2481
2482
0
# define OFF_START_POS      0
2483
0
# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2484
0
# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2485
0
# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2486
0
# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2487
2488
#endif
2489
2490
static int
2491
textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2492
0
{
2493
0
    unsigned char buffer[COOKIE_BUF_LEN];
2494
0
    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2495
0
    if (cookieLong == NULL)
2496
0
        return -1;
2497
2498
0
    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2499
0
                            PY_LITTLE_ENDIAN, 0, 1) < 0) {
2500
0
        Py_DECREF(cookieLong);
2501
0
        return -1;
2502
0
    }
2503
0
    Py_DECREF(cookieLong);
2504
2505
0
    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2506
0
    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2507
0
    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2508
0
    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2509
0
    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2510
2511
0
    return 0;
2512
0
}
2513
2514
static PyObject *
2515
textiowrapper_build_cookie(cookie_type *cookie)
2516
0
{
2517
0
    unsigned char buffer[COOKIE_BUF_LEN];
2518
2519
0
    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2520
0
    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2521
0
    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2522
0
    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2523
0
    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2524
2525
0
    return _PyLong_FromByteArray(buffer, sizeof(buffer),
2526
0
                                 PY_LITTLE_ENDIAN, 0);
2527
0
}
2528
2529
static int
2530
_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2531
0
{
2532
0
    PyObject *res;
2533
    /* When seeking to the start of the stream, we call decoder.reset()
2534
       rather than decoder.getstate().
2535
       This is for a few decoders such as utf-16 for which the state value
2536
       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2537
       utf-16, that we are expecting a BOM).
2538
    */
2539
0
    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2540
0
        res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2541
0
    }
2542
0
    else {
2543
0
        res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2544
0
                                   "((yi))", "", cookie->dec_flags);
2545
0
    }
2546
0
    if (res == NULL) {
2547
0
        return -1;
2548
0
    }
2549
0
    Py_DECREF(res);
2550
0
    return 0;
2551
0
}
2552
2553
static int
2554
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2555
0
{
2556
0
    PyObject *res;
2557
0
    if (start_of_stream) {
2558
0
        res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2559
0
        self->encoding_start_of_stream = 1;
2560
0
    }
2561
0
    else {
2562
0
        res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2563
0
                                        _PyLong_GetZero());
2564
0
        self->encoding_start_of_stream = 0;
2565
0
    }
2566
0
    if (res == NULL)
2567
0
        return -1;
2568
0
    Py_DECREF(res);
2569
0
    return 0;
2570
0
}
2571
2572
static int
2573
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2574
0
{
2575
    /* Same as _textiowrapper_decoder_setstate() above. */
2576
0
    return _textiowrapper_encoder_reset(
2577
0
        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2578
0
}
2579
2580
/*[clinic input]
2581
@critical_section
2582
_io.TextIOWrapper.seek
2583
    cookie as cookieObj: object
2584
      Zero or an opaque number returned by tell().
2585
    whence: int(c_default='0') = os.SEEK_SET
2586
      The relative position to seek from.
2587
    /
2588
2589
Set the stream position, and return the new stream position.
2590
2591
Four operations are supported, given by the following argument
2592
combinations:
2593
2594
- seek(0, SEEK_SET): Rewind to the start of the stream.
2595
- seek(cookie, SEEK_SET): Restore a previous position;
2596
  'cookie' must be a number returned by tell().
2597
- seek(0, SEEK_END): Fast-forward to the end of the stream.
2598
- seek(0, SEEK_CUR): Leave the current stream position unchanged.
2599
2600
Any other argument combinations are invalid,
2601
and may raise exceptions.
2602
[clinic start generated code]*/
2603
2604
static PyObject *
2605
_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2606
/*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2607
0
{
2608
0
    PyObject *posobj;
2609
0
    cookie_type cookie;
2610
0
    PyObject *res;
2611
0
    int cmp;
2612
0
    PyObject *snapshot;
2613
2614
0
    CHECK_ATTACHED(self);
2615
0
    CHECK_CLOSED(self);
2616
2617
0
    Py_INCREF(cookieObj);
2618
2619
0
    if (!self->seekable) {
2620
0
        _unsupported(self->state, "underlying stream is not seekable");
2621
0
        goto fail;
2622
0
    }
2623
2624
0
    PyObject *zero = _PyLong_GetZero();  // borrowed reference
2625
2626
0
    switch (whence) {
2627
0
    case SEEK_CUR:
2628
        /* seek relative to current position */
2629
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2630
0
        if (cmp < 0)
2631
0
            goto fail;
2632
2633
0
        if (cmp == 0) {
2634
0
            _unsupported(self->state, "can't do nonzero cur-relative seeks");
2635
0
            goto fail;
2636
0
        }
2637
2638
        /* Seeking to the current position should attempt to
2639
         * sync the underlying buffer with the current position.
2640
         */
2641
0
        Py_DECREF(cookieObj);
2642
0
        cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2643
0
        if (cookieObj == NULL)
2644
0
            goto fail;
2645
0
        break;
2646
2647
0
    case SEEK_END:
2648
        /* seek relative to end of file */
2649
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2650
0
        if (cmp < 0)
2651
0
            goto fail;
2652
2653
0
        if (cmp == 0) {
2654
0
            _unsupported(self->state, "can't do nonzero end-relative seeks");
2655
0
            goto fail;
2656
0
        }
2657
2658
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
2659
0
            goto fail;
2660
0
        }
2661
2662
0
        textiowrapper_set_decoded_chars(self, NULL);
2663
0
        Py_CLEAR(self->snapshot);
2664
0
        if (self->decoder) {
2665
0
            res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2666
0
            if (res == NULL)
2667
0
                goto fail;
2668
0
            Py_DECREF(res);
2669
0
        }
2670
2671
0
        PyObject *buf = buffer_access_safe(self);
2672
0
        if (buf == NULL) {
2673
0
            goto fail;
2674
0
        }
2675
0
        res = _PyObject_CallMethod(buf, &_Py_ID(seek), "ii", 0, 2);
2676
0
        Py_CLEAR(cookieObj);
2677
0
        if (res == NULL)
2678
0
            goto fail;
2679
0
        if (self->encoder) {
2680
            /* If seek() == 0, we are at the start of stream, otherwise not */
2681
0
            cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2682
0
            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2683
0
                Py_DECREF(res);
2684
0
                goto fail;
2685
0
            }
2686
0
        }
2687
0
        return res;
2688
2689
0
    case SEEK_SET:
2690
0
        break;
2691
2692
0
    default:
2693
0
        PyErr_Format(PyExc_ValueError,
2694
0
                     "invalid whence (%d, should be %d, %d or %d)", whence,
2695
0
                     SEEK_SET, SEEK_CUR, SEEK_END);
2696
0
        goto fail;
2697
0
    }
2698
2699
0
    cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2700
0
    if (cmp < 0)
2701
0
        goto fail;
2702
2703
0
    if (cmp == 1) {
2704
0
        PyErr_Format(PyExc_ValueError,
2705
0
                     "negative seek position %R", cookieObj);
2706
0
        goto fail;
2707
0
    }
2708
2709
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2710
0
        goto fail;
2711
0
    }
2712
2713
    /* The strategy of seek() is to go back to the safe start point
2714
     * and replay the effect of read(chars_to_skip) from there.
2715
     */
2716
0
    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2717
0
        goto fail;
2718
2719
    /* Seek back to the safe start point. */
2720
0
    posobj = PyLong_FromOff_t(cookie.start_pos);
2721
0
    if (posobj == NULL)
2722
0
        goto fail;
2723
0
    res = buffer_callmethod_onearg(self, &_Py_ID(seek), posobj);
2724
0
    Py_DECREF(posobj);
2725
0
    if (res == NULL)
2726
0
        goto fail;
2727
0
    Py_DECREF(res);
2728
2729
0
    textiowrapper_set_decoded_chars(self, NULL);
2730
0
    Py_CLEAR(self->snapshot);
2731
2732
    /* Restore the decoder to its state from the safe start point. */
2733
0
    if (self->decoder) {
2734
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2735
0
            goto fail;
2736
0
    }
2737
2738
0
    if (cookie.chars_to_skip) {
2739
        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2740
0
        PyObject *bytes_to_feed = PyLong_FromLong(cookie.bytes_to_feed);
2741
0
        if (bytes_to_feed == NULL) {
2742
0
            goto fail;
2743
0
        }
2744
0
        PyObject *input_chunk = buffer_callmethod_onearg(self,
2745
0
                                                         &_Py_ID(read),
2746
0
                                                         bytes_to_feed);
2747
0
        Py_DECREF(bytes_to_feed);
2748
2749
0
        PyObject *decoded;
2750
2751
0
        if (input_chunk == NULL)
2752
0
            goto fail;
2753
2754
0
        if (!PyBytes_Check(input_chunk)) {
2755
0
            PyErr_Format(PyExc_TypeError,
2756
0
                         "underlying read() should have returned a bytes "
2757
0
                         "object, not '%.200s'",
2758
0
                         Py_TYPE(input_chunk)->tp_name);
2759
0
            Py_DECREF(input_chunk);
2760
0
            goto fail;
2761
0
        }
2762
2763
0
        snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2764
0
        if (snapshot == NULL) {
2765
0
            goto fail;
2766
0
        }
2767
0
        Py_XSETREF(self->snapshot, snapshot);
2768
2769
0
        decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2770
0
            input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2771
2772
0
        if (check_decoded(decoded) < 0)
2773
0
            goto fail;
2774
2775
0
        textiowrapper_set_decoded_chars(self, decoded);
2776
2777
        /* Skip chars_to_skip of the decoded characters. */
2778
0
        if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2779
0
            PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2780
0
            goto fail;
2781
0
        }
2782
0
        self->decoded_chars_used = cookie.chars_to_skip;
2783
0
    }
2784
0
    else {
2785
0
        snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2786
0
        if (snapshot == NULL)
2787
0
            goto fail;
2788
0
        Py_XSETREF(self->snapshot, snapshot);
2789
0
    }
2790
2791
    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2792
0
    if (self->encoder) {
2793
0
        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2794
0
            goto fail;
2795
0
    }
2796
0
    return cookieObj;
2797
0
  fail:
2798
0
    Py_XDECREF(cookieObj);
2799
0
    return NULL;
2800
2801
0
}
2802
2803
/*[clinic input]
2804
@critical_section
2805
_io.TextIOWrapper.tell
2806
2807
Return the stream position as an opaque number.
2808
2809
The return value of tell() can be given as input to seek(), to
2810
restore a previous stream position.
2811
[clinic start generated code]*/
2812
2813
static PyObject *
2814
_io_TextIOWrapper_tell_impl(textio *self)
2815
/*[clinic end generated code: output=4f168c08bf34ad5f input=aeece020f747fd92]*/
2816
0
{
2817
0
    PyObject *res;
2818
0
    PyObject *posobj = NULL;
2819
0
    cookie_type cookie = {0,0,0,0,0};
2820
0
    PyObject *next_input;
2821
0
    Py_ssize_t chars_to_skip, chars_decoded;
2822
0
    Py_ssize_t skip_bytes, skip_back;
2823
0
    PyObject *saved_state = NULL;
2824
0
    const char *input, *input_end;
2825
0
    Py_ssize_t dec_buffer_len;
2826
0
    int dec_flags;
2827
2828
0
    CHECK_ATTACHED(self);
2829
0
    CHECK_CLOSED(self);
2830
2831
0
    if (!self->seekable) {
2832
0
        _unsupported(self->state, "underlying stream is not seekable");
2833
0
        goto fail;
2834
0
    }
2835
0
    if (!self->telling) {
2836
0
        PyErr_SetString(PyExc_OSError,
2837
0
                        "telling position disabled by next() call");
2838
0
        goto fail;
2839
0
    }
2840
2841
0
    if (_textiowrapper_writeflush(self) < 0)
2842
0
        return NULL;
2843
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2844
0
        goto fail;
2845
0
    }
2846
2847
0
    posobj = buffer_callmethod_noargs(self, &_Py_ID(tell));
2848
0
    if (posobj == NULL)
2849
0
        goto fail;
2850
2851
0
    if (self->decoder == NULL || self->snapshot == NULL) {
2852
0
        assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2853
0
        return posobj;
2854
0
    }
2855
2856
#if defined(HAVE_LARGEFILE_SUPPORT)
2857
    cookie.start_pos = PyLong_AsLongLong(posobj);
2858
#else
2859
0
    cookie.start_pos = PyLong_AsLong(posobj);
2860
0
#endif
2861
0
    Py_DECREF(posobj);
2862
0
    if (PyErr_Occurred())
2863
0
        goto fail;
2864
2865
    /* Skip backward to the snapshot point (see _read_chunk). */
2866
0
    assert(PyTuple_Check(self->snapshot));
2867
0
    if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2868
0
        goto fail;
2869
2870
0
    assert (PyBytes_Check(next_input));
2871
2872
0
    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2873
2874
    /* How many decoded characters have been used up since the snapshot? */
2875
0
    if (self->decoded_chars_used == 0)  {
2876
        /* We haven't moved from the snapshot point. */
2877
0
        return textiowrapper_build_cookie(&cookie);
2878
0
    }
2879
2880
0
    chars_to_skip = self->decoded_chars_used;
2881
2882
    /* Decoder state will be restored at the end */
2883
0
    saved_state = PyObject_CallMethodNoArgs(self->decoder,
2884
0
                                             &_Py_ID(getstate));
2885
0
    if (saved_state == NULL)
2886
0
        goto fail;
2887
2888
0
#define DECODER_GETSTATE() do { \
2889
0
        PyObject *dec_buffer; \
2890
0
        PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2891
0
            &_Py_ID(getstate)); \
2892
0
        if (_state == NULL) \
2893
0
            goto fail; \
2894
0
        if (!PyTuple_Check(_state)) { \
2895
0
            PyErr_SetString(PyExc_TypeError, \
2896
0
                            "illegal decoder state"); \
2897
0
            Py_DECREF(_state); \
2898
0
            goto fail; \
2899
0
        } \
2900
0
        if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2901
0
                              &dec_buffer, &dec_flags)) \
2902
0
        { \
2903
0
            Py_DECREF(_state); \
2904
0
            goto fail; \
2905
0
        } \
2906
0
        if (!PyBytes_Check(dec_buffer)) { \
2907
0
            PyErr_Format(PyExc_TypeError, \
2908
0
                         "illegal decoder state: the first item should be a " \
2909
0
                         "bytes object, not '%.200s'", \
2910
0
                         Py_TYPE(dec_buffer)->tp_name); \
2911
0
            Py_DECREF(_state); \
2912
0
            goto fail; \
2913
0
        } \
2914
0
        dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2915
0
        Py_DECREF(_state); \
2916
0
    } while (0)
2917
2918
0
#define DECODER_DECODE(start, len, res) do { \
2919
0
        PyObject *_decoded = _PyObject_CallMethod( \
2920
0
            self->decoder, &_Py_ID(decode), "y#", start, len); \
2921
0
        if (check_decoded(_decoded) < 0) \
2922
0
            goto fail; \
2923
0
        res = PyUnicode_GET_LENGTH(_decoded); \
2924
0
        Py_DECREF(_decoded); \
2925
0
    } while (0)
2926
2927
    /* Fast search for an acceptable start point, close to our
2928
       current pos */
2929
0
    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2930
0
    skip_back = 1;
2931
0
    assert(skip_bytes <= PyBytes_GET_SIZE(next_input));
2932
0
    input = PyBytes_AS_STRING(next_input);
2933
0
    while (skip_bytes > 0) {
2934
        /* Decode up to temptative start point */
2935
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2936
0
            goto fail;
2937
0
        DECODER_DECODE(input, skip_bytes, chars_decoded);
2938
0
        if (chars_decoded <= chars_to_skip) {
2939
0
            DECODER_GETSTATE();
2940
0
            if (dec_buffer_len == 0) {
2941
                /* Before pos and no bytes buffered in decoder => OK */
2942
0
                cookie.dec_flags = dec_flags;
2943
0
                chars_to_skip -= chars_decoded;
2944
0
                break;
2945
0
            }
2946
            /* Skip back by buffered amount and reset heuristic */
2947
0
            skip_bytes -= dec_buffer_len;
2948
0
            skip_back = 1;
2949
0
        }
2950
0
        else {
2951
            /* We're too far ahead, skip back a bit */
2952
0
            skip_bytes -= skip_back;
2953
0
            skip_back *= 2;
2954
0
        }
2955
0
    }
2956
0
    if (skip_bytes <= 0) {
2957
0
        skip_bytes = 0;
2958
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2959
0
            goto fail;
2960
0
    }
2961
2962
    /* Note our initial start point. */
2963
0
    cookie.start_pos += skip_bytes;
2964
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2965
0
    if (chars_to_skip == 0)
2966
0
        goto finally;
2967
2968
    /* We should be close to the desired position.  Now feed the decoder one
2969
     * byte at a time until we reach the `chars_to_skip` target.
2970
     * As we go, note the nearest "safe start point" before the current
2971
     * location (a point where the decoder has nothing buffered, so seek()
2972
     * can safely start from there and advance to this location).
2973
     */
2974
0
    chars_decoded = 0;
2975
0
    input = PyBytes_AS_STRING(next_input);
2976
0
    input_end = input + PyBytes_GET_SIZE(next_input);
2977
0
    input += skip_bytes;
2978
0
    while (input < input_end) {
2979
0
        Py_ssize_t n;
2980
2981
0
        DECODER_DECODE(input, (Py_ssize_t)1, n);
2982
        /* We got n chars for 1 byte */
2983
0
        chars_decoded += n;
2984
0
        cookie.bytes_to_feed += 1;
2985
0
        DECODER_GETSTATE();
2986
2987
0
        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2988
            /* Decoder buffer is empty, so this is a safe start point. */
2989
0
            cookie.start_pos += cookie.bytes_to_feed;
2990
0
            chars_to_skip -= chars_decoded;
2991
0
            cookie.dec_flags = dec_flags;
2992
0
            cookie.bytes_to_feed = 0;
2993
0
            chars_decoded = 0;
2994
0
        }
2995
0
        if (chars_decoded >= chars_to_skip)
2996
0
            break;
2997
0
        input++;
2998
0
    }
2999
0
    if (input == input_end) {
3000
        /* We didn't get enough decoded data; signal EOF to get more. */
3001
0
        PyObject *decoded = _PyObject_CallMethod(
3002
0
            self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
3003
0
        if (check_decoded(decoded) < 0)
3004
0
            goto fail;
3005
0
        chars_decoded += PyUnicode_GET_LENGTH(decoded);
3006
0
        Py_DECREF(decoded);
3007
0
        cookie.need_eof = 1;
3008
3009
0
        if (chars_decoded < chars_to_skip) {
3010
0
            PyErr_SetString(PyExc_OSError,
3011
0
                            "can't reconstruct logical file position");
3012
0
            goto fail;
3013
0
        }
3014
0
    }
3015
3016
0
finally:
3017
0
    res = PyObject_CallMethodOneArg(
3018
0
            self->decoder, &_Py_ID(setstate), saved_state);
3019
0
    Py_DECREF(saved_state);
3020
0
    if (res == NULL)
3021
0
        return NULL;
3022
0
    Py_DECREF(res);
3023
3024
    /* The returned cookie corresponds to the last safe start point. */
3025
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
3026
0
    return textiowrapper_build_cookie(&cookie);
3027
3028
0
fail:
3029
0
    if (saved_state) {
3030
0
        PyObject *exc = PyErr_GetRaisedException();
3031
0
        res = PyObject_CallMethodOneArg(
3032
0
                self->decoder, &_Py_ID(setstate), saved_state);
3033
0
        _PyErr_ChainExceptions1(exc);
3034
0
        Py_DECREF(saved_state);
3035
0
        Py_XDECREF(res);
3036
0
    }
3037
0
    return NULL;
3038
0
}
3039
3040
/*[clinic input]
3041
@critical_section
3042
_io.TextIOWrapper.truncate
3043
    pos: object = None
3044
    /
3045
[clinic start generated code]*/
3046
3047
static PyObject *
3048
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
3049
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
3050
0
{
3051
0
    CHECK_ATTACHED(self)
3052
3053
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
3054
0
        return NULL;
3055
0
    }
3056
3057
0
    return buffer_callmethod_onearg(self, &_Py_ID(truncate), pos);
3058
0
}
3059
3060
static PyObject *
3061
textiowrapper_repr(PyObject *op)
3062
0
{
3063
0
    PyObject *nameobj, *modeobj, *res, *s;
3064
0
    int status;
3065
0
    textio *self = textio_CAST(op);
3066
0
    const char *type_name = Py_TYPE(self)->tp_name;
3067
3068
0
    CHECK_INITIALIZED(self);
3069
3070
0
    res = PyUnicode_FromFormat("<%.100s", type_name);
3071
0
    if (res == NULL)
3072
0
        return NULL;
3073
3074
0
    status = Py_ReprEnter(op);
3075
0
    if (status != 0) {
3076
0
        if (status > 0) {
3077
0
            PyErr_Format(PyExc_RuntimeError,
3078
0
                         "reentrant call inside %.100s.__repr__",
3079
0
                         type_name);
3080
0
        }
3081
0
        goto error;
3082
0
    }
3083
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(name), &nameobj) < 0) {
3084
0
        if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
3085
0
            goto error;
3086
0
        }
3087
        /* Ignore ValueError raised if the underlying stream was detached */
3088
0
        PyErr_Clear();
3089
0
    }
3090
0
    if (nameobj != NULL) {
3091
0
        s = PyUnicode_FromFormat(" name=%R", nameobj);
3092
0
        Py_DECREF(nameobj);
3093
0
        if (s == NULL)
3094
0
            goto error;
3095
0
        PyUnicode_AppendAndDel(&res, s);
3096
0
        if (res == NULL)
3097
0
            goto error;
3098
0
    }
3099
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(mode), &modeobj) < 0) {
3100
0
        goto error;
3101
0
    }
3102
0
    if (modeobj != NULL) {
3103
0
        s = PyUnicode_FromFormat(" mode=%R", modeobj);
3104
0
        Py_DECREF(modeobj);
3105
0
        if (s == NULL)
3106
0
            goto error;
3107
0
        PyUnicode_AppendAndDel(&res, s);
3108
0
        if (res == NULL)
3109
0
            goto error;
3110
0
    }
3111
0
    s = PyUnicode_FromFormat("%U encoding=%R>",
3112
0
                             res, self->encoding);
3113
0
    Py_DECREF(res);
3114
0
    if (status == 0) {
3115
0
        Py_ReprLeave(op);
3116
0
    }
3117
0
    return s;
3118
3119
0
  error:
3120
0
    Py_XDECREF(res);
3121
0
    if (status == 0) {
3122
0
        Py_ReprLeave(op);
3123
0
    }
3124
0
    return NULL;
3125
0
}
3126
3127
3128
/* Inquiries */
3129
3130
/*[clinic input]
3131
@critical_section
3132
_io.TextIOWrapper.fileno
3133
[clinic start generated code]*/
3134
3135
static PyObject *
3136
_io_TextIOWrapper_fileno_impl(textio *self)
3137
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3138
0
{
3139
0
    return buffer_callmethod_noargs(self, &_Py_ID(fileno));
3140
0
}
3141
3142
/*[clinic input]
3143
@critical_section
3144
_io.TextIOWrapper.seekable
3145
[clinic start generated code]*/
3146
3147
static PyObject *
3148
_io_TextIOWrapper_seekable_impl(textio *self)
3149
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3150
0
{
3151
0
    return buffer_callmethod_noargs(self, &_Py_ID(seekable));
3152
0
}
3153
3154
/*[clinic input]
3155
@critical_section
3156
_io.TextIOWrapper.readable
3157
[clinic start generated code]*/
3158
3159
static PyObject *
3160
_io_TextIOWrapper_readable_impl(textio *self)
3161
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3162
0
{
3163
0
    return buffer_callmethod_noargs(self, &_Py_ID(readable));
3164
0
}
3165
3166
/*[clinic input]
3167
@critical_section
3168
_io.TextIOWrapper.writable
3169
[clinic start generated code]*/
3170
3171
static PyObject *
3172
_io_TextIOWrapper_writable_impl(textio *self)
3173
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3174
0
{
3175
0
    return buffer_callmethod_noargs(self, &_Py_ID(writable));
3176
0
}
3177
3178
/*[clinic input]
3179
@critical_section
3180
_io.TextIOWrapper.isatty
3181
[clinic start generated code]*/
3182
3183
static PyObject *
3184
_io_TextIOWrapper_isatty_impl(textio *self)
3185
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3186
0
{
3187
0
    return buffer_callmethod_noargs(self, &_Py_ID(isatty));
3188
0
}
3189
3190
/*[clinic input]
3191
@critical_section
3192
_io.TextIOWrapper.flush
3193
[clinic start generated code]*/
3194
3195
static PyObject *
3196
_io_TextIOWrapper_flush_impl(textio *self)
3197
/*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3198
19
{
3199
19
    CHECK_ATTACHED(self);
3200
19
    CHECK_CLOSED(self);
3201
19
    self->telling = self->seekable;
3202
19
    if (_textiowrapper_writeflush(self) < 0)
3203
0
        return NULL;
3204
19
    return buffer_callmethod_noargs(self, &_Py_ID(flush));
3205
19
}
3206
3207
/*[clinic input]
3208
@critical_section
3209
_io.TextIOWrapper.close
3210
[clinic start generated code]*/
3211
3212
static PyObject *
3213
_io_TextIOWrapper_close_impl(textio *self)
3214
/*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3215
19
{
3216
19
    PyObject *res;
3217
19
    int r;
3218
19
    CHECK_ATTACHED(self);
3219
3220
19
    res = _io_TextIOWrapper_closed_get_impl(self);
3221
19
    if (res == NULL)
3222
0
        return NULL;
3223
19
    r = PyObject_IsTrue(res);
3224
19
    Py_DECREF(res);
3225
19
    if (r < 0)
3226
0
        return NULL;
3227
3228
19
    if (r > 0) {
3229
0
        Py_RETURN_NONE; /* stream already closed */
3230
0
    }
3231
19
    if (self->detached) {
3232
0
        Py_RETURN_NONE; /* gh-142594 null pointer issue */
3233
0
    }
3234
19
    else {
3235
19
        PyObject *exc = NULL;
3236
19
        if (self->finalizing) {
3237
0
            res = buffer_callmethod_onearg(self,
3238
0
                                           &_Py_ID(_dealloc_warn),
3239
0
                                           (PyObject *)self);
3240
0
            if (res) {
3241
0
                Py_DECREF(res);
3242
0
            }
3243
0
            else {
3244
0
                PyErr_Clear();
3245
0
            }
3246
0
        }
3247
19
        if (_PyFile_Flush((PyObject *)self) < 0) {
3248
0
            exc = PyErr_GetRaisedException();
3249
0
        }
3250
3251
19
        res = buffer_callmethod_noargs(self, &_Py_ID(close));
3252
19
        if (exc != NULL) {
3253
0
            _PyErr_ChainExceptions1(exc);
3254
0
            Py_CLEAR(res);
3255
0
        }
3256
19
        return res;
3257
19
    }
3258
19
}
3259
3260
static PyObject *
3261
textiowrapper_iternext_lock_held(PyObject *op)
3262
22.2k
{
3263
22.2k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
3264
22.2k
    PyObject *line;
3265
22.2k
    textio *self = textio_CAST(op);
3266
3267
22.2k
    CHECK_ATTACHED(self);
3268
3269
22.2k
    self->telling = 0;
3270
22.2k
    if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3271
        /* Skip method call overhead for speed */
3272
22.2k
        line = _textiowrapper_readline(self, -1);
3273
22.2k
    }
3274
0
    else {
3275
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
3276
0
        if (line && !PyUnicode_Check(line)) {
3277
0
            PyErr_Format(PyExc_OSError,
3278
0
                         "readline() should have returned a str object, "
3279
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
3280
0
            Py_DECREF(line);
3281
0
            return NULL;
3282
0
        }
3283
0
    }
3284
3285
22.2k
    if (line == NULL)
3286
0
        return NULL;
3287
3288
22.2k
    if (PyUnicode_GET_LENGTH(line) == 0) {
3289
        /* Reached EOF or would have blocked */
3290
10
        Py_DECREF(line);
3291
10
        Py_CLEAR(self->snapshot);
3292
10
        self->telling = self->seekable;
3293
10
        return NULL;
3294
10
    }
3295
3296
22.2k
    return line;
3297
22.2k
}
3298
3299
static PyObject *
3300
textiowrapper_iternext(PyObject *op)
3301
22.2k
{
3302
22.2k
    PyObject *result;
3303
22.2k
    Py_BEGIN_CRITICAL_SECTION(op);
3304
22.2k
    result = textiowrapper_iternext_lock_held(op);
3305
22.2k
    Py_END_CRITICAL_SECTION();
3306
22.2k
    return result;
3307
22.2k
}
3308
3309
/*[clinic input]
3310
@critical_section
3311
@getter
3312
_io.TextIOWrapper.name
3313
[clinic start generated code]*/
3314
3315
static PyObject *
3316
_io_TextIOWrapper_name_get_impl(textio *self)
3317
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3318
0
{
3319
0
    return buffer_getattr(self, &_Py_ID(name));
3320
0
}
3321
3322
/*[clinic input]
3323
@critical_section
3324
@getter
3325
_io.TextIOWrapper.closed
3326
[clinic start generated code]*/
3327
3328
static PyObject *
3329
_io_TextIOWrapper_closed_get_impl(textio *self)
3330
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3331
67
{
3332
    /* If partially constructed or deconstructed, return that the underlying
3333
       buffer is closed.
3334
3335
       The code managing the transition is responsible for closing. The closed
3336
       attribute is often called in re-initalization, as part of repr in error
3337
       cases, and when the I/O stack is garbage collected. */
3338
67
    if (self->ok <= 0) {
3339
0
        Py_RETURN_TRUE;
3340
0
    }
3341
3342
67
    return buffer_getattr(self, &_Py_ID(closed));
3343
67
}
3344
3345
/*[clinic input]
3346
@critical_section
3347
@getter
3348
_io.TextIOWrapper.newlines
3349
[clinic start generated code]*/
3350
3351
static PyObject *
3352
_io_TextIOWrapper_newlines_get_impl(textio *self)
3353
/*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3354
0
{
3355
0
    PyObject *res;
3356
0
    CHECK_ATTACHED(self);
3357
0
    if (self->decoder == NULL ||
3358
0
        PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3359
0
    {
3360
0
        Py_RETURN_NONE;
3361
0
    }
3362
0
    return res;
3363
0
}
3364
3365
/*[clinic input]
3366
@critical_section
3367
@getter
3368
_io.TextIOWrapper.errors
3369
[clinic start generated code]*/
3370
3371
static PyObject *
3372
_io_TextIOWrapper_errors_get_impl(textio *self)
3373
/*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3374
0
{
3375
0
    CHECK_INITIALIZED(self);
3376
0
    return Py_NewRef(self->errors);
3377
0
}
3378
3379
/*[clinic input]
3380
@critical_section
3381
@getter
3382
_io.TextIOWrapper._CHUNK_SIZE
3383
[clinic start generated code]*/
3384
3385
static PyObject *
3386
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3387
/*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3388
0
{
3389
0
    CHECK_ATTACHED(self);
3390
0
    return PyLong_FromSsize_t(self->chunk_size);
3391
0
}
3392
3393
/*[clinic input]
3394
@critical_section
3395
@setter
3396
_io.TextIOWrapper._CHUNK_SIZE
3397
[clinic start generated code]*/
3398
3399
static int
3400
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3401
/*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3402
0
{
3403
0
    Py_ssize_t n;
3404
0
    CHECK_ATTACHED_INT(self);
3405
0
    if (value == NULL) {
3406
0
        PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3407
0
        return -1;
3408
0
    }
3409
0
    n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3410
0
    if (n == -1 && PyErr_Occurred())
3411
0
        return -1;
3412
0
    if (n <= 0) {
3413
0
        PyErr_SetString(PyExc_ValueError,
3414
0
                        "a strictly positive integer is required");
3415
0
        return -1;
3416
0
    }
3417
0
    self->chunk_size = n;
3418
0
    return 0;
3419
0
}
3420
3421
static PyMethodDef incrementalnewlinedecoder_methods[] = {
3422
    _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3423
    _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3424
    _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3425
    _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3426
    {NULL}
3427
};
3428
3429
static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3430
    {"newlines", incrementalnewlinedecoder_newlines_get, NULL, NULL},
3431
    {NULL}
3432
};
3433
3434
static PyType_Slot nldecoder_slots[] = {
3435
    {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3436
    {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3437
    {Py_tp_methods, incrementalnewlinedecoder_methods},
3438
    {Py_tp_getset, incrementalnewlinedecoder_getset},
3439
    {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3440
    {Py_tp_clear, incrementalnewlinedecoder_clear},
3441
    {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3442
    {0, NULL},
3443
};
3444
3445
PyType_Spec _Py_nldecoder_spec = {
3446
    .name = "_io.IncrementalNewlineDecoder",
3447
    .basicsize = sizeof(nldecoder_object),
3448
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3449
              Py_TPFLAGS_IMMUTABLETYPE),
3450
    .slots = nldecoder_slots,
3451
};
3452
3453
3454
static PyMethodDef textiowrapper_methods[] = {
3455
    _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3456
    _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3457
    _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3458
    _IO_TEXTIOWRAPPER_READ_METHODDEF
3459
    _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3460
    _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3461
    _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3462
3463
    _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3464
    _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3465
    _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3466
    _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3467
    _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3468
3469
    _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3470
    _IO_TEXTIOWRAPPER_TELL_METHODDEF
3471
    _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3472
3473
    {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS},
3474
    {NULL, NULL}
3475
};
3476
3477
static PyMemberDef textiowrapper_members[] = {
3478
    {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3479
    {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3480
    {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3481
    {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3482
    {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3483
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3484
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3485
    {NULL}
3486
};
3487
3488
static PyGetSetDef textiowrapper_getset[] = {
3489
    _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3490
    _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3491
    _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3492
    _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3493
    _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3494
    {NULL}
3495
};
3496
3497
static PyType_Slot textiowrapper_slots[] = {
3498
    {Py_tp_dealloc, textiowrapper_dealloc},
3499
    {Py_tp_repr, textiowrapper_repr},
3500
    {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3501
    {Py_tp_traverse, textiowrapper_traverse},
3502
    {Py_tp_clear, textiowrapper_clear},
3503
    {Py_tp_iternext, textiowrapper_iternext},
3504
    {Py_tp_methods, textiowrapper_methods},
3505
    {Py_tp_members, textiowrapper_members},
3506
    {Py_tp_getset, textiowrapper_getset},
3507
    {Py_tp_init, _io_TextIOWrapper___init__},
3508
    {0, NULL},
3509
};
3510
3511
PyType_Spec _Py_textiowrapper_spec = {
3512
    .name = "_io.TextIOWrapper",
3513
    .basicsize = sizeof(textio),
3514
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3515
              Py_TPFLAGS_IMMUTABLETYPE),
3516
    .slots = textiowrapper_slots,
3517
};