Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_io/textio.c
Line
Count
Source
1
/*
2
    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4
    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6
    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7
*/
8
9
#include "Python.h"
10
#include "pycore_call.h"          // _PyObject_CallMethod()
11
#include "pycore_codecs.h"        // _PyCodecInfo_GetIncrementalDecoder()
12
#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
13
#include "pycore_interp.h"        // PyInterpreterState.fs_codec
14
#include "pycore_long.h"          // _PyLong_GetZero()
15
#include "pycore_object.h"        // _PyObject_GC_UNTRACK()
16
#include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
17
#include "pycore_pystate.h"       // _PyInterpreterState_GET()
18
#include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString()
19
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
20
21
#include "_iomodule.h"
22
23
/*[clinic input]
24
module _io
25
class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
26
class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
27
class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
28
[clinic start generated code]*/
29
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
30
31
typedef struct nldecoder_object nldecoder_object;
32
typedef struct textio textio;
33
34
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
35
#include "clinic/textio.c.h"
36
#undef clinic_state
37
38
/* TextIOBase */
39
40
PyDoc_STRVAR(textiobase_doc,
41
    "Base class for text I/O.\n"
42
    "\n"
43
    "This class provides a character and line based interface to stream\n"
44
    "I/O. There is no readinto method because Python's character strings\n"
45
    "are immutable.\n"
46
    );
47
48
static PyObject *
49
_unsupported(_PyIO_State *state, const char *message)
50
0
{
51
0
    PyErr_SetString(state->unsupported_operation, message);
52
0
    return NULL;
53
0
}
54
55
/*[clinic input]
56
@permit_long_docstring_body
57
_io._TextIOBase.detach
58
    cls: defining_class
59
    /
60
61
Separate the underlying buffer from the TextIOBase and return it.
62
63
After the underlying buffer has been detached, the TextIO is in an unusable state.
64
[clinic start generated code]*/
65
66
static PyObject *
67
_io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
68
/*[clinic end generated code: output=50915f40c609eaa4 input=8cd0652c17d7f015]*/
69
0
{
70
0
    _PyIO_State *state = get_io_state_by_cls(cls);
71
0
    return _unsupported(state, "detach");
72
0
}
73
74
/*[clinic input]
75
_io._TextIOBase.read
76
    cls: defining_class
77
    size: int(unused=True) = -1
78
    /
79
80
Read at most size characters from stream.
81
82
Read from underlying buffer until we have size characters or we hit EOF.
83
If size is negative or omitted, read until EOF.
84
[clinic start generated code]*/
85
86
static PyObject *
87
_io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
88
                          int Py_UNUSED(size))
89
/*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
90
0
{
91
0
    _PyIO_State *state = get_io_state_by_cls(cls);
92
0
    return _unsupported(state, "read");
93
0
}
94
95
/*[clinic input]
96
_io._TextIOBase.readline
97
    cls: defining_class
98
    size: int(unused=True) = -1
99
    /
100
101
Read until newline or EOF.
102
103
Return an empty string if EOF is hit immediately.
104
If size is specified, at most size characters will be read.
105
[clinic start generated code]*/
106
107
static PyObject *
108
_io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
109
                              int Py_UNUSED(size))
110
/*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
111
0
{
112
0
    _PyIO_State *state = get_io_state_by_cls(cls);
113
0
    return _unsupported(state, "readline");
114
0
}
115
116
/*[clinic input]
117
_io._TextIOBase.write
118
    cls: defining_class
119
    s: str(unused=True)
120
    /
121
122
Write string s to stream.
123
124
Return the number of characters written
125
(which is always equal to the length of the string).
126
[clinic start generated code]*/
127
128
static PyObject *
129
_io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
130
                           const char *Py_UNUSED(s))
131
/*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
132
0
{
133
0
    _PyIO_State *state = get_io_state_by_cls(cls);
134
0
    return _unsupported(state, "write");
135
0
}
136
137
/*[clinic input]
138
@getter
139
_io._TextIOBase.encoding
140
141
Encoding of the text stream.
142
143
Subclasses should override.
144
[clinic start generated code]*/
145
146
static PyObject *
147
_io__TextIOBase_encoding_get_impl(PyObject *self)
148
/*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
149
0
{
150
0
    Py_RETURN_NONE;
151
0
}
152
153
/*[clinic input]
154
@getter
155
_io._TextIOBase.newlines
156
157
Line endings translated so far.
158
159
Only line endings translated during reading are considered.
160
161
Subclasses should override.
162
[clinic start generated code]*/
163
164
static PyObject *
165
_io__TextIOBase_newlines_get_impl(PyObject *self)
166
/*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
167
0
{
168
0
    Py_RETURN_NONE;
169
0
}
170
171
/*[clinic input]
172
@getter
173
_io._TextIOBase.errors
174
175
The error setting of the decoder or encoder.
176
177
Subclasses should override.
178
[clinic start generated code]*/
179
180
static PyObject *
181
_io__TextIOBase_errors_get_impl(PyObject *self)
182
/*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
183
0
{
184
0
    Py_RETURN_NONE;
185
0
}
186
187
188
static PyMethodDef textiobase_methods[] = {
189
    _IO__TEXTIOBASE_DETACH_METHODDEF
190
    _IO__TEXTIOBASE_READ_METHODDEF
191
    _IO__TEXTIOBASE_READLINE_METHODDEF
192
    _IO__TEXTIOBASE_WRITE_METHODDEF
193
    {NULL, NULL}
194
};
195
196
static PyGetSetDef textiobase_getset[] = {
197
    _IO__TEXTIOBASE_ENCODING_GETSETDEF
198
    _IO__TEXTIOBASE_NEWLINES_GETSETDEF
199
    _IO__TEXTIOBASE_ERRORS_GETSETDEF
200
    {NULL}
201
};
202
203
static PyType_Slot textiobase_slots[] = {
204
    {Py_tp_doc, (void *)textiobase_doc},
205
    {Py_tp_methods, textiobase_methods},
206
    {Py_tp_getset, textiobase_getset},
207
    {0, NULL},
208
};
209
210
/* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
211
PyType_Spec _Py_textiobase_spec = {
212
    .name = "_io._TextIOBase",
213
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
214
              Py_TPFLAGS_IMMUTABLETYPE),
215
    .slots = textiobase_slots,
216
};
217
218
/* IncrementalNewlineDecoder */
219
220
struct nldecoder_object {
221
    PyObject_HEAD
222
    PyObject *decoder;
223
    PyObject *errors;
224
    unsigned int pendingcr: 1;
225
    unsigned int translate: 1;
226
    unsigned int seennl: 3;
227
};
228
229
95.4k
#define nldecoder_object_CAST(op)   ((nldecoder_object *)(op))
230
231
/*[clinic input]
232
_io.IncrementalNewlineDecoder.__init__
233
    decoder: object
234
    translate: bool
235
    errors: object(c_default="NULL") = "strict"
236
237
Codec used when reading a file in universal newlines mode.
238
239
It wraps another incremental decoder, translating \r\n and \r into \n.
240
It also records the types of newlines encountered.  When used with
241
translate=False, it ensures that the newline sequence is returned in
242
one piece. When used with decoder=None, it expects unicode strings as
243
decode input and translates newlines without first invoking an external
244
decoder.
245
[clinic start generated code]*/
246
247
static int
248
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249
                                            PyObject *decoder, int translate,
250
                                            PyObject *errors)
251
/*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
252
16.1k
{
253
254
16.1k
    if (errors == NULL) {
255
16.1k
        errors = &_Py_ID(strict);
256
16.1k
    }
257
0
    else {
258
0
        errors = Py_NewRef(errors);
259
0
    }
260
261
16.1k
    Py_XSETREF(self->errors, errors);
262
16.1k
    Py_XSETREF(self->decoder, Py_NewRef(decoder));
263
16.1k
    self->translate = translate ? 1 : 0;
264
16.1k
    self->seennl = 0;
265
16.1k
    self->pendingcr = 0;
266
267
16.1k
    return 0;
268
16.1k
}
269
270
static int
271
incrementalnewlinedecoder_traverse(PyObject *op, visitproc visit, void *arg)
272
1.92k
{
273
1.92k
    nldecoder_object *self = nldecoder_object_CAST(op);
274
1.92k
    Py_VISIT(Py_TYPE(self));
275
1.92k
    Py_VISIT(self->decoder);
276
1.92k
    Py_VISIT(self->errors);
277
1.92k
    return 0;
278
1.92k
}
279
280
static int
281
incrementalnewlinedecoder_clear(PyObject *op)
282
16.1k
{
283
16.1k
    nldecoder_object *self = nldecoder_object_CAST(op);
284
16.1k
    Py_CLEAR(self->decoder);
285
16.1k
    Py_CLEAR(self->errors);
286
16.1k
    return 0;
287
16.1k
}
288
289
static void
290
incrementalnewlinedecoder_dealloc(PyObject *op)
291
16.1k
{
292
16.1k
    nldecoder_object *self = nldecoder_object_CAST(op);
293
16.1k
    PyTypeObject *tp = Py_TYPE(self);
294
16.1k
    _PyObject_GC_UNTRACK(self);
295
16.1k
    (void)incrementalnewlinedecoder_clear(op);
296
16.1k
    tp->tp_free(self);
297
16.1k
    Py_DECREF(tp);
298
16.1k
}
299
300
static int
301
check_decoded(PyObject *decoded)
302
61.2k
{
303
61.2k
    if (decoded == NULL)
304
0
        return -1;
305
61.2k
    if (!PyUnicode_Check(decoded)) {
306
0
        PyErr_Format(PyExc_TypeError,
307
0
                     "decoder should return a string result, not '%.200s'",
308
0
                     Py_TYPE(decoded)->tp_name);
309
0
        Py_DECREF(decoded);
310
0
        return -1;
311
0
    }
312
61.2k
    return 0;
313
61.2k
}
314
315
#define CHECK_INITIALIZED_DECODER(self) \
316
61.2k
    if (self->errors == NULL) { \
317
0
        PyErr_SetString(PyExc_ValueError, \
318
0
                        "IncrementalNewlineDecoder.__init__() not called"); \
319
0
        return NULL; \
320
0
    }
321
322
31.0M
#define SEEN_CR   1
323
26.0M
#define SEEN_LF   2
324
24.8M
#define SEEN_CRLF 4
325
24.7M
#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
326
327
PyObject *
328
_PyIncrementalNewlineDecoder_decode(PyObject *myself,
329
                                    PyObject *input, int final)
330
61.2k
{
331
61.2k
    PyObject *output;
332
61.2k
    Py_ssize_t output_len;
333
61.2k
    nldecoder_object *self = nldecoder_object_CAST(myself);
334
335
61.2k
    CHECK_INITIALIZED_DECODER(self);
336
337
    /* decode input (with the eventual \r from a previous pass) */
338
61.2k
    if (self->decoder != Py_None) {
339
50
        output = PyObject_CallMethodObjArgs(self->decoder,
340
50
            &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
341
50
    }
342
61.1k
    else {
343
61.1k
        output = Py_NewRef(input);
344
61.1k
    }
345
346
61.2k
    if (check_decoded(output) < 0)
347
0
        return NULL;
348
349
61.2k
    output_len = PyUnicode_GET_LENGTH(output);
350
61.2k
    if (self->pendingcr && (final || output_len > 0)) {
351
        /* Prefix output with CR */
352
0
        int kind;
353
0
        PyObject *modified;
354
0
        char *out;
355
356
0
        modified = PyUnicode_New(output_len + 1,
357
0
                                 PyUnicode_MAX_CHAR_VALUE(output));
358
0
        if (modified == NULL)
359
0
            goto error;
360
0
        kind = PyUnicode_KIND(modified);
361
0
        out = PyUnicode_DATA(modified);
362
0
        PyUnicode_WRITE(kind, out, 0, '\r');
363
0
        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
364
0
        Py_SETREF(output, modified);
365
0
        self->pendingcr = 0;
366
0
        output_len++;
367
0
    }
368
369
    /* retain last \r even when not translating data:
370
     * then readline() is sure to get \r\n in one pass
371
     */
372
61.2k
    if (!final) {
373
42
        if (output_len > 0
374
42
            && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
375
0
        {
376
0
            PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
377
0
            if (modified == NULL)
378
0
                goto error;
379
0
            Py_SETREF(output, modified);
380
0
            self->pendingcr = 1;
381
0
        }
382
42
    }
383
384
    /* Record which newlines are read and do newline translation if desired,
385
       all in one pass. */
386
61.2k
    {
387
61.2k
        const void *in_str;
388
61.2k
        Py_ssize_t len;
389
61.2k
        int seennl = self->seennl;
390
61.2k
        int only_lf = 0;
391
61.2k
        int kind;
392
393
61.2k
        in_str = PyUnicode_DATA(output);
394
61.2k
        len = PyUnicode_GET_LENGTH(output);
395
61.2k
        kind = PyUnicode_KIND(output);
396
397
61.2k
        if (len == 0)
398
4
            return output;
399
400
        /* If, up to now, newlines are consistently \n, do a quick check
401
           for the \r *byte* with the libc's optimized memchr.
402
           */
403
61.2k
        if (seennl == SEEN_LF || seennl == 0) {
404
30.1k
            only_lf = (memchr(in_str, '\r', kind * len) == NULL);
405
30.1k
        }
406
407
61.2k
        if (only_lf) {
408
            /* If not already seen, quick scan for a possible "\n" character.
409
               (there's nothing else to be done, even when in translation mode)
410
            */
411
24.0k
            if (seennl == 0 &&
412
17.0k
                memchr(in_str, '\n', kind * len) != NULL) {
413
1.95k
                if (kind == PyUnicode_1BYTE_KIND)
414
937
                    seennl |= SEEN_LF;
415
1.02k
                else {
416
1.02k
                    Py_ssize_t i = 0;
417
150k
                    for (;;) {
418
150k
                        Py_UCS4 c;
419
                        /* Fast loop for non-control characters */
420
1.07M
                        while (PyUnicode_READ(kind, in_str, i) > '\n')
421
927k
                            i++;
422
150k
                        c = PyUnicode_READ(kind, in_str, i++);
423
150k
                        if (c == '\n') {
424
829
                            seennl |= SEEN_LF;
425
829
                            break;
426
829
                        }
427
149k
                        if (i >= len)
428
191
                            break;
429
149k
                    }
430
1.02k
                }
431
1.95k
            }
432
            /* Finished: we have scanned for newlines, and none of them
433
               need translating */
434
24.0k
        }
435
37.1k
        else if (!self->translate) {
436
37.1k
            Py_ssize_t i = 0;
437
            /* We have already seen all newline types, no need to scan again */
438
37.1k
            if (seennl == SEEN_ALL)
439
13.7k
                goto endscan;
440
24.7M
            for (;;) {
441
24.7M
                Py_UCS4 c;
442
                /* Fast loop for non-control characters */
443
74.6M
                while (PyUnicode_READ(kind, in_str, i) > '\r')
444
49.8M
                    i++;
445
24.7M
                c = PyUnicode_READ(kind, in_str, i++);
446
24.7M
                if (c == '\n')
447
1.13M
                    seennl |= SEEN_LF;
448
23.6M
                else if (c == '\r') {
449
6.24M
                    if (PyUnicode_READ(kind, in_str, i) == '\n') {
450
3.50k
                        seennl |= SEEN_CRLF;
451
3.50k
                        i++;
452
3.50k
                    }
453
6.24M
                    else
454
6.24M
                        seennl |= SEEN_CR;
455
6.24M
                }
456
24.7M
                if (i >= len)
457
22.1k
                    break;
458
24.7M
                if (seennl == SEEN_ALL)
459
1.23k
                    break;
460
24.7M
            }
461
37.1k
        endscan:
462
37.1k
            ;
463
37.1k
        }
464
0
        else {
465
0
            void *translated;
466
0
            int kind = PyUnicode_KIND(output);
467
0
            const void *in_str = PyUnicode_DATA(output);
468
0
            Py_ssize_t in, out;
469
            /* XXX: Previous in-place translation here is disabled as
470
               resizing is not possible anymore */
471
            /* We could try to optimize this so that we only do a copy
472
               when there is something to translate. On the other hand,
473
               we already know there is a \r byte, so chances are high
474
               that something needs to be done. */
475
0
            translated = PyMem_Malloc(kind * len);
476
0
            if (translated == NULL) {
477
0
                PyErr_NoMemory();
478
0
                goto error;
479
0
            }
480
0
            in = out = 0;
481
0
            for (;;) {
482
0
                Py_UCS4 c;
483
                /* Fast loop for non-control characters */
484
0
                while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
485
0
                    PyUnicode_WRITE(kind, translated, out++, c);
486
0
                if (c == '\n') {
487
0
                    PyUnicode_WRITE(kind, translated, out++, c);
488
0
                    seennl |= SEEN_LF;
489
0
                    continue;
490
0
                }
491
0
                if (c == '\r') {
492
0
                    if (PyUnicode_READ(kind, in_str, in) == '\n') {
493
0
                        in++;
494
0
                        seennl |= SEEN_CRLF;
495
0
                    }
496
0
                    else
497
0
                        seennl |= SEEN_CR;
498
0
                    PyUnicode_WRITE(kind, translated, out++, '\n');
499
0
                    continue;
500
0
                }
501
0
                if (in > len)
502
0
                    break;
503
0
                PyUnicode_WRITE(kind, translated, out++, c);
504
0
            }
505
0
            Py_DECREF(output);
506
0
            output = PyUnicode_FromKindAndData(kind, translated, out);
507
0
            PyMem_Free(translated);
508
0
            if (!output)
509
0
                return NULL;
510
0
        }
511
61.2k
        self->seennl |= seennl;
512
61.2k
    }
513
514
0
    return output;
515
516
0
  error:
517
0
    Py_DECREF(output);
518
0
    return NULL;
519
61.2k
}
520
521
/*[clinic input]
522
@critical_section
523
_io.IncrementalNewlineDecoder.decode
524
    input: object
525
    final: bool = False
526
[clinic start generated code]*/
527
528
static PyObject *
529
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
530
                                          PyObject *input, int final)
531
/*[clinic end generated code: output=0d486755bb37a66e input=9475d16a73168504]*/
532
0
{
533
0
    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
534
0
}
535
536
/*[clinic input]
537
@critical_section
538
_io.IncrementalNewlineDecoder.getstate
539
[clinic start generated code]*/
540
541
static PyObject *
542
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
543
/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=dc3e1f27aa850f12]*/
544
0
{
545
0
    PyObject *buffer;
546
0
    unsigned long long flag;
547
548
0
    CHECK_INITIALIZED_DECODER(self);
549
550
0
    if (self->decoder != Py_None) {
551
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
552
0
           &_Py_ID(getstate));
553
0
        if (state == NULL)
554
0
            return NULL;
555
0
        if (!PyTuple_Check(state)) {
556
0
            PyErr_SetString(PyExc_TypeError,
557
0
                            "illegal decoder state");
558
0
            Py_DECREF(state);
559
0
            return NULL;
560
0
        }
561
0
        if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
562
0
                              &buffer, &flag))
563
0
        {
564
0
            Py_DECREF(state);
565
0
            return NULL;
566
0
        }
567
0
        Py_INCREF(buffer);
568
0
        Py_DECREF(state);
569
0
    }
570
0
    else {
571
0
        buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
572
0
        flag = 0;
573
0
    }
574
0
    flag <<= 1;
575
0
    if (self->pendingcr)
576
0
        flag |= 1;
577
0
    return Py_BuildValue("NK", buffer, flag);
578
0
}
579
580
/*[clinic input]
581
@critical_section
582
_io.IncrementalNewlineDecoder.setstate
583
    state: object
584
    /
585
[clinic start generated code]*/
586
587
static PyObject *
588
_io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self,
589
                                            PyObject *state)
590
/*[clinic end generated code: output=09135cb6e78a1dc8 input=275fd3982d2b08cb]*/
591
0
{
592
0
    PyObject *buffer;
593
0
    unsigned long long flag;
594
595
0
    CHECK_INITIALIZED_DECODER(self);
596
597
0
    if (!PyTuple_Check(state)) {
598
0
        PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
599
0
        return NULL;
600
0
    }
601
0
    if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
602
0
                          &buffer, &flag))
603
0
    {
604
0
        return NULL;
605
0
    }
606
607
0
    self->pendingcr = (int) (flag & 1);
608
0
    flag >>= 1;
609
610
0
    if (self->decoder != Py_None) {
611
0
        return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
612
0
                                    "((OK))", buffer, flag);
613
0
    }
614
0
    else {
615
0
        Py_RETURN_NONE;
616
0
    }
617
0
}
618
619
/*[clinic input]
620
@critical_section
621
_io.IncrementalNewlineDecoder.reset
622
[clinic start generated code]*/
623
624
static PyObject *
625
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
626
/*[clinic end generated code: output=32fa40c7462aa8ff input=31bd8ae4e36cec83]*/
627
0
{
628
0
    CHECK_INITIALIZED_DECODER(self);
629
630
0
    self->seennl = 0;
631
0
    self->pendingcr = 0;
632
0
    if (self->decoder != Py_None)
633
0
        return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
634
0
    else
635
0
        Py_RETURN_NONE;
636
0
}
637
638
static PyObject *
639
incrementalnewlinedecoder_newlines_get(PyObject *op, void *Py_UNUSED(context))
640
0
{
641
0
    nldecoder_object *self = nldecoder_object_CAST(op);
642
0
    CHECK_INITIALIZED_DECODER(self);
643
644
0
    switch (self->seennl) {
645
0
    case SEEN_CR:
646
0
        return PyUnicode_FromString("\r");
647
0
    case SEEN_LF:
648
0
        return PyUnicode_FromString("\n");
649
0
    case SEEN_CRLF:
650
0
        return PyUnicode_FromString("\r\n");
651
0
    case SEEN_CR | SEEN_LF:
652
0
        return Py_BuildValue("ss", "\r", "\n");
653
0
    case SEEN_CR | SEEN_CRLF:
654
0
        return Py_BuildValue("ss", "\r", "\r\n");
655
0
    case SEEN_LF | SEEN_CRLF:
656
0
        return Py_BuildValue("ss", "\n", "\r\n");
657
0
    case SEEN_CR | SEEN_LF | SEEN_CRLF:
658
0
        return Py_BuildValue("sss", "\r", "\n", "\r\n");
659
0
    default:
660
0
        Py_RETURN_NONE;
661
0
   }
662
663
0
}
664
665
/* TextIOWrapper */
666
667
typedef PyObject *(*encodefunc_t)(PyObject *, PyObject *);
668
669
struct textio
670
{
671
    PyObject_HEAD
672
    int ok; /* initialized? */
673
    int detached;
674
    Py_ssize_t chunk_size;
675
    PyObject *buffer;
676
    PyObject *encoding;
677
    PyObject *encoder;
678
    PyObject *decoder;
679
    PyObject *readnl;
680
    PyObject *errors;
681
    const char *writenl; /* ASCII-encoded; NULL stands for \n */
682
    char line_buffering;
683
    char write_through;
684
    char readuniversal;
685
    char readtranslate;
686
    char writetranslate;
687
    char seekable;
688
    char has_read1;
689
    char telling;
690
    char finalizing;
691
    /* Specialized encoding func (see below) */
692
    encodefunc_t encodefunc;
693
    /* Whether or not it's the start of the stream */
694
    char encoding_start_of_stream;
695
696
    /* Reads and writes are internally buffered in order to speed things up.
697
       However, any read will first flush the write buffer if itsn't empty.
698
699
       Please also note that text to be written is first encoded before being
700
       buffered. This is necessary so that encoding errors are immediately
701
       reported to the caller, but it unfortunately means that the
702
       IncrementalEncoder (whose encode() method is always written in Python)
703
       becomes a bottleneck for small writes.
704
    */
705
    PyObject *decoded_chars;       /* buffer for text returned from decoder */
706
    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
707
    PyObject *pending_bytes;       // data waiting to be written.
708
                                   // ascii unicode, bytes, or list of them.
709
    Py_ssize_t pending_bytes_count;
710
711
    /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
712
     * dec_flags is the second (integer) item of the decoder state and
713
     * next_input is the chunk of input bytes that comes next after the
714
     * snapshot point.  We use this to reconstruct decoder states in tell().
715
     */
716
    PyObject *snapshot;
717
    /* Bytes-to-characters ratio for the current chunk. Serves as input for
718
       the heuristic in tell(). */
719
    double b2cratio;
720
721
    /* Cache raw object if it's a FileIO object */
722
    PyObject *raw;
723
724
    PyObject *weakreflist;
725
    PyObject *dict;
726
727
    _PyIO_State *state;
728
};
729
730
12.5k
#define textio_CAST(op) ((textio *)(op))
731
732
static void
733
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
734
735
/* A couple of specialized cases in order to bypass the slow incremental
736
   encoding methods for the most popular encodings. */
737
738
static PyObject *
739
ascii_encode(PyObject *op, PyObject *text)
740
0
{
741
0
    textio *self = textio_CAST(op);
742
0
    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
743
0
}
744
745
static PyObject *
746
utf16be_encode(PyObject *op, PyObject *text)
747
0
{
748
0
    textio *self = textio_CAST(op);
749
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 1);
750
0
}
751
752
static PyObject *
753
utf16le_encode(PyObject *op, PyObject *text)
754
0
{
755
0
    textio *self = textio_CAST(op);
756
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), -1);
757
0
}
758
759
static PyObject *
760
utf16_encode(PyObject *op, PyObject *text)
761
0
{
762
0
    textio *self = textio_CAST(op);
763
0
    if (!self->encoding_start_of_stream) {
764
        /* Skip the BOM and use native byte ordering */
765
#if PY_BIG_ENDIAN
766
        return utf16be_encode(op, text);
767
#else
768
0
        return utf16le_encode(op, text);
769
0
#endif
770
0
    }
771
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 0);
772
0
}
773
774
static PyObject *
775
utf32be_encode(PyObject *op, PyObject *text)
776
0
{
777
0
    textio *self = textio_CAST(op);
778
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 1);
779
0
}
780
781
static PyObject *
782
utf32le_encode(PyObject *op, PyObject *text)
783
0
{
784
0
    textio *self = textio_CAST(op);
785
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), -1);
786
0
}
787
788
static PyObject *
789
utf32_encode(PyObject *op, PyObject *text)
790
0
{
791
0
    textio *self = textio_CAST(op);
792
0
    if (!self->encoding_start_of_stream) {
793
        /* Skip the BOM and use native byte ordering */
794
#if PY_BIG_ENDIAN
795
        return utf32be_encode(op, text);
796
#else
797
0
        return utf32le_encode(op, text);
798
0
#endif
799
0
    }
800
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 0);
801
0
}
802
803
static PyObject *
804
utf8_encode(PyObject *op, PyObject *text)
805
0
{
806
0
    textio *self = textio_CAST(op);
807
0
    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
808
0
}
809
810
static PyObject *
811
latin1_encode(PyObject *op, PyObject *text)
812
0
{
813
0
    textio *self = textio_CAST(op);
814
0
    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
815
0
}
816
817
// Return true when encoding can be skipped when text is ascii.
818
static inline int
819
is_asciicompat_encoding(encodefunc_t f)
820
254k
{
821
254k
    return f == ascii_encode || f == latin1_encode || f == utf8_encode;
822
254k
}
823
824
/* Map normalized encoding names onto the specialized encoding funcs */
825
826
typedef struct {
827
    const char *name;
828
    encodefunc_t encodefunc;
829
} encodefuncentry;
830
831
static const encodefuncentry encodefuncs[] = {
832
    {"ascii",       ascii_encode},
833
    {"iso8859-1",   latin1_encode},
834
    {"utf-8",       utf8_encode},
835
    {"utf-16-be",   utf16be_encode},
836
    {"utf-16-le",   utf16le_encode},
837
    {"utf-16",      utf16_encode},
838
    {"utf-32-be",   utf32be_encode},
839
    {"utf-32-le",   utf32le_encode},
840
    {"utf-32",      utf32_encode},
841
    {NULL, NULL}
842
};
843
844
static int
845
validate_newline(const char *newline)
846
104
{
847
104
    if (newline && newline[0] != '\0'
848
96
        && !(newline[0] == '\n' && newline[1] == '\0')
849
0
        && !(newline[0] == '\r' && newline[1] == '\0')
850
0
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
851
0
        PyErr_Format(PyExc_ValueError,
852
0
                     "illegal newline value: %s", newline);
853
0
        return -1;
854
0
    }
855
104
    return 0;
856
104
}
857
858
static int
859
set_newline(textio *self, const char *newline)
860
104
{
861
104
    PyObject *old = self->readnl;
862
104
    if (newline == NULL) {
863
8
        self->readnl = NULL;
864
8
    }
865
96
    else {
866
96
        self->readnl = PyUnicode_FromString(newline);
867
96
        if (self->readnl == NULL) {
868
0
            self->readnl = old;
869
0
            return -1;
870
0
        }
871
96
    }
872
104
    self->readuniversal = (newline == NULL || newline[0] == '\0');
873
104
    self->readtranslate = (newline == NULL);
874
104
    self->writetranslate = (newline == NULL || newline[0] != '\0');
875
104
    if (!self->readuniversal && self->readnl != NULL) {
876
        // validate_newline() accepts only ASCII newlines.
877
96
        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
878
96
        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
879
96
        if (strcmp(self->writenl, "\n") == 0) {
880
96
            self->writenl = NULL;
881
96
        }
882
96
    }
883
8
    else {
884
#ifdef MS_WINDOWS
885
        self->writenl = "\r\n";
886
#else
887
8
        self->writenl = NULL;
888
8
#endif
889
8
    }
890
104
    Py_XDECREF(old);
891
104
    return 0;
892
104
}
893
894
static int
895
_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
896
                           const char *errors)
897
104
{
898
104
    PyObject *res;
899
104
    int r;
900
901
104
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
902
104
    if (res == NULL)
903
0
        return -1;
904
905
104
    r = PyObject_IsTrue(res);
906
104
    Py_DECREF(res);
907
104
    if (r == -1)
908
0
        return -1;
909
910
104
    if (r != 1)
911
64
        return 0;
912
913
40
    Py_CLEAR(self->decoder);
914
40
    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
915
40
    if (self->decoder == NULL)
916
0
        return -1;
917
918
40
    if (self->readuniversal) {
919
8
        _PyIO_State *state = self->state;
920
8
        PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
921
8
            (PyObject *)state->PyIncrementalNewlineDecoder_Type,
922
8
            self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
923
8
        if (incrementalDecoder == NULL)
924
0
            return -1;
925
8
        Py_XSETREF(self->decoder, incrementalDecoder);
926
8
    }
927
928
40
    return 0;
929
40
}
930
931
static PyObject*
932
_textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
933
                      int eof)
934
46
{
935
46
    PyObject *chars;
936
937
46
    if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
938
46
        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
939
0
    else
940
0
        chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
941
0
                                           eof ? Py_True : Py_False, NULL);
942
943
46
    if (check_decoded(chars) < 0)
944
        // check_decoded already decreases refcount
945
0
        return NULL;
946
947
46
    return chars;
948
46
}
949
950
static int
951
_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
952
                           const char *errors)
953
104
{
954
104
    PyObject *res;
955
104
    int r;
956
957
104
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
958
104
    if (res == NULL)
959
0
        return -1;
960
961
104
    r = PyObject_IsTrue(res);
962
104
    Py_DECREF(res);
963
104
    if (r == -1)
964
0
        return -1;
965
966
104
    if (r != 1)
967
40
        return 0;
968
969
64
    Py_CLEAR(self->encoder);
970
64
    self->encodefunc = NULL;
971
64
    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
972
64
    if (self->encoder == NULL)
973
0
        return -1;
974
975
    /* Get the normalized named of the codec */
976
64
    if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
977
0
        return -1;
978
0
    }
979
64
    if (res != NULL && PyUnicode_Check(res)) {
980
64
        const encodefuncentry *e = encodefuncs;
981
192
        while (e->name != NULL) {
982
192
            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
983
64
                self->encodefunc = e->encodefunc;
984
64
                break;
985
64
            }
986
128
            e++;
987
128
        }
988
64
    }
989
64
    Py_XDECREF(res);
990
991
64
    return 0;
992
64
}
993
994
static int
995
_textiowrapper_fix_encoder_state(textio *self)
996
104
{
997
104
    if (!self->seekable || !self->encoder) {
998
40
        return 0;
999
40
    }
1000
1001
64
    self->encoding_start_of_stream = 1;
1002
1003
64
    PyObject *cookieObj = PyObject_CallMethodNoArgs(
1004
64
        self->buffer, &_Py_ID(tell));
1005
64
    if (cookieObj == NULL) {
1006
0
        return -1;
1007
0
    }
1008
1009
64
    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
1010
64
    Py_DECREF(cookieObj);
1011
64
    if (cmp < 0) {
1012
0
        return -1;
1013
0
    }
1014
1015
64
    if (cmp == 0) {
1016
30
        self->encoding_start_of_stream = 0;
1017
30
        PyObject *res = PyObject_CallMethodOneArg(
1018
30
            self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1019
30
        if (res == NULL) {
1020
0
            return -1;
1021
0
        }
1022
30
        Py_DECREF(res);
1023
30
    }
1024
1025
64
    return 0;
1026
64
}
1027
1028
static int
1029
io_check_errors(PyObject *errors)
1030
96
{
1031
96
    assert(errors != NULL && errors != Py_None);
1032
1033
96
    PyInterpreterState *interp = _PyInterpreterState_GET();
1034
96
#ifndef Py_DEBUG
1035
    /* In release mode, only check in development mode (-X dev) */
1036
96
    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1037
96
        return 0;
1038
96
    }
1039
#else
1040
    /* Always check in debug mode */
1041
#endif
1042
1043
    /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1044
       before_PyUnicode_InitEncodings() is called. */
1045
0
    if (!interp->unicode.fs_codec.encoding) {
1046
0
        return 0;
1047
0
    }
1048
1049
0
    const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1050
0
    if (name == NULL) {
1051
0
        return -1;
1052
0
    }
1053
0
    PyObject *handler = PyCodec_LookupError(name);
1054
0
    if (handler != NULL) {
1055
0
        Py_DECREF(handler);
1056
0
        return 0;
1057
0
    }
1058
0
    return -1;
1059
0
}
1060
1061
1062
1063
/*[clinic input]
1064
_io.TextIOWrapper.__init__
1065
    buffer: object
1066
    encoding: str(accept={str, NoneType}) = None
1067
    errors: object = None
1068
    newline: str(accept={str, NoneType}) = None
1069
    line_buffering: bool = False
1070
    write_through: bool = False
1071
1072
Character and line based layer over a BufferedIOBase object, buffer.
1073
1074
encoding gives the name of the encoding that the stream will be
1075
decoded or encoded with. It defaults to locale.getencoding().
1076
1077
errors determines the strictness of encoding and decoding (see
1078
help(codecs.Codec) or the documentation for codecs.register) and
1079
defaults to "strict".
1080
1081
newline controls how line endings are handled. It can be None, '',
1082
'\n', '\r', and '\r\n'.  It works as follows:
1083
1084
* On input, if newline is None, universal newlines mode is
1085
  enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1086
  these are translated into '\n' before being returned to the
1087
  caller. If it is '', universal newline mode is enabled, but line
1088
  endings are returned to the caller untranslated. If it has any of
1089
  the other legal values, input lines are only terminated by the given
1090
  string, and the line ending is returned to the caller untranslated.
1091
1092
* On output, if newline is None, any '\n' characters written are
1093
  translated to the system default line separator, os.linesep. If
1094
  newline is '' or '\n', no translation takes place. If newline is any
1095
  of the other legal values, any '\n' characters written are translated
1096
  to the given string.
1097
1098
If line_buffering is True, a call to flush is implied when a call to
1099
write contains a newline character.
1100
[clinic start generated code]*/
1101
1102
static int
1103
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1104
                                const char *encoding, PyObject *errors,
1105
                                const char *newline, int line_buffering,
1106
                                int write_through)
1107
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1108
104
{
1109
104
    PyObject *raw, *codec_info = NULL;
1110
104
    PyObject *res;
1111
104
    int r;
1112
1113
104
    self->ok = 0;
1114
104
    self->detached = 0;
1115
1116
104
    if (encoding == NULL) {
1117
0
        PyInterpreterState *interp = _PyInterpreterState_GET();
1118
0
        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1119
0
            if (PyErr_WarnEx(PyExc_EncodingWarning,
1120
0
                             "'encoding' argument not specified", 1)) {
1121
0
                return -1;
1122
0
            }
1123
0
        }
1124
0
    }
1125
1126
104
    if (errors == Py_None) {
1127
8
        errors = &_Py_ID(strict);
1128
8
    }
1129
96
    else if (!PyUnicode_Check(errors)) {
1130
        // Check 'errors' argument here because Argument Clinic doesn't support
1131
        // 'str(accept={str, NoneType})' converter.
1132
0
        PyErr_Format(
1133
0
            PyExc_TypeError,
1134
0
            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1135
0
            Py_TYPE(errors)->tp_name);
1136
0
        return -1;
1137
0
    }
1138
96
    else if (io_check_errors(errors)) {
1139
0
        return -1;
1140
0
    }
1141
104
    const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1142
104
    if (errors_str == NULL) {
1143
0
        return -1;
1144
0
    }
1145
1146
104
    if (validate_newline(newline) < 0) {
1147
0
        return -1;
1148
0
    }
1149
1150
104
    Py_CLEAR(self->buffer);
1151
104
    Py_CLEAR(self->encoding);
1152
104
    Py_CLEAR(self->encoder);
1153
104
    Py_CLEAR(self->decoder);
1154
104
    Py_CLEAR(self->readnl);
1155
104
    Py_CLEAR(self->decoded_chars);
1156
104
    Py_CLEAR(self->pending_bytes);
1157
104
    Py_CLEAR(self->snapshot);
1158
104
    Py_CLEAR(self->errors);
1159
104
    Py_CLEAR(self->raw);
1160
104
    self->decoded_chars_used = 0;
1161
104
    self->pending_bytes_count = 0;
1162
104
    self->encodefunc = NULL;
1163
104
    self->b2cratio = 0.0;
1164
1165
104
    if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1166
0
        _Py_DECLARE_STR(utf_8, "utf-8");
1167
0
        self->encoding = &_Py_STR(utf_8);
1168
0
    }
1169
104
    else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1170
0
        self->encoding = _Py_GetLocaleEncodingObject();
1171
0
        if (self->encoding == NULL) {
1172
0
            goto error;
1173
0
        }
1174
0
        assert(PyUnicode_Check(self->encoding));
1175
0
    }
1176
1177
104
    if (self->encoding != NULL) {
1178
0
        encoding = PyUnicode_AsUTF8(self->encoding);
1179
0
        if (encoding == NULL)
1180
0
            goto error;
1181
0
    }
1182
104
    else if (encoding != NULL) {
1183
104
        self->encoding = PyUnicode_FromString(encoding);
1184
104
        if (self->encoding == NULL)
1185
0
            goto error;
1186
104
    }
1187
0
    else {
1188
0
        PyErr_SetString(PyExc_OSError,
1189
0
                        "could not determine default encoding");
1190
0
        goto error;
1191
0
    }
1192
1193
    /* Check we have been asked for a real text encoding */
1194
104
    codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
1195
104
    if (codec_info == NULL) {
1196
0
        Py_CLEAR(self->encoding);
1197
0
        goto error;
1198
0
    }
1199
1200
    /* XXX: Failures beyond this point have the potential to leak elements
1201
     * of the partially constructed object (like self->encoding)
1202
     */
1203
1204
104
    self->errors = Py_NewRef(errors);
1205
104
    self->chunk_size = 8192;
1206
104
    self->line_buffering = line_buffering;
1207
104
    self->write_through = write_through;
1208
104
    if (set_newline(self, newline) < 0) {
1209
0
        goto error;
1210
0
    }
1211
1212
104
    self->buffer = Py_NewRef(buffer);
1213
1214
    /* Build the decoder object */
1215
104
    _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1216
104
    self->state = state;
1217
104
    if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1218
0
        goto error;
1219
1220
    /* Build the encoder object */
1221
104
    if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1222
0
        goto error;
1223
1224
    /* Finished sorting out the codec details */
1225
104
    Py_CLEAR(codec_info);
1226
1227
104
    if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1228
64
        Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1229
0
        Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1230
104
    {
1231
104
        if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1232
0
            goto error;
1233
        /* Cache the raw FileIO object to speed up 'closed' checks */
1234
104
        if (raw != NULL) {
1235
104
            if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1236
104
                self->raw = raw;
1237
0
            else
1238
0
                Py_DECREF(raw);
1239
104
        }
1240
104
    }
1241
1242
104
    res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1243
104
    if (res == NULL)
1244
0
        goto error;
1245
104
    r = PyObject_IsTrue(res);
1246
104
    Py_DECREF(res);
1247
104
    if (r < 0)
1248
0
        goto error;
1249
104
    self->seekable = self->telling = r;
1250
1251
104
    r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1252
104
    if (r < 0) {
1253
0
        goto error;
1254
0
    }
1255
104
    self->has_read1 = r;
1256
1257
104
    self->encoding_start_of_stream = 0;
1258
104
    if (_textiowrapper_fix_encoder_state(self) < 0) {
1259
0
        goto error;
1260
0
    }
1261
1262
104
    self->ok = 1;
1263
104
    return 0;
1264
1265
0
  error:
1266
0
    Py_XDECREF(codec_info);
1267
0
    return -1;
1268
104
}
1269
1270
/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1271
 * -1 on error.
1272
 */
1273
static int
1274
convert_optional_bool(PyObject *obj, int default_value)
1275
0
{
1276
0
    long v;
1277
0
    if (obj == Py_None) {
1278
0
        v = default_value;
1279
0
    }
1280
0
    else {
1281
0
        v = PyLong_AsLong(obj);
1282
0
        if (v == -1 && PyErr_Occurred())
1283
0
            return -1;
1284
0
    }
1285
0
    return v != 0;
1286
0
}
1287
1288
static int
1289
textiowrapper_change_encoding(textio *self, PyObject *encoding,
1290
                              PyObject *errors, int newline_changed)
1291
0
{
1292
    /* Use existing settings where new settings are not specified */
1293
0
    if (encoding == Py_None && errors == Py_None && !newline_changed) {
1294
0
        return 0;  // no change
1295
0
    }
1296
1297
0
    if (encoding == Py_None) {
1298
0
        encoding = self->encoding;
1299
0
        if (errors == Py_None) {
1300
0
            errors = self->errors;
1301
0
        }
1302
0
        Py_INCREF(encoding);
1303
0
    }
1304
0
    else {
1305
0
        if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1306
0
            encoding = _Py_GetLocaleEncodingObject();
1307
0
            if (encoding == NULL) {
1308
0
                return -1;
1309
0
            }
1310
0
        } else {
1311
0
            Py_INCREF(encoding);
1312
0
        }
1313
0
        if (errors == Py_None) {
1314
0
            errors = &_Py_ID(strict);
1315
0
        }
1316
0
    }
1317
0
    Py_INCREF(errors);
1318
1319
0
    const char *c_encoding = PyUnicode_AsUTF8(encoding);
1320
0
    if (c_encoding == NULL) {
1321
0
        Py_DECREF(encoding);
1322
0
        Py_DECREF(errors);
1323
0
        return -1;
1324
0
    }
1325
0
    const char *c_errors = PyUnicode_AsUTF8(errors);
1326
0
    if (c_errors == NULL) {
1327
0
        Py_DECREF(encoding);
1328
0
        Py_DECREF(errors);
1329
0
        return -1;
1330
0
    }
1331
1332
    // Create new encoder & decoder
1333
0
    PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
1334
0
    if (codec_info == NULL) {
1335
0
        Py_DECREF(encoding);
1336
0
        Py_DECREF(errors);
1337
0
        return -1;
1338
0
    }
1339
0
    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1340
0
            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1341
0
        Py_DECREF(codec_info);
1342
0
        Py_DECREF(encoding);
1343
0
        Py_DECREF(errors);
1344
0
        return -1;
1345
0
    }
1346
0
    Py_DECREF(codec_info);
1347
1348
0
    Py_SETREF(self->encoding, encoding);
1349
0
    Py_SETREF(self->errors, errors);
1350
1351
0
    return _textiowrapper_fix_encoder_state(self);
1352
0
}
1353
1354
/*[clinic input]
1355
@critical_section
1356
_io.TextIOWrapper.reconfigure
1357
    *
1358
    encoding: object = None
1359
    errors: object = None
1360
    newline as newline_obj: object(c_default="NULL") = None
1361
    line_buffering as line_buffering_obj: object = None
1362
    write_through as write_through_obj: object = None
1363
1364
Reconfigure the text stream with new parameters.
1365
1366
This also does an implicit stream flush.
1367
1368
[clinic start generated code]*/
1369
1370
static PyObject *
1371
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1372
                                   PyObject *errors, PyObject *newline_obj,
1373
                                   PyObject *line_buffering_obj,
1374
                                   PyObject *write_through_obj)
1375
/*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1376
0
{
1377
0
    int line_buffering;
1378
0
    int write_through;
1379
0
    const char *newline = NULL;
1380
1381
0
    if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1382
0
        PyErr_Format(PyExc_TypeError,
1383
0
                "reconfigure() argument 'encoding' must be str or None, not %s",
1384
0
                Py_TYPE(encoding)->tp_name);
1385
0
        return NULL;
1386
0
    }
1387
0
    if (errors != Py_None && !PyUnicode_Check(errors)) {
1388
0
        PyErr_Format(PyExc_TypeError,
1389
0
                "reconfigure() argument 'errors' must be str or None, not %s",
1390
0
                Py_TYPE(errors)->tp_name);
1391
0
        return NULL;
1392
0
    }
1393
0
    if (newline_obj != NULL && newline_obj != Py_None &&
1394
0
        !PyUnicode_Check(newline_obj))
1395
0
    {
1396
0
        PyErr_Format(PyExc_TypeError,
1397
0
                "reconfigure() argument 'newline' must be str or None, not %s",
1398
0
                Py_TYPE(newline_obj)->tp_name);
1399
0
        return NULL;
1400
0
    }
1401
    /* Check if something is in the read buffer */
1402
0
    if (self->decoded_chars != NULL) {
1403
0
        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1404
0
            _unsupported(self->state,
1405
0
                         "It is not possible to set the encoding or newline "
1406
0
                         "of stream after the first read");
1407
0
            return NULL;
1408
0
        }
1409
0
    }
1410
1411
0
    if (newline_obj != NULL && newline_obj != Py_None) {
1412
0
        newline = PyUnicode_AsUTF8(newline_obj);
1413
0
        if (newline == NULL || validate_newline(newline) < 0) {
1414
0
            return NULL;
1415
0
        }
1416
0
    }
1417
1418
0
    line_buffering = convert_optional_bool(line_buffering_obj,
1419
0
                                           self->line_buffering);
1420
0
    if (line_buffering < 0) {
1421
0
        return NULL;
1422
0
    }
1423
0
    write_through = convert_optional_bool(write_through_obj,
1424
0
                                          self->write_through);
1425
0
    if (write_through < 0) {
1426
0
        return NULL;
1427
0
    }
1428
1429
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1430
0
        return NULL;
1431
0
    }
1432
0
    self->b2cratio = 0;
1433
1434
0
    if (newline_obj != NULL && set_newline(self, newline) < 0) {
1435
0
        return NULL;
1436
0
    }
1437
1438
0
    if (textiowrapper_change_encoding(
1439
0
            self, encoding, errors, newline_obj != NULL) < 0) {
1440
0
        return NULL;
1441
0
    }
1442
1443
0
    self->line_buffering = line_buffering;
1444
0
    self->write_through = write_through;
1445
0
    Py_RETURN_NONE;
1446
0
}
1447
1448
static int
1449
textiowrapper_clear(PyObject *op)
1450
8
{
1451
8
    textio *self = textio_CAST(op);
1452
8
    self->ok = 0;
1453
8
    Py_CLEAR(self->buffer);
1454
8
    Py_CLEAR(self->encoding);
1455
8
    Py_CLEAR(self->encoder);
1456
8
    Py_CLEAR(self->decoder);
1457
8
    Py_CLEAR(self->readnl);
1458
8
    Py_CLEAR(self->decoded_chars);
1459
8
    Py_CLEAR(self->pending_bytes);
1460
8
    Py_CLEAR(self->snapshot);
1461
8
    Py_CLEAR(self->errors);
1462
8
    Py_CLEAR(self->raw);
1463
1464
8
    Py_CLEAR(self->dict);
1465
8
    return 0;
1466
8
}
1467
1468
static void
1469
textiowrapper_dealloc(PyObject *op)
1470
8
{
1471
8
    textio *self = textio_CAST(op);
1472
8
    PyTypeObject *tp = Py_TYPE(self);
1473
8
    self->finalizing = 1;
1474
8
    if (_PyIOBase_finalize(op) < 0)
1475
0
        return;
1476
8
    self->ok = 0;
1477
8
    _PyObject_GC_UNTRACK(self);
1478
8
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
1479
8
    (void)textiowrapper_clear(op);
1480
8
    tp->tp_free(self);
1481
8
    Py_DECREF(tp);
1482
8
}
1483
1484
static int
1485
textiowrapper_traverse(PyObject *op, visitproc visit, void *arg)
1486
4.61k
{
1487
4.61k
    textio *self = textio_CAST(op);
1488
4.61k
    Py_VISIT(Py_TYPE(self));
1489
4.61k
    Py_VISIT(self->buffer);
1490
4.61k
    Py_VISIT(self->encoding);
1491
4.61k
    Py_VISIT(self->encoder);
1492
4.61k
    Py_VISIT(self->decoder);
1493
4.61k
    Py_VISIT(self->readnl);
1494
4.61k
    Py_VISIT(self->decoded_chars);
1495
4.61k
    Py_VISIT(self->pending_bytes);
1496
4.61k
    Py_VISIT(self->snapshot);
1497
4.61k
    Py_VISIT(self->errors);
1498
4.61k
    Py_VISIT(self->raw);
1499
1500
4.61k
    Py_VISIT(self->dict);
1501
4.61k
    return 0;
1502
4.61k
}
1503
1504
static PyObject *
1505
_io_TextIOWrapper_closed_get_impl(textio *self);
1506
1507
/* This macro takes some shortcuts to make the common case faster. */
1508
#define CHECK_CLOSED(self) \
1509
262k
    do { \
1510
262k
        int r; \
1511
262k
        PyObject *_res; \
1512
262k
        if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1513
262k
            if (self->raw != NULL) \
1514
262k
                r = _PyFileIO_closed(self->raw); \
1515
262k
            else { \
1516
0
                _res = _io_TextIOWrapper_closed_get_impl(self); \
1517
0
                if (_res == NULL) \
1518
0
                    return NULL; \
1519
0
                r = PyObject_IsTrue(_res); \
1520
0
                Py_DECREF(_res); \
1521
0
                if (r < 0) \
1522
0
                    return NULL; \
1523
0
            } \
1524
262k
            if (r > 0) { \
1525
0
                PyErr_SetString(PyExc_ValueError, \
1526
0
                                "I/O operation on closed file."); \
1527
0
                return NULL; \
1528
0
            } \
1529
262k
        } \
1530
262k
        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1531
0
            return NULL; \
1532
262k
    } while (0)
1533
1534
#define CHECK_INITIALIZED(self) \
1535
262k
    if (self->ok <= 0) { \
1536
0
        PyErr_SetString(PyExc_ValueError, \
1537
0
            "I/O operation on uninitialized object"); \
1538
0
        return NULL; \
1539
0
    }
1540
1541
#define CHECK_ATTACHED(self) \
1542
262k
    CHECK_INITIALIZED(self); \
1543
262k
    if (self->detached) { \
1544
0
        PyErr_SetString(PyExc_ValueError, \
1545
0
             "underlying buffer has been detached"); \
1546
0
        return NULL; \
1547
0
    }
1548
1549
#define CHECK_ATTACHED_INT(self) \
1550
0
    if (self->ok <= 0) { \
1551
0
        PyErr_SetString(PyExc_ValueError, \
1552
0
            "I/O operation on uninitialized object"); \
1553
0
        return -1; \
1554
0
    } else if (self->detached) { \
1555
0
        PyErr_SetString(PyExc_ValueError, \
1556
0
             "underlying buffer has been detached"); \
1557
0
        return -1; \
1558
0
    }
1559
1560
1561
/*[clinic input]
1562
@critical_section
1563
_io.TextIOWrapper.detach
1564
[clinic start generated code]*/
1565
1566
static PyObject *
1567
_io_TextIOWrapper_detach_impl(textio *self)
1568
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1569
0
{
1570
0
    PyObject *buffer;
1571
0
    CHECK_ATTACHED(self);
1572
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1573
0
        return NULL;
1574
0
    }
1575
0
    buffer = self->buffer;
1576
0
    self->buffer = NULL;
1577
0
    self->detached = 1;
1578
0
    return buffer;
1579
0
}
1580
1581
/* Flush the internal write buffer. This doesn't explicitly flush the
1582
   underlying buffered object, though. */
1583
static int
1584
_textiowrapper_writeflush(textio *self)
1585
262k
{
1586
262k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
1587
1588
262k
    if (self->pending_bytes == NULL)
1589
7.95k
        return 0;
1590
1591
254k
    PyObject *pending = self->pending_bytes;
1592
254k
    PyObject *b;
1593
1594
254k
    if (PyBytes_Check(pending)) {
1595
0
        b = Py_NewRef(pending);
1596
0
    }
1597
254k
    else if (PyUnicode_Check(pending)) {
1598
254k
        assert(PyUnicode_IS_ASCII(pending));
1599
254k
        assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1600
254k
        b = PyBytes_FromStringAndSize(
1601
254k
                PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1602
254k
        if (b == NULL) {
1603
0
            return -1;
1604
0
        }
1605
254k
    }
1606
0
    else {
1607
0
        assert(PyList_Check(pending));
1608
0
        b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1609
0
        if (b == NULL) {
1610
0
            return -1;
1611
0
        }
1612
1613
0
        char *buf = PyBytes_AsString(b);
1614
0
        Py_ssize_t pos = 0;
1615
1616
0
        for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1617
0
            PyObject *obj = PyList_GET_ITEM(pending, i);
1618
0
            char *src;
1619
0
            Py_ssize_t len;
1620
0
            if (PyUnicode_Check(obj)) {
1621
0
                assert(PyUnicode_IS_ASCII(obj));
1622
0
                src = PyUnicode_DATA(obj);
1623
0
                len = PyUnicode_GET_LENGTH(obj);
1624
0
            }
1625
0
            else {
1626
0
                assert(PyBytes_Check(obj));
1627
0
                if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1628
0
                    Py_DECREF(b);
1629
0
                    return -1;
1630
0
                }
1631
0
            }
1632
0
            memcpy(buf + pos, src, len);
1633
0
            pos += len;
1634
0
        }
1635
0
        assert(pos == self->pending_bytes_count);
1636
0
    }
1637
1638
254k
    self->pending_bytes_count = 0;
1639
254k
    self->pending_bytes = NULL;
1640
254k
    Py_DECREF(pending);
1641
1642
254k
    PyObject *ret;
1643
254k
    do {
1644
254k
        ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1645
254k
    } while (ret == NULL && _PyIO_trap_eintr());
1646
254k
    Py_DECREF(b);
1647
    // NOTE: We cleared buffer but we don't know how many bytes are actually written
1648
    // when an error occurred.
1649
254k
    if (ret == NULL)
1650
0
        return -1;
1651
254k
    Py_DECREF(ret);
1652
254k
    return 0;
1653
254k
}
1654
1655
/*[clinic input]
1656
@critical_section
1657
_io.TextIOWrapper.write
1658
    text: unicode
1659
    /
1660
[clinic start generated code]*/
1661
1662
static PyObject *
1663
_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1664
/*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1665
254k
{
1666
254k
    PyObject *ret;
1667
254k
    PyObject *b;
1668
254k
    Py_ssize_t textlen;
1669
254k
    int haslf = 0;
1670
254k
    int needflush = 0, text_needflush = 0;
1671
1672
254k
    CHECK_ATTACHED(self);
1673
254k
    CHECK_CLOSED(self);
1674
1675
254k
    if (self->encoder == NULL) {
1676
0
        return _unsupported(self->state, "not writable");
1677
0
    }
1678
1679
254k
    Py_INCREF(text);
1680
1681
254k
    textlen = PyUnicode_GET_LENGTH(text);
1682
1683
254k
    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1684
254k
        if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1685
254k
            haslf = 1;
1686
1687
254k
    if (haslf && self->writetranslate && self->writenl != NULL) {
1688
0
        PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1689
0
                                                 "ss", "\n", self->writenl);
1690
0
        Py_DECREF(text);
1691
0
        if (newtext == NULL)
1692
0
            return NULL;
1693
0
        text = newtext;
1694
0
    }
1695
1696
254k
    if (self->write_through)
1697
0
        text_needflush = 1;
1698
254k
    if (self->line_buffering &&
1699
254k
        (haslf ||
1700
0
         PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1701
254k
        needflush = 1;
1702
1703
    /* XXX What if we were just reading? */
1704
254k
    if (self->encodefunc != NULL) {
1705
254k
        if (PyUnicode_IS_ASCII(text) &&
1706
                // See bpo-43260
1707
254k
                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1708
254k
                is_asciicompat_encoding(self->encodefunc)) {
1709
254k
            b = Py_NewRef(text);
1710
254k
        }
1711
0
        else {
1712
0
            b = (*self->encodefunc)((PyObject *) self, text);
1713
0
        }
1714
254k
        self->encoding_start_of_stream = 0;
1715
254k
    }
1716
0
    else {
1717
0
        b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1718
0
    }
1719
1720
254k
    Py_DECREF(text);
1721
254k
    if (b == NULL)
1722
0
        return NULL;
1723
254k
    if (b != text && !PyBytes_Check(b)) {
1724
0
        PyErr_Format(PyExc_TypeError,
1725
0
                     "encoder should return a bytes object, not '%.200s'",
1726
0
                     Py_TYPE(b)->tp_name);
1727
0
        Py_DECREF(b);
1728
0
        return NULL;
1729
0
    }
1730
1731
254k
    Py_ssize_t bytes_len;
1732
254k
    if (b == text) {
1733
254k
        bytes_len = PyUnicode_GET_LENGTH(b);
1734
254k
    }
1735
0
    else {
1736
0
        bytes_len = PyBytes_GET_SIZE(b);
1737
0
    }
1738
1739
    // We should avoid concatenating huge data.
1740
    // Flush the buffer before adding b to the buffer if b is not small.
1741
    // https://github.com/python/cpython/issues/87426
1742
254k
    if (bytes_len >= self->chunk_size) {
1743
        // _textiowrapper_writeflush() calls buffer.write().
1744
        // self->pending_bytes can be appended during buffer->write()
1745
        // or other thread.
1746
        // We need to loop until buffer becomes empty.
1747
        // https://github.com/python/cpython/issues/118138
1748
        // https://github.com/python/cpython/issues/119506
1749
0
        while (self->pending_bytes != NULL) {
1750
0
            if (_textiowrapper_writeflush(self) < 0) {
1751
0
                Py_DECREF(b);
1752
0
                return NULL;
1753
0
            }
1754
0
        }
1755
0
    }
1756
1757
254k
    if (self->pending_bytes == NULL) {
1758
254k
        assert(self->pending_bytes_count == 0);
1759
254k
        self->pending_bytes = b;
1760
254k
    }
1761
0
    else if (!PyList_CheckExact(self->pending_bytes)) {
1762
0
        PyObject *list = PyList_New(2);
1763
0
        if (list == NULL) {
1764
0
            Py_DECREF(b);
1765
0
            return NULL;
1766
0
        }
1767
        // Since Python 3.12, allocating GC object won't trigger GC and release
1768
        // GIL. See https://github.com/python/cpython/issues/97922
1769
0
        assert(!PyList_CheckExact(self->pending_bytes));
1770
0
        PyList_SET_ITEM(list, 0, self->pending_bytes);
1771
0
        PyList_SET_ITEM(list, 1, b);
1772
0
        self->pending_bytes = list;
1773
0
    }
1774
0
    else {
1775
0
        if (PyList_Append(self->pending_bytes, b) < 0) {
1776
0
            Py_DECREF(b);
1777
0
            return NULL;
1778
0
        }
1779
0
        Py_DECREF(b);
1780
0
    }
1781
1782
254k
    self->pending_bytes_count += bytes_len;
1783
254k
    if (self->pending_bytes_count >= self->chunk_size || needflush ||
1784
254k
        text_needflush) {
1785
254k
        if (_textiowrapper_writeflush(self) < 0)
1786
0
            return NULL;
1787
254k
    }
1788
1789
254k
    if (needflush) {
1790
254k
        if (_PyFile_Flush(self->buffer) < 0) {
1791
0
            return NULL;
1792
0
        }
1793
254k
    }
1794
1795
254k
    if (self->snapshot != NULL) {
1796
0
        textiowrapper_set_decoded_chars(self, NULL);
1797
0
        Py_CLEAR(self->snapshot);
1798
0
    }
1799
1800
254k
    if (self->decoder) {
1801
0
        ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1802
0
        if (ret == NULL)
1803
0
            return NULL;
1804
0
        Py_DECREF(ret);
1805
0
    }
1806
1807
254k
    return PyLong_FromSsize_t(textlen);
1808
254k
}
1809
1810
/* Steal a reference to chars and store it in the decoded_char buffer;
1811
 */
1812
static void
1813
textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1814
92
{
1815
92
    Py_XSETREF(self->decoded_chars, chars);
1816
92
    self->decoded_chars_used = 0;
1817
92
}
1818
1819
static PyObject *
1820
textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1821
4
{
1822
4
    PyObject *chars;
1823
4
    Py_ssize_t avail;
1824
1825
4
    if (self->decoded_chars == NULL)
1826
4
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1827
1828
0
    avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1829
0
             - self->decoded_chars_used);
1830
1831
0
    assert(avail >= 0);
1832
1833
0
    if (n < 0 || n > avail)
1834
0
        n = avail;
1835
1836
0
    if (self->decoded_chars_used > 0 || n < avail) {
1837
0
        chars = PyUnicode_Substring(self->decoded_chars,
1838
0
                                    self->decoded_chars_used,
1839
0
                                    self->decoded_chars_used + n);
1840
0
        if (chars == NULL)
1841
0
            return NULL;
1842
0
    }
1843
0
    else {
1844
0
        chars = Py_NewRef(self->decoded_chars);
1845
0
    }
1846
1847
0
    self->decoded_chars_used += n;
1848
0
    return chars;
1849
0
}
1850
1851
/* Read and decode the next chunk of data from the BufferedReader.
1852
 */
1853
static int
1854
textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1855
46
{
1856
46
    PyObject *dec_buffer = NULL;
1857
46
    PyObject *dec_flags = NULL;
1858
46
    PyObject *input_chunk = NULL;
1859
46
    Py_buffer input_chunk_buf;
1860
46
    PyObject *decoded_chars, *chunk_size;
1861
46
    Py_ssize_t nbytes, nchars;
1862
46
    int eof;
1863
1864
    /* The return value is True unless EOF was reached.  The decoded string is
1865
     * placed in self._decoded_chars (replacing its previous value).  The
1866
     * entire input chunk is sent to the decoder, though some of it may remain
1867
     * buffered in the decoder, yet to be converted.
1868
     */
1869
1870
46
    if (self->decoder == NULL) {
1871
0
        _unsupported(self->state, "not readable");
1872
0
        return -1;
1873
0
    }
1874
1875
46
    if (self->telling) {
1876
        /* To prepare for tell(), we need to snapshot a point in the file
1877
         * where the decoder's input buffer is empty.
1878
         */
1879
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1880
0
                                                     &_Py_ID(getstate));
1881
0
        if (state == NULL)
1882
0
            return -1;
1883
        /* Given this, we know there was a valid snapshot point
1884
         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1885
         */
1886
0
        if (!PyTuple_Check(state)) {
1887
0
            PyErr_SetString(PyExc_TypeError,
1888
0
                            "illegal decoder state");
1889
0
            Py_DECREF(state);
1890
0
            return -1;
1891
0
        }
1892
0
        if (!PyArg_ParseTuple(state,
1893
0
                              "OO;illegal decoder state", &dec_buffer, &dec_flags))
1894
0
        {
1895
0
            Py_DECREF(state);
1896
0
            return -1;
1897
0
        }
1898
1899
0
        if (!PyBytes_Check(dec_buffer)) {
1900
0
            PyErr_Format(PyExc_TypeError,
1901
0
                         "illegal decoder state: the first item should be a "
1902
0
                         "bytes object, not '%.200s'",
1903
0
                         Py_TYPE(dec_buffer)->tp_name);
1904
0
            Py_DECREF(state);
1905
0
            return -1;
1906
0
        }
1907
0
        Py_INCREF(dec_buffer);
1908
0
        Py_INCREF(dec_flags);
1909
0
        Py_DECREF(state);
1910
0
    }
1911
1912
    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1913
46
    if (size_hint > 0) {
1914
0
        size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1915
0
    }
1916
46
    chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1917
46
    if (chunk_size == NULL)
1918
0
        goto fail;
1919
1920
46
    input_chunk = PyObject_CallMethodOneArg(self->buffer,
1921
46
        (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1922
46
        chunk_size);
1923
46
    Py_DECREF(chunk_size);
1924
46
    if (input_chunk == NULL)
1925
0
        goto fail;
1926
1927
46
    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1928
0
        PyErr_Format(PyExc_TypeError,
1929
0
                     "underlying %s() should have returned a bytes-like object, "
1930
0
                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
1931
0
                     Py_TYPE(input_chunk)->tp_name);
1932
0
        goto fail;
1933
0
    }
1934
1935
46
    nbytes = input_chunk_buf.len;
1936
46
    eof = (nbytes == 0);
1937
1938
46
    decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1939
46
                                          input_chunk, eof);
1940
46
    PyBuffer_Release(&input_chunk_buf);
1941
46
    if (decoded_chars == NULL)
1942
0
        goto fail;
1943
1944
46
    textiowrapper_set_decoded_chars(self, decoded_chars);
1945
46
    nchars = PyUnicode_GET_LENGTH(decoded_chars);
1946
46
    if (nchars > 0)
1947
42
        self->b2cratio = (double) nbytes / nchars;
1948
4
    else
1949
4
        self->b2cratio = 0.0;
1950
46
    if (nchars > 0)
1951
42
        eof = 0;
1952
1953
46
    if (self->telling) {
1954
        /* At the snapshot point, len(dec_buffer) bytes before the read, the
1955
         * next input to be decoded is dec_buffer + input_chunk.
1956
         */
1957
0
        PyObject *next_input = dec_buffer;
1958
0
        PyBytes_Concat(&next_input, input_chunk);
1959
0
        dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1960
0
        if (next_input == NULL) {
1961
0
            goto fail;
1962
0
        }
1963
0
        PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1964
0
        if (snapshot == NULL) {
1965
0
            dec_flags = NULL;
1966
0
            goto fail;
1967
0
        }
1968
0
        Py_XSETREF(self->snapshot, snapshot);
1969
0
    }
1970
46
    Py_DECREF(input_chunk);
1971
1972
46
    return (eof == 0);
1973
1974
0
  fail:
1975
0
    Py_XDECREF(dec_buffer);
1976
0
    Py_XDECREF(dec_flags);
1977
0
    Py_XDECREF(input_chunk);
1978
0
    return -1;
1979
46
}
1980
1981
/*[clinic input]
1982
@critical_section
1983
_io.TextIOWrapper.read
1984
    size as n: Py_ssize_t(accept={int, NoneType}) = -1
1985
    /
1986
[clinic start generated code]*/
1987
1988
static PyObject *
1989
_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1990
/*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
1991
4
{
1992
4
    PyObject *result = NULL, *chunks = NULL;
1993
1994
4
    CHECK_ATTACHED(self);
1995
4
    CHECK_CLOSED(self);
1996
1997
4
    if (self->decoder == NULL) {
1998
0
        return _unsupported(self->state, "not readable");
1999
0
    }
2000
2001
4
    if (_textiowrapper_writeflush(self) < 0)
2002
0
        return NULL;
2003
2004
4
    if (n < 0) {
2005
        /* Read everything */
2006
4
        PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
2007
4
        PyObject *decoded;
2008
4
        if (bytes == NULL)
2009
0
            goto fail;
2010
2011
4
        if (bytes == Py_None){
2012
0
            Py_DECREF(bytes);
2013
0
            PyErr_SetString(PyExc_BlockingIOError, "Read returned None.");
2014
0
            return NULL;
2015
0
        }
2016
2017
4
        _PyIO_State *state = self->state;
2018
4
        if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
2019
4
            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
2020
4
                                                          bytes, 1);
2021
0
        else
2022
0
            decoded = PyObject_CallMethodObjArgs(
2023
0
                self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2024
4
        Py_DECREF(bytes);
2025
4
        if (check_decoded(decoded) < 0)
2026
0
            goto fail;
2027
2028
4
        result = textiowrapper_get_decoded_chars(self, -1);
2029
2030
4
        if (result == NULL) {
2031
0
            Py_DECREF(decoded);
2032
0
            return NULL;
2033
0
        }
2034
2035
4
        PyUnicode_AppendAndDel(&result, decoded);
2036
4
        if (result == NULL)
2037
0
            goto fail;
2038
2039
4
        if (self->snapshot != NULL) {
2040
0
            textiowrapper_set_decoded_chars(self, NULL);
2041
0
            Py_CLEAR(self->snapshot);
2042
0
        }
2043
4
        return result;
2044
4
    }
2045
0
    else {
2046
0
        int res = 1;
2047
0
        Py_ssize_t remaining = n;
2048
2049
0
        result = textiowrapper_get_decoded_chars(self, n);
2050
0
        if (result == NULL)
2051
0
            goto fail;
2052
0
        remaining -= PyUnicode_GET_LENGTH(result);
2053
2054
        /* Keep reading chunks until we have n characters to return */
2055
0
        while (remaining > 0) {
2056
0
            res = textiowrapper_read_chunk(self, remaining);
2057
0
            if (res < 0) {
2058
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2059
                   when EINTR occurs so we needn't do it ourselves. */
2060
0
                if (_PyIO_trap_eintr()) {
2061
0
                    continue;
2062
0
                }
2063
0
                goto fail;
2064
0
            }
2065
0
            if (res == 0)  /* EOF */
2066
0
                break;
2067
0
            if (chunks == NULL) {
2068
0
                chunks = PyList_New(0);
2069
0
                if (chunks == NULL)
2070
0
                    goto fail;
2071
0
            }
2072
0
            if (PyUnicode_GET_LENGTH(result) > 0 &&
2073
0
                PyList_Append(chunks, result) < 0)
2074
0
                goto fail;
2075
0
            Py_DECREF(result);
2076
0
            result = textiowrapper_get_decoded_chars(self, remaining);
2077
0
            if (result == NULL)
2078
0
                goto fail;
2079
0
            remaining -= PyUnicode_GET_LENGTH(result);
2080
0
        }
2081
0
        if (chunks != NULL) {
2082
0
            if (result != NULL && PyList_Append(chunks, result) < 0)
2083
0
                goto fail;
2084
0
            _Py_DECLARE_STR(empty, "");
2085
0
            Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2086
0
            if (result == NULL)
2087
0
                goto fail;
2088
0
            Py_CLEAR(chunks);
2089
0
        }
2090
0
        return result;
2091
0
    }
2092
0
  fail:
2093
0
    Py_XDECREF(result);
2094
0
    Py_XDECREF(chunks);
2095
0
    return NULL;
2096
4
}
2097
2098
2099
/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2100
   that is to the NUL character. Otherwise the function will produce
2101
   incorrect results. */
2102
static const char *
2103
find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2104
5.97M
{
2105
5.97M
    if (kind == PyUnicode_1BYTE_KIND) {
2106
7.61k
        assert(ch < 256);
2107
7.61k
        return (char *) memchr((const void *) s, (char) ch, end - s);
2108
7.61k
    }
2109
98.7M
    for (;;) {
2110
166M
        while (PyUnicode_READ(kind, s, 0) > ch)
2111
67.2M
            s += kind;
2112
98.7M
        if (PyUnicode_READ(kind, s, 0) == ch)
2113
5.96M
            return s;
2114
92.7M
        if (s == end)
2115
4.85k
            return NULL;
2116
92.7M
        s += kind;
2117
92.7M
    }
2118
5.97M
}
2119
2120
Py_ssize_t
2121
_PyIO_find_line_ending(
2122
    int translated, int universal, PyObject *readnl,
2123
    int kind, const char *start, const char *end, Py_ssize_t *consumed)
2124
22.2M
{
2125
22.2M
    Py_ssize_t len = (end - start)/kind;
2126
2127
22.2M
    if (translated) {
2128
        /* Newlines are already translated, only search for \n */
2129
7.97k
        const char *pos = find_control_char(kind, start, end, '\n');
2130
7.97k
        if (pos != NULL)
2131
7.93k
            return (pos - start)/kind + 1;
2132
42
        else {
2133
42
            *consumed = len;
2134
42
            return -1;
2135
42
        }
2136
7.97k
    }
2137
22.2M
    else if (universal) {
2138
        /* Universal newline search. Find any of \r, \r\n, \n
2139
         * The decoder ensures that \r\n are not split in two pieces
2140
         */
2141
16.2M
        const char *s = start;
2142
78.9M
        for (;;) {
2143
78.9M
            Py_UCS4 ch;
2144
            /* Fast path for non-control chars. The loop always ends
2145
               since the Unicode string is NUL-terminated. */
2146
238M
            while (PyUnicode_READ(kind, s, 0) > '\r')
2147
159M
                s += kind;
2148
78.9M
            if (s >= end) {
2149
31.4k
                *consumed = len;
2150
31.4k
                return -1;
2151
31.4k
            }
2152
78.8M
            ch = PyUnicode_READ(kind, s, 0);
2153
78.8M
            s += kind;
2154
78.8M
            if (ch == '\n')
2155
4.12M
                return (s - start)/kind;
2156
74.7M
            if (ch == '\r') {
2157
12.1M
                if (PyUnicode_READ(kind, s, 0) == '\n')
2158
441k
                    return (s - start)/kind + 1;
2159
11.6M
                else
2160
11.6M
                    return (s - start)/kind;
2161
12.1M
            }
2162
74.7M
        }
2163
16.2M
    }
2164
5.97M
    else {
2165
        /* Non-universal mode. */
2166
5.97M
        Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2167
5.97M
        const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2168
        /* Assume that readnl is an ASCII character. */
2169
5.97M
        assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2170
5.97M
        if (readnl_len == 1) {
2171
5.97M
            const char *pos = find_control_char(kind, start, end, nl[0]);
2172
5.97M
            if (pos != NULL)
2173
5.96M
                return (pos - start)/kind + 1;
2174
4.85k
            *consumed = len;
2175
4.85k
            return -1;
2176
5.97M
        }
2177
0
        else {
2178
0
            const char *s = start;
2179
0
            const char *e = end - (readnl_len - 1)*kind;
2180
0
            const char *pos;
2181
0
            if (e < s)
2182
0
                e = s;
2183
0
            while (s < e) {
2184
0
                Py_ssize_t i;
2185
0
                const char *pos = find_control_char(kind, s, end, nl[0]);
2186
0
                if (pos == NULL || pos >= e)
2187
0
                    break;
2188
0
                for (i = 1; i < readnl_len; i++) {
2189
0
                    if (PyUnicode_READ(kind, pos, i) != nl[i])
2190
0
                        break;
2191
0
                }
2192
0
                if (i == readnl_len)
2193
0
                    return (pos - start)/kind + readnl_len;
2194
0
                s = pos + kind;
2195
0
            }
2196
0
            pos = find_control_char(kind, e, end, nl[0]);
2197
0
            if (pos == NULL)
2198
0
                *consumed = len;
2199
0
            else
2200
0
                *consumed = (pos - start)/kind;
2201
0
            return -1;
2202
0
        }
2203
5.97M
    }
2204
22.2M
}
2205
2206
static PyObject *
2207
_textiowrapper_readline(textio *self, Py_ssize_t limit)
2208
7.94k
{
2209
7.94k
    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2210
7.94k
    Py_ssize_t start, endpos, chunked, offset_to_buffer;
2211
7.94k
    int res;
2212
2213
7.94k
    CHECK_CLOSED(self);
2214
2215
7.94k
    if (_textiowrapper_writeflush(self) < 0)
2216
0
        return NULL;
2217
2218
7.94k
    chunked = 0;
2219
2220
7.98k
    while (1) {
2221
7.98k
        const char *ptr;
2222
7.98k
        Py_ssize_t line_len;
2223
7.98k
        int kind;
2224
7.98k
        Py_ssize_t consumed = 0;
2225
2226
        /* First, get some data if necessary */
2227
7.98k
        res = 1;
2228
8.02k
        while (!self->decoded_chars ||
2229
7.97k
               !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2230
46
            res = textiowrapper_read_chunk(self, 0);
2231
46
            if (res < 0) {
2232
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2233
                   when EINTR occurs so we needn't do it ourselves. */
2234
0
                if (_PyIO_trap_eintr()) {
2235
0
                    continue;
2236
0
                }
2237
0
                goto error;
2238
0
            }
2239
46
            if (res == 0)
2240
4
                break;
2241
46
        }
2242
7.98k
        if (res == 0) {
2243
            /* end of file */
2244
4
            textiowrapper_set_decoded_chars(self, NULL);
2245
4
            Py_CLEAR(self->snapshot);
2246
4
            start = endpos = offset_to_buffer = 0;
2247
4
            break;
2248
4
        }
2249
2250
7.97k
        if (remaining == NULL) {
2251
7.97k
            line = Py_NewRef(self->decoded_chars);
2252
7.97k
            start = self->decoded_chars_used;
2253
7.97k
            offset_to_buffer = 0;
2254
7.97k
        }
2255
0
        else {
2256
0
            assert(self->decoded_chars_used == 0);
2257
0
            line = PyUnicode_Concat(remaining, self->decoded_chars);
2258
0
            start = 0;
2259
0
            offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2260
0
            Py_CLEAR(remaining);
2261
0
            if (line == NULL)
2262
0
                goto error;
2263
0
        }
2264
2265
7.97k
        ptr = PyUnicode_DATA(line);
2266
7.97k
        line_len = PyUnicode_GET_LENGTH(line);
2267
7.97k
        kind = PyUnicode_KIND(line);
2268
2269
7.97k
        endpos = _PyIO_find_line_ending(
2270
7.97k
            self->readtranslate, self->readuniversal, self->readnl,
2271
7.97k
            kind,
2272
7.97k
            ptr + kind * start,
2273
7.97k
            ptr + kind * line_len,
2274
7.97k
            &consumed);
2275
7.97k
        if (endpos >= 0) {
2276
7.93k
            endpos += start;
2277
7.93k
            if (limit >= 0 && (endpos - start) + chunked >= limit)
2278
0
                endpos = start + limit - chunked;
2279
7.93k
            break;
2280
7.93k
        }
2281
2282
        /* We can put aside up to `endpos` */
2283
42
        endpos = consumed + start;
2284
42
        if (limit >= 0 && (endpos - start) + chunked >= limit) {
2285
            /* Didn't find line ending, but reached length limit */
2286
0
            endpos = start + limit - chunked;
2287
0
            break;
2288
0
        }
2289
2290
42
        if (endpos > start) {
2291
            /* No line ending seen yet - put aside current data */
2292
36
            PyObject *s;
2293
36
            if (chunks == NULL) {
2294
36
                chunks = PyList_New(0);
2295
36
                if (chunks == NULL)
2296
0
                    goto error;
2297
36
            }
2298
36
            s = PyUnicode_Substring(line, start, endpos);
2299
36
            if (s == NULL)
2300
0
                goto error;
2301
36
            if (PyList_Append(chunks, s) < 0) {
2302
0
                Py_DECREF(s);
2303
0
                goto error;
2304
0
            }
2305
36
            chunked += PyUnicode_GET_LENGTH(s);
2306
36
            Py_DECREF(s);
2307
36
        }
2308
        /* There may be some remaining bytes we'll have to prepend to the
2309
           next chunk of data */
2310
42
        if (endpos < line_len) {
2311
0
            remaining = PyUnicode_Substring(line, endpos, line_len);
2312
0
            if (remaining == NULL)
2313
0
                goto error;
2314
0
        }
2315
42
        Py_CLEAR(line);
2316
        /* We have consumed the buffer */
2317
42
        textiowrapper_set_decoded_chars(self, NULL);
2318
42
    }
2319
2320
7.94k
    if (line != NULL) {
2321
        /* Our line ends in the current buffer */
2322
7.93k
        self->decoded_chars_used = endpos - offset_to_buffer;
2323
7.93k
        if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2324
7.93k
            PyObject *s = PyUnicode_Substring(line, start, endpos);
2325
7.93k
            Py_CLEAR(line);
2326
7.93k
            if (s == NULL)
2327
0
                goto error;
2328
7.93k
            line = s;
2329
7.93k
        }
2330
7.93k
    }
2331
7.94k
    if (remaining != NULL) {
2332
0
        if (chunks == NULL) {
2333
0
            chunks = PyList_New(0);
2334
0
            if (chunks == NULL)
2335
0
                goto error;
2336
0
        }
2337
0
        if (PyList_Append(chunks, remaining) < 0)
2338
0
            goto error;
2339
0
        Py_CLEAR(remaining);
2340
0
    }
2341
7.94k
    if (chunks != NULL) {
2342
36
        if (line != NULL) {
2343
36
            if (PyList_Append(chunks, line) < 0)
2344
0
                goto error;
2345
36
            Py_DECREF(line);
2346
36
        }
2347
36
        line = PyUnicode_Join(&_Py_STR(empty), chunks);
2348
36
        if (line == NULL)
2349
0
            goto error;
2350
36
        Py_CLEAR(chunks);
2351
36
    }
2352
7.94k
    if (line == NULL) {
2353
4
        line = &_Py_STR(empty);
2354
4
    }
2355
2356
7.94k
    return line;
2357
2358
0
  error:
2359
0
    Py_XDECREF(chunks);
2360
0
    Py_XDECREF(remaining);
2361
0
    Py_XDECREF(line);
2362
0
    return NULL;
2363
7.94k
}
2364
2365
/*[clinic input]
2366
@critical_section
2367
_io.TextIOWrapper.readline
2368
    size: Py_ssize_t = -1
2369
    /
2370
[clinic start generated code]*/
2371
2372
static PyObject *
2373
_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2374
/*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2375
0
{
2376
0
    CHECK_ATTACHED(self);
2377
0
    return _textiowrapper_readline(self, size);
2378
0
}
2379
2380
/* Seek and Tell */
2381
2382
typedef struct {
2383
    Py_off_t start_pos;
2384
    int dec_flags;
2385
    int bytes_to_feed;
2386
    int chars_to_skip;
2387
    char need_eof;
2388
} cookie_type;
2389
2390
/*
2391
   To speed up cookie packing/unpacking, we store the fields in a temporary
2392
   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2393
   The following macros define at which offsets in the intermediary byte
2394
   string the various CookieStruct fields will be stored.
2395
 */
2396
2397
#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2398
2399
#if PY_BIG_ENDIAN
2400
/* We want the least significant byte of start_pos to also be the least
2401
   significant byte of the cookie, which means that in big-endian mode we
2402
   must copy the fields in reverse order. */
2403
2404
# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2405
# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2406
# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2407
# define OFF_CHARS_TO_SKIP  (sizeof(char))
2408
# define OFF_NEED_EOF       0
2409
2410
#else
2411
/* Little-endian mode: the least significant byte of start_pos will
2412
   naturally end up the least significant byte of the cookie. */
2413
2414
0
# define OFF_START_POS      0
2415
0
# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2416
0
# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2417
0
# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2418
0
# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2419
2420
#endif
2421
2422
static int
2423
textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2424
0
{
2425
0
    unsigned char buffer[COOKIE_BUF_LEN];
2426
0
    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2427
0
    if (cookieLong == NULL)
2428
0
        return -1;
2429
2430
0
    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2431
0
                            PY_LITTLE_ENDIAN, 0, 1) < 0) {
2432
0
        Py_DECREF(cookieLong);
2433
0
        return -1;
2434
0
    }
2435
0
    Py_DECREF(cookieLong);
2436
2437
0
    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2438
0
    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2439
0
    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2440
0
    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2441
0
    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2442
2443
0
    return 0;
2444
0
}
2445
2446
static PyObject *
2447
textiowrapper_build_cookie(cookie_type *cookie)
2448
0
{
2449
0
    unsigned char buffer[COOKIE_BUF_LEN];
2450
2451
0
    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2452
0
    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2453
0
    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2454
0
    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2455
0
    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2456
2457
0
    return _PyLong_FromByteArray(buffer, sizeof(buffer),
2458
0
                                 PY_LITTLE_ENDIAN, 0);
2459
0
}
2460
2461
static int
2462
_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2463
0
{
2464
0
    PyObject *res;
2465
    /* When seeking to the start of the stream, we call decoder.reset()
2466
       rather than decoder.getstate().
2467
       This is for a few decoders such as utf-16 for which the state value
2468
       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2469
       utf-16, that we are expecting a BOM).
2470
    */
2471
0
    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2472
0
        res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2473
0
    }
2474
0
    else {
2475
0
        res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2476
0
                                   "((yi))", "", cookie->dec_flags);
2477
0
    }
2478
0
    if (res == NULL) {
2479
0
        return -1;
2480
0
    }
2481
0
    Py_DECREF(res);
2482
0
    return 0;
2483
0
}
2484
2485
static int
2486
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2487
0
{
2488
0
    PyObject *res;
2489
0
    if (start_of_stream) {
2490
0
        res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2491
0
        self->encoding_start_of_stream = 1;
2492
0
    }
2493
0
    else {
2494
0
        res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2495
0
                                        _PyLong_GetZero());
2496
0
        self->encoding_start_of_stream = 0;
2497
0
    }
2498
0
    if (res == NULL)
2499
0
        return -1;
2500
0
    Py_DECREF(res);
2501
0
    return 0;
2502
0
}
2503
2504
static int
2505
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2506
0
{
2507
    /* Same as _textiowrapper_decoder_setstate() above. */
2508
0
    return _textiowrapper_encoder_reset(
2509
0
        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2510
0
}
2511
2512
/*[clinic input]
2513
@critical_section
2514
_io.TextIOWrapper.seek
2515
    cookie as cookieObj: object
2516
      Zero or an opaque number returned by tell().
2517
    whence: int(c_default='0') = os.SEEK_SET
2518
      The relative position to seek from.
2519
    /
2520
2521
Set the stream position, and return the new stream position.
2522
2523
Four operations are supported, given by the following argument
2524
combinations:
2525
2526
- seek(0, SEEK_SET): Rewind to the start of the stream.
2527
- seek(cookie, SEEK_SET): Restore a previous position;
2528
  'cookie' must be a number returned by tell().
2529
- seek(0, SEEK_END): Fast-forward to the end of the stream.
2530
- seek(0, SEEK_CUR): Leave the current stream position unchanged.
2531
2532
Any other argument combinations are invalid,
2533
and may raise exceptions.
2534
[clinic start generated code]*/
2535
2536
static PyObject *
2537
_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2538
/*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2539
0
{
2540
0
    PyObject *posobj;
2541
0
    cookie_type cookie;
2542
0
    PyObject *res;
2543
0
    int cmp;
2544
0
    PyObject *snapshot;
2545
2546
0
    CHECK_ATTACHED(self);
2547
0
    CHECK_CLOSED(self);
2548
2549
0
    Py_INCREF(cookieObj);
2550
2551
0
    if (!self->seekable) {
2552
0
        _unsupported(self->state, "underlying stream is not seekable");
2553
0
        goto fail;
2554
0
    }
2555
2556
0
    PyObject *zero = _PyLong_GetZero();  // borrowed reference
2557
2558
0
    switch (whence) {
2559
0
    case SEEK_CUR:
2560
        /* seek relative to current position */
2561
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2562
0
        if (cmp < 0)
2563
0
            goto fail;
2564
2565
0
        if (cmp == 0) {
2566
0
            _unsupported(self->state, "can't do nonzero cur-relative seeks");
2567
0
            goto fail;
2568
0
        }
2569
2570
        /* Seeking to the current position should attempt to
2571
         * sync the underlying buffer with the current position.
2572
         */
2573
0
        Py_DECREF(cookieObj);
2574
0
        cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2575
0
        if (cookieObj == NULL)
2576
0
            goto fail;
2577
0
        break;
2578
2579
0
    case SEEK_END:
2580
        /* seek relative to end of file */
2581
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2582
0
        if (cmp < 0)
2583
0
            goto fail;
2584
2585
0
        if (cmp == 0) {
2586
0
            _unsupported(self->state, "can't do nonzero end-relative seeks");
2587
0
            goto fail;
2588
0
        }
2589
2590
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
2591
0
            goto fail;
2592
0
        }
2593
2594
0
        textiowrapper_set_decoded_chars(self, NULL);
2595
0
        Py_CLEAR(self->snapshot);
2596
0
        if (self->decoder) {
2597
0
            res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2598
0
            if (res == NULL)
2599
0
                goto fail;
2600
0
            Py_DECREF(res);
2601
0
        }
2602
2603
0
        res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2604
0
        Py_CLEAR(cookieObj);
2605
0
        if (res == NULL)
2606
0
            goto fail;
2607
0
        if (self->encoder) {
2608
            /* If seek() == 0, we are at the start of stream, otherwise not */
2609
0
            cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2610
0
            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2611
0
                Py_DECREF(res);
2612
0
                goto fail;
2613
0
            }
2614
0
        }
2615
0
        return res;
2616
2617
0
    case SEEK_SET:
2618
0
        break;
2619
2620
0
    default:
2621
0
        PyErr_Format(PyExc_ValueError,
2622
0
                     "invalid whence (%d, should be %d, %d or %d)", whence,
2623
0
                     SEEK_SET, SEEK_CUR, SEEK_END);
2624
0
        goto fail;
2625
0
    }
2626
2627
0
    cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2628
0
    if (cmp < 0)
2629
0
        goto fail;
2630
2631
0
    if (cmp == 1) {
2632
0
        PyErr_Format(PyExc_ValueError,
2633
0
                     "negative seek position %R", cookieObj);
2634
0
        goto fail;
2635
0
    }
2636
2637
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2638
0
        goto fail;
2639
0
    }
2640
2641
    /* The strategy of seek() is to go back to the safe start point
2642
     * and replay the effect of read(chars_to_skip) from there.
2643
     */
2644
0
    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2645
0
        goto fail;
2646
2647
    /* Seek back to the safe start point. */
2648
0
    posobj = PyLong_FromOff_t(cookie.start_pos);
2649
0
    if (posobj == NULL)
2650
0
        goto fail;
2651
0
    res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2652
0
    Py_DECREF(posobj);
2653
0
    if (res == NULL)
2654
0
        goto fail;
2655
0
    Py_DECREF(res);
2656
2657
0
    textiowrapper_set_decoded_chars(self, NULL);
2658
0
    Py_CLEAR(self->snapshot);
2659
2660
    /* Restore the decoder to its state from the safe start point. */
2661
0
    if (self->decoder) {
2662
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2663
0
            goto fail;
2664
0
    }
2665
2666
0
    if (cookie.chars_to_skip) {
2667
        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2668
0
        PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2669
0
                                                     "i", cookie.bytes_to_feed);
2670
0
        PyObject *decoded;
2671
2672
0
        if (input_chunk == NULL)
2673
0
            goto fail;
2674
2675
0
        if (!PyBytes_Check(input_chunk)) {
2676
0
            PyErr_Format(PyExc_TypeError,
2677
0
                         "underlying read() should have returned a bytes "
2678
0
                         "object, not '%.200s'",
2679
0
                         Py_TYPE(input_chunk)->tp_name);
2680
0
            Py_DECREF(input_chunk);
2681
0
            goto fail;
2682
0
        }
2683
2684
0
        snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2685
0
        if (snapshot == NULL) {
2686
0
            goto fail;
2687
0
        }
2688
0
        Py_XSETREF(self->snapshot, snapshot);
2689
2690
0
        decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2691
0
            input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2692
2693
0
        if (check_decoded(decoded) < 0)
2694
0
            goto fail;
2695
2696
0
        textiowrapper_set_decoded_chars(self, decoded);
2697
2698
        /* Skip chars_to_skip of the decoded characters. */
2699
0
        if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2700
0
            PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2701
0
            goto fail;
2702
0
        }
2703
0
        self->decoded_chars_used = cookie.chars_to_skip;
2704
0
    }
2705
0
    else {
2706
0
        snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2707
0
        if (snapshot == NULL)
2708
0
            goto fail;
2709
0
        Py_XSETREF(self->snapshot, snapshot);
2710
0
    }
2711
2712
    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2713
0
    if (self->encoder) {
2714
0
        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2715
0
            goto fail;
2716
0
    }
2717
0
    return cookieObj;
2718
0
  fail:
2719
0
    Py_XDECREF(cookieObj);
2720
0
    return NULL;
2721
2722
0
}
2723
2724
/*[clinic input]
2725
@critical_section
2726
_io.TextIOWrapper.tell
2727
2728
Return the stream position as an opaque number.
2729
2730
The return value of tell() can be given as input to seek(), to restore a
2731
previous stream position.
2732
[clinic start generated code]*/
2733
2734
static PyObject *
2735
_io_TextIOWrapper_tell_impl(textio *self)
2736
/*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/
2737
0
{
2738
0
    PyObject *res;
2739
0
    PyObject *posobj = NULL;
2740
0
    cookie_type cookie = {0,0,0,0,0};
2741
0
    PyObject *next_input;
2742
0
    Py_ssize_t chars_to_skip, chars_decoded;
2743
0
    Py_ssize_t skip_bytes, skip_back;
2744
0
    PyObject *saved_state = NULL;
2745
0
    const char *input, *input_end;
2746
0
    Py_ssize_t dec_buffer_len;
2747
0
    int dec_flags;
2748
2749
0
    CHECK_ATTACHED(self);
2750
0
    CHECK_CLOSED(self);
2751
2752
0
    if (!self->seekable) {
2753
0
        _unsupported(self->state, "underlying stream is not seekable");
2754
0
        goto fail;
2755
0
    }
2756
0
    if (!self->telling) {
2757
0
        PyErr_SetString(PyExc_OSError,
2758
0
                        "telling position disabled by next() call");
2759
0
        goto fail;
2760
0
    }
2761
2762
0
    if (_textiowrapper_writeflush(self) < 0)
2763
0
        return NULL;
2764
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2765
0
        goto fail;
2766
0
    }
2767
2768
0
    posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2769
0
    if (posobj == NULL)
2770
0
        goto fail;
2771
2772
0
    if (self->decoder == NULL || self->snapshot == NULL) {
2773
0
        assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2774
0
        return posobj;
2775
0
    }
2776
2777
#if defined(HAVE_LARGEFILE_SUPPORT)
2778
    cookie.start_pos = PyLong_AsLongLong(posobj);
2779
#else
2780
0
    cookie.start_pos = PyLong_AsLong(posobj);
2781
0
#endif
2782
0
    Py_DECREF(posobj);
2783
0
    if (PyErr_Occurred())
2784
0
        goto fail;
2785
2786
    /* Skip backward to the snapshot point (see _read_chunk). */
2787
0
    assert(PyTuple_Check(self->snapshot));
2788
0
    if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2789
0
        goto fail;
2790
2791
0
    assert (PyBytes_Check(next_input));
2792
2793
0
    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2794
2795
    /* How many decoded characters have been used up since the snapshot? */
2796
0
    if (self->decoded_chars_used == 0)  {
2797
        /* We haven't moved from the snapshot point. */
2798
0
        return textiowrapper_build_cookie(&cookie);
2799
0
    }
2800
2801
0
    chars_to_skip = self->decoded_chars_used;
2802
2803
    /* Decoder state will be restored at the end */
2804
0
    saved_state = PyObject_CallMethodNoArgs(self->decoder,
2805
0
                                             &_Py_ID(getstate));
2806
0
    if (saved_state == NULL)
2807
0
        goto fail;
2808
2809
0
#define DECODER_GETSTATE() do { \
2810
0
        PyObject *dec_buffer; \
2811
0
        PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2812
0
            &_Py_ID(getstate)); \
2813
0
        if (_state == NULL) \
2814
0
            goto fail; \
2815
0
        if (!PyTuple_Check(_state)) { \
2816
0
            PyErr_SetString(PyExc_TypeError, \
2817
0
                            "illegal decoder state"); \
2818
0
            Py_DECREF(_state); \
2819
0
            goto fail; \
2820
0
        } \
2821
0
        if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2822
0
                              &dec_buffer, &dec_flags)) \
2823
0
        { \
2824
0
            Py_DECREF(_state); \
2825
0
            goto fail; \
2826
0
        } \
2827
0
        if (!PyBytes_Check(dec_buffer)) { \
2828
0
            PyErr_Format(PyExc_TypeError, \
2829
0
                         "illegal decoder state: the first item should be a " \
2830
0
                         "bytes object, not '%.200s'", \
2831
0
                         Py_TYPE(dec_buffer)->tp_name); \
2832
0
            Py_DECREF(_state); \
2833
0
            goto fail; \
2834
0
        } \
2835
0
        dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2836
0
        Py_DECREF(_state); \
2837
0
    } while (0)
2838
2839
0
#define DECODER_DECODE(start, len, res) do { \
2840
0
        PyObject *_decoded = _PyObject_CallMethod( \
2841
0
            self->decoder, &_Py_ID(decode), "y#", start, len); \
2842
0
        if (check_decoded(_decoded) < 0) \
2843
0
            goto fail; \
2844
0
        res = PyUnicode_GET_LENGTH(_decoded); \
2845
0
        Py_DECREF(_decoded); \
2846
0
    } while (0)
2847
2848
    /* Fast search for an acceptable start point, close to our
2849
       current pos */
2850
0
    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2851
0
    skip_back = 1;
2852
0
    assert(skip_bytes <= PyBytes_GET_SIZE(next_input));
2853
0
    input = PyBytes_AS_STRING(next_input);
2854
0
    while (skip_bytes > 0) {
2855
        /* Decode up to temptative start point */
2856
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2857
0
            goto fail;
2858
0
        DECODER_DECODE(input, skip_bytes, chars_decoded);
2859
0
        if (chars_decoded <= chars_to_skip) {
2860
0
            DECODER_GETSTATE();
2861
0
            if (dec_buffer_len == 0) {
2862
                /* Before pos and no bytes buffered in decoder => OK */
2863
0
                cookie.dec_flags = dec_flags;
2864
0
                chars_to_skip -= chars_decoded;
2865
0
                break;
2866
0
            }
2867
            /* Skip back by buffered amount and reset heuristic */
2868
0
            skip_bytes -= dec_buffer_len;
2869
0
            skip_back = 1;
2870
0
        }
2871
0
        else {
2872
            /* We're too far ahead, skip back a bit */
2873
0
            skip_bytes -= skip_back;
2874
0
            skip_back *= 2;
2875
0
        }
2876
0
    }
2877
0
    if (skip_bytes <= 0) {
2878
0
        skip_bytes = 0;
2879
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2880
0
            goto fail;
2881
0
    }
2882
2883
    /* Note our initial start point. */
2884
0
    cookie.start_pos += skip_bytes;
2885
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2886
0
    if (chars_to_skip == 0)
2887
0
        goto finally;
2888
2889
    /* We should be close to the desired position.  Now feed the decoder one
2890
     * byte at a time until we reach the `chars_to_skip` target.
2891
     * As we go, note the nearest "safe start point" before the current
2892
     * location (a point where the decoder has nothing buffered, so seek()
2893
     * can safely start from there and advance to this location).
2894
     */
2895
0
    chars_decoded = 0;
2896
0
    input = PyBytes_AS_STRING(next_input);
2897
0
    input_end = input + PyBytes_GET_SIZE(next_input);
2898
0
    input += skip_bytes;
2899
0
    while (input < input_end) {
2900
0
        Py_ssize_t n;
2901
2902
0
        DECODER_DECODE(input, (Py_ssize_t)1, n);
2903
        /* We got n chars for 1 byte */
2904
0
        chars_decoded += n;
2905
0
        cookie.bytes_to_feed += 1;
2906
0
        DECODER_GETSTATE();
2907
2908
0
        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2909
            /* Decoder buffer is empty, so this is a safe start point. */
2910
0
            cookie.start_pos += cookie.bytes_to_feed;
2911
0
            chars_to_skip -= chars_decoded;
2912
0
            cookie.dec_flags = dec_flags;
2913
0
            cookie.bytes_to_feed = 0;
2914
0
            chars_decoded = 0;
2915
0
        }
2916
0
        if (chars_decoded >= chars_to_skip)
2917
0
            break;
2918
0
        input++;
2919
0
    }
2920
0
    if (input == input_end) {
2921
        /* We didn't get enough decoded data; signal EOF to get more. */
2922
0
        PyObject *decoded = _PyObject_CallMethod(
2923
0
            self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2924
0
        if (check_decoded(decoded) < 0)
2925
0
            goto fail;
2926
0
        chars_decoded += PyUnicode_GET_LENGTH(decoded);
2927
0
        Py_DECREF(decoded);
2928
0
        cookie.need_eof = 1;
2929
2930
0
        if (chars_decoded < chars_to_skip) {
2931
0
            PyErr_SetString(PyExc_OSError,
2932
0
                            "can't reconstruct logical file position");
2933
0
            goto fail;
2934
0
        }
2935
0
    }
2936
2937
0
finally:
2938
0
    res = PyObject_CallMethodOneArg(
2939
0
            self->decoder, &_Py_ID(setstate), saved_state);
2940
0
    Py_DECREF(saved_state);
2941
0
    if (res == NULL)
2942
0
        return NULL;
2943
0
    Py_DECREF(res);
2944
2945
    /* The returned cookie corresponds to the last safe start point. */
2946
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2947
0
    return textiowrapper_build_cookie(&cookie);
2948
2949
0
fail:
2950
0
    if (saved_state) {
2951
0
        PyObject *exc = PyErr_GetRaisedException();
2952
0
        res = PyObject_CallMethodOneArg(
2953
0
                self->decoder, &_Py_ID(setstate), saved_state);
2954
0
        _PyErr_ChainExceptions1(exc);
2955
0
        Py_DECREF(saved_state);
2956
0
        Py_XDECREF(res);
2957
0
    }
2958
0
    return NULL;
2959
0
}
2960
2961
/*[clinic input]
2962
@critical_section
2963
_io.TextIOWrapper.truncate
2964
    pos: object = None
2965
    /
2966
[clinic start generated code]*/
2967
2968
static PyObject *
2969
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2970
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
2971
0
{
2972
0
    CHECK_ATTACHED(self)
2973
2974
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2975
0
        return NULL;
2976
0
    }
2977
2978
0
    return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2979
0
}
2980
2981
static PyObject *
2982
textiowrapper_repr(PyObject *op)
2983
0
{
2984
0
    PyObject *nameobj, *modeobj, *res, *s;
2985
0
    int status;
2986
0
    textio *self = textio_CAST(op);
2987
0
    const char *type_name = Py_TYPE(self)->tp_name;
2988
2989
0
    CHECK_INITIALIZED(self);
2990
2991
0
    res = PyUnicode_FromFormat("<%.100s", type_name);
2992
0
    if (res == NULL)
2993
0
        return NULL;
2994
2995
0
    status = Py_ReprEnter(op);
2996
0
    if (status != 0) {
2997
0
        if (status > 0) {
2998
0
            PyErr_Format(PyExc_RuntimeError,
2999
0
                         "reentrant call inside %.100s.__repr__",
3000
0
                         type_name);
3001
0
        }
3002
0
        goto error;
3003
0
    }
3004
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(name), &nameobj) < 0) {
3005
0
        if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
3006
0
            goto error;
3007
0
        }
3008
        /* Ignore ValueError raised if the underlying stream was detached */
3009
0
        PyErr_Clear();
3010
0
    }
3011
0
    if (nameobj != NULL) {
3012
0
        s = PyUnicode_FromFormat(" name=%R", nameobj);
3013
0
        Py_DECREF(nameobj);
3014
0
        if (s == NULL)
3015
0
            goto error;
3016
0
        PyUnicode_AppendAndDel(&res, s);
3017
0
        if (res == NULL)
3018
0
            goto error;
3019
0
    }
3020
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(mode), &modeobj) < 0) {
3021
0
        goto error;
3022
0
    }
3023
0
    if (modeobj != NULL) {
3024
0
        s = PyUnicode_FromFormat(" mode=%R", modeobj);
3025
0
        Py_DECREF(modeobj);
3026
0
        if (s == NULL)
3027
0
            goto error;
3028
0
        PyUnicode_AppendAndDel(&res, s);
3029
0
        if (res == NULL)
3030
0
            goto error;
3031
0
    }
3032
0
    s = PyUnicode_FromFormat("%U encoding=%R>",
3033
0
                             res, self->encoding);
3034
0
    Py_DECREF(res);
3035
0
    if (status == 0) {
3036
0
        Py_ReprLeave(op);
3037
0
    }
3038
0
    return s;
3039
3040
0
  error:
3041
0
    Py_XDECREF(res);
3042
0
    if (status == 0) {
3043
0
        Py_ReprLeave(op);
3044
0
    }
3045
0
    return NULL;
3046
0
}
3047
3048
3049
/* Inquiries */
3050
3051
/*[clinic input]
3052
@critical_section
3053
_io.TextIOWrapper.fileno
3054
[clinic start generated code]*/
3055
3056
static PyObject *
3057
_io_TextIOWrapper_fileno_impl(textio *self)
3058
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3059
0
{
3060
0
    CHECK_ATTACHED(self);
3061
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3062
0
}
3063
3064
/*[clinic input]
3065
@critical_section
3066
_io.TextIOWrapper.seekable
3067
[clinic start generated code]*/
3068
3069
static PyObject *
3070
_io_TextIOWrapper_seekable_impl(textio *self)
3071
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3072
0
{
3073
0
    CHECK_ATTACHED(self);
3074
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3075
0
}
3076
3077
/*[clinic input]
3078
@critical_section
3079
_io.TextIOWrapper.readable
3080
[clinic start generated code]*/
3081
3082
static PyObject *
3083
_io_TextIOWrapper_readable_impl(textio *self)
3084
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3085
0
{
3086
0
    CHECK_ATTACHED(self);
3087
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3088
0
}
3089
3090
/*[clinic input]
3091
@critical_section
3092
_io.TextIOWrapper.writable
3093
[clinic start generated code]*/
3094
3095
static PyObject *
3096
_io_TextIOWrapper_writable_impl(textio *self)
3097
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3098
0
{
3099
0
    CHECK_ATTACHED(self);
3100
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3101
0
}
3102
3103
/*[clinic input]
3104
@critical_section
3105
_io.TextIOWrapper.isatty
3106
[clinic start generated code]*/
3107
3108
static PyObject *
3109
_io_TextIOWrapper_isatty_impl(textio *self)
3110
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3111
0
{
3112
0
    CHECK_ATTACHED(self);
3113
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3114
0
}
3115
3116
/*[clinic input]
3117
@critical_section
3118
_io.TextIOWrapper.flush
3119
[clinic start generated code]*/
3120
3121
static PyObject *
3122
_io_TextIOWrapper_flush_impl(textio *self)
3123
/*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3124
8
{
3125
8
    CHECK_ATTACHED(self);
3126
8
    CHECK_CLOSED(self);
3127
8
    self->telling = self->seekable;
3128
8
    if (_textiowrapper_writeflush(self) < 0)
3129
0
        return NULL;
3130
8
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3131
8
}
3132
3133
/*[clinic input]
3134
@critical_section
3135
_io.TextIOWrapper.close
3136
[clinic start generated code]*/
3137
3138
static PyObject *
3139
_io_TextIOWrapper_close_impl(textio *self)
3140
/*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3141
8
{
3142
8
    PyObject *res;
3143
8
    int r;
3144
8
    CHECK_ATTACHED(self);
3145
3146
8
    res = _io_TextIOWrapper_closed_get_impl(self);
3147
8
    if (res == NULL)
3148
0
        return NULL;
3149
8
    r = PyObject_IsTrue(res);
3150
8
    Py_DECREF(res);
3151
8
    if (r < 0)
3152
0
        return NULL;
3153
3154
8
    if (r > 0) {
3155
0
        Py_RETURN_NONE; /* stream already closed */
3156
0
    }
3157
8
    if (self->detached) {
3158
0
        Py_RETURN_NONE; /* gh-142594 null pointer issue */
3159
0
    }
3160
8
    else {
3161
8
        PyObject *exc = NULL;
3162
8
        if (self->finalizing) {
3163
0
            res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3164
0
                                            (PyObject *)self);
3165
0
            if (res) {
3166
0
                Py_DECREF(res);
3167
0
            }
3168
0
            else {
3169
0
                PyErr_Clear();
3170
0
            }
3171
0
        }
3172
8
        if (_PyFile_Flush((PyObject *)self) < 0) {
3173
0
            exc = PyErr_GetRaisedException();
3174
0
        }
3175
3176
8
        res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3177
8
        if (exc != NULL) {
3178
0
            _PyErr_ChainExceptions1(exc);
3179
0
            Py_CLEAR(res);
3180
0
        }
3181
8
        return res;
3182
8
    }
3183
8
}
3184
3185
static PyObject *
3186
textiowrapper_iternext_lock_held(PyObject *op)
3187
7.94k
{
3188
7.94k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
3189
7.94k
    PyObject *line;
3190
7.94k
    textio *self = textio_CAST(op);
3191
3192
7.94k
    CHECK_ATTACHED(self);
3193
3194
7.94k
    self->telling = 0;
3195
7.94k
    if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3196
        /* Skip method call overhead for speed */
3197
7.94k
        line = _textiowrapper_readline(self, -1);
3198
7.94k
    }
3199
0
    else {
3200
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
3201
0
        if (line && !PyUnicode_Check(line)) {
3202
0
            PyErr_Format(PyExc_OSError,
3203
0
                         "readline() should have returned a str object, "
3204
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
3205
0
            Py_DECREF(line);
3206
0
            return NULL;
3207
0
        }
3208
0
    }
3209
3210
7.94k
    if (line == NULL)
3211
0
        return NULL;
3212
3213
7.94k
    if (PyUnicode_GET_LENGTH(line) == 0) {
3214
        /* Reached EOF or would have blocked */
3215
4
        Py_DECREF(line);
3216
4
        Py_CLEAR(self->snapshot);
3217
4
        self->telling = self->seekable;
3218
4
        return NULL;
3219
4
    }
3220
3221
7.93k
    return line;
3222
7.94k
}
3223
3224
static PyObject *
3225
textiowrapper_iternext(PyObject *op)
3226
7.94k
{
3227
7.94k
    PyObject *result;
3228
7.94k
    Py_BEGIN_CRITICAL_SECTION(op);
3229
7.94k
    result = textiowrapper_iternext_lock_held(op);
3230
7.94k
    Py_END_CRITICAL_SECTION();
3231
7.94k
    return result;
3232
7.94k
}
3233
3234
/*[clinic input]
3235
@critical_section
3236
@getter
3237
_io.TextIOWrapper.name
3238
[clinic start generated code]*/
3239
3240
static PyObject *
3241
_io_TextIOWrapper_name_get_impl(textio *self)
3242
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3243
0
{
3244
0
    CHECK_ATTACHED(self);
3245
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3246
0
}
3247
3248
/*[clinic input]
3249
@critical_section
3250
@getter
3251
_io.TextIOWrapper.closed
3252
[clinic start generated code]*/
3253
3254
static PyObject *
3255
_io_TextIOWrapper_closed_get_impl(textio *self)
3256
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3257
28
{
3258
28
    CHECK_ATTACHED(self);
3259
28
    return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3260
28
}
3261
3262
/*[clinic input]
3263
@critical_section
3264
@getter
3265
_io.TextIOWrapper.newlines
3266
[clinic start generated code]*/
3267
3268
static PyObject *
3269
_io_TextIOWrapper_newlines_get_impl(textio *self)
3270
/*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3271
0
{
3272
0
    PyObject *res;
3273
0
    CHECK_ATTACHED(self);
3274
0
    if (self->decoder == NULL ||
3275
0
        PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3276
0
    {
3277
0
        Py_RETURN_NONE;
3278
0
    }
3279
0
    return res;
3280
0
}
3281
3282
/*[clinic input]
3283
@critical_section
3284
@getter
3285
_io.TextIOWrapper.errors
3286
[clinic start generated code]*/
3287
3288
static PyObject *
3289
_io_TextIOWrapper_errors_get_impl(textio *self)
3290
/*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3291
0
{
3292
0
    CHECK_INITIALIZED(self);
3293
0
    return Py_NewRef(self->errors);
3294
0
}
3295
3296
/*[clinic input]
3297
@critical_section
3298
@getter
3299
_io.TextIOWrapper._CHUNK_SIZE
3300
[clinic start generated code]*/
3301
3302
static PyObject *
3303
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3304
/*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3305
0
{
3306
0
    CHECK_ATTACHED(self);
3307
0
    return PyLong_FromSsize_t(self->chunk_size);
3308
0
}
3309
3310
/*[clinic input]
3311
@critical_section
3312
@setter
3313
_io.TextIOWrapper._CHUNK_SIZE
3314
[clinic start generated code]*/
3315
3316
static int
3317
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3318
/*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3319
0
{
3320
0
    Py_ssize_t n;
3321
0
    CHECK_ATTACHED_INT(self);
3322
0
    if (value == NULL) {
3323
0
        PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3324
0
        return -1;
3325
0
    }
3326
0
    n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3327
0
    if (n == -1 && PyErr_Occurred())
3328
0
        return -1;
3329
0
    if (n <= 0) {
3330
0
        PyErr_SetString(PyExc_ValueError,
3331
0
                        "a strictly positive integer is required");
3332
0
        return -1;
3333
0
    }
3334
0
    self->chunk_size = n;
3335
0
    return 0;
3336
0
}
3337
3338
static PyMethodDef incrementalnewlinedecoder_methods[] = {
3339
    _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3340
    _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3341
    _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3342
    _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3343
    {NULL}
3344
};
3345
3346
static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3347
    {"newlines", incrementalnewlinedecoder_newlines_get, NULL, NULL},
3348
    {NULL}
3349
};
3350
3351
static PyType_Slot nldecoder_slots[] = {
3352
    {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3353
    {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3354
    {Py_tp_methods, incrementalnewlinedecoder_methods},
3355
    {Py_tp_getset, incrementalnewlinedecoder_getset},
3356
    {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3357
    {Py_tp_clear, incrementalnewlinedecoder_clear},
3358
    {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3359
    {0, NULL},
3360
};
3361
3362
PyType_Spec _Py_nldecoder_spec = {
3363
    .name = "_io.IncrementalNewlineDecoder",
3364
    .basicsize = sizeof(nldecoder_object),
3365
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3366
              Py_TPFLAGS_IMMUTABLETYPE),
3367
    .slots = nldecoder_slots,
3368
};
3369
3370
3371
static PyMethodDef textiowrapper_methods[] = {
3372
    _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3373
    _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3374
    _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3375
    _IO_TEXTIOWRAPPER_READ_METHODDEF
3376
    _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3377
    _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3378
    _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3379
3380
    _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3381
    _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3382
    _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3383
    _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3384
    _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3385
3386
    _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3387
    _IO_TEXTIOWRAPPER_TELL_METHODDEF
3388
    _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3389
3390
    {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS},
3391
    {NULL, NULL}
3392
};
3393
3394
static PyMemberDef textiowrapper_members[] = {
3395
    {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3396
    {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3397
    {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3398
    {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3399
    {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3400
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3401
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3402
    {NULL}
3403
};
3404
3405
static PyGetSetDef textiowrapper_getset[] = {
3406
    _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3407
    _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3408
    _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3409
    _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3410
    _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3411
    {NULL}
3412
};
3413
3414
static PyType_Slot textiowrapper_slots[] = {
3415
    {Py_tp_dealloc, textiowrapper_dealloc},
3416
    {Py_tp_repr, textiowrapper_repr},
3417
    {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3418
    {Py_tp_traverse, textiowrapper_traverse},
3419
    {Py_tp_clear, textiowrapper_clear},
3420
    {Py_tp_iternext, textiowrapper_iternext},
3421
    {Py_tp_methods, textiowrapper_methods},
3422
    {Py_tp_members, textiowrapper_members},
3423
    {Py_tp_getset, textiowrapper_getset},
3424
    {Py_tp_init, _io_TextIOWrapper___init__},
3425
    {0, NULL},
3426
};
3427
3428
PyType_Spec _Py_textiowrapper_spec = {
3429
    .name = "_io.TextIOWrapper",
3430
    .basicsize = sizeof(textio),
3431
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3432
              Py_TPFLAGS_IMMUTABLETYPE),
3433
    .slots = textiowrapper_slots,
3434
};