Coverage Report

Created: 2025-08-26 06:26

/src/cpython/Modules/_io/textio.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4
    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6
    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7
*/
8
9
#include "Python.h"
10
#include "pycore_call.h"          // _PyObject_CallMethod()
11
#include "pycore_codecs.h"        // _PyCodecInfo_GetIncrementalDecoder()
12
#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
13
#include "pycore_interp.h"        // PyInterpreterState.fs_codec
14
#include "pycore_long.h"          // _PyLong_GetZero()
15
#include "pycore_object.h"        // _PyObject_GC_UNTRACK()
16
#include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
17
#include "pycore_pystate.h"       // _PyInterpreterState_GET()
18
#include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString()
19
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
20
21
#include "_iomodule.h"
22
23
/*[clinic input]
24
module _io
25
class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
26
class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
27
class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
28
[clinic start generated code]*/
29
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
30
31
typedef struct nldecoder_object nldecoder_object;
32
typedef struct textio textio;
33
34
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
35
#include "clinic/textio.c.h"
36
#undef clinic_state
37
38
/* TextIOBase */
39
40
PyDoc_STRVAR(textiobase_doc,
41
    "Base class for text I/O.\n"
42
    "\n"
43
    "This class provides a character and line based interface to stream\n"
44
    "I/O. There is no readinto method because Python's character strings\n"
45
    "are immutable.\n"
46
    );
47
48
static PyObject *
49
_unsupported(_PyIO_State *state, const char *message)
50
0
{
51
0
    PyErr_SetString(state->unsupported_operation, message);
52
0
    return NULL;
53
0
}
54
55
/*[clinic input]
56
@permit_long_docstring_body
57
_io._TextIOBase.detach
58
    cls: defining_class
59
    /
60
61
Separate the underlying buffer from the TextIOBase and return it.
62
63
After the underlying buffer has been detached, the TextIO is in an unusable state.
64
[clinic start generated code]*/
65
66
static PyObject *
67
_io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
68
/*[clinic end generated code: output=50915f40c609eaa4 input=8cd0652c17d7f015]*/
69
0
{
70
0
    _PyIO_State *state = get_io_state_by_cls(cls);
71
0
    return _unsupported(state, "detach");
72
0
}
73
74
/*[clinic input]
75
_io._TextIOBase.read
76
    cls: defining_class
77
    size: int(unused=True) = -1
78
    /
79
80
Read at most size characters from stream.
81
82
Read from underlying buffer until we have size characters or we hit EOF.
83
If size is negative or omitted, read until EOF.
84
[clinic start generated code]*/
85
86
static PyObject *
87
_io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
88
                          int Py_UNUSED(size))
89
/*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
90
0
{
91
0
    _PyIO_State *state = get_io_state_by_cls(cls);
92
0
    return _unsupported(state, "read");
93
0
}
94
95
/*[clinic input]
96
_io._TextIOBase.readline
97
    cls: defining_class
98
    size: int(unused=True) = -1
99
    /
100
101
Read until newline or EOF.
102
103
Return an empty string if EOF is hit immediately.
104
If size is specified, at most size characters will be read.
105
[clinic start generated code]*/
106
107
static PyObject *
108
_io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
109
                              int Py_UNUSED(size))
110
/*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
111
0
{
112
0
    _PyIO_State *state = get_io_state_by_cls(cls);
113
0
    return _unsupported(state, "readline");
114
0
}
115
116
/*[clinic input]
117
_io._TextIOBase.write
118
    cls: defining_class
119
    s: str(unused=True)
120
    /
121
122
Write string s to stream.
123
124
Return the number of characters written
125
(which is always equal to the length of the string).
126
[clinic start generated code]*/
127
128
static PyObject *
129
_io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
130
                           const char *Py_UNUSED(s))
131
/*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
132
0
{
133
0
    _PyIO_State *state = get_io_state_by_cls(cls);
134
0
    return _unsupported(state, "write");
135
0
}
136
137
/*[clinic input]
138
@getter
139
_io._TextIOBase.encoding
140
141
Encoding of the text stream.
142
143
Subclasses should override.
144
[clinic start generated code]*/
145
146
static PyObject *
147
_io__TextIOBase_encoding_get_impl(PyObject *self)
148
/*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
149
0
{
150
0
    Py_RETURN_NONE;
151
0
}
152
153
/*[clinic input]
154
@getter
155
_io._TextIOBase.newlines
156
157
Line endings translated so far.
158
159
Only line endings translated during reading are considered.
160
161
Subclasses should override.
162
[clinic start generated code]*/
163
164
static PyObject *
165
_io__TextIOBase_newlines_get_impl(PyObject *self)
166
/*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
167
0
{
168
0
    Py_RETURN_NONE;
169
0
}
170
171
/*[clinic input]
172
@getter
173
_io._TextIOBase.errors
174
175
The error setting of the decoder or encoder.
176
177
Subclasses should override.
178
[clinic start generated code]*/
179
180
static PyObject *
181
_io__TextIOBase_errors_get_impl(PyObject *self)
182
/*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
183
0
{
184
0
    Py_RETURN_NONE;
185
0
}
186
187
188
static PyMethodDef textiobase_methods[] = {
189
    _IO__TEXTIOBASE_DETACH_METHODDEF
190
    _IO__TEXTIOBASE_READ_METHODDEF
191
    _IO__TEXTIOBASE_READLINE_METHODDEF
192
    _IO__TEXTIOBASE_WRITE_METHODDEF
193
    {NULL, NULL}
194
};
195
196
static PyGetSetDef textiobase_getset[] = {
197
    _IO__TEXTIOBASE_ENCODING_GETSETDEF
198
    _IO__TEXTIOBASE_NEWLINES_GETSETDEF
199
    _IO__TEXTIOBASE_ERRORS_GETSETDEF
200
    {NULL}
201
};
202
203
static PyType_Slot textiobase_slots[] = {
204
    {Py_tp_doc, (void *)textiobase_doc},
205
    {Py_tp_methods, textiobase_methods},
206
    {Py_tp_getset, textiobase_getset},
207
    {0, NULL},
208
};
209
210
/* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
211
PyType_Spec textiobase_spec = {
212
    .name = "_io._TextIOBase",
213
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
214
              Py_TPFLAGS_IMMUTABLETYPE),
215
    .slots = textiobase_slots,
216
};
217
218
/* IncrementalNewlineDecoder */
219
220
struct nldecoder_object {
221
    PyObject_HEAD
222
    PyObject *decoder;
223
    PyObject *errors;
224
    unsigned int pendingcr: 1;
225
    unsigned int translate: 1;
226
    unsigned int seennl: 3;
227
};
228
229
101k
#define nldecoder_object_CAST(op)   ((nldecoder_object *)(op))
230
231
/*[clinic input]
232
_io.IncrementalNewlineDecoder.__init__
233
    decoder: object
234
    translate: bool
235
    errors: object(c_default="NULL") = "strict"
236
237
Codec used when reading a file in universal newlines mode.
238
239
It wraps another incremental decoder, translating \r\n and \r into \n.
240
It also records the types of newlines encountered.  When used with
241
translate=False, it ensures that the newline sequence is returned in
242
one piece. When used with decoder=None, it expects unicode strings as
243
decode input and translates newlines without first invoking an external
244
decoder.
245
[clinic start generated code]*/
246
247
static int
248
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249
                                            PyObject *decoder, int translate,
250
                                            PyObject *errors)
251
/*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
252
16.2k
{
253
254
16.2k
    if (errors == NULL) {
255
16.2k
        errors = &_Py_ID(strict);
256
16.2k
    }
257
0
    else {
258
0
        errors = Py_NewRef(errors);
259
0
    }
260
261
16.2k
    Py_XSETREF(self->errors, errors);
262
16.2k
    Py_XSETREF(self->decoder, Py_NewRef(decoder));
263
16.2k
    self->translate = translate ? 1 : 0;
264
16.2k
    self->seennl = 0;
265
16.2k
    self->pendingcr = 0;
266
267
16.2k
    return 0;
268
16.2k
}
269
270
static int
271
incrementalnewlinedecoder_traverse(PyObject *op, visitproc visit, void *arg)
272
3.16k
{
273
3.16k
    nldecoder_object *self = nldecoder_object_CAST(op);
274
3.16k
    Py_VISIT(Py_TYPE(self));
275
3.16k
    Py_VISIT(self->decoder);
276
3.16k
    Py_VISIT(self->errors);
277
3.16k
    return 0;
278
3.16k
}
279
280
static int
281
incrementalnewlinedecoder_clear(PyObject *op)
282
16.2k
{
283
16.2k
    nldecoder_object *self = nldecoder_object_CAST(op);
284
16.2k
    Py_CLEAR(self->decoder);
285
16.2k
    Py_CLEAR(self->errors);
286
16.2k
    return 0;
287
16.2k
}
288
289
static void
290
incrementalnewlinedecoder_dealloc(PyObject *op)
291
16.2k
{
292
16.2k
    nldecoder_object *self = nldecoder_object_CAST(op);
293
16.2k
    PyTypeObject *tp = Py_TYPE(self);
294
16.2k
    _PyObject_GC_UNTRACK(self);
295
16.2k
    (void)incrementalnewlinedecoder_clear(op);
296
16.2k
    tp->tp_free(self);
297
16.2k
    Py_DECREF(tp);
298
16.2k
}
299
300
static int
301
check_decoded(PyObject *decoded)
302
66.0k
{
303
66.0k
    if (decoded == NULL)
304
0
        return -1;
305
66.0k
    if (!PyUnicode_Check(decoded)) {
306
0
        PyErr_Format(PyExc_TypeError,
307
0
                     "decoder should return a string result, not '%.200s'",
308
0
                     Py_TYPE(decoded)->tp_name);
309
0
        Py_DECREF(decoded);
310
0
        return -1;
311
0
    }
312
66.0k
    return 0;
313
66.0k
}
314
315
#define CHECK_INITIALIZED_DECODER(self) \
316
66.0k
    if (self->errors == NULL) { \
317
0
        PyErr_SetString(PyExc_ValueError, \
318
0
                        "IncrementalNewlineDecoder.__init__() not called"); \
319
0
        return NULL; \
320
0
    }
321
322
34.1M
#define SEEN_CR   1
323
28.8M
#define SEEN_LF   2
324
27.7M
#define SEEN_CRLF 4
325
27.7M
#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
326
327
PyObject *
328
_PyIncrementalNewlineDecoder_decode(PyObject *myself,
329
                                    PyObject *input, int final)
330
66.0k
{
331
66.0k
    PyObject *output;
332
66.0k
    Py_ssize_t output_len;
333
66.0k
    nldecoder_object *self = nldecoder_object_CAST(myself);
334
335
66.0k
    CHECK_INITIALIZED_DECODER(self);
336
337
    /* decode input (with the eventual \r from a previous pass) */
338
66.0k
    if (self->decoder != Py_None) {
339
0
        output = PyObject_CallMethodObjArgs(self->decoder,
340
0
            &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
341
0
    }
342
66.0k
    else {
343
66.0k
        output = Py_NewRef(input);
344
66.0k
    }
345
346
66.0k
    if (check_decoded(output) < 0)
347
0
        return NULL;
348
349
66.0k
    output_len = PyUnicode_GET_LENGTH(output);
350
66.0k
    if (self->pendingcr && (final || output_len > 0)) {
351
        /* Prefix output with CR */
352
0
        int kind;
353
0
        PyObject *modified;
354
0
        char *out;
355
356
0
        modified = PyUnicode_New(output_len + 1,
357
0
                                 PyUnicode_MAX_CHAR_VALUE(output));
358
0
        if (modified == NULL)
359
0
            goto error;
360
0
        kind = PyUnicode_KIND(modified);
361
0
        out = PyUnicode_DATA(modified);
362
0
        PyUnicode_WRITE(kind, out, 0, '\r');
363
0
        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
364
0
        Py_SETREF(output, modified);
365
0
        self->pendingcr = 0;
366
0
        output_len++;
367
0
    }
368
369
    /* retain last \r even when not translating data:
370
     * then readline() is sure to get \r\n in one pass
371
     */
372
66.0k
    if (!final) {
373
0
        if (output_len > 0
374
0
            && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
375
0
        {
376
0
            PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
377
0
            if (modified == NULL)
378
0
                goto error;
379
0
            Py_SETREF(output, modified);
380
0
            self->pendingcr = 1;
381
0
        }
382
0
    }
383
384
    /* Record which newlines are read and do newline translation if desired,
385
       all in one pass. */
386
66.0k
    {
387
66.0k
        const void *in_str;
388
66.0k
        Py_ssize_t len;
389
66.0k
        int seennl = self->seennl;
390
66.0k
        int only_lf = 0;
391
66.0k
        int kind;
392
393
66.0k
        in_str = PyUnicode_DATA(output);
394
66.0k
        len = PyUnicode_GET_LENGTH(output);
395
66.0k
        kind = PyUnicode_KIND(output);
396
397
66.0k
        if (len == 0)
398
0
            return output;
399
400
        /* If, up to now, newlines are consistently \n, do a quick check
401
           for the \r *byte* with the libc's optimized memchr.
402
           */
403
66.0k
        if (seennl == SEEN_LF || seennl == 0) {
404
31.0k
            only_lf = (memchr(in_str, '\r', kind * len) == NULL);
405
31.0k
        }
406
407
66.0k
        if (only_lf) {
408
            /* If not already seen, quick scan for a possible "\n" character.
409
               (there's nothing else to be done, even when in translation mode)
410
            */
411
24.7k
            if (seennl == 0 &&
412
24.7k
                memchr(in_str, '\n', kind * len) != NULL) {
413
1.85k
                if (kind == PyUnicode_1BYTE_KIND)
414
886
                    seennl |= SEEN_LF;
415
966
                else {
416
966
                    Py_ssize_t i = 0;
417
166k
                    for (;;) {
418
166k
                        Py_UCS4 c;
419
                        /* Fast loop for non-control characters */
420
1.09M
                        while (PyUnicode_READ(kind, in_str, i) > '\n')
421
924k
                            i++;
422
166k
                        c = PyUnicode_READ(kind, in_str, i++);
423
166k
                        if (c == '\n') {
424
771
                            seennl |= SEEN_LF;
425
771
                            break;
426
771
                        }
427
165k
                        if (i >= len)
428
195
                            break;
429
165k
                    }
430
966
                }
431
1.85k
            }
432
            /* Finished: we have scanned for newlines, and none of them
433
               need translating */
434
24.7k
        }
435
41.2k
        else if (!self->translate) {
436
41.2k
            Py_ssize_t i = 0;
437
            /* We have already seen all newline types, no need to scan again */
438
41.2k
            if (seennl == SEEN_ALL)
439
13.9k
                goto endscan;
440
27.7M
            for (;;) {
441
27.7M
                Py_UCS4 c;
442
                /* Fast loop for non-control characters */
443
93.1M
                while (PyUnicode_READ(kind, in_str, i) > '\r')
444
65.4M
                    i++;
445
27.7M
                c = PyUnicode_READ(kind, in_str, i++);
446
27.7M
                if (c == '\n')
447
1.02M
                    seennl |= SEEN_LF;
448
26.6M
                else if (c == '\r') {
449
6.44M
                    if (PyUnicode_READ(kind, in_str, i) == '\n') {
450
3.24k
                        seennl |= SEEN_CRLF;
451
3.24k
                        i++;
452
3.24k
                    }
453
6.43M
                    else
454
6.43M
                        seennl |= SEEN_CR;
455
6.44M
                }
456
27.7M
                if (i >= len)
457
26.1k
                    break;
458
27.6M
                if (seennl == SEEN_ALL)
459
1.19k
                    break;
460
27.6M
            }
461
41.2k
        endscan:
462
41.2k
            ;
463
41.2k
        }
464
0
        else {
465
0
            void *translated;
466
0
            int kind = PyUnicode_KIND(output);
467
0
            const void *in_str = PyUnicode_DATA(output);
468
0
            Py_ssize_t in, out;
469
            /* XXX: Previous in-place translation here is disabled as
470
               resizing is not possible anymore */
471
            /* We could try to optimize this so that we only do a copy
472
               when there is something to translate. On the other hand,
473
               we already know there is a \r byte, so chances are high
474
               that something needs to be done. */
475
0
            translated = PyMem_Malloc(kind * len);
476
0
            if (translated == NULL) {
477
0
                PyErr_NoMemory();
478
0
                goto error;
479
0
            }
480
0
            in = out = 0;
481
0
            for (;;) {
482
0
                Py_UCS4 c;
483
                /* Fast loop for non-control characters */
484
0
                while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
485
0
                    PyUnicode_WRITE(kind, translated, out++, c);
486
0
                if (c == '\n') {
487
0
                    PyUnicode_WRITE(kind, translated, out++, c);
488
0
                    seennl |= SEEN_LF;
489
0
                    continue;
490
0
                }
491
0
                if (c == '\r') {
492
0
                    if (PyUnicode_READ(kind, in_str, in) == '\n') {
493
0
                        in++;
494
0
                        seennl |= SEEN_CRLF;
495
0
                    }
496
0
                    else
497
0
                        seennl |= SEEN_CR;
498
0
                    PyUnicode_WRITE(kind, translated, out++, '\n');
499
0
                    continue;
500
0
                }
501
0
                if (in > len)
502
0
                    break;
503
0
                PyUnicode_WRITE(kind, translated, out++, c);
504
0
            }
505
0
            Py_DECREF(output);
506
0
            output = PyUnicode_FromKindAndData(kind, translated, out);
507
0
            PyMem_Free(translated);
508
0
            if (!output)
509
0
                return NULL;
510
0
        }
511
66.0k
        self->seennl |= seennl;
512
66.0k
    }
513
514
0
    return output;
515
516
0
  error:
517
0
    Py_DECREF(output);
518
0
    return NULL;
519
66.0k
}
520
521
/*[clinic input]
522
_io.IncrementalNewlineDecoder.decode
523
    input: object
524
    final: bool = False
525
[clinic start generated code]*/
526
527
static PyObject *
528
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
529
                                          PyObject *input, int final)
530
/*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
531
0
{
532
0
    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
533
0
}
534
535
/*[clinic input]
536
_io.IncrementalNewlineDecoder.getstate
537
[clinic start generated code]*/
538
539
static PyObject *
540
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
541
/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
542
0
{
543
0
    PyObject *buffer;
544
0
    unsigned long long flag;
545
546
0
    CHECK_INITIALIZED_DECODER(self);
547
548
0
    if (self->decoder != Py_None) {
549
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
550
0
           &_Py_ID(getstate));
551
0
        if (state == NULL)
552
0
            return NULL;
553
0
        if (!PyTuple_Check(state)) {
554
0
            PyErr_SetString(PyExc_TypeError,
555
0
                            "illegal decoder state");
556
0
            Py_DECREF(state);
557
0
            return NULL;
558
0
        }
559
0
        if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
560
0
                              &buffer, &flag))
561
0
        {
562
0
            Py_DECREF(state);
563
0
            return NULL;
564
0
        }
565
0
        Py_INCREF(buffer);
566
0
        Py_DECREF(state);
567
0
    }
568
0
    else {
569
0
        buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
570
0
        flag = 0;
571
0
    }
572
0
    flag <<= 1;
573
0
    if (self->pendingcr)
574
0
        flag |= 1;
575
0
    return Py_BuildValue("NK", buffer, flag);
576
0
}
577
578
/*[clinic input]
579
_io.IncrementalNewlineDecoder.setstate
580
    state: object
581
    /
582
[clinic start generated code]*/
583
584
static PyObject *
585
_io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self,
586
                                            PyObject *state)
587
/*[clinic end generated code: output=09135cb6e78a1dc8 input=c53fb505a76dbbe2]*/
588
0
{
589
0
    PyObject *buffer;
590
0
    unsigned long long flag;
591
592
0
    CHECK_INITIALIZED_DECODER(self);
593
594
0
    if (!PyTuple_Check(state)) {
595
0
        PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
596
0
        return NULL;
597
0
    }
598
0
    if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
599
0
                          &buffer, &flag))
600
0
    {
601
0
        return NULL;
602
0
    }
603
604
0
    self->pendingcr = (int) (flag & 1);
605
0
    flag >>= 1;
606
607
0
    if (self->decoder != Py_None) {
608
0
        return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
609
0
                                    "((OK))", buffer, flag);
610
0
    }
611
0
    else {
612
0
        Py_RETURN_NONE;
613
0
    }
614
0
}
615
616
/*[clinic input]
617
_io.IncrementalNewlineDecoder.reset
618
[clinic start generated code]*/
619
620
static PyObject *
621
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
622
/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
623
0
{
624
0
    CHECK_INITIALIZED_DECODER(self);
625
626
0
    self->seennl = 0;
627
0
    self->pendingcr = 0;
628
0
    if (self->decoder != Py_None)
629
0
        return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
630
0
    else
631
0
        Py_RETURN_NONE;
632
0
}
633
634
static PyObject *
635
incrementalnewlinedecoder_newlines_get(PyObject *op, void *Py_UNUSED(context))
636
0
{
637
0
    nldecoder_object *self = nldecoder_object_CAST(op);
638
0
    CHECK_INITIALIZED_DECODER(self);
639
640
0
    switch (self->seennl) {
641
0
    case SEEN_CR:
642
0
        return PyUnicode_FromString("\r");
643
0
    case SEEN_LF:
644
0
        return PyUnicode_FromString("\n");
645
0
    case SEEN_CRLF:
646
0
        return PyUnicode_FromString("\r\n");
647
0
    case SEEN_CR | SEEN_LF:
648
0
        return Py_BuildValue("ss", "\r", "\n");
649
0
    case SEEN_CR | SEEN_CRLF:
650
0
        return Py_BuildValue("ss", "\r", "\r\n");
651
0
    case SEEN_LF | SEEN_CRLF:
652
0
        return Py_BuildValue("ss", "\n", "\r\n");
653
0
    case SEEN_CR | SEEN_LF | SEEN_CRLF:
654
0
        return Py_BuildValue("sss", "\r", "\n", "\r\n");
655
0
    default:
656
0
        Py_RETURN_NONE;
657
0
   }
658
659
0
}
660
661
/* TextIOWrapper */
662
663
typedef PyObject *(*encodefunc_t)(PyObject *, PyObject *);
664
665
struct textio
666
{
667
    PyObject_HEAD
668
    int ok; /* initialized? */
669
    int detached;
670
    Py_ssize_t chunk_size;
671
    PyObject *buffer;
672
    PyObject *encoding;
673
    PyObject *encoder;
674
    PyObject *decoder;
675
    PyObject *readnl;
676
    PyObject *errors;
677
    const char *writenl; /* ASCII-encoded; NULL stands for \n */
678
    char line_buffering;
679
    char write_through;
680
    char readuniversal;
681
    char readtranslate;
682
    char writetranslate;
683
    char seekable;
684
    char has_read1;
685
    char telling;
686
    char finalizing;
687
    /* Specialized encoding func (see below) */
688
    encodefunc_t encodefunc;
689
    /* Whether or not it's the start of the stream */
690
    char encoding_start_of_stream;
691
692
    /* Reads and writes are internally buffered in order to speed things up.
693
       However, any read will first flush the write buffer if itsn't empty.
694
695
       Please also note that text to be written is first encoded before being
696
       buffered. This is necessary so that encoding errors are immediately
697
       reported to the caller, but it unfortunately means that the
698
       IncrementalEncoder (whose encode() method is always written in Python)
699
       becomes a bottleneck for small writes.
700
    */
701
    PyObject *decoded_chars;       /* buffer for text returned from decoder */
702
    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
703
    PyObject *pending_bytes;       // data waiting to be written.
704
                                   // ascii unicode, bytes, or list of them.
705
    Py_ssize_t pending_bytes_count;
706
707
    /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
708
     * dec_flags is the second (integer) item of the decoder state and
709
     * next_input is the chunk of input bytes that comes next after the
710
     * snapshot point.  We use this to reconstruct decoder states in tell().
711
     */
712
    PyObject *snapshot;
713
    /* Bytes-to-characters ratio for the current chunk. Serves as input for
714
       the heuristic in tell(). */
715
    double b2cratio;
716
717
    /* Cache raw object if it's a FileIO object */
718
    PyObject *raw;
719
720
    PyObject *weakreflist;
721
    PyObject *dict;
722
723
    _PyIO_State *state;
724
};
725
726
13.2k
#define textio_CAST(op) ((textio *)(op))
727
728
static void
729
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
730
731
/* A couple of specialized cases in order to bypass the slow incremental
732
   encoding methods for the most popular encodings. */
733
734
static PyObject *
735
ascii_encode(PyObject *op, PyObject *text)
736
0
{
737
0
    textio *self = textio_CAST(op);
738
0
    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
739
0
}
740
741
static PyObject *
742
utf16be_encode(PyObject *op, PyObject *text)
743
0
{
744
0
    textio *self = textio_CAST(op);
745
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 1);
746
0
}
747
748
static PyObject *
749
utf16le_encode(PyObject *op, PyObject *text)
750
0
{
751
0
    textio *self = textio_CAST(op);
752
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), -1);
753
0
}
754
755
static PyObject *
756
utf16_encode(PyObject *op, PyObject *text)
757
0
{
758
0
    textio *self = textio_CAST(op);
759
0
    if (!self->encoding_start_of_stream) {
760
        /* Skip the BOM and use native byte ordering */
761
#if PY_BIG_ENDIAN
762
        return utf16be_encode(op, text);
763
#else
764
0
        return utf16le_encode(op, text);
765
0
#endif
766
0
    }
767
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 0);
768
0
}
769
770
static PyObject *
771
utf32be_encode(PyObject *op, PyObject *text)
772
0
{
773
0
    textio *self = textio_CAST(op);
774
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 1);
775
0
}
776
777
static PyObject *
778
utf32le_encode(PyObject *op, PyObject *text)
779
0
{
780
0
    textio *self = textio_CAST(op);
781
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), -1);
782
0
}
783
784
static PyObject *
785
utf32_encode(PyObject *op, PyObject *text)
786
0
{
787
0
    textio *self = textio_CAST(op);
788
0
    if (!self->encoding_start_of_stream) {
789
        /* Skip the BOM and use native byte ordering */
790
#if PY_BIG_ENDIAN
791
        return utf32be_encode(op, text);
792
#else
793
0
        return utf32le_encode(op, text);
794
0
#endif
795
0
    }
796
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 0);
797
0
}
798
799
static PyObject *
800
utf8_encode(PyObject *op, PyObject *text)
801
0
{
802
0
    textio *self = textio_CAST(op);
803
0
    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
804
0
}
805
806
static PyObject *
807
latin1_encode(PyObject *op, PyObject *text)
808
0
{
809
0
    textio *self = textio_CAST(op);
810
0
    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
811
0
}
812
813
// Return true when encoding can be skipped when text is ascii.
814
static inline int
815
is_asciicompat_encoding(encodefunc_t f)
816
0
{
817
0
    return f == ascii_encode || f == latin1_encode || f == utf8_encode;
818
0
}
819
820
/* Map normalized encoding names onto the specialized encoding funcs */
821
822
typedef struct {
823
    const char *name;
824
    encodefunc_t encodefunc;
825
} encodefuncentry;
826
827
static const encodefuncentry encodefuncs[] = {
828
    {"ascii",       ascii_encode},
829
    {"iso8859-1",   latin1_encode},
830
    {"utf-8",       utf8_encode},
831
    {"utf-16-be",   utf16be_encode},
832
    {"utf-16-le",   utf16le_encode},
833
    {"utf-16",      utf16_encode},
834
    {"utf-32-be",   utf32be_encode},
835
    {"utf-32-le",   utf32le_encode},
836
    {"utf-32",      utf32_encode},
837
    {NULL, NULL}
838
};
839
840
static int
841
validate_newline(const char *newline)
842
48
{
843
48
    if (newline && newline[0] != '\0'
844
48
        && !(newline[0] == '\n' && newline[1] == '\0')
845
48
        && !(newline[0] == '\r' && newline[1] == '\0')
846
48
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
847
0
        PyErr_Format(PyExc_ValueError,
848
0
                     "illegal newline value: %s", newline);
849
0
        return -1;
850
0
    }
851
48
    return 0;
852
48
}
853
854
static int
855
set_newline(textio *self, const char *newline)
856
48
{
857
48
    PyObject *old = self->readnl;
858
48
    if (newline == NULL) {
859
0
        self->readnl = NULL;
860
0
    }
861
48
    else {
862
48
        self->readnl = PyUnicode_FromString(newline);
863
48
        if (self->readnl == NULL) {
864
0
            self->readnl = old;
865
0
            return -1;
866
0
        }
867
48
    }
868
48
    self->readuniversal = (newline == NULL || newline[0] == '\0');
869
48
    self->readtranslate = (newline == NULL);
870
48
    self->writetranslate = (newline == NULL || newline[0] != '\0');
871
48
    if (!self->readuniversal && self->readnl != NULL) {
872
        // validate_newline() accepts only ASCII newlines.
873
48
        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
874
48
        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
875
48
        if (strcmp(self->writenl, "\n") == 0) {
876
48
            self->writenl = NULL;
877
48
        }
878
48
    }
879
0
    else {
880
#ifdef MS_WINDOWS
881
        self->writenl = "\r\n";
882
#else
883
0
        self->writenl = NULL;
884
0
#endif
885
0
    }
886
48
    Py_XDECREF(old);
887
48
    return 0;
888
48
}
889
890
static int
891
_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
892
                           const char *errors)
893
48
{
894
48
    PyObject *res;
895
48
    int r;
896
897
48
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
898
48
    if (res == NULL)
899
0
        return -1;
900
901
48
    r = PyObject_IsTrue(res);
902
48
    Py_DECREF(res);
903
48
    if (r == -1)
904
0
        return -1;
905
906
48
    if (r != 1)
907
32
        return 0;
908
909
16
    Py_CLEAR(self->decoder);
910
16
    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
911
16
    if (self->decoder == NULL)
912
0
        return -1;
913
914
16
    if (self->readuniversal) {
915
0
        _PyIO_State *state = self->state;
916
0
        PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
917
0
            (PyObject *)state->PyIncrementalNewlineDecoder_Type,
918
0
            self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
919
0
        if (incrementalDecoder == NULL)
920
0
            return -1;
921
0
        Py_XSETREF(self->decoder, incrementalDecoder);
922
0
    }
923
924
16
    return 0;
925
16
}
926
927
static PyObject*
928
_textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
929
                      int eof)
930
0
{
931
0
    PyObject *chars;
932
933
0
    if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
934
0
        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
935
0
    else
936
0
        chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
937
0
                                           eof ? Py_True : Py_False, NULL);
938
939
0
    if (check_decoded(chars) < 0)
940
        // check_decoded already decreases refcount
941
0
        return NULL;
942
943
0
    return chars;
944
0
}
945
946
static int
947
_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
948
                           const char *errors)
949
48
{
950
48
    PyObject *res;
951
48
    int r;
952
953
48
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
954
48
    if (res == NULL)
955
0
        return -1;
956
957
48
    r = PyObject_IsTrue(res);
958
48
    Py_DECREF(res);
959
48
    if (r == -1)
960
0
        return -1;
961
962
48
    if (r != 1)
963
16
        return 0;
964
965
32
    Py_CLEAR(self->encoder);
966
32
    self->encodefunc = NULL;
967
32
    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
968
32
    if (self->encoder == NULL)
969
0
        return -1;
970
971
    /* Get the normalized named of the codec */
972
32
    if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
973
0
        return -1;
974
0
    }
975
32
    if (res != NULL && PyUnicode_Check(res)) {
976
32
        const encodefuncentry *e = encodefuncs;
977
96
        while (e->name != NULL) {
978
96
            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
979
32
                self->encodefunc = e->encodefunc;
980
32
                break;
981
32
            }
982
64
            e++;
983
64
        }
984
32
    }
985
32
    Py_XDECREF(res);
986
987
32
    return 0;
988
32
}
989
990
static int
991
_textiowrapper_fix_encoder_state(textio *self)
992
48
{
993
48
    if (!self->seekable || !self->encoder) {
994
16
        return 0;
995
16
    }
996
997
32
    self->encoding_start_of_stream = 1;
998
999
32
    PyObject *cookieObj = PyObject_CallMethodNoArgs(
1000
32
        self->buffer, &_Py_ID(tell));
1001
32
    if (cookieObj == NULL) {
1002
0
        return -1;
1003
0
    }
1004
1005
32
    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
1006
32
    Py_DECREF(cookieObj);
1007
32
    if (cmp < 0) {
1008
0
        return -1;
1009
0
    }
1010
1011
32
    if (cmp == 0) {
1012
16
        self->encoding_start_of_stream = 0;
1013
16
        PyObject *res = PyObject_CallMethodOneArg(
1014
16
            self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1015
16
        if (res == NULL) {
1016
0
            return -1;
1017
0
        }
1018
16
        Py_DECREF(res);
1019
16
    }
1020
1021
32
    return 0;
1022
32
}
1023
1024
static int
1025
io_check_errors(PyObject *errors)
1026
48
{
1027
48
    assert(errors != NULL && errors != Py_None);
1028
1029
48
    PyInterpreterState *interp = _PyInterpreterState_GET();
1030
48
#ifndef Py_DEBUG
1031
    /* In release mode, only check in development mode (-X dev) */
1032
48
    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1033
48
        return 0;
1034
48
    }
1035
#else
1036
    /* Always check in debug mode */
1037
#endif
1038
1039
    /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1040
       before_PyUnicode_InitEncodings() is called. */
1041
0
    if (!interp->unicode.fs_codec.encoding) {
1042
0
        return 0;
1043
0
    }
1044
1045
0
    const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1046
0
    if (name == NULL) {
1047
0
        return -1;
1048
0
    }
1049
0
    PyObject *handler = PyCodec_LookupError(name);
1050
0
    if (handler != NULL) {
1051
0
        Py_DECREF(handler);
1052
0
        return 0;
1053
0
    }
1054
0
    return -1;
1055
0
}
1056
1057
1058
1059
/*[clinic input]
1060
_io.TextIOWrapper.__init__
1061
    buffer: object
1062
    encoding: str(accept={str, NoneType}) = None
1063
    errors: object = None
1064
    newline: str(accept={str, NoneType}) = None
1065
    line_buffering: bool = False
1066
    write_through: bool = False
1067
1068
Character and line based layer over a BufferedIOBase object, buffer.
1069
1070
encoding gives the name of the encoding that the stream will be
1071
decoded or encoded with. It defaults to locale.getencoding().
1072
1073
errors determines the strictness of encoding and decoding (see
1074
help(codecs.Codec) or the documentation for codecs.register) and
1075
defaults to "strict".
1076
1077
newline controls how line endings are handled. It can be None, '',
1078
'\n', '\r', and '\r\n'.  It works as follows:
1079
1080
* On input, if newline is None, universal newlines mode is
1081
  enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1082
  these are translated into '\n' before being returned to the
1083
  caller. If it is '', universal newline mode is enabled, but line
1084
  endings are returned to the caller untranslated. If it has any of
1085
  the other legal values, input lines are only terminated by the given
1086
  string, and the line ending is returned to the caller untranslated.
1087
1088
* On output, if newline is None, any '\n' characters written are
1089
  translated to the system default line separator, os.linesep. If
1090
  newline is '' or '\n', no translation takes place. If newline is any
1091
  of the other legal values, any '\n' characters written are translated
1092
  to the given string.
1093
1094
If line_buffering is True, a call to flush is implied when a call to
1095
write contains a newline character.
1096
[clinic start generated code]*/
1097
1098
static int
1099
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1100
                                const char *encoding, PyObject *errors,
1101
                                const char *newline, int line_buffering,
1102
                                int write_through)
1103
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1104
48
{
1105
48
    PyObject *raw, *codec_info = NULL;
1106
48
    PyObject *res;
1107
48
    int r;
1108
1109
48
    self->ok = 0;
1110
48
    self->detached = 0;
1111
1112
48
    if (encoding == NULL) {
1113
0
        PyInterpreterState *interp = _PyInterpreterState_GET();
1114
0
        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1115
0
            if (PyErr_WarnEx(PyExc_EncodingWarning,
1116
0
                             "'encoding' argument not specified", 1)) {
1117
0
                return -1;
1118
0
            }
1119
0
        }
1120
0
    }
1121
1122
48
    if (errors == Py_None) {
1123
0
        errors = &_Py_ID(strict);
1124
0
    }
1125
48
    else if (!PyUnicode_Check(errors)) {
1126
        // Check 'errors' argument here because Argument Clinic doesn't support
1127
        // 'str(accept={str, NoneType})' converter.
1128
0
        PyErr_Format(
1129
0
            PyExc_TypeError,
1130
0
            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1131
0
            Py_TYPE(errors)->tp_name);
1132
0
        return -1;
1133
0
    }
1134
48
    else if (io_check_errors(errors)) {
1135
0
        return -1;
1136
0
    }
1137
48
    const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1138
48
    if (errors_str == NULL) {
1139
0
        return -1;
1140
0
    }
1141
1142
48
    if (validate_newline(newline) < 0) {
1143
0
        return -1;
1144
0
    }
1145
1146
48
    Py_CLEAR(self->buffer);
1147
48
    Py_CLEAR(self->encoding);
1148
48
    Py_CLEAR(self->encoder);
1149
48
    Py_CLEAR(self->decoder);
1150
48
    Py_CLEAR(self->readnl);
1151
48
    Py_CLEAR(self->decoded_chars);
1152
48
    Py_CLEAR(self->pending_bytes);
1153
48
    Py_CLEAR(self->snapshot);
1154
48
    Py_CLEAR(self->errors);
1155
48
    Py_CLEAR(self->raw);
1156
48
    self->decoded_chars_used = 0;
1157
48
    self->pending_bytes_count = 0;
1158
48
    self->encodefunc = NULL;
1159
48
    self->b2cratio = 0.0;
1160
1161
48
    if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1162
0
        _Py_DECLARE_STR(utf_8, "utf-8");
1163
0
        self->encoding = &_Py_STR(utf_8);
1164
0
    }
1165
48
    else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1166
0
        self->encoding = _Py_GetLocaleEncodingObject();
1167
0
        if (self->encoding == NULL) {
1168
0
            goto error;
1169
0
        }
1170
0
        assert(PyUnicode_Check(self->encoding));
1171
0
    }
1172
1173
48
    if (self->encoding != NULL) {
1174
0
        encoding = PyUnicode_AsUTF8(self->encoding);
1175
0
        if (encoding == NULL)
1176
0
            goto error;
1177
0
    }
1178
48
    else if (encoding != NULL) {
1179
48
        self->encoding = PyUnicode_FromString(encoding);
1180
48
        if (self->encoding == NULL)
1181
0
            goto error;
1182
48
    }
1183
0
    else {
1184
0
        PyErr_SetString(PyExc_OSError,
1185
0
                        "could not determine default encoding");
1186
0
        goto error;
1187
0
    }
1188
1189
    /* Check we have been asked for a real text encoding */
1190
48
    codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
1191
48
    if (codec_info == NULL) {
1192
0
        Py_CLEAR(self->encoding);
1193
0
        goto error;
1194
0
    }
1195
1196
    /* XXX: Failures beyond this point have the potential to leak elements
1197
     * of the partially constructed object (like self->encoding)
1198
     */
1199
1200
48
    self->errors = Py_NewRef(errors);
1201
48
    self->chunk_size = 8192;
1202
48
    self->line_buffering = line_buffering;
1203
48
    self->write_through = write_through;
1204
48
    if (set_newline(self, newline) < 0) {
1205
0
        goto error;
1206
0
    }
1207
1208
48
    self->buffer = Py_NewRef(buffer);
1209
1210
    /* Build the decoder object */
1211
48
    _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1212
48
    self->state = state;
1213
48
    if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1214
0
        goto error;
1215
1216
    /* Build the encoder object */
1217
48
    if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1218
0
        goto error;
1219
1220
    /* Finished sorting out the codec details */
1221
48
    Py_CLEAR(codec_info);
1222
1223
48
    if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1224
48
        Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1225
48
        Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1226
48
    {
1227
48
        if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1228
0
            goto error;
1229
        /* Cache the raw FileIO object to speed up 'closed' checks */
1230
48
        if (raw != NULL) {
1231
48
            if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1232
48
                self->raw = raw;
1233
0
            else
1234
0
                Py_DECREF(raw);
1235
48
        }
1236
48
    }
1237
1238
48
    res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1239
48
    if (res == NULL)
1240
0
        goto error;
1241
48
    r = PyObject_IsTrue(res);
1242
48
    Py_DECREF(res);
1243
48
    if (r < 0)
1244
0
        goto error;
1245
48
    self->seekable = self->telling = r;
1246
1247
48
    r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1248
48
    if (r < 0) {
1249
0
        goto error;
1250
0
    }
1251
48
    self->has_read1 = r;
1252
1253
48
    self->encoding_start_of_stream = 0;
1254
48
    if (_textiowrapper_fix_encoder_state(self) < 0) {
1255
0
        goto error;
1256
0
    }
1257
1258
48
    self->ok = 1;
1259
48
    return 0;
1260
1261
0
  error:
1262
0
    Py_XDECREF(codec_info);
1263
0
    return -1;
1264
48
}
1265
1266
/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1267
 * -1 on error.
1268
 */
1269
static int
1270
convert_optional_bool(PyObject *obj, int default_value)
1271
0
{
1272
0
    long v;
1273
0
    if (obj == Py_None) {
1274
0
        v = default_value;
1275
0
    }
1276
0
    else {
1277
0
        v = PyLong_AsLong(obj);
1278
0
        if (v == -1 && PyErr_Occurred())
1279
0
            return -1;
1280
0
    }
1281
0
    return v != 0;
1282
0
}
1283
1284
static int
1285
textiowrapper_change_encoding(textio *self, PyObject *encoding,
1286
                              PyObject *errors, int newline_changed)
1287
0
{
1288
    /* Use existing settings where new settings are not specified */
1289
0
    if (encoding == Py_None && errors == Py_None && !newline_changed) {
1290
0
        return 0;  // no change
1291
0
    }
1292
1293
0
    if (encoding == Py_None) {
1294
0
        encoding = self->encoding;
1295
0
        if (errors == Py_None) {
1296
0
            errors = self->errors;
1297
0
        }
1298
0
        Py_INCREF(encoding);
1299
0
    }
1300
0
    else {
1301
0
        if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1302
0
            encoding = _Py_GetLocaleEncodingObject();
1303
0
            if (encoding == NULL) {
1304
0
                return -1;
1305
0
            }
1306
0
        } else {
1307
0
            Py_INCREF(encoding);
1308
0
        }
1309
0
        if (errors == Py_None) {
1310
0
            errors = &_Py_ID(strict);
1311
0
        }
1312
0
    }
1313
0
    Py_INCREF(errors);
1314
1315
0
    const char *c_encoding = PyUnicode_AsUTF8(encoding);
1316
0
    if (c_encoding == NULL) {
1317
0
        Py_DECREF(encoding);
1318
0
        Py_DECREF(errors);
1319
0
        return -1;
1320
0
    }
1321
0
    const char *c_errors = PyUnicode_AsUTF8(errors);
1322
0
    if (c_errors == NULL) {
1323
0
        Py_DECREF(encoding);
1324
0
        Py_DECREF(errors);
1325
0
        return -1;
1326
0
    }
1327
1328
    // Create new encoder & decoder
1329
0
    PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
1330
0
    if (codec_info == NULL) {
1331
0
        Py_DECREF(encoding);
1332
0
        Py_DECREF(errors);
1333
0
        return -1;
1334
0
    }
1335
0
    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1336
0
            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1337
0
        Py_DECREF(codec_info);
1338
0
        Py_DECREF(encoding);
1339
0
        Py_DECREF(errors);
1340
0
        return -1;
1341
0
    }
1342
0
    Py_DECREF(codec_info);
1343
1344
0
    Py_SETREF(self->encoding, encoding);
1345
0
    Py_SETREF(self->errors, errors);
1346
1347
0
    return _textiowrapper_fix_encoder_state(self);
1348
0
}
1349
1350
/*[clinic input]
1351
@critical_section
1352
_io.TextIOWrapper.reconfigure
1353
    *
1354
    encoding: object = None
1355
    errors: object = None
1356
    newline as newline_obj: object(c_default="NULL") = None
1357
    line_buffering as line_buffering_obj: object = None
1358
    write_through as write_through_obj: object = None
1359
1360
Reconfigure the text stream with new parameters.
1361
1362
This also does an implicit stream flush.
1363
1364
[clinic start generated code]*/
1365
1366
static PyObject *
1367
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1368
                                   PyObject *errors, PyObject *newline_obj,
1369
                                   PyObject *line_buffering_obj,
1370
                                   PyObject *write_through_obj)
1371
/*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1372
0
{
1373
0
    int line_buffering;
1374
0
    int write_through;
1375
0
    const char *newline = NULL;
1376
1377
0
    if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1378
0
        PyErr_Format(PyExc_TypeError,
1379
0
                "reconfigure() argument 'encoding' must be str or None, not %s",
1380
0
                Py_TYPE(encoding)->tp_name);
1381
0
        return NULL;
1382
0
    }
1383
0
    if (errors != Py_None && !PyUnicode_Check(errors)) {
1384
0
        PyErr_Format(PyExc_TypeError,
1385
0
                "reconfigure() argument 'errors' must be str or None, not %s",
1386
0
                Py_TYPE(errors)->tp_name);
1387
0
        return NULL;
1388
0
    }
1389
0
    if (newline_obj != NULL && newline_obj != Py_None &&
1390
0
        !PyUnicode_Check(newline_obj))
1391
0
    {
1392
0
        PyErr_Format(PyExc_TypeError,
1393
0
                "reconfigure() argument 'newline' must be str or None, not %s",
1394
0
                Py_TYPE(newline_obj)->tp_name);
1395
0
        return NULL;
1396
0
    }
1397
    /* Check if something is in the read buffer */
1398
0
    if (self->decoded_chars != NULL) {
1399
0
        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1400
0
            _unsupported(self->state,
1401
0
                         "It is not possible to set the encoding or newline "
1402
0
                         "of stream after the first read");
1403
0
            return NULL;
1404
0
        }
1405
0
    }
1406
1407
0
    if (newline_obj != NULL && newline_obj != Py_None) {
1408
0
        newline = PyUnicode_AsUTF8(newline_obj);
1409
0
        if (newline == NULL || validate_newline(newline) < 0) {
1410
0
            return NULL;
1411
0
        }
1412
0
    }
1413
1414
0
    line_buffering = convert_optional_bool(line_buffering_obj,
1415
0
                                           self->line_buffering);
1416
0
    if (line_buffering < 0) {
1417
0
        return NULL;
1418
0
    }
1419
0
    write_through = convert_optional_bool(write_through_obj,
1420
0
                                          self->write_through);
1421
0
    if (write_through < 0) {
1422
0
        return NULL;
1423
0
    }
1424
1425
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1426
0
        return NULL;
1427
0
    }
1428
0
    self->b2cratio = 0;
1429
1430
0
    if (newline_obj != NULL && set_newline(self, newline) < 0) {
1431
0
        return NULL;
1432
0
    }
1433
1434
0
    if (textiowrapper_change_encoding(
1435
0
            self, encoding, errors, newline_obj != NULL) < 0) {
1436
0
        return NULL;
1437
0
    }
1438
1439
0
    self->line_buffering = line_buffering;
1440
0
    self->write_through = write_through;
1441
0
    Py_RETURN_NONE;
1442
0
}
1443
1444
static int
1445
textiowrapper_clear(PyObject *op)
1446
0
{
1447
0
    textio *self = textio_CAST(op);
1448
0
    self->ok = 0;
1449
0
    Py_CLEAR(self->buffer);
1450
0
    Py_CLEAR(self->encoding);
1451
0
    Py_CLEAR(self->encoder);
1452
0
    Py_CLEAR(self->decoder);
1453
0
    Py_CLEAR(self->readnl);
1454
0
    Py_CLEAR(self->decoded_chars);
1455
0
    Py_CLEAR(self->pending_bytes);
1456
0
    Py_CLEAR(self->snapshot);
1457
0
    Py_CLEAR(self->errors);
1458
0
    Py_CLEAR(self->raw);
1459
1460
0
    Py_CLEAR(self->dict);
1461
0
    return 0;
1462
0
}
1463
1464
static void
1465
textiowrapper_dealloc(PyObject *op)
1466
0
{
1467
0
    textio *self = textio_CAST(op);
1468
0
    PyTypeObject *tp = Py_TYPE(self);
1469
0
    self->finalizing = 1;
1470
0
    if (_PyIOBase_finalize(op) < 0)
1471
0
        return;
1472
0
    self->ok = 0;
1473
0
    _PyObject_GC_UNTRACK(self);
1474
0
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
1475
0
    (void)textiowrapper_clear(op);
1476
0
    tp->tp_free(self);
1477
0
    Py_DECREF(tp);
1478
0
}
1479
1480
static int
1481
textiowrapper_traverse(PyObject *op, visitproc visit, void *arg)
1482
13.2k
{
1483
13.2k
    textio *self = textio_CAST(op);
1484
13.2k
    Py_VISIT(Py_TYPE(self));
1485
13.2k
    Py_VISIT(self->buffer);
1486
13.2k
    Py_VISIT(self->encoding);
1487
13.2k
    Py_VISIT(self->encoder);
1488
13.2k
    Py_VISIT(self->decoder);
1489
13.2k
    Py_VISIT(self->readnl);
1490
13.2k
    Py_VISIT(self->decoded_chars);
1491
13.2k
    Py_VISIT(self->pending_bytes);
1492
13.2k
    Py_VISIT(self->snapshot);
1493
13.2k
    Py_VISIT(self->errors);
1494
13.2k
    Py_VISIT(self->raw);
1495
1496
13.2k
    Py_VISIT(self->dict);
1497
13.2k
    return 0;
1498
13.2k
}
1499
1500
static PyObject *
1501
_io_TextIOWrapper_closed_get_impl(textio *self);
1502
1503
/* This macro takes some shortcuts to make the common case faster. */
1504
#define CHECK_CLOSED(self) \
1505
0
    do { \
1506
0
        int r; \
1507
0
        PyObject *_res; \
1508
0
        if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1509
0
            if (self->raw != NULL) \
1510
0
                r = _PyFileIO_closed(self->raw); \
1511
0
            else { \
1512
0
                _res = _io_TextIOWrapper_closed_get_impl(self); \
1513
0
                if (_res == NULL) \
1514
0
                    return NULL; \
1515
0
                r = PyObject_IsTrue(_res); \
1516
0
                Py_DECREF(_res); \
1517
0
                if (r < 0) \
1518
0
                    return NULL; \
1519
0
            } \
1520
0
            if (r > 0) { \
1521
0
                PyErr_SetString(PyExc_ValueError, \
1522
0
                                "I/O operation on closed file."); \
1523
0
                return NULL; \
1524
0
            } \
1525
0
        } \
1526
0
        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1527
0
            return NULL; \
1528
0
    } while (0)
1529
1530
#define CHECK_INITIALIZED(self) \
1531
0
    if (self->ok <= 0) { \
1532
0
        PyErr_SetString(PyExc_ValueError, \
1533
0
            "I/O operation on uninitialized object"); \
1534
0
        return NULL; \
1535
0
    }
1536
1537
#define CHECK_ATTACHED(self) \
1538
0
    CHECK_INITIALIZED(self); \
1539
0
    if (self->detached) { \
1540
0
        PyErr_SetString(PyExc_ValueError, \
1541
0
             "underlying buffer has been detached"); \
1542
0
        return NULL; \
1543
0
    }
1544
1545
#define CHECK_ATTACHED_INT(self) \
1546
0
    if (self->ok <= 0) { \
1547
0
        PyErr_SetString(PyExc_ValueError, \
1548
0
            "I/O operation on uninitialized object"); \
1549
0
        return -1; \
1550
0
    } else if (self->detached) { \
1551
0
        PyErr_SetString(PyExc_ValueError, \
1552
0
             "underlying buffer has been detached"); \
1553
0
        return -1; \
1554
0
    }
1555
1556
1557
/*[clinic input]
1558
@critical_section
1559
_io.TextIOWrapper.detach
1560
[clinic start generated code]*/
1561
1562
static PyObject *
1563
_io_TextIOWrapper_detach_impl(textio *self)
1564
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1565
0
{
1566
0
    PyObject *buffer;
1567
0
    CHECK_ATTACHED(self);
1568
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1569
0
        return NULL;
1570
0
    }
1571
0
    buffer = self->buffer;
1572
0
    self->buffer = NULL;
1573
0
    self->detached = 1;
1574
0
    return buffer;
1575
0
}
1576
1577
/* Flush the internal write buffer. This doesn't explicitly flush the
1578
   underlying buffered object, though. */
1579
static int
1580
_textiowrapper_writeflush(textio *self)
1581
0
{
1582
0
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
1583
1584
0
    if (self->pending_bytes == NULL)
1585
0
        return 0;
1586
1587
0
    PyObject *pending = self->pending_bytes;
1588
0
    PyObject *b;
1589
1590
0
    if (PyBytes_Check(pending)) {
1591
0
        b = Py_NewRef(pending);
1592
0
    }
1593
0
    else if (PyUnicode_Check(pending)) {
1594
0
        assert(PyUnicode_IS_ASCII(pending));
1595
0
        assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1596
0
        b = PyBytes_FromStringAndSize(
1597
0
                PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1598
0
        if (b == NULL) {
1599
0
            return -1;
1600
0
        }
1601
0
    }
1602
0
    else {
1603
0
        assert(PyList_Check(pending));
1604
0
        b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1605
0
        if (b == NULL) {
1606
0
            return -1;
1607
0
        }
1608
1609
0
        char *buf = PyBytes_AsString(b);
1610
0
        Py_ssize_t pos = 0;
1611
1612
0
        for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1613
0
            PyObject *obj = PyList_GET_ITEM(pending, i);
1614
0
            char *src;
1615
0
            Py_ssize_t len;
1616
0
            if (PyUnicode_Check(obj)) {
1617
0
                assert(PyUnicode_IS_ASCII(obj));
1618
0
                src = PyUnicode_DATA(obj);
1619
0
                len = PyUnicode_GET_LENGTH(obj);
1620
0
            }
1621
0
            else {
1622
0
                assert(PyBytes_Check(obj));
1623
0
                if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1624
0
                    Py_DECREF(b);
1625
0
                    return -1;
1626
0
                }
1627
0
            }
1628
0
            memcpy(buf + pos, src, len);
1629
0
            pos += len;
1630
0
        }
1631
0
        assert(pos == self->pending_bytes_count);
1632
0
    }
1633
1634
0
    self->pending_bytes_count = 0;
1635
0
    self->pending_bytes = NULL;
1636
0
    Py_DECREF(pending);
1637
1638
0
    PyObject *ret;
1639
0
    do {
1640
0
        ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1641
0
    } while (ret == NULL && _PyIO_trap_eintr());
1642
0
    Py_DECREF(b);
1643
    // NOTE: We cleared buffer but we don't know how many bytes are actually written
1644
    // when an error occurred.
1645
0
    if (ret == NULL)
1646
0
        return -1;
1647
0
    Py_DECREF(ret);
1648
0
    return 0;
1649
0
}
1650
1651
/*[clinic input]
1652
@critical_section
1653
_io.TextIOWrapper.write
1654
    text: unicode
1655
    /
1656
[clinic start generated code]*/
1657
1658
static PyObject *
1659
_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1660
/*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1661
0
{
1662
0
    PyObject *ret;
1663
0
    PyObject *b;
1664
0
    Py_ssize_t textlen;
1665
0
    int haslf = 0;
1666
0
    int needflush = 0, text_needflush = 0;
1667
1668
0
    CHECK_ATTACHED(self);
1669
0
    CHECK_CLOSED(self);
1670
1671
0
    if (self->encoder == NULL) {
1672
0
        return _unsupported(self->state, "not writable");
1673
0
    }
1674
1675
0
    Py_INCREF(text);
1676
1677
0
    textlen = PyUnicode_GET_LENGTH(text);
1678
1679
0
    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1680
0
        if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1681
0
            haslf = 1;
1682
1683
0
    if (haslf && self->writetranslate && self->writenl != NULL) {
1684
0
        PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1685
0
                                                 "ss", "\n", self->writenl);
1686
0
        Py_DECREF(text);
1687
0
        if (newtext == NULL)
1688
0
            return NULL;
1689
0
        text = newtext;
1690
0
    }
1691
1692
0
    if (self->write_through)
1693
0
        text_needflush = 1;
1694
0
    if (self->line_buffering &&
1695
0
        (haslf ||
1696
0
         PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1697
0
        needflush = 1;
1698
1699
    /* XXX What if we were just reading? */
1700
0
    if (self->encodefunc != NULL) {
1701
0
        if (PyUnicode_IS_ASCII(text) &&
1702
                // See bpo-43260
1703
0
                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1704
0
                is_asciicompat_encoding(self->encodefunc)) {
1705
0
            b = Py_NewRef(text);
1706
0
        }
1707
0
        else {
1708
0
            b = (*self->encodefunc)((PyObject *) self, text);
1709
0
        }
1710
0
        self->encoding_start_of_stream = 0;
1711
0
    }
1712
0
    else {
1713
0
        b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1714
0
    }
1715
1716
0
    Py_DECREF(text);
1717
0
    if (b == NULL)
1718
0
        return NULL;
1719
0
    if (b != text && !PyBytes_Check(b)) {
1720
0
        PyErr_Format(PyExc_TypeError,
1721
0
                     "encoder should return a bytes object, not '%.200s'",
1722
0
                     Py_TYPE(b)->tp_name);
1723
0
        Py_DECREF(b);
1724
0
        return NULL;
1725
0
    }
1726
1727
0
    Py_ssize_t bytes_len;
1728
0
    if (b == text) {
1729
0
        bytes_len = PyUnicode_GET_LENGTH(b);
1730
0
    }
1731
0
    else {
1732
0
        bytes_len = PyBytes_GET_SIZE(b);
1733
0
    }
1734
1735
    // We should avoid concatenating huge data.
1736
    // Flush the buffer before adding b to the buffer if b is not small.
1737
    // https://github.com/python/cpython/issues/87426
1738
0
    if (bytes_len >= self->chunk_size) {
1739
        // _textiowrapper_writeflush() calls buffer.write().
1740
        // self->pending_bytes can be appended during buffer->write()
1741
        // or other thread.
1742
        // We need to loop until buffer becomes empty.
1743
        // https://github.com/python/cpython/issues/118138
1744
        // https://github.com/python/cpython/issues/119506
1745
0
        while (self->pending_bytes != NULL) {
1746
0
            if (_textiowrapper_writeflush(self) < 0) {
1747
0
                Py_DECREF(b);
1748
0
                return NULL;
1749
0
            }
1750
0
        }
1751
0
    }
1752
1753
0
    if (self->pending_bytes == NULL) {
1754
0
        assert(self->pending_bytes_count == 0);
1755
0
        self->pending_bytes = b;
1756
0
    }
1757
0
    else if (!PyList_CheckExact(self->pending_bytes)) {
1758
0
        PyObject *list = PyList_New(2);
1759
0
        if (list == NULL) {
1760
0
            Py_DECREF(b);
1761
0
            return NULL;
1762
0
        }
1763
        // Since Python 3.12, allocating GC object won't trigger GC and release
1764
        // GIL. See https://github.com/python/cpython/issues/97922
1765
0
        assert(!PyList_CheckExact(self->pending_bytes));
1766
0
        PyList_SET_ITEM(list, 0, self->pending_bytes);
1767
0
        PyList_SET_ITEM(list, 1, b);
1768
0
        self->pending_bytes = list;
1769
0
    }
1770
0
    else {
1771
0
        if (PyList_Append(self->pending_bytes, b) < 0) {
1772
0
            Py_DECREF(b);
1773
0
            return NULL;
1774
0
        }
1775
0
        Py_DECREF(b);
1776
0
    }
1777
1778
0
    self->pending_bytes_count += bytes_len;
1779
0
    if (self->pending_bytes_count >= self->chunk_size || needflush ||
1780
0
        text_needflush) {
1781
0
        if (_textiowrapper_writeflush(self) < 0)
1782
0
            return NULL;
1783
0
    }
1784
1785
0
    if (needflush) {
1786
0
        if (_PyFile_Flush(self->buffer) < 0) {
1787
0
            return NULL;
1788
0
        }
1789
0
    }
1790
1791
0
    if (self->snapshot != NULL) {
1792
0
        textiowrapper_set_decoded_chars(self, NULL);
1793
0
        Py_CLEAR(self->snapshot);
1794
0
    }
1795
1796
0
    if (self->decoder) {
1797
0
        ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1798
0
        if (ret == NULL)
1799
0
            return NULL;
1800
0
        Py_DECREF(ret);
1801
0
    }
1802
1803
0
    return PyLong_FromSsize_t(textlen);
1804
0
}
1805
1806
/* Steal a reference to chars and store it in the decoded_char buffer;
1807
 */
1808
static void
1809
textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1810
0
{
1811
0
    Py_XSETREF(self->decoded_chars, chars);
1812
0
    self->decoded_chars_used = 0;
1813
0
}
1814
1815
static PyObject *
1816
textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1817
0
{
1818
0
    PyObject *chars;
1819
0
    Py_ssize_t avail;
1820
1821
0
    if (self->decoded_chars == NULL)
1822
0
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1823
1824
0
    avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1825
0
             - self->decoded_chars_used);
1826
1827
0
    assert(avail >= 0);
1828
1829
0
    if (n < 0 || n > avail)
1830
0
        n = avail;
1831
1832
0
    if (self->decoded_chars_used > 0 || n < avail) {
1833
0
        chars = PyUnicode_Substring(self->decoded_chars,
1834
0
                                    self->decoded_chars_used,
1835
0
                                    self->decoded_chars_used + n);
1836
0
        if (chars == NULL)
1837
0
            return NULL;
1838
0
    }
1839
0
    else {
1840
0
        chars = Py_NewRef(self->decoded_chars);
1841
0
    }
1842
1843
0
    self->decoded_chars_used += n;
1844
0
    return chars;
1845
0
}
1846
1847
/* Read and decode the next chunk of data from the BufferedReader.
1848
 */
1849
static int
1850
textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1851
0
{
1852
0
    PyObject *dec_buffer = NULL;
1853
0
    PyObject *dec_flags = NULL;
1854
0
    PyObject *input_chunk = NULL;
1855
0
    Py_buffer input_chunk_buf;
1856
0
    PyObject *decoded_chars, *chunk_size;
1857
0
    Py_ssize_t nbytes, nchars;
1858
0
    int eof;
1859
1860
    /* The return value is True unless EOF was reached.  The decoded string is
1861
     * placed in self._decoded_chars (replacing its previous value).  The
1862
     * entire input chunk is sent to the decoder, though some of it may remain
1863
     * buffered in the decoder, yet to be converted.
1864
     */
1865
1866
0
    if (self->decoder == NULL) {
1867
0
        _unsupported(self->state, "not readable");
1868
0
        return -1;
1869
0
    }
1870
1871
0
    if (self->telling) {
1872
        /* To prepare for tell(), we need to snapshot a point in the file
1873
         * where the decoder's input buffer is empty.
1874
         */
1875
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1876
0
                                                     &_Py_ID(getstate));
1877
0
        if (state == NULL)
1878
0
            return -1;
1879
        /* Given this, we know there was a valid snapshot point
1880
         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1881
         */
1882
0
        if (!PyTuple_Check(state)) {
1883
0
            PyErr_SetString(PyExc_TypeError,
1884
0
                            "illegal decoder state");
1885
0
            Py_DECREF(state);
1886
0
            return -1;
1887
0
        }
1888
0
        if (!PyArg_ParseTuple(state,
1889
0
                              "OO;illegal decoder state", &dec_buffer, &dec_flags))
1890
0
        {
1891
0
            Py_DECREF(state);
1892
0
            return -1;
1893
0
        }
1894
1895
0
        if (!PyBytes_Check(dec_buffer)) {
1896
0
            PyErr_Format(PyExc_TypeError,
1897
0
                         "illegal decoder state: the first item should be a "
1898
0
                         "bytes object, not '%.200s'",
1899
0
                         Py_TYPE(dec_buffer)->tp_name);
1900
0
            Py_DECREF(state);
1901
0
            return -1;
1902
0
        }
1903
0
        Py_INCREF(dec_buffer);
1904
0
        Py_INCREF(dec_flags);
1905
0
        Py_DECREF(state);
1906
0
    }
1907
1908
    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1909
0
    if (size_hint > 0) {
1910
0
        size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1911
0
    }
1912
0
    chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1913
0
    if (chunk_size == NULL)
1914
0
        goto fail;
1915
1916
0
    input_chunk = PyObject_CallMethodOneArg(self->buffer,
1917
0
        (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1918
0
        chunk_size);
1919
0
    Py_DECREF(chunk_size);
1920
0
    if (input_chunk == NULL)
1921
0
        goto fail;
1922
1923
0
    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1924
0
        PyErr_Format(PyExc_TypeError,
1925
0
                     "underlying %s() should have returned a bytes-like object, "
1926
0
                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
1927
0
                     Py_TYPE(input_chunk)->tp_name);
1928
0
        goto fail;
1929
0
    }
1930
1931
0
    nbytes = input_chunk_buf.len;
1932
0
    eof = (nbytes == 0);
1933
1934
0
    decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1935
0
                                          input_chunk, eof);
1936
0
    PyBuffer_Release(&input_chunk_buf);
1937
0
    if (decoded_chars == NULL)
1938
0
        goto fail;
1939
1940
0
    textiowrapper_set_decoded_chars(self, decoded_chars);
1941
0
    nchars = PyUnicode_GET_LENGTH(decoded_chars);
1942
0
    if (nchars > 0)
1943
0
        self->b2cratio = (double) nbytes / nchars;
1944
0
    else
1945
0
        self->b2cratio = 0.0;
1946
0
    if (nchars > 0)
1947
0
        eof = 0;
1948
1949
0
    if (self->telling) {
1950
        /* At the snapshot point, len(dec_buffer) bytes before the read, the
1951
         * next input to be decoded is dec_buffer + input_chunk.
1952
         */
1953
0
        PyObject *next_input = dec_buffer;
1954
0
        PyBytes_Concat(&next_input, input_chunk);
1955
0
        dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1956
0
        if (next_input == NULL) {
1957
0
            goto fail;
1958
0
        }
1959
0
        PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1960
0
        if (snapshot == NULL) {
1961
0
            dec_flags = NULL;
1962
0
            goto fail;
1963
0
        }
1964
0
        Py_XSETREF(self->snapshot, snapshot);
1965
0
    }
1966
0
    Py_DECREF(input_chunk);
1967
1968
0
    return (eof == 0);
1969
1970
0
  fail:
1971
0
    Py_XDECREF(dec_buffer);
1972
0
    Py_XDECREF(dec_flags);
1973
0
    Py_XDECREF(input_chunk);
1974
0
    return -1;
1975
0
}
1976
1977
/*[clinic input]
1978
@critical_section
1979
_io.TextIOWrapper.read
1980
    size as n: Py_ssize_t(accept={int, NoneType}) = -1
1981
    /
1982
[clinic start generated code]*/
1983
1984
static PyObject *
1985
_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1986
/*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
1987
0
{
1988
0
    PyObject *result = NULL, *chunks = NULL;
1989
1990
0
    CHECK_ATTACHED(self);
1991
0
    CHECK_CLOSED(self);
1992
1993
0
    if (self->decoder == NULL) {
1994
0
        return _unsupported(self->state, "not readable");
1995
0
    }
1996
1997
0
    if (_textiowrapper_writeflush(self) < 0)
1998
0
        return NULL;
1999
2000
0
    if (n < 0) {
2001
        /* Read everything */
2002
0
        PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
2003
0
        PyObject *decoded;
2004
0
        if (bytes == NULL)
2005
0
            goto fail;
2006
2007
0
        if (bytes == Py_None){
2008
0
            Py_DECREF(bytes);
2009
0
            PyErr_SetString(PyExc_BlockingIOError, "Read returned None.");
2010
0
            return NULL;
2011
0
        }
2012
2013
0
        _PyIO_State *state = self->state;
2014
0
        if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
2015
0
            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
2016
0
                                                          bytes, 1);
2017
0
        else
2018
0
            decoded = PyObject_CallMethodObjArgs(
2019
0
                self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2020
0
        Py_DECREF(bytes);
2021
0
        if (check_decoded(decoded) < 0)
2022
0
            goto fail;
2023
2024
0
        result = textiowrapper_get_decoded_chars(self, -1);
2025
2026
0
        if (result == NULL) {
2027
0
            Py_DECREF(decoded);
2028
0
            return NULL;
2029
0
        }
2030
2031
0
        PyUnicode_AppendAndDel(&result, decoded);
2032
0
        if (result == NULL)
2033
0
            goto fail;
2034
2035
0
        if (self->snapshot != NULL) {
2036
0
            textiowrapper_set_decoded_chars(self, NULL);
2037
0
            Py_CLEAR(self->snapshot);
2038
0
        }
2039
0
        return result;
2040
0
    }
2041
0
    else {
2042
0
        int res = 1;
2043
0
        Py_ssize_t remaining = n;
2044
2045
0
        result = textiowrapper_get_decoded_chars(self, n);
2046
0
        if (result == NULL)
2047
0
            goto fail;
2048
0
        remaining -= PyUnicode_GET_LENGTH(result);
2049
2050
        /* Keep reading chunks until we have n characters to return */
2051
0
        while (remaining > 0) {
2052
0
            res = textiowrapper_read_chunk(self, remaining);
2053
0
            if (res < 0) {
2054
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2055
                   when EINTR occurs so we needn't do it ourselves. */
2056
0
                if (_PyIO_trap_eintr()) {
2057
0
                    continue;
2058
0
                }
2059
0
                goto fail;
2060
0
            }
2061
0
            if (res == 0)  /* EOF */
2062
0
                break;
2063
0
            if (chunks == NULL) {
2064
0
                chunks = PyList_New(0);
2065
0
                if (chunks == NULL)
2066
0
                    goto fail;
2067
0
            }
2068
0
            if (PyUnicode_GET_LENGTH(result) > 0 &&
2069
0
                PyList_Append(chunks, result) < 0)
2070
0
                goto fail;
2071
0
            Py_DECREF(result);
2072
0
            result = textiowrapper_get_decoded_chars(self, remaining);
2073
0
            if (result == NULL)
2074
0
                goto fail;
2075
0
            remaining -= PyUnicode_GET_LENGTH(result);
2076
0
        }
2077
0
        if (chunks != NULL) {
2078
0
            if (result != NULL && PyList_Append(chunks, result) < 0)
2079
0
                goto fail;
2080
0
            _Py_DECLARE_STR(empty, "");
2081
0
            Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2082
0
            if (result == NULL)
2083
0
                goto fail;
2084
0
            Py_CLEAR(chunks);
2085
0
        }
2086
0
        return result;
2087
0
    }
2088
0
  fail:
2089
0
    Py_XDECREF(result);
2090
0
    Py_XDECREF(chunks);
2091
0
    return NULL;
2092
0
}
2093
2094
2095
/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2096
   that is to the NUL character. Otherwise the function will produce
2097
   incorrect results. */
2098
static const char *
2099
find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2100
0
{
2101
0
    if (kind == PyUnicode_1BYTE_KIND) {
2102
0
        assert(ch < 256);
2103
0
        return (char *) memchr((const void *) s, (char) ch, end - s);
2104
0
    }
2105
0
    for (;;) {
2106
0
        while (PyUnicode_READ(kind, s, 0) > ch)
2107
0
            s += kind;
2108
0
        if (PyUnicode_READ(kind, s, 0) == ch)
2109
0
            return s;
2110
0
        if (s == end)
2111
0
            return NULL;
2112
0
        s += kind;
2113
0
    }
2114
0
}
2115
2116
Py_ssize_t
2117
_PyIO_find_line_ending(
2118
    int translated, int universal, PyObject *readnl,
2119
    int kind, const char *start, const char *end, Py_ssize_t *consumed)
2120
20.3M
{
2121
20.3M
    Py_ssize_t len = (end - start)/kind;
2122
2123
20.3M
    if (translated) {
2124
        /* Newlines are already translated, only search for \n */
2125
0
        const char *pos = find_control_char(kind, start, end, '\n');
2126
0
        if (pos != NULL)
2127
0
            return (pos - start)/kind + 1;
2128
0
        else {
2129
0
            *consumed = len;
2130
0
            return -1;
2131
0
        }
2132
0
    }
2133
20.3M
    else if (universal) {
2134
        /* Universal newline search. Find any of \r, \r\n, \n
2135
         * The decoder ensures that \r\n are not split in two pieces
2136
         */
2137
20.3M
        const char *s = start;
2138
88.5M
        for (;;) {
2139
88.5M
            Py_UCS4 ch;
2140
            /* Fast path for non-control chars. The loop always ends
2141
               since the Unicode string is NUL-terminated. */
2142
260M
            while (PyUnicode_READ(kind, s, 0) > '\r')
2143
171M
                s += kind;
2144
88.5M
            if (s >= end) {
2145
33.3k
                *consumed = len;
2146
33.3k
                return -1;
2147
33.3k
            }
2148
88.5M
            ch = PyUnicode_READ(kind, s, 0);
2149
88.5M
            s += kind;
2150
88.5M
            if (ch == '\n')
2151
6.23M
                return (s - start)/kind;
2152
82.3M
            if (ch == '\r') {
2153
14.0M
                if (PyUnicode_READ(kind, s, 0) == '\n')
2154
439k
                    return (s - start)/kind + 1;
2155
13.6M
                else
2156
13.6M
                    return (s - start)/kind;
2157
14.0M
            }
2158
82.3M
        }
2159
20.3M
    }
2160
0
    else {
2161
        /* Non-universal mode. */
2162
0
        Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2163
0
        const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2164
        /* Assume that readnl is an ASCII character. */
2165
0
        assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2166
0
        if (readnl_len == 1) {
2167
0
            const char *pos = find_control_char(kind, start, end, nl[0]);
2168
0
            if (pos != NULL)
2169
0
                return (pos - start)/kind + 1;
2170
0
            *consumed = len;
2171
0
            return -1;
2172
0
        }
2173
0
        else {
2174
0
            const char *s = start;
2175
0
            const char *e = end - (readnl_len - 1)*kind;
2176
0
            const char *pos;
2177
0
            if (e < s)
2178
0
                e = s;
2179
0
            while (s < e) {
2180
0
                Py_ssize_t i;
2181
0
                const char *pos = find_control_char(kind, s, end, nl[0]);
2182
0
                if (pos == NULL || pos >= e)
2183
0
                    break;
2184
0
                for (i = 1; i < readnl_len; i++) {
2185
0
                    if (PyUnicode_READ(kind, pos, i) != nl[i])
2186
0
                        break;
2187
0
                }
2188
0
                if (i == readnl_len)
2189
0
                    return (pos - start)/kind + readnl_len;
2190
0
                s = pos + kind;
2191
0
            }
2192
0
            pos = find_control_char(kind, e, end, nl[0]);
2193
0
            if (pos == NULL)
2194
0
                *consumed = len;
2195
0
            else
2196
0
                *consumed = (pos - start)/kind;
2197
0
            return -1;
2198
0
        }
2199
0
    }
2200
20.3M
}
2201
2202
static PyObject *
2203
_textiowrapper_readline(textio *self, Py_ssize_t limit)
2204
0
{
2205
0
    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2206
0
    Py_ssize_t start, endpos, chunked, offset_to_buffer;
2207
0
    int res;
2208
2209
0
    CHECK_CLOSED(self);
2210
2211
0
    if (_textiowrapper_writeflush(self) < 0)
2212
0
        return NULL;
2213
2214
0
    chunked = 0;
2215
2216
0
    while (1) {
2217
0
        const char *ptr;
2218
0
        Py_ssize_t line_len;
2219
0
        int kind;
2220
0
        Py_ssize_t consumed = 0;
2221
2222
        /* First, get some data if necessary */
2223
0
        res = 1;
2224
0
        while (!self->decoded_chars ||
2225
0
               !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2226
0
            res = textiowrapper_read_chunk(self, 0);
2227
0
            if (res < 0) {
2228
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2229
                   when EINTR occurs so we needn't do it ourselves. */
2230
0
                if (_PyIO_trap_eintr()) {
2231
0
                    continue;
2232
0
                }
2233
0
                goto error;
2234
0
            }
2235
0
            if (res == 0)
2236
0
                break;
2237
0
        }
2238
0
        if (res == 0) {
2239
            /* end of file */
2240
0
            textiowrapper_set_decoded_chars(self, NULL);
2241
0
            Py_CLEAR(self->snapshot);
2242
0
            start = endpos = offset_to_buffer = 0;
2243
0
            break;
2244
0
        }
2245
2246
0
        if (remaining == NULL) {
2247
0
            line = Py_NewRef(self->decoded_chars);
2248
0
            start = self->decoded_chars_used;
2249
0
            offset_to_buffer = 0;
2250
0
        }
2251
0
        else {
2252
0
            assert(self->decoded_chars_used == 0);
2253
0
            line = PyUnicode_Concat(remaining, self->decoded_chars);
2254
0
            start = 0;
2255
0
            offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2256
0
            Py_CLEAR(remaining);
2257
0
            if (line == NULL)
2258
0
                goto error;
2259
0
        }
2260
2261
0
        ptr = PyUnicode_DATA(line);
2262
0
        line_len = PyUnicode_GET_LENGTH(line);
2263
0
        kind = PyUnicode_KIND(line);
2264
2265
0
        endpos = _PyIO_find_line_ending(
2266
0
            self->readtranslate, self->readuniversal, self->readnl,
2267
0
            kind,
2268
0
            ptr + kind * start,
2269
0
            ptr + kind * line_len,
2270
0
            &consumed);
2271
0
        if (endpos >= 0) {
2272
0
            endpos += start;
2273
0
            if (limit >= 0 && (endpos - start) + chunked >= limit)
2274
0
                endpos = start + limit - chunked;
2275
0
            break;
2276
0
        }
2277
2278
        /* We can put aside up to `endpos` */
2279
0
        endpos = consumed + start;
2280
0
        if (limit >= 0 && (endpos - start) + chunked >= limit) {
2281
            /* Didn't find line ending, but reached length limit */
2282
0
            endpos = start + limit - chunked;
2283
0
            break;
2284
0
        }
2285
2286
0
        if (endpos > start) {
2287
            /* No line ending seen yet - put aside current data */
2288
0
            PyObject *s;
2289
0
            if (chunks == NULL) {
2290
0
                chunks = PyList_New(0);
2291
0
                if (chunks == NULL)
2292
0
                    goto error;
2293
0
            }
2294
0
            s = PyUnicode_Substring(line, start, endpos);
2295
0
            if (s == NULL)
2296
0
                goto error;
2297
0
            if (PyList_Append(chunks, s) < 0) {
2298
0
                Py_DECREF(s);
2299
0
                goto error;
2300
0
            }
2301
0
            chunked += PyUnicode_GET_LENGTH(s);
2302
0
            Py_DECREF(s);
2303
0
        }
2304
        /* There may be some remaining bytes we'll have to prepend to the
2305
           next chunk of data */
2306
0
        if (endpos < line_len) {
2307
0
            remaining = PyUnicode_Substring(line, endpos, line_len);
2308
0
            if (remaining == NULL)
2309
0
                goto error;
2310
0
        }
2311
0
        Py_CLEAR(line);
2312
        /* We have consumed the buffer */
2313
0
        textiowrapper_set_decoded_chars(self, NULL);
2314
0
    }
2315
2316
0
    if (line != NULL) {
2317
        /* Our line ends in the current buffer */
2318
0
        self->decoded_chars_used = endpos - offset_to_buffer;
2319
0
        if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2320
0
            PyObject *s = PyUnicode_Substring(line, start, endpos);
2321
0
            Py_CLEAR(line);
2322
0
            if (s == NULL)
2323
0
                goto error;
2324
0
            line = s;
2325
0
        }
2326
0
    }
2327
0
    if (remaining != NULL) {
2328
0
        if (chunks == NULL) {
2329
0
            chunks = PyList_New(0);
2330
0
            if (chunks == NULL)
2331
0
                goto error;
2332
0
        }
2333
0
        if (PyList_Append(chunks, remaining) < 0)
2334
0
            goto error;
2335
0
        Py_CLEAR(remaining);
2336
0
    }
2337
0
    if (chunks != NULL) {
2338
0
        if (line != NULL) {
2339
0
            if (PyList_Append(chunks, line) < 0)
2340
0
                goto error;
2341
0
            Py_DECREF(line);
2342
0
        }
2343
0
        line = PyUnicode_Join(&_Py_STR(empty), chunks);
2344
0
        if (line == NULL)
2345
0
            goto error;
2346
0
        Py_CLEAR(chunks);
2347
0
    }
2348
0
    if (line == NULL) {
2349
0
        line = &_Py_STR(empty);
2350
0
    }
2351
2352
0
    return line;
2353
2354
0
  error:
2355
0
    Py_XDECREF(chunks);
2356
0
    Py_XDECREF(remaining);
2357
0
    Py_XDECREF(line);
2358
0
    return NULL;
2359
0
}
2360
2361
/*[clinic input]
2362
@critical_section
2363
_io.TextIOWrapper.readline
2364
    size: Py_ssize_t = -1
2365
    /
2366
[clinic start generated code]*/
2367
2368
static PyObject *
2369
_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2370
/*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2371
0
{
2372
0
    CHECK_ATTACHED(self);
2373
0
    return _textiowrapper_readline(self, size);
2374
0
}
2375
2376
/* Seek and Tell */
2377
2378
typedef struct {
2379
    Py_off_t start_pos;
2380
    int dec_flags;
2381
    int bytes_to_feed;
2382
    int chars_to_skip;
2383
    char need_eof;
2384
} cookie_type;
2385
2386
/*
2387
   To speed up cookie packing/unpacking, we store the fields in a temporary
2388
   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2389
   The following macros define at which offsets in the intermediary byte
2390
   string the various CookieStruct fields will be stored.
2391
 */
2392
2393
#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2394
2395
#if PY_BIG_ENDIAN
2396
/* We want the least significant byte of start_pos to also be the least
2397
   significant byte of the cookie, which means that in big-endian mode we
2398
   must copy the fields in reverse order. */
2399
2400
# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2401
# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2402
# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2403
# define OFF_CHARS_TO_SKIP  (sizeof(char))
2404
# define OFF_NEED_EOF       0
2405
2406
#else
2407
/* Little-endian mode: the least significant byte of start_pos will
2408
   naturally end up the least significant byte of the cookie. */
2409
2410
0
# define OFF_START_POS      0
2411
0
# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2412
0
# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2413
0
# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2414
0
# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2415
2416
#endif
2417
2418
static int
2419
textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2420
0
{
2421
0
    unsigned char buffer[COOKIE_BUF_LEN];
2422
0
    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2423
0
    if (cookieLong == NULL)
2424
0
        return -1;
2425
2426
0
    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2427
0
                            PY_LITTLE_ENDIAN, 0, 1) < 0) {
2428
0
        Py_DECREF(cookieLong);
2429
0
        return -1;
2430
0
    }
2431
0
    Py_DECREF(cookieLong);
2432
2433
0
    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2434
0
    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2435
0
    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2436
0
    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2437
0
    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2438
2439
0
    return 0;
2440
0
}
2441
2442
static PyObject *
2443
textiowrapper_build_cookie(cookie_type *cookie)
2444
0
{
2445
0
    unsigned char buffer[COOKIE_BUF_LEN];
2446
2447
0
    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2448
0
    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2449
0
    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2450
0
    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2451
0
    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2452
2453
0
    return _PyLong_FromByteArray(buffer, sizeof(buffer),
2454
0
                                 PY_LITTLE_ENDIAN, 0);
2455
0
}
2456
2457
static int
2458
_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2459
0
{
2460
0
    PyObject *res;
2461
    /* When seeking to the start of the stream, we call decoder.reset()
2462
       rather than decoder.getstate().
2463
       This is for a few decoders such as utf-16 for which the state value
2464
       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2465
       utf-16, that we are expecting a BOM).
2466
    */
2467
0
    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2468
0
        res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2469
0
    }
2470
0
    else {
2471
0
        res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2472
0
                                   "((yi))", "", cookie->dec_flags);
2473
0
    }
2474
0
    if (res == NULL) {
2475
0
        return -1;
2476
0
    }
2477
0
    Py_DECREF(res);
2478
0
    return 0;
2479
0
}
2480
2481
static int
2482
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2483
0
{
2484
0
    PyObject *res;
2485
0
    if (start_of_stream) {
2486
0
        res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2487
0
        self->encoding_start_of_stream = 1;
2488
0
    }
2489
0
    else {
2490
0
        res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2491
0
                                        _PyLong_GetZero());
2492
0
        self->encoding_start_of_stream = 0;
2493
0
    }
2494
0
    if (res == NULL)
2495
0
        return -1;
2496
0
    Py_DECREF(res);
2497
0
    return 0;
2498
0
}
2499
2500
static int
2501
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2502
0
{
2503
    /* Same as _textiowrapper_decoder_setstate() above. */
2504
0
    return _textiowrapper_encoder_reset(
2505
0
        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2506
0
}
2507
2508
/*[clinic input]
2509
@critical_section
2510
_io.TextIOWrapper.seek
2511
    cookie as cookieObj: object
2512
      Zero or an opaque number returned by tell().
2513
    whence: int(c_default='0') = os.SEEK_SET
2514
      The relative position to seek from.
2515
    /
2516
2517
Set the stream position, and return the new stream position.
2518
2519
Four operations are supported, given by the following argument
2520
combinations:
2521
2522
- seek(0, SEEK_SET): Rewind to the start of the stream.
2523
- seek(cookie, SEEK_SET): Restore a previous position;
2524
  'cookie' must be a number returned by tell().
2525
- seek(0, SEEK_END): Fast-forward to the end of the stream.
2526
- seek(0, SEEK_CUR): Leave the current stream position unchanged.
2527
2528
Any other argument combinations are invalid,
2529
and may raise exceptions.
2530
[clinic start generated code]*/
2531
2532
static PyObject *
2533
_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2534
/*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2535
0
{
2536
0
    PyObject *posobj;
2537
0
    cookie_type cookie;
2538
0
    PyObject *res;
2539
0
    int cmp;
2540
0
    PyObject *snapshot;
2541
2542
0
    CHECK_ATTACHED(self);
2543
0
    CHECK_CLOSED(self);
2544
2545
0
    Py_INCREF(cookieObj);
2546
2547
0
    if (!self->seekable) {
2548
0
        _unsupported(self->state, "underlying stream is not seekable");
2549
0
        goto fail;
2550
0
    }
2551
2552
0
    PyObject *zero = _PyLong_GetZero();  // borrowed reference
2553
2554
0
    switch (whence) {
2555
0
    case SEEK_CUR:
2556
        /* seek relative to current position */
2557
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2558
0
        if (cmp < 0)
2559
0
            goto fail;
2560
2561
0
        if (cmp == 0) {
2562
0
            _unsupported(self->state, "can't do nonzero cur-relative seeks");
2563
0
            goto fail;
2564
0
        }
2565
2566
        /* Seeking to the current position should attempt to
2567
         * sync the underlying buffer with the current position.
2568
         */
2569
0
        Py_DECREF(cookieObj);
2570
0
        cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2571
0
        if (cookieObj == NULL)
2572
0
            goto fail;
2573
0
        break;
2574
2575
0
    case SEEK_END:
2576
        /* seek relative to end of file */
2577
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2578
0
        if (cmp < 0)
2579
0
            goto fail;
2580
2581
0
        if (cmp == 0) {
2582
0
            _unsupported(self->state, "can't do nonzero end-relative seeks");
2583
0
            goto fail;
2584
0
        }
2585
2586
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
2587
0
            goto fail;
2588
0
        }
2589
2590
0
        textiowrapper_set_decoded_chars(self, NULL);
2591
0
        Py_CLEAR(self->snapshot);
2592
0
        if (self->decoder) {
2593
0
            res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2594
0
            if (res == NULL)
2595
0
                goto fail;
2596
0
            Py_DECREF(res);
2597
0
        }
2598
2599
0
        res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2600
0
        Py_CLEAR(cookieObj);
2601
0
        if (res == NULL)
2602
0
            goto fail;
2603
0
        if (self->encoder) {
2604
            /* If seek() == 0, we are at the start of stream, otherwise not */
2605
0
            cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2606
0
            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2607
0
                Py_DECREF(res);
2608
0
                goto fail;
2609
0
            }
2610
0
        }
2611
0
        return res;
2612
2613
0
    case SEEK_SET:
2614
0
        break;
2615
2616
0
    default:
2617
0
        PyErr_Format(PyExc_ValueError,
2618
0
                     "invalid whence (%d, should be %d, %d or %d)", whence,
2619
0
                     SEEK_SET, SEEK_CUR, SEEK_END);
2620
0
        goto fail;
2621
0
    }
2622
2623
0
    cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2624
0
    if (cmp < 0)
2625
0
        goto fail;
2626
2627
0
    if (cmp == 1) {
2628
0
        PyErr_Format(PyExc_ValueError,
2629
0
                     "negative seek position %R", cookieObj);
2630
0
        goto fail;
2631
0
    }
2632
2633
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2634
0
        goto fail;
2635
0
    }
2636
2637
    /* The strategy of seek() is to go back to the safe start point
2638
     * and replay the effect of read(chars_to_skip) from there.
2639
     */
2640
0
    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2641
0
        goto fail;
2642
2643
    /* Seek back to the safe start point. */
2644
0
    posobj = PyLong_FromOff_t(cookie.start_pos);
2645
0
    if (posobj == NULL)
2646
0
        goto fail;
2647
0
    res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2648
0
    Py_DECREF(posobj);
2649
0
    if (res == NULL)
2650
0
        goto fail;
2651
0
    Py_DECREF(res);
2652
2653
0
    textiowrapper_set_decoded_chars(self, NULL);
2654
0
    Py_CLEAR(self->snapshot);
2655
2656
    /* Restore the decoder to its state from the safe start point. */
2657
0
    if (self->decoder) {
2658
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2659
0
            goto fail;
2660
0
    }
2661
2662
0
    if (cookie.chars_to_skip) {
2663
        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2664
0
        PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2665
0
                                                     "i", cookie.bytes_to_feed);
2666
0
        PyObject *decoded;
2667
2668
0
        if (input_chunk == NULL)
2669
0
            goto fail;
2670
2671
0
        if (!PyBytes_Check(input_chunk)) {
2672
0
            PyErr_Format(PyExc_TypeError,
2673
0
                         "underlying read() should have returned a bytes "
2674
0
                         "object, not '%.200s'",
2675
0
                         Py_TYPE(input_chunk)->tp_name);
2676
0
            Py_DECREF(input_chunk);
2677
0
            goto fail;
2678
0
        }
2679
2680
0
        snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2681
0
        if (snapshot == NULL) {
2682
0
            goto fail;
2683
0
        }
2684
0
        Py_XSETREF(self->snapshot, snapshot);
2685
2686
0
        decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2687
0
            input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2688
2689
0
        if (check_decoded(decoded) < 0)
2690
0
            goto fail;
2691
2692
0
        textiowrapper_set_decoded_chars(self, decoded);
2693
2694
        /* Skip chars_to_skip of the decoded characters. */
2695
0
        if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2696
0
            PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2697
0
            goto fail;
2698
0
        }
2699
0
        self->decoded_chars_used = cookie.chars_to_skip;
2700
0
    }
2701
0
    else {
2702
0
        snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2703
0
        if (snapshot == NULL)
2704
0
            goto fail;
2705
0
        Py_XSETREF(self->snapshot, snapshot);
2706
0
    }
2707
2708
    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2709
0
    if (self->encoder) {
2710
0
        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2711
0
            goto fail;
2712
0
    }
2713
0
    return cookieObj;
2714
0
  fail:
2715
0
    Py_XDECREF(cookieObj);
2716
0
    return NULL;
2717
2718
0
}
2719
2720
/*[clinic input]
2721
@critical_section
2722
_io.TextIOWrapper.tell
2723
2724
Return the stream position as an opaque number.
2725
2726
The return value of tell() can be given as input to seek(), to restore a
2727
previous stream position.
2728
[clinic start generated code]*/
2729
2730
static PyObject *
2731
_io_TextIOWrapper_tell_impl(textio *self)
2732
/*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/
2733
0
{
2734
0
    PyObject *res;
2735
0
    PyObject *posobj = NULL;
2736
0
    cookie_type cookie = {0,0,0,0,0};
2737
0
    PyObject *next_input;
2738
0
    Py_ssize_t chars_to_skip, chars_decoded;
2739
0
    Py_ssize_t skip_bytes, skip_back;
2740
0
    PyObject *saved_state = NULL;
2741
0
    const char *input, *input_end;
2742
0
    Py_ssize_t dec_buffer_len;
2743
0
    int dec_flags;
2744
2745
0
    CHECK_ATTACHED(self);
2746
0
    CHECK_CLOSED(self);
2747
2748
0
    if (!self->seekable) {
2749
0
        _unsupported(self->state, "underlying stream is not seekable");
2750
0
        goto fail;
2751
0
    }
2752
0
    if (!self->telling) {
2753
0
        PyErr_SetString(PyExc_OSError,
2754
0
                        "telling position disabled by next() call");
2755
0
        goto fail;
2756
0
    }
2757
2758
0
    if (_textiowrapper_writeflush(self) < 0)
2759
0
        return NULL;
2760
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2761
0
        goto fail;
2762
0
    }
2763
2764
0
    posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2765
0
    if (posobj == NULL)
2766
0
        goto fail;
2767
2768
0
    if (self->decoder == NULL || self->snapshot == NULL) {
2769
0
        assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2770
0
        return posobj;
2771
0
    }
2772
2773
#if defined(HAVE_LARGEFILE_SUPPORT)
2774
    cookie.start_pos = PyLong_AsLongLong(posobj);
2775
#else
2776
0
    cookie.start_pos = PyLong_AsLong(posobj);
2777
0
#endif
2778
0
    Py_DECREF(posobj);
2779
0
    if (PyErr_Occurred())
2780
0
        goto fail;
2781
2782
    /* Skip backward to the snapshot point (see _read_chunk). */
2783
0
    assert(PyTuple_Check(self->snapshot));
2784
0
    if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2785
0
        goto fail;
2786
2787
0
    assert (PyBytes_Check(next_input));
2788
2789
0
    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2790
2791
    /* How many decoded characters have been used up since the snapshot? */
2792
0
    if (self->decoded_chars_used == 0)  {
2793
        /* We haven't moved from the snapshot point. */
2794
0
        return textiowrapper_build_cookie(&cookie);
2795
0
    }
2796
2797
0
    chars_to_skip = self->decoded_chars_used;
2798
2799
    /* Decoder state will be restored at the end */
2800
0
    saved_state = PyObject_CallMethodNoArgs(self->decoder,
2801
0
                                             &_Py_ID(getstate));
2802
0
    if (saved_state == NULL)
2803
0
        goto fail;
2804
2805
0
#define DECODER_GETSTATE() do { \
2806
0
        PyObject *dec_buffer; \
2807
0
        PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2808
0
            &_Py_ID(getstate)); \
2809
0
        if (_state == NULL) \
2810
0
            goto fail; \
2811
0
        if (!PyTuple_Check(_state)) { \
2812
0
            PyErr_SetString(PyExc_TypeError, \
2813
0
                            "illegal decoder state"); \
2814
0
            Py_DECREF(_state); \
2815
0
            goto fail; \
2816
0
        } \
2817
0
        if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2818
0
                              &dec_buffer, &dec_flags)) \
2819
0
        { \
2820
0
            Py_DECREF(_state); \
2821
0
            goto fail; \
2822
0
        } \
2823
0
        if (!PyBytes_Check(dec_buffer)) { \
2824
0
            PyErr_Format(PyExc_TypeError, \
2825
0
                         "illegal decoder state: the first item should be a " \
2826
0
                         "bytes object, not '%.200s'", \
2827
0
                         Py_TYPE(dec_buffer)->tp_name); \
2828
0
            Py_DECREF(_state); \
2829
0
            goto fail; \
2830
0
        } \
2831
0
        dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2832
0
        Py_DECREF(_state); \
2833
0
    } while (0)
2834
2835
0
#define DECODER_DECODE(start, len, res) do { \
2836
0
        PyObject *_decoded = _PyObject_CallMethod( \
2837
0
            self->decoder, &_Py_ID(decode), "y#", start, len); \
2838
0
        if (check_decoded(_decoded) < 0) \
2839
0
            goto fail; \
2840
0
        res = PyUnicode_GET_LENGTH(_decoded); \
2841
0
        Py_DECREF(_decoded); \
2842
0
    } while (0)
2843
2844
    /* Fast search for an acceptable start point, close to our
2845
       current pos */
2846
0
    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2847
0
    skip_back = 1;
2848
0
    assert(skip_back <= PyBytes_GET_SIZE(next_input));
2849
0
    input = PyBytes_AS_STRING(next_input);
2850
0
    while (skip_bytes > 0) {
2851
        /* Decode up to temptative start point */
2852
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2853
0
            goto fail;
2854
0
        DECODER_DECODE(input, skip_bytes, chars_decoded);
2855
0
        if (chars_decoded <= chars_to_skip) {
2856
0
            DECODER_GETSTATE();
2857
0
            if (dec_buffer_len == 0) {
2858
                /* Before pos and no bytes buffered in decoder => OK */
2859
0
                cookie.dec_flags = dec_flags;
2860
0
                chars_to_skip -= chars_decoded;
2861
0
                break;
2862
0
            }
2863
            /* Skip back by buffered amount and reset heuristic */
2864
0
            skip_bytes -= dec_buffer_len;
2865
0
            skip_back = 1;
2866
0
        }
2867
0
        else {
2868
            /* We're too far ahead, skip back a bit */
2869
0
            skip_bytes -= skip_back;
2870
0
            skip_back *= 2;
2871
0
        }
2872
0
    }
2873
0
    if (skip_bytes <= 0) {
2874
0
        skip_bytes = 0;
2875
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2876
0
            goto fail;
2877
0
    }
2878
2879
    /* Note our initial start point. */
2880
0
    cookie.start_pos += skip_bytes;
2881
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2882
0
    if (chars_to_skip == 0)
2883
0
        goto finally;
2884
2885
    /* We should be close to the desired position.  Now feed the decoder one
2886
     * byte at a time until we reach the `chars_to_skip` target.
2887
     * As we go, note the nearest "safe start point" before the current
2888
     * location (a point where the decoder has nothing buffered, so seek()
2889
     * can safely start from there and advance to this location).
2890
     */
2891
0
    chars_decoded = 0;
2892
0
    input = PyBytes_AS_STRING(next_input);
2893
0
    input_end = input + PyBytes_GET_SIZE(next_input);
2894
0
    input += skip_bytes;
2895
0
    while (input < input_end) {
2896
0
        Py_ssize_t n;
2897
2898
0
        DECODER_DECODE(input, (Py_ssize_t)1, n);
2899
        /* We got n chars for 1 byte */
2900
0
        chars_decoded += n;
2901
0
        cookie.bytes_to_feed += 1;
2902
0
        DECODER_GETSTATE();
2903
2904
0
        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2905
            /* Decoder buffer is empty, so this is a safe start point. */
2906
0
            cookie.start_pos += cookie.bytes_to_feed;
2907
0
            chars_to_skip -= chars_decoded;
2908
0
            cookie.dec_flags = dec_flags;
2909
0
            cookie.bytes_to_feed = 0;
2910
0
            chars_decoded = 0;
2911
0
        }
2912
0
        if (chars_decoded >= chars_to_skip)
2913
0
            break;
2914
0
        input++;
2915
0
    }
2916
0
    if (input == input_end) {
2917
        /* We didn't get enough decoded data; signal EOF to get more. */
2918
0
        PyObject *decoded = _PyObject_CallMethod(
2919
0
            self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2920
0
        if (check_decoded(decoded) < 0)
2921
0
            goto fail;
2922
0
        chars_decoded += PyUnicode_GET_LENGTH(decoded);
2923
0
        Py_DECREF(decoded);
2924
0
        cookie.need_eof = 1;
2925
2926
0
        if (chars_decoded < chars_to_skip) {
2927
0
            PyErr_SetString(PyExc_OSError,
2928
0
                            "can't reconstruct logical file position");
2929
0
            goto fail;
2930
0
        }
2931
0
    }
2932
2933
0
finally:
2934
0
    res = PyObject_CallMethodOneArg(
2935
0
            self->decoder, &_Py_ID(setstate), saved_state);
2936
0
    Py_DECREF(saved_state);
2937
0
    if (res == NULL)
2938
0
        return NULL;
2939
0
    Py_DECREF(res);
2940
2941
    /* The returned cookie corresponds to the last safe start point. */
2942
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2943
0
    return textiowrapper_build_cookie(&cookie);
2944
2945
0
fail:
2946
0
    if (saved_state) {
2947
0
        PyObject *exc = PyErr_GetRaisedException();
2948
0
        res = PyObject_CallMethodOneArg(
2949
0
                self->decoder, &_Py_ID(setstate), saved_state);
2950
0
        _PyErr_ChainExceptions1(exc);
2951
0
        Py_DECREF(saved_state);
2952
0
        Py_XDECREF(res);
2953
0
    }
2954
0
    return NULL;
2955
0
}
2956
2957
/*[clinic input]
2958
@critical_section
2959
_io.TextIOWrapper.truncate
2960
    pos: object = None
2961
    /
2962
[clinic start generated code]*/
2963
2964
static PyObject *
2965
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2966
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
2967
0
{
2968
0
    CHECK_ATTACHED(self)
2969
2970
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2971
0
        return NULL;
2972
0
    }
2973
2974
0
    return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2975
0
}
2976
2977
static PyObject *
2978
textiowrapper_repr(PyObject *op)
2979
0
{
2980
0
    PyObject *nameobj, *modeobj, *res, *s;
2981
0
    int status;
2982
0
    textio *self = textio_CAST(op);
2983
0
    const char *type_name = Py_TYPE(self)->tp_name;
2984
2985
0
    CHECK_INITIALIZED(self);
2986
2987
0
    res = PyUnicode_FromFormat("<%.100s", type_name);
2988
0
    if (res == NULL)
2989
0
        return NULL;
2990
2991
0
    status = Py_ReprEnter(op);
2992
0
    if (status != 0) {
2993
0
        if (status > 0) {
2994
0
            PyErr_Format(PyExc_RuntimeError,
2995
0
                         "reentrant call inside %.100s.__repr__",
2996
0
                         type_name);
2997
0
        }
2998
0
        goto error;
2999
0
    }
3000
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(name), &nameobj) < 0) {
3001
0
        if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
3002
0
            goto error;
3003
0
        }
3004
        /* Ignore ValueError raised if the underlying stream was detached */
3005
0
        PyErr_Clear();
3006
0
    }
3007
0
    if (nameobj != NULL) {
3008
0
        s = PyUnicode_FromFormat(" name=%R", nameobj);
3009
0
        Py_DECREF(nameobj);
3010
0
        if (s == NULL)
3011
0
            goto error;
3012
0
        PyUnicode_AppendAndDel(&res, s);
3013
0
        if (res == NULL)
3014
0
            goto error;
3015
0
    }
3016
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(mode), &modeobj) < 0) {
3017
0
        goto error;
3018
0
    }
3019
0
    if (modeobj != NULL) {
3020
0
        s = PyUnicode_FromFormat(" mode=%R", modeobj);
3021
0
        Py_DECREF(modeobj);
3022
0
        if (s == NULL)
3023
0
            goto error;
3024
0
        PyUnicode_AppendAndDel(&res, s);
3025
0
        if (res == NULL)
3026
0
            goto error;
3027
0
    }
3028
0
    s = PyUnicode_FromFormat("%U encoding=%R>",
3029
0
                             res, self->encoding);
3030
0
    Py_DECREF(res);
3031
0
    if (status == 0) {
3032
0
        Py_ReprLeave(op);
3033
0
    }
3034
0
    return s;
3035
3036
0
  error:
3037
0
    Py_XDECREF(res);
3038
0
    if (status == 0) {
3039
0
        Py_ReprLeave(op);
3040
0
    }
3041
0
    return NULL;
3042
0
}
3043
3044
3045
/* Inquiries */
3046
3047
/*[clinic input]
3048
@critical_section
3049
_io.TextIOWrapper.fileno
3050
[clinic start generated code]*/
3051
3052
static PyObject *
3053
_io_TextIOWrapper_fileno_impl(textio *self)
3054
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3055
0
{
3056
0
    CHECK_ATTACHED(self);
3057
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3058
0
}
3059
3060
/*[clinic input]
3061
@critical_section
3062
_io.TextIOWrapper.seekable
3063
[clinic start generated code]*/
3064
3065
static PyObject *
3066
_io_TextIOWrapper_seekable_impl(textio *self)
3067
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3068
0
{
3069
0
    CHECK_ATTACHED(self);
3070
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3071
0
}
3072
3073
/*[clinic input]
3074
@critical_section
3075
_io.TextIOWrapper.readable
3076
[clinic start generated code]*/
3077
3078
static PyObject *
3079
_io_TextIOWrapper_readable_impl(textio *self)
3080
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3081
0
{
3082
0
    CHECK_ATTACHED(self);
3083
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3084
0
}
3085
3086
/*[clinic input]
3087
@critical_section
3088
_io.TextIOWrapper.writable
3089
[clinic start generated code]*/
3090
3091
static PyObject *
3092
_io_TextIOWrapper_writable_impl(textio *self)
3093
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3094
0
{
3095
0
    CHECK_ATTACHED(self);
3096
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3097
0
}
3098
3099
/*[clinic input]
3100
@critical_section
3101
_io.TextIOWrapper.isatty
3102
[clinic start generated code]*/
3103
3104
static PyObject *
3105
_io_TextIOWrapper_isatty_impl(textio *self)
3106
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3107
0
{
3108
0
    CHECK_ATTACHED(self);
3109
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3110
0
}
3111
3112
/*[clinic input]
3113
@critical_section
3114
_io.TextIOWrapper.flush
3115
[clinic start generated code]*/
3116
3117
static PyObject *
3118
_io_TextIOWrapper_flush_impl(textio *self)
3119
/*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3120
0
{
3121
0
    CHECK_ATTACHED(self);
3122
0
    CHECK_CLOSED(self);
3123
0
    self->telling = self->seekable;
3124
0
    if (_textiowrapper_writeflush(self) < 0)
3125
0
        return NULL;
3126
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3127
0
}
3128
3129
/*[clinic input]
3130
@critical_section
3131
_io.TextIOWrapper.close
3132
[clinic start generated code]*/
3133
3134
static PyObject *
3135
_io_TextIOWrapper_close_impl(textio *self)
3136
/*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3137
0
{
3138
0
    PyObject *res;
3139
0
    int r;
3140
0
    CHECK_ATTACHED(self);
3141
3142
0
    res = _io_TextIOWrapper_closed_get_impl(self);
3143
0
    if (res == NULL)
3144
0
        return NULL;
3145
0
    r = PyObject_IsTrue(res);
3146
0
    Py_DECREF(res);
3147
0
    if (r < 0)
3148
0
        return NULL;
3149
3150
0
    if (r > 0) {
3151
0
        Py_RETURN_NONE; /* stream already closed */
3152
0
    }
3153
0
    else {
3154
0
        PyObject *exc = NULL;
3155
0
        if (self->finalizing) {
3156
0
            res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3157
0
                                            (PyObject *)self);
3158
0
            if (res) {
3159
0
                Py_DECREF(res);
3160
0
            }
3161
0
            else {
3162
0
                PyErr_Clear();
3163
0
            }
3164
0
        }
3165
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
3166
0
            exc = PyErr_GetRaisedException();
3167
0
        }
3168
3169
0
        res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3170
0
        if (exc != NULL) {
3171
0
            _PyErr_ChainExceptions1(exc);
3172
0
            Py_CLEAR(res);
3173
0
        }
3174
0
        return res;
3175
0
    }
3176
0
}
3177
3178
static PyObject *
3179
textiowrapper_iternext_lock_held(PyObject *op)
3180
0
{
3181
0
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
3182
0
    PyObject *line;
3183
0
    textio *self = textio_CAST(op);
3184
3185
0
    CHECK_ATTACHED(self);
3186
3187
0
    self->telling = 0;
3188
0
    if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3189
        /* Skip method call overhead for speed */
3190
0
        line = _textiowrapper_readline(self, -1);
3191
0
    }
3192
0
    else {
3193
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
3194
0
        if (line && !PyUnicode_Check(line)) {
3195
0
            PyErr_Format(PyExc_OSError,
3196
0
                         "readline() should have returned a str object, "
3197
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
3198
0
            Py_DECREF(line);
3199
0
            return NULL;
3200
0
        }
3201
0
    }
3202
3203
0
    if (line == NULL)
3204
0
        return NULL;
3205
3206
0
    if (PyUnicode_GET_LENGTH(line) == 0) {
3207
        /* Reached EOF or would have blocked */
3208
0
        Py_DECREF(line);
3209
0
        Py_CLEAR(self->snapshot);
3210
0
        self->telling = self->seekable;
3211
0
        return NULL;
3212
0
    }
3213
3214
0
    return line;
3215
0
}
3216
3217
static PyObject *
3218
textiowrapper_iternext(PyObject *op)
3219
0
{
3220
0
    PyObject *result;
3221
0
    Py_BEGIN_CRITICAL_SECTION(op);
3222
0
    result = textiowrapper_iternext_lock_held(op);
3223
0
    Py_END_CRITICAL_SECTION();
3224
0
    return result;
3225
0
}
3226
3227
/*[clinic input]
3228
@critical_section
3229
@getter
3230
_io.TextIOWrapper.name
3231
[clinic start generated code]*/
3232
3233
static PyObject *
3234
_io_TextIOWrapper_name_get_impl(textio *self)
3235
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3236
0
{
3237
0
    CHECK_ATTACHED(self);
3238
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3239
0
}
3240
3241
/*[clinic input]
3242
@critical_section
3243
@getter
3244
_io.TextIOWrapper.closed
3245
[clinic start generated code]*/
3246
3247
static PyObject *
3248
_io_TextIOWrapper_closed_get_impl(textio *self)
3249
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3250
0
{
3251
0
    CHECK_ATTACHED(self);
3252
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3253
0
}
3254
3255
/*[clinic input]
3256
@critical_section
3257
@getter
3258
_io.TextIOWrapper.newlines
3259
[clinic start generated code]*/
3260
3261
static PyObject *
3262
_io_TextIOWrapper_newlines_get_impl(textio *self)
3263
/*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3264
0
{
3265
0
    PyObject *res;
3266
0
    CHECK_ATTACHED(self);
3267
0
    if (self->decoder == NULL ||
3268
0
        PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3269
0
    {
3270
0
        Py_RETURN_NONE;
3271
0
    }
3272
0
    return res;
3273
0
}
3274
3275
/*[clinic input]
3276
@critical_section
3277
@getter
3278
_io.TextIOWrapper.errors
3279
[clinic start generated code]*/
3280
3281
static PyObject *
3282
_io_TextIOWrapper_errors_get_impl(textio *self)
3283
/*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3284
0
{
3285
0
    CHECK_INITIALIZED(self);
3286
0
    return Py_NewRef(self->errors);
3287
0
}
3288
3289
/*[clinic input]
3290
@critical_section
3291
@getter
3292
_io.TextIOWrapper._CHUNK_SIZE
3293
[clinic start generated code]*/
3294
3295
static PyObject *
3296
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3297
/*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3298
0
{
3299
0
    CHECK_ATTACHED(self);
3300
0
    return PyLong_FromSsize_t(self->chunk_size);
3301
0
}
3302
3303
/*[clinic input]
3304
@critical_section
3305
@setter
3306
_io.TextIOWrapper._CHUNK_SIZE
3307
[clinic start generated code]*/
3308
3309
static int
3310
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3311
/*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3312
0
{
3313
0
    Py_ssize_t n;
3314
0
    CHECK_ATTACHED_INT(self);
3315
0
    if (value == NULL) {
3316
0
        PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3317
0
        return -1;
3318
0
    }
3319
0
    n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3320
0
    if (n == -1 && PyErr_Occurred())
3321
0
        return -1;
3322
0
    if (n <= 0) {
3323
0
        PyErr_SetString(PyExc_ValueError,
3324
0
                        "a strictly positive integer is required");
3325
0
        return -1;
3326
0
    }
3327
0
    self->chunk_size = n;
3328
0
    return 0;
3329
0
}
3330
3331
static PyMethodDef incrementalnewlinedecoder_methods[] = {
3332
    _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3333
    _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3334
    _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3335
    _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3336
    {NULL}
3337
};
3338
3339
static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3340
    {"newlines", incrementalnewlinedecoder_newlines_get, NULL, NULL},
3341
    {NULL}
3342
};
3343
3344
static PyType_Slot nldecoder_slots[] = {
3345
    {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3346
    {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3347
    {Py_tp_methods, incrementalnewlinedecoder_methods},
3348
    {Py_tp_getset, incrementalnewlinedecoder_getset},
3349
    {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3350
    {Py_tp_clear, incrementalnewlinedecoder_clear},
3351
    {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3352
    {0, NULL},
3353
};
3354
3355
PyType_Spec nldecoder_spec = {
3356
    .name = "_io.IncrementalNewlineDecoder",
3357
    .basicsize = sizeof(nldecoder_object),
3358
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3359
              Py_TPFLAGS_IMMUTABLETYPE),
3360
    .slots = nldecoder_slots,
3361
};
3362
3363
3364
static PyMethodDef textiowrapper_methods[] = {
3365
    _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3366
    _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3367
    _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3368
    _IO_TEXTIOWRAPPER_READ_METHODDEF
3369
    _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3370
    _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3371
    _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3372
3373
    _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3374
    _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3375
    _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3376
    _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3377
    _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3378
3379
    _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3380
    _IO_TEXTIOWRAPPER_TELL_METHODDEF
3381
    _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3382
3383
    {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS},
3384
    {NULL, NULL}
3385
};
3386
3387
static PyMemberDef textiowrapper_members[] = {
3388
    {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3389
    {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3390
    {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3391
    {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3392
    {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3393
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3394
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3395
    {NULL}
3396
};
3397
3398
static PyGetSetDef textiowrapper_getset[] = {
3399
    _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3400
    _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3401
    _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3402
    _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3403
    _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3404
    {NULL}
3405
};
3406
3407
PyType_Slot textiowrapper_slots[] = {
3408
    {Py_tp_dealloc, textiowrapper_dealloc},
3409
    {Py_tp_repr, textiowrapper_repr},
3410
    {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3411
    {Py_tp_traverse, textiowrapper_traverse},
3412
    {Py_tp_clear, textiowrapper_clear},
3413
    {Py_tp_iternext, textiowrapper_iternext},
3414
    {Py_tp_methods, textiowrapper_methods},
3415
    {Py_tp_members, textiowrapper_members},
3416
    {Py_tp_getset, textiowrapper_getset},
3417
    {Py_tp_init, _io_TextIOWrapper___init__},
3418
    {0, NULL},
3419
};
3420
3421
PyType_Spec textiowrapper_spec = {
3422
    .name = "_io.TextIOWrapper",
3423
    .basicsize = sizeof(textio),
3424
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3425
              Py_TPFLAGS_IMMUTABLETYPE),
3426
    .slots = textiowrapper_slots,
3427
};