Coverage Report

Created: 2026-02-26 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython3/Modules/_io/textio.c
Line
Count
Source
1
/*
2
    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4
    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6
    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7
*/
8
9
#include "Python.h"
10
#include "pycore_call.h"          // _PyObject_CallMethod()
11
#include "pycore_codecs.h"        // _PyCodecInfo_GetIncrementalDecoder()
12
#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
13
#include "pycore_interp.h"        // PyInterpreterState.fs_codec
14
#include "pycore_long.h"          // _PyLong_GetZero()
15
#include "pycore_object.h"        // _PyObject_GC_UNTRACK()
16
#include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
17
#include "pycore_pystate.h"       // _PyInterpreterState_GET()
18
#include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString()
19
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
20
21
#include "_iomodule.h"
22
23
/*[clinic input]
24
module _io
25
class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
26
class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
27
class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
28
[clinic start generated code]*/
29
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
30
31
typedef struct nldecoder_object nldecoder_object;
32
typedef struct textio textio;
33
34
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
35
#include "clinic/textio.c.h"
36
#undef clinic_state
37
38
/* TextIOBase */
39
40
PyDoc_STRVAR(textiobase_doc,
41
    "Base class for text I/O.\n"
42
    "\n"
43
    "This class provides a character and line based interface to stream\n"
44
    "I/O. There is no readinto method because Python's character strings\n"
45
    "are immutable.\n"
46
    );
47
48
static PyObject *
49
_unsupported(_PyIO_State *state, const char *message)
50
0
{
51
0
    PyErr_SetString(state->unsupported_operation, message);
52
0
    return NULL;
53
0
}
54
55
/*[clinic input]
56
@permit_long_docstring_body
57
_io._TextIOBase.detach
58
    cls: defining_class
59
    /
60
61
Separate the underlying buffer from the TextIOBase and return it.
62
63
After the underlying buffer has been detached, the TextIO is in an unusable state.
64
[clinic start generated code]*/
65
66
static PyObject *
67
_io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
68
/*[clinic end generated code: output=50915f40c609eaa4 input=8cd0652c17d7f015]*/
69
0
{
70
0
    _PyIO_State *state = get_io_state_by_cls(cls);
71
0
    return _unsupported(state, "detach");
72
0
}
73
74
/*[clinic input]
75
_io._TextIOBase.read
76
    cls: defining_class
77
    size: int(unused=True) = -1
78
    /
79
80
Read at most size characters from stream.
81
82
Read from underlying buffer until we have size characters or we hit EOF.
83
If size is negative or omitted, read until EOF.
84
[clinic start generated code]*/
85
86
static PyObject *
87
_io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
88
                          int Py_UNUSED(size))
89
/*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
90
0
{
91
0
    _PyIO_State *state = get_io_state_by_cls(cls);
92
0
    return _unsupported(state, "read");
93
0
}
94
95
/*[clinic input]
96
_io._TextIOBase.readline
97
    cls: defining_class
98
    size: int(unused=True) = -1
99
    /
100
101
Read until newline or EOF.
102
103
Return an empty string if EOF is hit immediately.
104
If size is specified, at most size characters will be read.
105
[clinic start generated code]*/
106
107
static PyObject *
108
_io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
109
                              int Py_UNUSED(size))
110
/*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
111
0
{
112
0
    _PyIO_State *state = get_io_state_by_cls(cls);
113
0
    return _unsupported(state, "readline");
114
0
}
115
116
/*[clinic input]
117
_io._TextIOBase.write
118
    cls: defining_class
119
    s: str(unused=True)
120
    /
121
122
Write string s to stream.
123
124
Return the number of characters written
125
(which is always equal to the length of the string).
126
[clinic start generated code]*/
127
128
static PyObject *
129
_io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
130
                           const char *Py_UNUSED(s))
131
/*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
132
0
{
133
0
    _PyIO_State *state = get_io_state_by_cls(cls);
134
0
    return _unsupported(state, "write");
135
0
}
136
137
/*[clinic input]
138
@getter
139
_io._TextIOBase.encoding
140
141
Encoding of the text stream.
142
143
Subclasses should override.
144
[clinic start generated code]*/
145
146
static PyObject *
147
_io__TextIOBase_encoding_get_impl(PyObject *self)
148
/*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
149
0
{
150
0
    Py_RETURN_NONE;
151
0
}
152
153
/*[clinic input]
154
@getter
155
_io._TextIOBase.newlines
156
157
Line endings translated so far.
158
159
Only line endings translated during reading are considered.
160
161
Subclasses should override.
162
[clinic start generated code]*/
163
164
static PyObject *
165
_io__TextIOBase_newlines_get_impl(PyObject *self)
166
/*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
167
0
{
168
0
    Py_RETURN_NONE;
169
0
}
170
171
/*[clinic input]
172
@getter
173
_io._TextIOBase.errors
174
175
The error setting of the decoder or encoder.
176
177
Subclasses should override.
178
[clinic start generated code]*/
179
180
static PyObject *
181
_io__TextIOBase_errors_get_impl(PyObject *self)
182
/*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
183
0
{
184
0
    Py_RETURN_NONE;
185
0
}
186
187
188
static PyMethodDef textiobase_methods[] = {
189
    _IO__TEXTIOBASE_DETACH_METHODDEF
190
    _IO__TEXTIOBASE_READ_METHODDEF
191
    _IO__TEXTIOBASE_READLINE_METHODDEF
192
    _IO__TEXTIOBASE_WRITE_METHODDEF
193
    {NULL, NULL}
194
};
195
196
static PyGetSetDef textiobase_getset[] = {
197
    _IO__TEXTIOBASE_ENCODING_GETSETDEF
198
    _IO__TEXTIOBASE_NEWLINES_GETSETDEF
199
    _IO__TEXTIOBASE_ERRORS_GETSETDEF
200
    {NULL}
201
};
202
203
static PyType_Slot textiobase_slots[] = {
204
    {Py_tp_doc, (void *)textiobase_doc},
205
    {Py_tp_methods, textiobase_methods},
206
    {Py_tp_getset, textiobase_getset},
207
    {0, NULL},
208
};
209
210
/* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
211
PyType_Spec _Py_textiobase_spec = {
212
    .name = "_io._TextIOBase",
213
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
214
              Py_TPFLAGS_IMMUTABLETYPE),
215
    .slots = textiobase_slots,
216
};
217
218
/* IncrementalNewlineDecoder */
219
220
struct nldecoder_object {
221
    PyObject_HEAD
222
    PyObject *decoder;
223
    PyObject *errors;
224
    unsigned int pendingcr: 1;
225
    unsigned int translate: 1;
226
    unsigned int seennl: 3;
227
};
228
229
0
#define nldecoder_object_CAST(op)   ((nldecoder_object *)(op))
230
231
/*[clinic input]
232
_io.IncrementalNewlineDecoder.__init__
233
    decoder: object
234
    translate: bool
235
    errors: object(c_default="NULL") = "strict"
236
237
Codec used when reading a file in universal newlines mode.
238
239
It wraps another incremental decoder, translating \r\n and \r into \n.
240
It also records the types of newlines encountered.  When used with
241
translate=False, it ensures that the newline sequence is returned in
242
one piece. When used with decoder=None, it expects unicode strings as
243
decode input and translates newlines without first invoking an external
244
decoder.
245
[clinic start generated code]*/
246
247
static int
248
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249
                                            PyObject *decoder, int translate,
250
                                            PyObject *errors)
251
/*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
252
0
{
253
254
0
    if (errors == NULL) {
255
0
        errors = &_Py_ID(strict);
256
0
    }
257
0
    else {
258
0
        errors = Py_NewRef(errors);
259
0
    }
260
261
0
    Py_XSETREF(self->errors, errors);
262
0
    Py_XSETREF(self->decoder, Py_NewRef(decoder));
263
0
    self->translate = translate ? 1 : 0;
264
0
    self->seennl = 0;
265
0
    self->pendingcr = 0;
266
267
0
    return 0;
268
0
}
269
270
static int
271
incrementalnewlinedecoder_traverse(PyObject *op, visitproc visit, void *arg)
272
0
{
273
0
    nldecoder_object *self = nldecoder_object_CAST(op);
274
0
    Py_VISIT(Py_TYPE(self));
275
0
    Py_VISIT(self->decoder);
276
0
    Py_VISIT(self->errors);
277
0
    return 0;
278
0
}
279
280
static int
281
incrementalnewlinedecoder_clear(PyObject *op)
282
0
{
283
0
    nldecoder_object *self = nldecoder_object_CAST(op);
284
0
    Py_CLEAR(self->decoder);
285
0
    Py_CLEAR(self->errors);
286
0
    return 0;
287
0
}
288
289
static void
290
incrementalnewlinedecoder_dealloc(PyObject *op)
291
0
{
292
0
    nldecoder_object *self = nldecoder_object_CAST(op);
293
0
    PyTypeObject *tp = Py_TYPE(self);
294
0
    _PyObject_GC_UNTRACK(self);
295
0
    (void)incrementalnewlinedecoder_clear(op);
296
0
    tp->tp_free(self);
297
0
    Py_DECREF(tp);
298
0
}
299
300
static int
301
check_decoded(PyObject *decoded)
302
0
{
303
0
    if (decoded == NULL)
304
0
        return -1;
305
0
    if (!PyUnicode_Check(decoded)) {
306
0
        PyErr_Format(PyExc_TypeError,
307
0
                     "decoder should return a string result, not '%.200s'",
308
0
                     Py_TYPE(decoded)->tp_name);
309
0
        Py_DECREF(decoded);
310
0
        return -1;
311
0
    }
312
0
    return 0;
313
0
}
314
315
#define CHECK_INITIALIZED_DECODER(self) \
316
0
    if (self->errors == NULL) { \
317
0
        PyErr_SetString(PyExc_ValueError, \
318
0
                        "IncrementalNewlineDecoder.__init__() not called"); \
319
0
        return NULL; \
320
0
    }
321
322
0
#define SEEN_CR   1
323
0
#define SEEN_LF   2
324
0
#define SEEN_CRLF 4
325
0
#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
326
327
PyObject *
328
_PyIncrementalNewlineDecoder_decode(PyObject *myself,
329
                                    PyObject *input, int final)
330
0
{
331
0
    PyObject *output;
332
0
    Py_ssize_t output_len;
333
0
    nldecoder_object *self = nldecoder_object_CAST(myself);
334
335
0
    CHECK_INITIALIZED_DECODER(self);
336
337
    /* decode input (with the eventual \r from a previous pass) */
338
0
    if (self->decoder != Py_None) {
339
0
        output = PyObject_CallMethodObjArgs(self->decoder,
340
0
            &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
341
0
    }
342
0
    else {
343
0
        output = Py_NewRef(input);
344
0
    }
345
346
0
    if (check_decoded(output) < 0)
347
0
        return NULL;
348
349
0
    output_len = PyUnicode_GET_LENGTH(output);
350
0
    if (self->pendingcr && (final || output_len > 0)) {
351
        /* Prefix output with CR */
352
0
        int kind;
353
0
        PyObject *modified;
354
0
        char *out;
355
356
0
        modified = PyUnicode_New(output_len + 1,
357
0
                                 PyUnicode_MAX_CHAR_VALUE(output));
358
0
        if (modified == NULL)
359
0
            goto error;
360
0
        kind = PyUnicode_KIND(modified);
361
0
        out = PyUnicode_DATA(modified);
362
0
        PyUnicode_WRITE(kind, out, 0, '\r');
363
0
        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
364
0
        Py_SETREF(output, modified);
365
0
        self->pendingcr = 0;
366
0
        output_len++;
367
0
    }
368
369
    /* retain last \r even when not translating data:
370
     * then readline() is sure to get \r\n in one pass
371
     */
372
0
    if (!final) {
373
0
        if (output_len > 0
374
0
            && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
375
0
        {
376
0
            PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
377
0
            if (modified == NULL)
378
0
                goto error;
379
0
            Py_SETREF(output, modified);
380
0
            self->pendingcr = 1;
381
0
        }
382
0
    }
383
384
    /* Record which newlines are read and do newline translation if desired,
385
       all in one pass. */
386
0
    {
387
0
        const void *in_str;
388
0
        Py_ssize_t len;
389
0
        int seennl = self->seennl;
390
0
        int only_lf = 0;
391
0
        int kind;
392
393
0
        in_str = PyUnicode_DATA(output);
394
0
        len = PyUnicode_GET_LENGTH(output);
395
0
        kind = PyUnicode_KIND(output);
396
397
0
        if (len == 0)
398
0
            return output;
399
400
        /* If, up to now, newlines are consistently \n, do a quick check
401
           for the \r *byte* with the libc's optimized memchr.
402
           */
403
0
        if (seennl == SEEN_LF || seennl == 0) {
404
0
            only_lf = (memchr(in_str, '\r', kind * len) == NULL);
405
0
        }
406
407
0
        if (only_lf) {
408
            /* If not already seen, quick scan for a possible "\n" character.
409
               (there's nothing else to be done, even when in translation mode)
410
            */
411
0
            if (seennl == 0 &&
412
0
                memchr(in_str, '\n', kind * len) != NULL) {
413
0
                if (kind == PyUnicode_1BYTE_KIND)
414
0
                    seennl |= SEEN_LF;
415
0
                else {
416
0
                    Py_ssize_t i = 0;
417
0
                    for (;;) {
418
0
                        Py_UCS4 c;
419
                        /* Fast loop for non-control characters */
420
0
                        while (PyUnicode_READ(kind, in_str, i) > '\n')
421
0
                            i++;
422
0
                        c = PyUnicode_READ(kind, in_str, i++);
423
0
                        if (c == '\n') {
424
0
                            seennl |= SEEN_LF;
425
0
                            break;
426
0
                        }
427
0
                        if (i >= len)
428
0
                            break;
429
0
                    }
430
0
                }
431
0
            }
432
            /* Finished: we have scanned for newlines, and none of them
433
               need translating */
434
0
        }
435
0
        else if (!self->translate) {
436
0
            Py_ssize_t i = 0;
437
            /* We have already seen all newline types, no need to scan again */
438
0
            if (seennl == SEEN_ALL)
439
0
                goto endscan;
440
0
            for (;;) {
441
0
                Py_UCS4 c;
442
                /* Fast loop for non-control characters */
443
0
                while (PyUnicode_READ(kind, in_str, i) > '\r')
444
0
                    i++;
445
0
                c = PyUnicode_READ(kind, in_str, i++);
446
0
                if (c == '\n')
447
0
                    seennl |= SEEN_LF;
448
0
                else if (c == '\r') {
449
0
                    if (PyUnicode_READ(kind, in_str, i) == '\n') {
450
0
                        seennl |= SEEN_CRLF;
451
0
                        i++;
452
0
                    }
453
0
                    else
454
0
                        seennl |= SEEN_CR;
455
0
                }
456
0
                if (i >= len)
457
0
                    break;
458
0
                if (seennl == SEEN_ALL)
459
0
                    break;
460
0
            }
461
0
        endscan:
462
0
            ;
463
0
        }
464
0
        else {
465
0
            void *translated;
466
0
            int kind = PyUnicode_KIND(output);
467
0
            const void *in_str = PyUnicode_DATA(output);
468
0
            Py_ssize_t in, out;
469
            /* XXX: Previous in-place translation here is disabled as
470
               resizing is not possible anymore */
471
            /* We could try to optimize this so that we only do a copy
472
               when there is something to translate. On the other hand,
473
               we already know there is a \r byte, so chances are high
474
               that something needs to be done. */
475
0
            translated = PyMem_Malloc(kind * len);
476
0
            if (translated == NULL) {
477
0
                PyErr_NoMemory();
478
0
                goto error;
479
0
            }
480
0
            in = out = 0;
481
0
            for (;;) {
482
0
                Py_UCS4 c;
483
                /* Fast loop for non-control characters */
484
0
                while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
485
0
                    PyUnicode_WRITE(kind, translated, out++, c);
486
0
                if (c == '\n') {
487
0
                    PyUnicode_WRITE(kind, translated, out++, c);
488
0
                    seennl |= SEEN_LF;
489
0
                    continue;
490
0
                }
491
0
                if (c == '\r') {
492
0
                    if (PyUnicode_READ(kind, in_str, in) == '\n') {
493
0
                        in++;
494
0
                        seennl |= SEEN_CRLF;
495
0
                    }
496
0
                    else
497
0
                        seennl |= SEEN_CR;
498
0
                    PyUnicode_WRITE(kind, translated, out++, '\n');
499
0
                    continue;
500
0
                }
501
0
                if (in > len)
502
0
                    break;
503
0
                PyUnicode_WRITE(kind, translated, out++, c);
504
0
            }
505
0
            Py_DECREF(output);
506
0
            output = PyUnicode_FromKindAndData(kind, translated, out);
507
0
            PyMem_Free(translated);
508
0
            if (!output)
509
0
                return NULL;
510
0
        }
511
0
        self->seennl |= seennl;
512
0
    }
513
514
0
    return output;
515
516
0
  error:
517
0
    Py_DECREF(output);
518
0
    return NULL;
519
0
}
520
521
/*[clinic input]
522
@critical_section
523
_io.IncrementalNewlineDecoder.decode
524
    input: object
525
    final: bool = False
526
[clinic start generated code]*/
527
528
static PyObject *
529
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
530
                                          PyObject *input, int final)
531
/*[clinic end generated code: output=0d486755bb37a66e input=9475d16a73168504]*/
532
0
{
533
0
    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
534
0
}
535
536
/*[clinic input]
537
@critical_section
538
_io.IncrementalNewlineDecoder.getstate
539
[clinic start generated code]*/
540
541
static PyObject *
542
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
543
/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=dc3e1f27aa850f12]*/
544
0
{
545
0
    PyObject *buffer;
546
0
    unsigned long long flag;
547
548
0
    CHECK_INITIALIZED_DECODER(self);
549
550
0
    if (self->decoder != Py_None) {
551
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
552
0
           &_Py_ID(getstate));
553
0
        if (state == NULL)
554
0
            return NULL;
555
0
        if (!PyTuple_Check(state)) {
556
0
            PyErr_SetString(PyExc_TypeError,
557
0
                            "illegal decoder state");
558
0
            Py_DECREF(state);
559
0
            return NULL;
560
0
        }
561
0
        if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
562
0
                              &buffer, &flag))
563
0
        {
564
0
            Py_DECREF(state);
565
0
            return NULL;
566
0
        }
567
0
        Py_INCREF(buffer);
568
0
        Py_DECREF(state);
569
0
    }
570
0
    else {
571
0
        buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
572
0
        flag = 0;
573
0
    }
574
0
    flag <<= 1;
575
0
    if (self->pendingcr)
576
0
        flag |= 1;
577
0
    return Py_BuildValue("NK", buffer, flag);
578
0
}
579
580
/*[clinic input]
581
@critical_section
582
_io.IncrementalNewlineDecoder.setstate
583
    state: object
584
    /
585
[clinic start generated code]*/
586
587
static PyObject *
588
_io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self,
589
                                            PyObject *state)
590
/*[clinic end generated code: output=09135cb6e78a1dc8 input=275fd3982d2b08cb]*/
591
0
{
592
0
    PyObject *buffer;
593
0
    unsigned long long flag;
594
595
0
    CHECK_INITIALIZED_DECODER(self);
596
597
0
    if (!PyTuple_Check(state)) {
598
0
        PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
599
0
        return NULL;
600
0
    }
601
0
    if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
602
0
                          &buffer, &flag))
603
0
    {
604
0
        return NULL;
605
0
    }
606
607
0
    self->pendingcr = (int) (flag & 1);
608
0
    flag >>= 1;
609
610
0
    if (self->decoder != Py_None) {
611
0
        return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
612
0
                                    "((OK))", buffer, flag);
613
0
    }
614
0
    else {
615
0
        Py_RETURN_NONE;
616
0
    }
617
0
}
618
619
/*[clinic input]
620
@critical_section
621
_io.IncrementalNewlineDecoder.reset
622
[clinic start generated code]*/
623
624
static PyObject *
625
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
626
/*[clinic end generated code: output=32fa40c7462aa8ff input=31bd8ae4e36cec83]*/
627
0
{
628
0
    CHECK_INITIALIZED_DECODER(self);
629
630
0
    self->seennl = 0;
631
0
    self->pendingcr = 0;
632
0
    if (self->decoder != Py_None)
633
0
        return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
634
0
    else
635
0
        Py_RETURN_NONE;
636
0
}
637
638
static PyObject *
639
incrementalnewlinedecoder_newlines_get(PyObject *op, void *Py_UNUSED(context))
640
0
{
641
0
    nldecoder_object *self = nldecoder_object_CAST(op);
642
0
    CHECK_INITIALIZED_DECODER(self);
643
644
0
    switch (self->seennl) {
645
0
    case SEEN_CR:
646
0
        return PyUnicode_FromString("\r");
647
0
    case SEEN_LF:
648
0
        return PyUnicode_FromString("\n");
649
0
    case SEEN_CRLF:
650
0
        return PyUnicode_FromString("\r\n");
651
0
    case SEEN_CR | SEEN_LF:
652
0
        return Py_BuildValue("ss", "\r", "\n");
653
0
    case SEEN_CR | SEEN_CRLF:
654
0
        return Py_BuildValue("ss", "\r", "\r\n");
655
0
    case SEEN_LF | SEEN_CRLF:
656
0
        return Py_BuildValue("ss", "\n", "\r\n");
657
0
    case SEEN_CR | SEEN_LF | SEEN_CRLF:
658
0
        return Py_BuildValue("sss", "\r", "\n", "\r\n");
659
0
    default:
660
0
        Py_RETURN_NONE;
661
0
   }
662
663
0
}
664
665
/* TextIOWrapper */
666
667
typedef PyObject *(*encodefunc_t)(PyObject *, PyObject *);
668
669
struct textio
670
{
671
    PyObject_HEAD
672
    int ok; /* initialized? */
673
    int detached;
674
    Py_ssize_t chunk_size;
675
    PyObject *buffer;
676
    PyObject *encoding;
677
    PyObject *encoder;
678
    PyObject *decoder;
679
    PyObject *readnl;
680
    PyObject *errors;
681
    const char *writenl; /* ASCII-encoded; NULL stands for \n */
682
    char line_buffering;
683
    char write_through;
684
    char readuniversal;
685
    char readtranslate;
686
    char writetranslate;
687
    char seekable;
688
    char has_read1;
689
    char telling;
690
    char finalizing;
691
    /* Specialized encoding func (see below) */
692
    encodefunc_t encodefunc;
693
    /* Whether or not it's the start of the stream */
694
    char encoding_start_of_stream;
695
696
    /* Reads and writes are internally buffered in order to speed things up.
697
       However, any read will first flush the write buffer if itsn't empty.
698
699
       Please also note that text to be written is first encoded before being
700
       buffered. This is necessary so that encoding errors are immediately
701
       reported to the caller, but it unfortunately means that the
702
       IncrementalEncoder (whose encode() method is always written in Python)
703
       becomes a bottleneck for small writes.
704
    */
705
    PyObject *decoded_chars;       /* buffer for text returned from decoder */
706
    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
707
    PyObject *pending_bytes;       // data waiting to be written.
708
                                   // ascii unicode, bytes, or list of them.
709
    Py_ssize_t pending_bytes_count;
710
711
    /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
712
     * dec_flags is the second (integer) item of the decoder state and
713
     * next_input is the chunk of input bytes that comes next after the
714
     * snapshot point.  We use this to reconstruct decoder states in tell().
715
     */
716
    PyObject *snapshot;
717
    /* Bytes-to-characters ratio for the current chunk. Serves as input for
718
       the heuristic in tell(). */
719
    double b2cratio;
720
721
    /* Cache raw object if it's a FileIO object */
722
    PyObject *raw;
723
724
    PyObject *weakreflist;
725
    PyObject *dict;
726
727
    _PyIO_State *state;
728
};
729
730
9.43k
#define textio_CAST(op) ((textio *)(op))
731
732
static void
733
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
734
735
/* A couple of specialized cases in order to bypass the slow incremental
736
   encoding methods for the most popular encodings. */
737
738
static PyObject *
739
ascii_encode(PyObject *op, PyObject *text)
740
0
{
741
0
    textio *self = textio_CAST(op);
742
0
    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
743
0
}
744
745
static PyObject *
746
utf16be_encode(PyObject *op, PyObject *text)
747
0
{
748
0
    textio *self = textio_CAST(op);
749
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 1);
750
0
}
751
752
static PyObject *
753
utf16le_encode(PyObject *op, PyObject *text)
754
0
{
755
0
    textio *self = textio_CAST(op);
756
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), -1);
757
0
}
758
759
static PyObject *
760
utf16_encode(PyObject *op, PyObject *text)
761
0
{
762
0
    textio *self = textio_CAST(op);
763
0
    if (!self->encoding_start_of_stream) {
764
        /* Skip the BOM and use native byte ordering */
765
#if PY_BIG_ENDIAN
766
        return utf16be_encode(op, text);
767
#else
768
0
        return utf16le_encode(op, text);
769
0
#endif
770
0
    }
771
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 0);
772
0
}
773
774
static PyObject *
775
utf32be_encode(PyObject *op, PyObject *text)
776
0
{
777
0
    textio *self = textio_CAST(op);
778
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 1);
779
0
}
780
781
static PyObject *
782
utf32le_encode(PyObject *op, PyObject *text)
783
0
{
784
0
    textio *self = textio_CAST(op);
785
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), -1);
786
0
}
787
788
static PyObject *
789
utf32_encode(PyObject *op, PyObject *text)
790
0
{
791
0
    textio *self = textio_CAST(op);
792
0
    if (!self->encoding_start_of_stream) {
793
        /* Skip the BOM and use native byte ordering */
794
#if PY_BIG_ENDIAN
795
        return utf32be_encode(op, text);
796
#else
797
0
        return utf32le_encode(op, text);
798
0
#endif
799
0
    }
800
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 0);
801
0
}
802
803
static PyObject *
804
utf8_encode(PyObject *op, PyObject *text)
805
0
{
806
0
    textio *self = textio_CAST(op);
807
0
    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
808
0
}
809
810
static PyObject *
811
latin1_encode(PyObject *op, PyObject *text)
812
0
{
813
0
    textio *self = textio_CAST(op);
814
0
    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
815
0
}
816
817
// Return true when encoding can be skipped when text is ascii.
818
static inline int
819
is_asciicompat_encoding(encodefunc_t f)
820
170k
{
821
170k
    return f == ascii_encode || f == latin1_encode || f == utf8_encode;
822
170k
}
823
824
/* Map normalized encoding names onto the specialized encoding funcs */
825
826
typedef struct {
827
    const char *name;
828
    encodefunc_t encodefunc;
829
} encodefuncentry;
830
831
static const encodefuncentry encodefuncs[] = {
832
    {"ascii",       ascii_encode},
833
    {"iso8859-1",   latin1_encode},
834
    {"utf-8",       utf8_encode},
835
    {"utf-16-be",   utf16be_encode},
836
    {"utf-16-le",   utf16le_encode},
837
    {"utf-16",      utf16_encode},
838
    {"utf-32-be",   utf32be_encode},
839
    {"utf-32-le",   utf32le_encode},
840
    {"utf-32",      utf32_encode},
841
    {NULL, NULL}
842
};
843
844
static int
845
validate_newline(const char *newline)
846
66
{
847
66
    if (newline && newline[0] != '\0'
848
66
        && !(newline[0] == '\n' && newline[1] == '\0')
849
0
        && !(newline[0] == '\r' && newline[1] == '\0')
850
0
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
851
0
        PyErr_Format(PyExc_ValueError,
852
0
                     "illegal newline value: %s", newline);
853
0
        return -1;
854
0
    }
855
66
    return 0;
856
66
}
857
858
static int
859
set_newline(textio *self, const char *newline)
860
66
{
861
66
    PyObject *old = self->readnl;
862
66
    if (newline == NULL) {
863
0
        self->readnl = NULL;
864
0
    }
865
66
    else {
866
66
        self->readnl = PyUnicode_FromString(newline);
867
66
        if (self->readnl == NULL) {
868
0
            self->readnl = old;
869
0
            return -1;
870
0
        }
871
66
    }
872
66
    self->readuniversal = (newline == NULL || newline[0] == '\0');
873
66
    self->readtranslate = (newline == NULL);
874
66
    self->writetranslate = (newline == NULL || newline[0] != '\0');
875
66
    if (!self->readuniversal && self->readnl != NULL) {
876
        // validate_newline() accepts only ASCII newlines.
877
66
        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
878
66
        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
879
66
        if (strcmp(self->writenl, "\n") == 0) {
880
66
            self->writenl = NULL;
881
66
        }
882
66
    }
883
0
    else {
884
#ifdef MS_WINDOWS
885
        self->writenl = "\r\n";
886
#else
887
0
        self->writenl = NULL;
888
0
#endif
889
0
    }
890
66
    Py_XDECREF(old);
891
66
    return 0;
892
66
}
893
894
static int
895
_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
896
                           const char *errors)
897
66
{
898
66
    PyObject *res;
899
66
    int r;
900
901
66
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
902
66
    if (res == NULL)
903
0
        return -1;
904
905
66
    r = PyObject_IsTrue(res);
906
66
    Py_DECREF(res);
907
66
    if (r == -1)
908
0
        return -1;
909
910
66
    if (r != 1)
911
44
        return 0;
912
913
22
    Py_CLEAR(self->decoder);
914
22
    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
915
22
    if (self->decoder == NULL)
916
0
        return -1;
917
918
22
    if (self->readuniversal) {
919
0
        _PyIO_State *state = self->state;
920
0
        PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
921
0
            (PyObject *)state->PyIncrementalNewlineDecoder_Type,
922
0
            self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
923
0
        if (incrementalDecoder == NULL)
924
0
            return -1;
925
0
        Py_XSETREF(self->decoder, incrementalDecoder);
926
0
    }
927
928
22
    return 0;
929
22
}
930
931
static PyObject*
932
_textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
933
                      int eof)
934
0
{
935
0
    PyObject *chars;
936
937
0
    if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
938
0
        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
939
0
    else
940
0
        chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
941
0
                                           eof ? Py_True : Py_False, NULL);
942
943
0
    if (check_decoded(chars) < 0)
944
        // check_decoded already decreases refcount
945
0
        return NULL;
946
947
0
    return chars;
948
0
}
949
950
static int
951
_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
952
                           const char *errors)
953
66
{
954
66
    PyObject *res;
955
66
    int r;
956
957
66
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
958
66
    if (res == NULL)
959
0
        return -1;
960
961
66
    r = PyObject_IsTrue(res);
962
66
    Py_DECREF(res);
963
66
    if (r == -1)
964
0
        return -1;
965
966
66
    if (r != 1)
967
22
        return 0;
968
969
44
    Py_CLEAR(self->encoder);
970
44
    self->encodefunc = NULL;
971
44
    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
972
44
    if (self->encoder == NULL)
973
0
        return -1;
974
975
    /* Get the normalized named of the codec */
976
44
    if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
977
0
        return -1;
978
0
    }
979
44
    if (res != NULL && PyUnicode_Check(res)) {
980
44
        const encodefuncentry *e = encodefuncs;
981
132
        while (e->name != NULL) {
982
132
            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
983
44
                self->encodefunc = e->encodefunc;
984
44
                break;
985
44
            }
986
88
            e++;
987
88
        }
988
44
    }
989
44
    Py_XDECREF(res);
990
991
44
    return 0;
992
44
}
993
994
static int
995
_textiowrapper_fix_encoder_state(textio *self)
996
66
{
997
66
    if (!self->seekable || !self->encoder) {
998
22
        return 0;
999
22
    }
1000
1001
44
    self->encoding_start_of_stream = 1;
1002
1003
44
    PyObject *cookieObj = PyObject_CallMethodNoArgs(
1004
44
        self->buffer, &_Py_ID(tell));
1005
44
    if (cookieObj == NULL) {
1006
0
        return -1;
1007
0
    }
1008
1009
44
    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
1010
44
    Py_DECREF(cookieObj);
1011
44
    if (cmp < 0) {
1012
0
        return -1;
1013
0
    }
1014
1015
44
    if (cmp == 0) {
1016
22
        self->encoding_start_of_stream = 0;
1017
22
        PyObject *res = PyObject_CallMethodOneArg(
1018
22
            self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1019
22
        if (res == NULL) {
1020
0
            return -1;
1021
0
        }
1022
22
        Py_DECREF(res);
1023
22
    }
1024
1025
44
    return 0;
1026
44
}
1027
1028
static int
1029
io_check_errors(PyObject *errors)
1030
66
{
1031
66
    assert(errors != NULL && errors != Py_None);
1032
1033
66
    PyInterpreterState *interp = _PyInterpreterState_GET();
1034
66
#ifndef Py_DEBUG
1035
    /* In release mode, only check in development mode (-X dev) */
1036
66
    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1037
66
        return 0;
1038
66
    }
1039
#else
1040
    /* Always check in debug mode */
1041
#endif
1042
1043
    /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1044
       before_PyUnicode_InitEncodings() is called. */
1045
0
    if (!interp->unicode.fs_codec.encoding) {
1046
0
        return 0;
1047
0
    }
1048
1049
0
    const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1050
0
    if (name == NULL) {
1051
0
        return -1;
1052
0
    }
1053
0
    PyObject *handler = PyCodec_LookupError(name);
1054
0
    if (handler != NULL) {
1055
0
        Py_DECREF(handler);
1056
0
        return 0;
1057
0
    }
1058
0
    return -1;
1059
0
}
1060
1061
1062
1063
/*[clinic input]
1064
_io.TextIOWrapper.__init__
1065
    buffer: object
1066
    encoding: str(accept={str, NoneType}) = None
1067
    errors: object = None
1068
    newline: str(accept={str, NoneType}) = None
1069
    line_buffering: bool = False
1070
    write_through: bool = False
1071
1072
Character and line based layer over a BufferedIOBase object, buffer.
1073
1074
encoding gives the name of the encoding that the stream will be
1075
decoded or encoded with. It defaults to locale.getencoding().
1076
1077
errors determines the strictness of encoding and decoding (see
1078
help(codecs.Codec) or the documentation for codecs.register) and
1079
defaults to "strict".
1080
1081
newline controls how line endings are handled. It can be None, '',
1082
'\n', '\r', and '\r\n'.  It works as follows:
1083
1084
* On input, if newline is None, universal newlines mode is
1085
  enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1086
  these are translated into '\n' before being returned to the
1087
  caller. If it is '', universal newline mode is enabled, but line
1088
  endings are returned to the caller untranslated. If it has any of
1089
  the other legal values, input lines are only terminated by the given
1090
  string, and the line ending is returned to the caller untranslated.
1091
1092
* On output, if newline is None, any '\n' characters written are
1093
  translated to the system default line separator, os.linesep. If
1094
  newline is '' or '\n', no translation takes place. If newline is any
1095
  of the other legal values, any '\n' characters written are translated
1096
  to the given string.
1097
1098
If line_buffering is True, a call to flush is implied when a call to
1099
write contains a newline character.
1100
[clinic start generated code]*/
1101
1102
static int
1103
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1104
                                const char *encoding, PyObject *errors,
1105
                                const char *newline, int line_buffering,
1106
                                int write_through)
1107
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1108
66
{
1109
66
    PyObject *raw, *codec_info = NULL;
1110
66
    PyObject *res;
1111
66
    int r;
1112
1113
66
    self->ok = 0;
1114
66
    self->detached = 0;
1115
1116
66
    if (encoding == NULL) {
1117
0
        PyInterpreterState *interp = _PyInterpreterState_GET();
1118
0
        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1119
0
            if (PyErr_WarnEx(PyExc_EncodingWarning,
1120
0
                             "'encoding' argument not specified", 1)) {
1121
0
                return -1;
1122
0
            }
1123
0
        }
1124
0
    }
1125
1126
66
    if (errors == Py_None) {
1127
0
        errors = &_Py_ID(strict);
1128
0
    }
1129
66
    else if (!PyUnicode_Check(errors)) {
1130
        // Check 'errors' argument here because Argument Clinic doesn't support
1131
        // 'str(accept={str, NoneType})' converter.
1132
0
        PyErr_Format(
1133
0
            PyExc_TypeError,
1134
0
            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1135
0
            Py_TYPE(errors)->tp_name);
1136
0
        return -1;
1137
0
    }
1138
66
    else if (io_check_errors(errors)) {
1139
0
        return -1;
1140
0
    }
1141
66
    const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1142
66
    if (errors_str == NULL) {
1143
0
        return -1;
1144
0
    }
1145
1146
66
    if (validate_newline(newline) < 0) {
1147
0
        return -1;
1148
0
    }
1149
1150
66
    Py_CLEAR(self->buffer);
1151
66
    Py_CLEAR(self->encoding);
1152
66
    Py_CLEAR(self->encoder);
1153
66
    Py_CLEAR(self->decoder);
1154
66
    Py_CLEAR(self->readnl);
1155
66
    Py_CLEAR(self->decoded_chars);
1156
66
    Py_CLEAR(self->pending_bytes);
1157
66
    Py_CLEAR(self->snapshot);
1158
66
    Py_CLEAR(self->errors);
1159
66
    Py_CLEAR(self->raw);
1160
66
    self->decoded_chars_used = 0;
1161
66
    self->pending_bytes_count = 0;
1162
66
    self->encodefunc = NULL;
1163
66
    self->b2cratio = 0.0;
1164
1165
66
    if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1166
0
        _Py_DECLARE_STR(utf_8, "utf-8");
1167
0
        self->encoding = &_Py_STR(utf_8);
1168
0
    }
1169
66
    else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1170
0
        self->encoding = _Py_GetLocaleEncodingObject();
1171
0
        if (self->encoding == NULL) {
1172
0
            goto error;
1173
0
        }
1174
0
        assert(PyUnicode_Check(self->encoding));
1175
0
    }
1176
1177
66
    if (self->encoding != NULL) {
1178
0
        encoding = PyUnicode_AsUTF8(self->encoding);
1179
0
        if (encoding == NULL)
1180
0
            goto error;
1181
0
    }
1182
66
    else if (encoding != NULL) {
1183
66
        self->encoding = PyUnicode_FromString(encoding);
1184
66
        if (self->encoding == NULL)
1185
0
            goto error;
1186
66
    }
1187
0
    else {
1188
0
        PyErr_SetString(PyExc_OSError,
1189
0
                        "could not determine default encoding");
1190
0
        goto error;
1191
0
    }
1192
1193
    /* Check we have been asked for a real text encoding */
1194
66
    codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
1195
66
    if (codec_info == NULL) {
1196
0
        Py_CLEAR(self->encoding);
1197
0
        goto error;
1198
0
    }
1199
1200
    /* XXX: Failures beyond this point have the potential to leak elements
1201
     * of the partially constructed object (like self->encoding)
1202
     */
1203
1204
66
    self->errors = Py_NewRef(errors);
1205
66
    self->chunk_size = 8192;
1206
66
    self->line_buffering = line_buffering;
1207
66
    self->write_through = write_through;
1208
66
    if (set_newline(self, newline) < 0) {
1209
0
        goto error;
1210
0
    }
1211
1212
66
    self->buffer = Py_NewRef(buffer);
1213
1214
    /* Build the decoder object */
1215
66
    _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1216
66
    self->state = state;
1217
66
    if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1218
0
        goto error;
1219
1220
    /* Build the encoder object */
1221
66
    if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1222
0
        goto error;
1223
1224
    /* Finished sorting out the codec details */
1225
66
    Py_CLEAR(codec_info);
1226
1227
66
    if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1228
44
        Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1229
0
        Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1230
66
    {
1231
66
        if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1232
0
            goto error;
1233
        /* Cache the raw FileIO object to speed up 'closed' checks */
1234
66
        if (raw != NULL) {
1235
66
            if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1236
66
                self->raw = raw;
1237
0
            else
1238
0
                Py_DECREF(raw);
1239
66
        }
1240
66
    }
1241
1242
66
    res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1243
66
    if (res == NULL)
1244
0
        goto error;
1245
66
    r = PyObject_IsTrue(res);
1246
66
    Py_DECREF(res);
1247
66
    if (r < 0)
1248
0
        goto error;
1249
66
    self->seekable = self->telling = r;
1250
1251
66
    r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1252
66
    if (r < 0) {
1253
0
        goto error;
1254
0
    }
1255
66
    self->has_read1 = r;
1256
1257
66
    self->encoding_start_of_stream = 0;
1258
66
    if (_textiowrapper_fix_encoder_state(self) < 0) {
1259
0
        goto error;
1260
0
    }
1261
1262
66
    self->ok = 1;
1263
66
    return 0;
1264
1265
0
  error:
1266
0
    Py_XDECREF(codec_info);
1267
0
    return -1;
1268
66
}
1269
1270
/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1271
 * -1 on error.
1272
 */
1273
static int
1274
convert_optional_bool(PyObject *obj, int default_value)
1275
0
{
1276
0
    long v;
1277
0
    if (obj == Py_None) {
1278
0
        v = default_value;
1279
0
    }
1280
0
    else {
1281
0
        v = PyLong_AsLong(obj);
1282
0
        if (v == -1 && PyErr_Occurred())
1283
0
            return -1;
1284
0
    }
1285
0
    return v != 0;
1286
0
}
1287
1288
static int
1289
textiowrapper_change_encoding(textio *self, PyObject *encoding,
1290
                              PyObject *errors, int newline_changed)
1291
0
{
1292
    /* Use existing settings where new settings are not specified */
1293
0
    if (encoding == Py_None && errors == Py_None && !newline_changed) {
1294
0
        return 0;  // no change
1295
0
    }
1296
1297
0
    if (encoding == Py_None) {
1298
0
        encoding = self->encoding;
1299
0
        if (errors == Py_None) {
1300
0
            errors = self->errors;
1301
0
        }
1302
0
        Py_INCREF(encoding);
1303
0
    }
1304
0
    else {
1305
0
        if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1306
0
            encoding = _Py_GetLocaleEncodingObject();
1307
0
            if (encoding == NULL) {
1308
0
                return -1;
1309
0
            }
1310
0
        } else {
1311
0
            Py_INCREF(encoding);
1312
0
        }
1313
0
        if (errors == Py_None) {
1314
0
            errors = &_Py_ID(strict);
1315
0
        }
1316
0
    }
1317
0
    Py_INCREF(errors);
1318
1319
0
    const char *c_encoding = PyUnicode_AsUTF8(encoding);
1320
0
    if (c_encoding == NULL) {
1321
0
        Py_DECREF(encoding);
1322
0
        Py_DECREF(errors);
1323
0
        return -1;
1324
0
    }
1325
0
    const char *c_errors = PyUnicode_AsUTF8(errors);
1326
0
    if (c_errors == NULL) {
1327
0
        Py_DECREF(encoding);
1328
0
        Py_DECREF(errors);
1329
0
        return -1;
1330
0
    }
1331
1332
    // Create new encoder & decoder
1333
0
    PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
1334
0
    if (codec_info == NULL) {
1335
0
        Py_DECREF(encoding);
1336
0
        Py_DECREF(errors);
1337
0
        return -1;
1338
0
    }
1339
0
    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1340
0
            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1341
0
        Py_DECREF(codec_info);
1342
0
        Py_DECREF(encoding);
1343
0
        Py_DECREF(errors);
1344
0
        return -1;
1345
0
    }
1346
0
    Py_DECREF(codec_info);
1347
1348
0
    Py_SETREF(self->encoding, encoding);
1349
0
    Py_SETREF(self->errors, errors);
1350
1351
0
    return _textiowrapper_fix_encoder_state(self);
1352
0
}
1353
1354
/*[clinic input]
1355
@critical_section
1356
_io.TextIOWrapper.reconfigure
1357
    *
1358
    encoding: object = None
1359
    errors: object = None
1360
    newline as newline_obj: object(c_default="NULL") = None
1361
    line_buffering as line_buffering_obj: object = None
1362
    write_through as write_through_obj: object = None
1363
1364
Reconfigure the text stream with new parameters.
1365
1366
This also does an implicit stream flush.
1367
1368
[clinic start generated code]*/
1369
1370
static PyObject *
1371
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1372
                                   PyObject *errors, PyObject *newline_obj,
1373
                                   PyObject *line_buffering_obj,
1374
                                   PyObject *write_through_obj)
1375
/*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1376
0
{
1377
0
    int line_buffering;
1378
0
    int write_through;
1379
0
    const char *newline = NULL;
1380
1381
0
    if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1382
0
        PyErr_Format(PyExc_TypeError,
1383
0
                "reconfigure() argument 'encoding' must be str or None, not %s",
1384
0
                Py_TYPE(encoding)->tp_name);
1385
0
        return NULL;
1386
0
    }
1387
0
    if (errors != Py_None && !PyUnicode_Check(errors)) {
1388
0
        PyErr_Format(PyExc_TypeError,
1389
0
                "reconfigure() argument 'errors' must be str or None, not %s",
1390
0
                Py_TYPE(errors)->tp_name);
1391
0
        return NULL;
1392
0
    }
1393
0
    if (newline_obj != NULL && newline_obj != Py_None &&
1394
0
        !PyUnicode_Check(newline_obj))
1395
0
    {
1396
0
        PyErr_Format(PyExc_TypeError,
1397
0
                "reconfigure() argument 'newline' must be str or None, not %s",
1398
0
                Py_TYPE(newline_obj)->tp_name);
1399
0
        return NULL;
1400
0
    }
1401
    /* Check if something is in the read buffer */
1402
0
    if (self->decoded_chars != NULL) {
1403
0
        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1404
0
            _unsupported(self->state,
1405
0
                         "It is not possible to set the encoding or newline "
1406
0
                         "of stream after the first read");
1407
0
            return NULL;
1408
0
        }
1409
0
    }
1410
1411
0
    if (newline_obj != NULL && newline_obj != Py_None) {
1412
0
        newline = PyUnicode_AsUTF8(newline_obj);
1413
0
        if (newline == NULL || validate_newline(newline) < 0) {
1414
0
            return NULL;
1415
0
        }
1416
0
    }
1417
1418
0
    line_buffering = convert_optional_bool(line_buffering_obj,
1419
0
                                           self->line_buffering);
1420
0
    if (line_buffering < 0) {
1421
0
        return NULL;
1422
0
    }
1423
0
    write_through = convert_optional_bool(write_through_obj,
1424
0
                                          self->write_through);
1425
0
    if (write_through < 0) {
1426
0
        return NULL;
1427
0
    }
1428
1429
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1430
0
        return NULL;
1431
0
    }
1432
0
    self->b2cratio = 0;
1433
1434
0
    if (newline_obj != NULL && set_newline(self, newline) < 0) {
1435
0
        return NULL;
1436
0
    }
1437
1438
0
    if (textiowrapper_change_encoding(
1439
0
            self, encoding, errors, newline_obj != NULL) < 0) {
1440
0
        return NULL;
1441
0
    }
1442
1443
0
    self->line_buffering = line_buffering;
1444
0
    self->write_through = write_through;
1445
0
    Py_RETURN_NONE;
1446
0
}
1447
1448
static int
1449
textiowrapper_clear(PyObject *op)
1450
0
{
1451
0
    textio *self = textio_CAST(op);
1452
0
    self->ok = 0;
1453
0
    Py_CLEAR(self->buffer);
1454
0
    Py_CLEAR(self->encoding);
1455
0
    Py_CLEAR(self->encoder);
1456
0
    Py_CLEAR(self->decoder);
1457
0
    Py_CLEAR(self->readnl);
1458
0
    Py_CLEAR(self->decoded_chars);
1459
0
    Py_CLEAR(self->pending_bytes);
1460
0
    Py_CLEAR(self->snapshot);
1461
0
    Py_CLEAR(self->errors);
1462
0
    Py_CLEAR(self->raw);
1463
1464
0
    Py_CLEAR(self->dict);
1465
0
    return 0;
1466
0
}
1467
1468
static void
1469
textiowrapper_dealloc(PyObject *op)
1470
0
{
1471
0
    textio *self = textio_CAST(op);
1472
0
    PyTypeObject *tp = Py_TYPE(self);
1473
0
    self->finalizing = 1;
1474
0
    if (_PyIOBase_finalize(op) < 0)
1475
0
        return;
1476
0
    self->ok = 0;
1477
0
    _PyObject_GC_UNTRACK(self);
1478
0
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
1479
0
    (void)textiowrapper_clear(op);
1480
0
    tp->tp_free(self);
1481
0
    Py_DECREF(tp);
1482
0
}
1483
1484
static int
1485
textiowrapper_traverse(PyObject *op, visitproc visit, void *arg)
1486
9.43k
{
1487
9.43k
    textio *self = textio_CAST(op);
1488
9.43k
    Py_VISIT(Py_TYPE(self));
1489
9.43k
    Py_VISIT(self->buffer);
1490
9.43k
    Py_VISIT(self->encoding);
1491
9.43k
    Py_VISIT(self->encoder);
1492
9.43k
    Py_VISIT(self->decoder);
1493
9.43k
    Py_VISIT(self->readnl);
1494
9.43k
    Py_VISIT(self->decoded_chars);
1495
9.43k
    Py_VISIT(self->pending_bytes);
1496
9.43k
    Py_VISIT(self->snapshot);
1497
9.43k
    Py_VISIT(self->errors);
1498
9.43k
    Py_VISIT(self->raw);
1499
1500
9.43k
    Py_VISIT(self->dict);
1501
9.43k
    return 0;
1502
9.43k
}
1503
1504
static PyObject *
1505
_io_TextIOWrapper_closed_get_impl(textio *self);
1506
1507
/* This macro takes some shortcuts to make the common case faster. */
1508
#define CHECK_CLOSED(self) \
1509
170k
    do { \
1510
170k
        int r; \
1511
170k
        PyObject *_res; \
1512
170k
        if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1513
170k
            if (self->raw != NULL) \
1514
170k
                r = _PyFileIO_closed(self->raw); \
1515
170k
            else { \
1516
0
                _res = _io_TextIOWrapper_closed_get_impl(self); \
1517
0
                if (_res == NULL) \
1518
0
                    return NULL; \
1519
0
                r = PyObject_IsTrue(_res); \
1520
0
                Py_DECREF(_res); \
1521
0
                if (r < 0) \
1522
0
                    return NULL; \
1523
0
            } \
1524
170k
            if (r > 0) { \
1525
0
                PyErr_SetString(PyExc_ValueError, \
1526
0
                                "I/O operation on closed file."); \
1527
0
                return NULL; \
1528
0
            } \
1529
170k
        } \
1530
170k
        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1531
0
            return NULL; \
1532
170k
    } while (0)
1533
1534
#define CHECK_INITIALIZED(self) \
1535
170k
    if (self->ok <= 0) { \
1536
0
        PyErr_SetString(PyExc_ValueError, \
1537
0
            "I/O operation on uninitialized object"); \
1538
0
        return NULL; \
1539
0
    }
1540
1541
#define CHECK_ATTACHED(self) \
1542
170k
    CHECK_INITIALIZED(self); \
1543
170k
    if (self->detached) { \
1544
0
        PyErr_SetString(PyExc_ValueError, \
1545
0
             "underlying buffer has been detached"); \
1546
0
        return NULL; \
1547
0
    }
1548
1549
#define CHECK_ATTACHED_INT(self) \
1550
0
    if (self->ok <= 0) { \
1551
0
        PyErr_SetString(PyExc_ValueError, \
1552
0
            "I/O operation on uninitialized object"); \
1553
0
        return -1; \
1554
0
    } else if (self->detached) { \
1555
0
        PyErr_SetString(PyExc_ValueError, \
1556
0
             "underlying buffer has been detached"); \
1557
0
        return -1; \
1558
0
    }
1559
1560
1561
/*[clinic input]
1562
@critical_section
1563
_io.TextIOWrapper.detach
1564
[clinic start generated code]*/
1565
1566
static PyObject *
1567
_io_TextIOWrapper_detach_impl(textio *self)
1568
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1569
0
{
1570
0
    PyObject *buffer;
1571
0
    CHECK_ATTACHED(self);
1572
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1573
0
        return NULL;
1574
0
    }
1575
0
    buffer = self->buffer;
1576
0
    self->buffer = NULL;
1577
0
    self->detached = 1;
1578
0
    return buffer;
1579
0
}
1580
1581
/* Flush the internal write buffer. This doesn't explicitly flush the
1582
   underlying buffered object, though. */
1583
static int
1584
_textiowrapper_writeflush(textio *self)
1585
170k
{
1586
170k
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
1587
1588
170k
    if (self->pending_bytes == NULL)
1589
0
        return 0;
1590
1591
170k
    PyObject *pending = self->pending_bytes;
1592
170k
    PyObject *b;
1593
1594
170k
    if (PyBytes_Check(pending)) {
1595
0
        b = Py_NewRef(pending);
1596
0
    }
1597
170k
    else if (PyUnicode_Check(pending)) {
1598
170k
        assert(PyUnicode_IS_ASCII(pending));
1599
170k
        assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1600
170k
        b = PyBytes_FromStringAndSize(
1601
170k
                PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1602
170k
        if (b == NULL) {
1603
0
            return -1;
1604
0
        }
1605
170k
    }
1606
0
    else {
1607
0
        assert(PyList_Check(pending));
1608
0
        b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1609
0
        if (b == NULL) {
1610
0
            return -1;
1611
0
        }
1612
1613
0
        char *buf = PyBytes_AsString(b);
1614
0
        Py_ssize_t pos = 0;
1615
1616
0
        for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1617
0
            PyObject *obj = PyList_GET_ITEM(pending, i);
1618
0
            char *src;
1619
0
            Py_ssize_t len;
1620
0
            if (PyUnicode_Check(obj)) {
1621
0
                assert(PyUnicode_IS_ASCII(obj));
1622
0
                src = PyUnicode_DATA(obj);
1623
0
                len = PyUnicode_GET_LENGTH(obj);
1624
0
            }
1625
0
            else {
1626
0
                assert(PyBytes_Check(obj));
1627
0
                if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1628
0
                    Py_DECREF(b);
1629
0
                    return -1;
1630
0
                }
1631
0
            }
1632
0
            memcpy(buf + pos, src, len);
1633
0
            pos += len;
1634
0
        }
1635
0
        assert(pos == self->pending_bytes_count);
1636
0
    }
1637
1638
170k
    self->pending_bytes_count = 0;
1639
170k
    self->pending_bytes = NULL;
1640
170k
    Py_DECREF(pending);
1641
1642
170k
    PyObject *ret;
1643
170k
    do {
1644
170k
        ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1645
170k
    } while (ret == NULL && _PyIO_trap_eintr());
1646
170k
    Py_DECREF(b);
1647
    // NOTE: We cleared buffer but we don't know how many bytes are actually written
1648
    // when an error occurred.
1649
170k
    if (ret == NULL)
1650
0
        return -1;
1651
170k
    Py_DECREF(ret);
1652
170k
    return 0;
1653
170k
}
1654
1655
/*[clinic input]
1656
@critical_section
1657
_io.TextIOWrapper.write
1658
    text: unicode
1659
    /
1660
[clinic start generated code]*/
1661
1662
static PyObject *
1663
_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1664
/*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1665
170k
{
1666
170k
    PyObject *ret;
1667
170k
    PyObject *b;
1668
170k
    Py_ssize_t textlen;
1669
170k
    int haslf = 0;
1670
170k
    int needflush = 0, text_needflush = 0;
1671
1672
170k
    CHECK_ATTACHED(self);
1673
170k
    CHECK_CLOSED(self);
1674
1675
170k
    if (self->encoder == NULL) {
1676
0
        return _unsupported(self->state, "not writable");
1677
0
    }
1678
1679
170k
    Py_INCREF(text);
1680
1681
170k
    textlen = PyUnicode_GET_LENGTH(text);
1682
1683
170k
    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1684
170k
        if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1685
170k
            haslf = 1;
1686
1687
170k
    if (haslf && self->writetranslate && self->writenl != NULL) {
1688
0
        PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1689
0
                                                 "ss", "\n", self->writenl);
1690
0
        Py_DECREF(text);
1691
0
        if (newtext == NULL)
1692
0
            return NULL;
1693
0
        text = newtext;
1694
0
    }
1695
1696
170k
    if (self->write_through)
1697
0
        text_needflush = 1;
1698
170k
    if (self->line_buffering &&
1699
170k
        (haslf ||
1700
0
         PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1701
170k
        needflush = 1;
1702
1703
    /* XXX What if we were just reading? */
1704
170k
    if (self->encodefunc != NULL) {
1705
170k
        if (PyUnicode_IS_ASCII(text) &&
1706
                // See bpo-43260
1707
170k
                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1708
170k
                is_asciicompat_encoding(self->encodefunc)) {
1709
170k
            b = Py_NewRef(text);
1710
170k
        }
1711
0
        else {
1712
0
            b = (*self->encodefunc)((PyObject *) self, text);
1713
0
        }
1714
170k
        self->encoding_start_of_stream = 0;
1715
170k
    }
1716
0
    else {
1717
0
        b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1718
0
    }
1719
1720
170k
    Py_DECREF(text);
1721
170k
    if (b == NULL)
1722
0
        return NULL;
1723
170k
    if (b != text && !PyBytes_Check(b)) {
1724
0
        PyErr_Format(PyExc_TypeError,
1725
0
                     "encoder should return a bytes object, not '%.200s'",
1726
0
                     Py_TYPE(b)->tp_name);
1727
0
        Py_DECREF(b);
1728
0
        return NULL;
1729
0
    }
1730
1731
170k
    Py_ssize_t bytes_len;
1732
170k
    if (b == text) {
1733
170k
        bytes_len = PyUnicode_GET_LENGTH(b);
1734
170k
    }
1735
0
    else {
1736
0
        bytes_len = PyBytes_GET_SIZE(b);
1737
0
    }
1738
1739
    // We should avoid concatenating huge data.
1740
    // Flush the buffer before adding b to the buffer if b is not small.
1741
    // https://github.com/python/cpython/issues/87426
1742
170k
    if (bytes_len >= self->chunk_size) {
1743
        // _textiowrapper_writeflush() calls buffer.write().
1744
        // self->pending_bytes can be appended during buffer->write()
1745
        // or other thread.
1746
        // We need to loop until buffer becomes empty.
1747
        // https://github.com/python/cpython/issues/118138
1748
        // https://github.com/python/cpython/issues/119506
1749
0
        while (self->pending_bytes != NULL) {
1750
0
            if (_textiowrapper_writeflush(self) < 0) {
1751
0
                Py_DECREF(b);
1752
0
                return NULL;
1753
0
            }
1754
0
        }
1755
0
    }
1756
1757
170k
    if (self->pending_bytes == NULL) {
1758
170k
        assert(self->pending_bytes_count == 0);
1759
170k
        self->pending_bytes = b;
1760
170k
    }
1761
0
    else if (!PyList_CheckExact(self->pending_bytes)) {
1762
0
        PyObject *list = PyList_New(2);
1763
0
        if (list == NULL) {
1764
0
            Py_DECREF(b);
1765
0
            return NULL;
1766
0
        }
1767
        // Since Python 3.12, allocating GC object won't trigger GC and release
1768
        // GIL. See https://github.com/python/cpython/issues/97922
1769
0
        assert(!PyList_CheckExact(self->pending_bytes));
1770
0
        PyList_SET_ITEM(list, 0, self->pending_bytes);
1771
0
        PyList_SET_ITEM(list, 1, b);
1772
0
        self->pending_bytes = list;
1773
0
    }
1774
0
    else {
1775
0
        if (PyList_Append(self->pending_bytes, b) < 0) {
1776
0
            Py_DECREF(b);
1777
0
            return NULL;
1778
0
        }
1779
0
        Py_DECREF(b);
1780
0
    }
1781
1782
170k
    self->pending_bytes_count += bytes_len;
1783
170k
    if (self->pending_bytes_count >= self->chunk_size || needflush ||
1784
170k
        text_needflush) {
1785
170k
        if (_textiowrapper_writeflush(self) < 0)
1786
0
            return NULL;
1787
170k
    }
1788
1789
170k
    if (needflush) {
1790
170k
        if (_PyFile_Flush(self->buffer) < 0) {
1791
0
            return NULL;
1792
0
        }
1793
170k
    }
1794
1795
170k
    if (self->snapshot != NULL) {
1796
0
        textiowrapper_set_decoded_chars(self, NULL);
1797
0
        Py_CLEAR(self->snapshot);
1798
0
    }
1799
1800
170k
    if (self->decoder) {
1801
0
        ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1802
0
        if (ret == NULL)
1803
0
            return NULL;
1804
0
        Py_DECREF(ret);
1805
0
    }
1806
1807
170k
    return PyLong_FromSsize_t(textlen);
1808
170k
}
1809
1810
/* Steal a reference to chars and store it in the decoded_char buffer;
1811
 */
1812
static void
1813
textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1814
0
{
1815
0
    Py_XSETREF(self->decoded_chars, chars);
1816
0
    self->decoded_chars_used = 0;
1817
0
}
1818
1819
static PyObject *
1820
textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1821
0
{
1822
0
    PyObject *chars;
1823
0
    Py_ssize_t avail;
1824
1825
0
    if (self->decoded_chars == NULL)
1826
0
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1827
1828
0
    avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1829
0
             - self->decoded_chars_used);
1830
1831
0
    assert(avail >= 0);
1832
1833
0
    if (n < 0 || n > avail)
1834
0
        n = avail;
1835
1836
0
    if (self->decoded_chars_used > 0 || n < avail) {
1837
0
        chars = PyUnicode_Substring(self->decoded_chars,
1838
0
                                    self->decoded_chars_used,
1839
0
                                    self->decoded_chars_used + n);
1840
0
        if (chars == NULL)
1841
0
            return NULL;
1842
0
    }
1843
0
    else {
1844
0
        chars = Py_NewRef(self->decoded_chars);
1845
0
    }
1846
1847
0
    self->decoded_chars_used += n;
1848
0
    return chars;
1849
0
}
1850
1851
/* Read and decode the next chunk of data from the BufferedReader.
1852
 */
1853
static int
1854
textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1855
0
{
1856
0
    PyObject *dec_buffer = NULL;
1857
0
    PyObject *dec_flags = NULL;
1858
0
    PyObject *input_chunk = NULL;
1859
0
    Py_buffer input_chunk_buf;
1860
0
    PyObject *decoded_chars, *chunk_size;
1861
0
    Py_ssize_t nbytes, nchars;
1862
0
    int eof;
1863
1864
    /* The return value is True unless EOF was reached.  The decoded string is
1865
     * placed in self._decoded_chars (replacing its previous value).  The
1866
     * entire input chunk is sent to the decoder, though some of it may remain
1867
     * buffered in the decoder, yet to be converted.
1868
     */
1869
1870
0
    if (self->decoder == NULL) {
1871
0
        _unsupported(self->state, "not readable");
1872
0
        return -1;
1873
0
    }
1874
1875
0
    if (self->telling) {
1876
        /* To prepare for tell(), we need to snapshot a point in the file
1877
         * where the decoder's input buffer is empty.
1878
         */
1879
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1880
0
                                                     &_Py_ID(getstate));
1881
0
        if (state == NULL)
1882
0
            return -1;
1883
        /* Given this, we know there was a valid snapshot point
1884
         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1885
         */
1886
0
        if (!PyTuple_Check(state)) {
1887
0
            PyErr_SetString(PyExc_TypeError,
1888
0
                            "illegal decoder state");
1889
0
            Py_DECREF(state);
1890
0
            return -1;
1891
0
        }
1892
0
        if (!PyArg_ParseTuple(state,
1893
0
                              "OO;illegal decoder state", &dec_buffer, &dec_flags))
1894
0
        {
1895
0
            Py_DECREF(state);
1896
0
            return -1;
1897
0
        }
1898
1899
0
        if (!PyBytes_Check(dec_buffer)) {
1900
0
            PyErr_Format(PyExc_TypeError,
1901
0
                         "illegal decoder state: the first item should be a "
1902
0
                         "bytes object, not '%.200s'",
1903
0
                         Py_TYPE(dec_buffer)->tp_name);
1904
0
            Py_DECREF(state);
1905
0
            return -1;
1906
0
        }
1907
0
        Py_INCREF(dec_buffer);
1908
0
        Py_INCREF(dec_flags);
1909
0
        Py_DECREF(state);
1910
0
    }
1911
1912
    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1913
0
    if (size_hint > 0) {
1914
0
        size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1915
0
    }
1916
0
    chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1917
0
    if (chunk_size == NULL)
1918
0
        goto fail;
1919
1920
0
    input_chunk = PyObject_CallMethodOneArg(self->buffer,
1921
0
        (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1922
0
        chunk_size);
1923
0
    Py_DECREF(chunk_size);
1924
0
    if (input_chunk == NULL)
1925
0
        goto fail;
1926
1927
0
    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1928
0
        PyErr_Format(PyExc_TypeError,
1929
0
                     "underlying %s() should have returned a bytes-like object, "
1930
0
                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
1931
0
                     Py_TYPE(input_chunk)->tp_name);
1932
0
        goto fail;
1933
0
    }
1934
1935
0
    nbytes = input_chunk_buf.len;
1936
0
    eof = (nbytes == 0);
1937
1938
0
    decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1939
0
                                          input_chunk, eof);
1940
0
    PyBuffer_Release(&input_chunk_buf);
1941
0
    if (decoded_chars == NULL)
1942
0
        goto fail;
1943
1944
0
    textiowrapper_set_decoded_chars(self, decoded_chars);
1945
0
    nchars = PyUnicode_GET_LENGTH(decoded_chars);
1946
0
    if (nchars > 0)
1947
0
        self->b2cratio = (double) nbytes / nchars;
1948
0
    else
1949
0
        self->b2cratio = 0.0;
1950
0
    if (nchars > 0)
1951
0
        eof = 0;
1952
1953
0
    if (self->telling) {
1954
        /* At the snapshot point, len(dec_buffer) bytes before the read, the
1955
         * next input to be decoded is dec_buffer + input_chunk.
1956
         */
1957
0
        PyObject *next_input = dec_buffer;
1958
0
        PyBytes_Concat(&next_input, input_chunk);
1959
0
        dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1960
0
        if (next_input == NULL) {
1961
0
            goto fail;
1962
0
        }
1963
0
        PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1964
0
        if (snapshot == NULL) {
1965
0
            dec_flags = NULL;
1966
0
            goto fail;
1967
0
        }
1968
0
        Py_XSETREF(self->snapshot, snapshot);
1969
0
    }
1970
0
    Py_DECREF(input_chunk);
1971
1972
0
    return (eof == 0);
1973
1974
0
  fail:
1975
0
    Py_XDECREF(dec_buffer);
1976
0
    Py_XDECREF(dec_flags);
1977
0
    Py_XDECREF(input_chunk);
1978
0
    return -1;
1979
0
}
1980
1981
/*[clinic input]
1982
@critical_section
1983
_io.TextIOWrapper.read
1984
    size as n: Py_ssize_t(accept={int, NoneType}) = -1
1985
    /
1986
[clinic start generated code]*/
1987
1988
static PyObject *
1989
_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1990
/*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
1991
0
{
1992
0
    PyObject *result = NULL, *chunks = NULL;
1993
1994
0
    CHECK_ATTACHED(self);
1995
0
    CHECK_CLOSED(self);
1996
1997
0
    if (self->decoder == NULL) {
1998
0
        return _unsupported(self->state, "not readable");
1999
0
    }
2000
2001
0
    if (_textiowrapper_writeflush(self) < 0)
2002
0
        return NULL;
2003
2004
0
    if (n < 0) {
2005
        /* Read everything */
2006
0
        PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
2007
0
        PyObject *decoded;
2008
0
        if (bytes == NULL)
2009
0
            goto fail;
2010
2011
0
        if (bytes == Py_None){
2012
0
            Py_DECREF(bytes);
2013
0
            PyErr_SetString(PyExc_BlockingIOError, "Read returned None.");
2014
0
            return NULL;
2015
0
        }
2016
2017
0
        _PyIO_State *state = self->state;
2018
0
        if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
2019
0
            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
2020
0
                                                          bytes, 1);
2021
0
        else
2022
0
            decoded = PyObject_CallMethodObjArgs(
2023
0
                self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2024
0
        Py_DECREF(bytes);
2025
0
        if (check_decoded(decoded) < 0)
2026
0
            goto fail;
2027
2028
0
        result = textiowrapper_get_decoded_chars(self, -1);
2029
2030
0
        if (result == NULL) {
2031
0
            Py_DECREF(decoded);
2032
0
            return NULL;
2033
0
        }
2034
2035
0
        PyUnicode_AppendAndDel(&result, decoded);
2036
0
        if (result == NULL)
2037
0
            goto fail;
2038
2039
0
        if (self->snapshot != NULL) {
2040
0
            textiowrapper_set_decoded_chars(self, NULL);
2041
0
            Py_CLEAR(self->snapshot);
2042
0
        }
2043
0
        return result;
2044
0
    }
2045
0
    else {
2046
0
        int res = 1;
2047
0
        Py_ssize_t remaining = n;
2048
2049
0
        result = textiowrapper_get_decoded_chars(self, n);
2050
0
        if (result == NULL)
2051
0
            goto fail;
2052
0
        remaining -= PyUnicode_GET_LENGTH(result);
2053
2054
        /* Keep reading chunks until we have n characters to return */
2055
0
        while (remaining > 0) {
2056
0
            res = textiowrapper_read_chunk(self, remaining);
2057
0
            if (res < 0) {
2058
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2059
                   when EINTR occurs so we needn't do it ourselves. */
2060
0
                if (_PyIO_trap_eintr()) {
2061
0
                    continue;
2062
0
                }
2063
0
                goto fail;
2064
0
            }
2065
0
            if (res == 0)  /* EOF */
2066
0
                break;
2067
0
            if (chunks == NULL) {
2068
0
                chunks = PyList_New(0);
2069
0
                if (chunks == NULL)
2070
0
                    goto fail;
2071
0
            }
2072
0
            if (PyUnicode_GET_LENGTH(result) > 0 &&
2073
0
                PyList_Append(chunks, result) < 0)
2074
0
                goto fail;
2075
0
            Py_DECREF(result);
2076
0
            result = textiowrapper_get_decoded_chars(self, remaining);
2077
0
            if (result == NULL)
2078
0
                goto fail;
2079
0
            remaining -= PyUnicode_GET_LENGTH(result);
2080
0
        }
2081
0
        if (chunks != NULL) {
2082
0
            if (result != NULL && PyList_Append(chunks, result) < 0)
2083
0
                goto fail;
2084
0
            _Py_DECLARE_STR(empty, "");
2085
0
            Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2086
0
            if (result == NULL)
2087
0
                goto fail;
2088
0
            Py_CLEAR(chunks);
2089
0
        }
2090
0
        return result;
2091
0
    }
2092
0
  fail:
2093
0
    Py_XDECREF(result);
2094
0
    Py_XDECREF(chunks);
2095
0
    return NULL;
2096
0
}
2097
2098
2099
/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2100
   that is to the NUL character. Otherwise the function will produce
2101
   incorrect results. */
2102
static const char *
2103
find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2104
0
{
2105
0
    if (kind == PyUnicode_1BYTE_KIND) {
2106
0
        assert(ch < 256);
2107
0
        return (char *) memchr((const void *) s, (char) ch, end - s);
2108
0
    }
2109
0
    for (;;) {
2110
0
        while (PyUnicode_READ(kind, s, 0) > ch)
2111
0
            s += kind;
2112
0
        if (PyUnicode_READ(kind, s, 0) == ch)
2113
0
            return s;
2114
0
        if (s == end)
2115
0
            return NULL;
2116
0
        s += kind;
2117
0
    }
2118
0
}
2119
2120
Py_ssize_t
2121
_PyIO_find_line_ending(
2122
    int translated, int universal, PyObject *readnl,
2123
    int kind, const char *start, const char *end, Py_ssize_t *consumed)
2124
0
{
2125
0
    Py_ssize_t len = (end - start)/kind;
2126
2127
0
    if (translated) {
2128
        /* Newlines are already translated, only search for \n */
2129
0
        const char *pos = find_control_char(kind, start, end, '\n');
2130
0
        if (pos != NULL)
2131
0
            return (pos - start)/kind + 1;
2132
0
        else {
2133
0
            *consumed = len;
2134
0
            return -1;
2135
0
        }
2136
0
    }
2137
0
    else if (universal) {
2138
        /* Universal newline search. Find any of \r, \r\n, \n
2139
         * The decoder ensures that \r\n are not split in two pieces
2140
         */
2141
0
        const char *s = start;
2142
0
        for (;;) {
2143
0
            Py_UCS4 ch;
2144
            /* Fast path for non-control chars. The loop always ends
2145
               since the Unicode string is NUL-terminated. */
2146
0
            while (PyUnicode_READ(kind, s, 0) > '\r')
2147
0
                s += kind;
2148
0
            if (s >= end) {
2149
0
                *consumed = len;
2150
0
                return -1;
2151
0
            }
2152
0
            ch = PyUnicode_READ(kind, s, 0);
2153
0
            s += kind;
2154
0
            if (ch == '\n')
2155
0
                return (s - start)/kind;
2156
0
            if (ch == '\r') {
2157
0
                if (PyUnicode_READ(kind, s, 0) == '\n')
2158
0
                    return (s - start)/kind + 1;
2159
0
                else
2160
0
                    return (s - start)/kind;
2161
0
            }
2162
0
        }
2163
0
    }
2164
0
    else {
2165
        /* Non-universal mode. */
2166
0
        Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2167
0
        const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2168
        /* Assume that readnl is an ASCII character. */
2169
0
        assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2170
0
        if (readnl_len == 1) {
2171
0
            const char *pos = find_control_char(kind, start, end, nl[0]);
2172
0
            if (pos != NULL)
2173
0
                return (pos - start)/kind + 1;
2174
0
            *consumed = len;
2175
0
            return -1;
2176
0
        }
2177
0
        else {
2178
0
            const char *s = start;
2179
0
            const char *e = end - (readnl_len - 1)*kind;
2180
0
            const char *pos;
2181
0
            if (e < s)
2182
0
                e = s;
2183
0
            while (s < e) {
2184
0
                Py_ssize_t i;
2185
0
                const char *pos = find_control_char(kind, s, end, nl[0]);
2186
0
                if (pos == NULL || pos >= e)
2187
0
                    break;
2188
0
                for (i = 1; i < readnl_len; i++) {
2189
0
                    if (PyUnicode_READ(kind, pos, i) != nl[i])
2190
0
                        break;
2191
0
                }
2192
0
                if (i == readnl_len)
2193
0
                    return (pos - start)/kind + readnl_len;
2194
0
                s = pos + kind;
2195
0
            }
2196
0
            pos = find_control_char(kind, e, end, nl[0]);
2197
0
            if (pos == NULL)
2198
0
                *consumed = len;
2199
0
            else
2200
0
                *consumed = (pos - start)/kind;
2201
0
            return -1;
2202
0
        }
2203
0
    }
2204
0
}
2205
2206
static PyObject *
2207
_textiowrapper_readline(textio *self, Py_ssize_t limit)
2208
0
{
2209
0
    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2210
0
    Py_ssize_t start, endpos, chunked, offset_to_buffer;
2211
0
    int res;
2212
2213
0
    CHECK_CLOSED(self);
2214
2215
0
    if (_textiowrapper_writeflush(self) < 0)
2216
0
        return NULL;
2217
2218
0
    chunked = 0;
2219
2220
0
    while (1) {
2221
0
        const char *ptr;
2222
0
        Py_ssize_t line_len;
2223
0
        int kind;
2224
0
        Py_ssize_t consumed = 0;
2225
2226
        /* First, get some data if necessary */
2227
0
        res = 1;
2228
0
        while (!self->decoded_chars ||
2229
0
               !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2230
0
            res = textiowrapper_read_chunk(self, 0);
2231
0
            if (res < 0) {
2232
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2233
                   when EINTR occurs so we needn't do it ourselves. */
2234
0
                if (_PyIO_trap_eintr()) {
2235
0
                    continue;
2236
0
                }
2237
0
                goto error;
2238
0
            }
2239
0
            if (res == 0)
2240
0
                break;
2241
0
        }
2242
0
        if (res == 0) {
2243
            /* end of file */
2244
0
            textiowrapper_set_decoded_chars(self, NULL);
2245
0
            Py_CLEAR(self->snapshot);
2246
0
            start = endpos = offset_to_buffer = 0;
2247
0
            break;
2248
0
        }
2249
2250
0
        if (remaining == NULL) {
2251
0
            line = Py_NewRef(self->decoded_chars);
2252
0
            start = self->decoded_chars_used;
2253
0
            offset_to_buffer = 0;
2254
0
        }
2255
0
        else {
2256
0
            assert(self->decoded_chars_used == 0);
2257
0
            line = PyUnicode_Concat(remaining, self->decoded_chars);
2258
0
            start = 0;
2259
0
            offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2260
0
            Py_CLEAR(remaining);
2261
0
            if (line == NULL)
2262
0
                goto error;
2263
0
        }
2264
2265
0
        ptr = PyUnicode_DATA(line);
2266
0
        line_len = PyUnicode_GET_LENGTH(line);
2267
0
        kind = PyUnicode_KIND(line);
2268
2269
0
        endpos = _PyIO_find_line_ending(
2270
0
            self->readtranslate, self->readuniversal, self->readnl,
2271
0
            kind,
2272
0
            ptr + kind * start,
2273
0
            ptr + kind * line_len,
2274
0
            &consumed);
2275
0
        if (endpos >= 0) {
2276
0
            endpos += start;
2277
0
            if (limit >= 0 && (endpos - start) + chunked >= limit)
2278
0
                endpos = start + limit - chunked;
2279
0
            break;
2280
0
        }
2281
2282
        /* We can put aside up to `endpos` */
2283
0
        endpos = consumed + start;
2284
0
        if (limit >= 0 && (endpos - start) + chunked >= limit) {
2285
            /* Didn't find line ending, but reached length limit */
2286
0
            endpos = start + limit - chunked;
2287
0
            break;
2288
0
        }
2289
2290
0
        if (endpos > start) {
2291
            /* No line ending seen yet - put aside current data */
2292
0
            PyObject *s;
2293
0
            if (chunks == NULL) {
2294
0
                chunks = PyList_New(0);
2295
0
                if (chunks == NULL)
2296
0
                    goto error;
2297
0
            }
2298
0
            s = PyUnicode_Substring(line, start, endpos);
2299
0
            if (s == NULL)
2300
0
                goto error;
2301
0
            if (PyList_Append(chunks, s) < 0) {
2302
0
                Py_DECREF(s);
2303
0
                goto error;
2304
0
            }
2305
0
            chunked += PyUnicode_GET_LENGTH(s);
2306
0
            Py_DECREF(s);
2307
0
        }
2308
        /* There may be some remaining bytes we'll have to prepend to the
2309
           next chunk of data */
2310
0
        if (endpos < line_len) {
2311
0
            remaining = PyUnicode_Substring(line, endpos, line_len);
2312
0
            if (remaining == NULL)
2313
0
                goto error;
2314
0
        }
2315
0
        Py_CLEAR(line);
2316
        /* We have consumed the buffer */
2317
0
        textiowrapper_set_decoded_chars(self, NULL);
2318
0
    }
2319
2320
0
    if (line != NULL) {
2321
        /* Our line ends in the current buffer */
2322
0
        self->decoded_chars_used = endpos - offset_to_buffer;
2323
0
        if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2324
0
            PyObject *s = PyUnicode_Substring(line, start, endpos);
2325
0
            Py_CLEAR(line);
2326
0
            if (s == NULL)
2327
0
                goto error;
2328
0
            line = s;
2329
0
        }
2330
0
    }
2331
0
    if (remaining != NULL) {
2332
0
        if (chunks == NULL) {
2333
0
            chunks = PyList_New(0);
2334
0
            if (chunks == NULL)
2335
0
                goto error;
2336
0
        }
2337
0
        if (PyList_Append(chunks, remaining) < 0)
2338
0
            goto error;
2339
0
        Py_CLEAR(remaining);
2340
0
    }
2341
0
    if (chunks != NULL) {
2342
0
        if (line != NULL) {
2343
0
            if (PyList_Append(chunks, line) < 0)
2344
0
                goto error;
2345
0
            Py_DECREF(line);
2346
0
        }
2347
0
        line = PyUnicode_Join(&_Py_STR(empty), chunks);
2348
0
        if (line == NULL)
2349
0
            goto error;
2350
0
        Py_CLEAR(chunks);
2351
0
    }
2352
0
    if (line == NULL) {
2353
0
        line = &_Py_STR(empty);
2354
0
    }
2355
2356
0
    return line;
2357
2358
0
  error:
2359
0
    Py_XDECREF(chunks);
2360
0
    Py_XDECREF(remaining);
2361
0
    Py_XDECREF(line);
2362
0
    return NULL;
2363
0
}
2364
2365
/*[clinic input]
2366
@critical_section
2367
_io.TextIOWrapper.readline
2368
    size: Py_ssize_t = -1
2369
    /
2370
[clinic start generated code]*/
2371
2372
static PyObject *
2373
_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2374
/*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2375
0
{
2376
0
    CHECK_ATTACHED(self);
2377
0
    return _textiowrapper_readline(self, size);
2378
0
}
2379
2380
/* Seek and Tell */
2381
2382
typedef struct {
2383
    Py_off_t start_pos;
2384
    int dec_flags;
2385
    int bytes_to_feed;
2386
    int chars_to_skip;
2387
    char need_eof;
2388
} cookie_type;
2389
2390
/*
2391
   To speed up cookie packing/unpacking, we store the fields in a temporary
2392
   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2393
   The following macros define at which offsets in the intermediary byte
2394
   string the various CookieStruct fields will be stored.
2395
 */
2396
2397
#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2398
2399
#if PY_BIG_ENDIAN
2400
/* We want the least significant byte of start_pos to also be the least
2401
   significant byte of the cookie, which means that in big-endian mode we
2402
   must copy the fields in reverse order. */
2403
2404
# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2405
# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2406
# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2407
# define OFF_CHARS_TO_SKIP  (sizeof(char))
2408
# define OFF_NEED_EOF       0
2409
2410
#else
2411
/* Little-endian mode: the least significant byte of start_pos will
2412
   naturally end up the least significant byte of the cookie. */
2413
2414
0
# define OFF_START_POS      0
2415
0
# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2416
0
# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2417
0
# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2418
0
# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2419
2420
#endif
2421
2422
static int
2423
textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2424
0
{
2425
0
    unsigned char buffer[COOKIE_BUF_LEN];
2426
0
    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2427
0
    if (cookieLong == NULL)
2428
0
        return -1;
2429
2430
0
    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2431
0
                            PY_LITTLE_ENDIAN, 0, 1) < 0) {
2432
0
        Py_DECREF(cookieLong);
2433
0
        return -1;
2434
0
    }
2435
0
    Py_DECREF(cookieLong);
2436
2437
0
    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2438
0
    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2439
0
    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2440
0
    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2441
0
    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2442
2443
0
    return 0;
2444
0
}
2445
2446
static PyObject *
2447
textiowrapper_build_cookie(cookie_type *cookie)
2448
0
{
2449
0
    unsigned char buffer[COOKIE_BUF_LEN];
2450
2451
0
    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2452
0
    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2453
0
    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2454
0
    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2455
0
    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2456
2457
0
    return _PyLong_FromByteArray(buffer, sizeof(buffer),
2458
0
                                 PY_LITTLE_ENDIAN, 0);
2459
0
}
2460
2461
static int
2462
_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2463
0
{
2464
0
    PyObject *res;
2465
    /* When seeking to the start of the stream, we call decoder.reset()
2466
       rather than decoder.getstate().
2467
       This is for a few decoders such as utf-16 for which the state value
2468
       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2469
       utf-16, that we are expecting a BOM).
2470
    */
2471
0
    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2472
0
        res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2473
0
    }
2474
0
    else {
2475
0
        res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2476
0
                                   "((yi))", "", cookie->dec_flags);
2477
0
    }
2478
0
    if (res == NULL) {
2479
0
        return -1;
2480
0
    }
2481
0
    Py_DECREF(res);
2482
0
    return 0;
2483
0
}
2484
2485
static int
2486
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2487
0
{
2488
0
    PyObject *res;
2489
0
    if (start_of_stream) {
2490
0
        res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2491
0
        self->encoding_start_of_stream = 1;
2492
0
    }
2493
0
    else {
2494
0
        res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2495
0
                                        _PyLong_GetZero());
2496
0
        self->encoding_start_of_stream = 0;
2497
0
    }
2498
0
    if (res == NULL)
2499
0
        return -1;
2500
0
    Py_DECREF(res);
2501
0
    return 0;
2502
0
}
2503
2504
static int
2505
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2506
0
{
2507
    /* Same as _textiowrapper_decoder_setstate() above. */
2508
0
    return _textiowrapper_encoder_reset(
2509
0
        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2510
0
}
2511
2512
/*[clinic input]
2513
@critical_section
2514
_io.TextIOWrapper.seek
2515
    cookie as cookieObj: object
2516
      Zero or an opaque number returned by tell().
2517
    whence: int(c_default='0') = os.SEEK_SET
2518
      The relative position to seek from.
2519
    /
2520
2521
Set the stream position, and return the new stream position.
2522
2523
Four operations are supported, given by the following argument
2524
combinations:
2525
2526
- seek(0, SEEK_SET): Rewind to the start of the stream.
2527
- seek(cookie, SEEK_SET): Restore a previous position;
2528
  'cookie' must be a number returned by tell().
2529
- seek(0, SEEK_END): Fast-forward to the end of the stream.
2530
- seek(0, SEEK_CUR): Leave the current stream position unchanged.
2531
2532
Any other argument combinations are invalid,
2533
and may raise exceptions.
2534
[clinic start generated code]*/
2535
2536
static PyObject *
2537
_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2538
/*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2539
0
{
2540
0
    PyObject *posobj;
2541
0
    cookie_type cookie;
2542
0
    PyObject *res;
2543
0
    int cmp;
2544
0
    PyObject *snapshot;
2545
2546
0
    CHECK_ATTACHED(self);
2547
0
    CHECK_CLOSED(self);
2548
2549
0
    Py_INCREF(cookieObj);
2550
2551
0
    if (!self->seekable) {
2552
0
        _unsupported(self->state, "underlying stream is not seekable");
2553
0
        goto fail;
2554
0
    }
2555
2556
0
    PyObject *zero = _PyLong_GetZero();  // borrowed reference
2557
2558
0
    switch (whence) {
2559
0
    case SEEK_CUR:
2560
        /* seek relative to current position */
2561
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2562
0
        if (cmp < 0)
2563
0
            goto fail;
2564
2565
0
        if (cmp == 0) {
2566
0
            _unsupported(self->state, "can't do nonzero cur-relative seeks");
2567
0
            goto fail;
2568
0
        }
2569
2570
        /* Seeking to the current position should attempt to
2571
         * sync the underlying buffer with the current position.
2572
         */
2573
0
        Py_DECREF(cookieObj);
2574
0
        cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2575
0
        if (cookieObj == NULL)
2576
0
            goto fail;
2577
0
        break;
2578
2579
0
    case SEEK_END:
2580
        /* seek relative to end of file */
2581
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2582
0
        if (cmp < 0)
2583
0
            goto fail;
2584
2585
0
        if (cmp == 0) {
2586
0
            _unsupported(self->state, "can't do nonzero end-relative seeks");
2587
0
            goto fail;
2588
0
        }
2589
2590
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
2591
0
            goto fail;
2592
0
        }
2593
2594
0
        textiowrapper_set_decoded_chars(self, NULL);
2595
0
        Py_CLEAR(self->snapshot);
2596
0
        if (self->decoder) {
2597
0
            res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2598
0
            if (res == NULL)
2599
0
                goto fail;
2600
0
            Py_DECREF(res);
2601
0
        }
2602
2603
0
        res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2604
0
        Py_CLEAR(cookieObj);
2605
0
        if (res == NULL)
2606
0
            goto fail;
2607
0
        if (self->encoder) {
2608
            /* If seek() == 0, we are at the start of stream, otherwise not */
2609
0
            cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2610
0
            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2611
0
                Py_DECREF(res);
2612
0
                goto fail;
2613
0
            }
2614
0
        }
2615
0
        return res;
2616
2617
0
    case SEEK_SET:
2618
0
        break;
2619
2620
0
    default:
2621
0
        PyErr_Format(PyExc_ValueError,
2622
0
                     "invalid whence (%d, should be %d, %d or %d)", whence,
2623
0
                     SEEK_SET, SEEK_CUR, SEEK_END);
2624
0
        goto fail;
2625
0
    }
2626
2627
0
    cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2628
0
    if (cmp < 0)
2629
0
        goto fail;
2630
2631
0
    if (cmp == 1) {
2632
0
        PyErr_Format(PyExc_ValueError,
2633
0
                     "negative seek position %R", cookieObj);
2634
0
        goto fail;
2635
0
    }
2636
2637
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2638
0
        goto fail;
2639
0
    }
2640
2641
    /* The strategy of seek() is to go back to the safe start point
2642
     * and replay the effect of read(chars_to_skip) from there.
2643
     */
2644
0
    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2645
0
        goto fail;
2646
2647
    /* Seek back to the safe start point. */
2648
0
    posobj = PyLong_FromOff_t(cookie.start_pos);
2649
0
    if (posobj == NULL)
2650
0
        goto fail;
2651
0
    res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2652
0
    Py_DECREF(posobj);
2653
0
    if (res == NULL)
2654
0
        goto fail;
2655
0
    Py_DECREF(res);
2656
2657
0
    textiowrapper_set_decoded_chars(self, NULL);
2658
0
    Py_CLEAR(self->snapshot);
2659
2660
    /* Restore the decoder to its state from the safe start point. */
2661
0
    if (self->decoder) {
2662
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2663
0
            goto fail;
2664
0
    }
2665
2666
0
    if (cookie.chars_to_skip) {
2667
        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2668
0
        PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2669
0
                                                     "i", cookie.bytes_to_feed);
2670
0
        PyObject *decoded;
2671
2672
0
        if (input_chunk == NULL)
2673
0
            goto fail;
2674
2675
0
        if (!PyBytes_Check(input_chunk)) {
2676
0
            PyErr_Format(PyExc_TypeError,
2677
0
                         "underlying read() should have returned a bytes "
2678
0
                         "object, not '%.200s'",
2679
0
                         Py_TYPE(input_chunk)->tp_name);
2680
0
            Py_DECREF(input_chunk);
2681
0
            goto fail;
2682
0
        }
2683
2684
0
        snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2685
0
        if (snapshot == NULL) {
2686
0
            goto fail;
2687
0
        }
2688
0
        Py_XSETREF(self->snapshot, snapshot);
2689
2690
0
        decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2691
0
            input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2692
2693
0
        if (check_decoded(decoded) < 0)
2694
0
            goto fail;
2695
2696
0
        textiowrapper_set_decoded_chars(self, decoded);
2697
2698
        /* Skip chars_to_skip of the decoded characters. */
2699
0
        if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2700
0
            PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2701
0
            goto fail;
2702
0
        }
2703
0
        self->decoded_chars_used = cookie.chars_to_skip;
2704
0
    }
2705
0
    else {
2706
0
        snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2707
0
        if (snapshot == NULL)
2708
0
            goto fail;
2709
0
        Py_XSETREF(self->snapshot, snapshot);
2710
0
    }
2711
2712
    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2713
0
    if (self->encoder) {
2714
0
        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2715
0
            goto fail;
2716
0
    }
2717
0
    return cookieObj;
2718
0
  fail:
2719
0
    Py_XDECREF(cookieObj);
2720
0
    return NULL;
2721
2722
0
}
2723
2724
/*[clinic input]
2725
@critical_section
2726
_io.TextIOWrapper.tell
2727
2728
Return the stream position as an opaque number.
2729
2730
The return value of tell() can be given as input to seek(), to restore a
2731
previous stream position.
2732
[clinic start generated code]*/
2733
2734
static PyObject *
2735
_io_TextIOWrapper_tell_impl(textio *self)
2736
/*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/
2737
0
{
2738
0
    PyObject *res;
2739
0
    PyObject *posobj = NULL;
2740
0
    cookie_type cookie = {0,0,0,0,0};
2741
0
    PyObject *next_input;
2742
0
    Py_ssize_t chars_to_skip, chars_decoded;
2743
0
    Py_ssize_t skip_bytes, skip_back;
2744
0
    PyObject *saved_state = NULL;
2745
0
    const char *input, *input_end;
2746
0
    Py_ssize_t dec_buffer_len;
2747
0
    int dec_flags;
2748
2749
0
    CHECK_ATTACHED(self);
2750
0
    CHECK_CLOSED(self);
2751
2752
0
    if (!self->seekable) {
2753
0
        _unsupported(self->state, "underlying stream is not seekable");
2754
0
        goto fail;
2755
0
    }
2756
0
    if (!self->telling) {
2757
0
        PyErr_SetString(PyExc_OSError,
2758
0
                        "telling position disabled by next() call");
2759
0
        goto fail;
2760
0
    }
2761
2762
0
    if (_textiowrapper_writeflush(self) < 0)
2763
0
        return NULL;
2764
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2765
0
        goto fail;
2766
0
    }
2767
2768
0
    posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2769
0
    if (posobj == NULL)
2770
0
        goto fail;
2771
2772
0
    if (self->decoder == NULL || self->snapshot == NULL) {
2773
0
        assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2774
0
        return posobj;
2775
0
    }
2776
2777
#if defined(HAVE_LARGEFILE_SUPPORT)
2778
    cookie.start_pos = PyLong_AsLongLong(posobj);
2779
#else
2780
0
    cookie.start_pos = PyLong_AsLong(posobj);
2781
0
#endif
2782
0
    Py_DECREF(posobj);
2783
0
    if (PyErr_Occurred())
2784
0
        goto fail;
2785
2786
    /* Skip backward to the snapshot point (see _read_chunk). */
2787
0
    assert(PyTuple_Check(self->snapshot));
2788
0
    if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2789
0
        goto fail;
2790
2791
0
    assert (PyBytes_Check(next_input));
2792
2793
0
    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2794
2795
    /* How many decoded characters have been used up since the snapshot? */
2796
0
    if (self->decoded_chars_used == 0)  {
2797
        /* We haven't moved from the snapshot point. */
2798
0
        return textiowrapper_build_cookie(&cookie);
2799
0
    }
2800
2801
0
    chars_to_skip = self->decoded_chars_used;
2802
2803
    /* Decoder state will be restored at the end */
2804
0
    saved_state = PyObject_CallMethodNoArgs(self->decoder,
2805
0
                                             &_Py_ID(getstate));
2806
0
    if (saved_state == NULL)
2807
0
        goto fail;
2808
2809
0
#define DECODER_GETSTATE() do { \
2810
0
        PyObject *dec_buffer; \
2811
0
        PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2812
0
            &_Py_ID(getstate)); \
2813
0
        if (_state == NULL) \
2814
0
            goto fail; \
2815
0
        if (!PyTuple_Check(_state)) { \
2816
0
            PyErr_SetString(PyExc_TypeError, \
2817
0
                            "illegal decoder state"); \
2818
0
            Py_DECREF(_state); \
2819
0
            goto fail; \
2820
0
        } \
2821
0
        if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2822
0
                              &dec_buffer, &dec_flags)) \
2823
0
        { \
2824
0
            Py_DECREF(_state); \
2825
0
            goto fail; \
2826
0
        } \
2827
0
        if (!PyBytes_Check(dec_buffer)) { \
2828
0
            PyErr_Format(PyExc_TypeError, \
2829
0
                         "illegal decoder state: the first item should be a " \
2830
0
                         "bytes object, not '%.200s'", \
2831
0
                         Py_TYPE(dec_buffer)->tp_name); \
2832
0
            Py_DECREF(_state); \
2833
0
            goto fail; \
2834
0
        } \
2835
0
        dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2836
0
        Py_DECREF(_state); \
2837
0
    } while (0)
2838
2839
0
#define DECODER_DECODE(start, len, res) do { \
2840
0
        PyObject *_decoded = _PyObject_CallMethod( \
2841
0
            self->decoder, &_Py_ID(decode), "y#", start, len); \
2842
0
        if (check_decoded(_decoded) < 0) \
2843
0
            goto fail; \
2844
0
        res = PyUnicode_GET_LENGTH(_decoded); \
2845
0
        Py_DECREF(_decoded); \
2846
0
    } while (0)
2847
2848
    /* Fast search for an acceptable start point, close to our
2849
       current pos */
2850
0
    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2851
0
    skip_back = 1;
2852
0
    assert(skip_bytes <= PyBytes_GET_SIZE(next_input));
2853
0
    input = PyBytes_AS_STRING(next_input);
2854
0
    while (skip_bytes > 0) {
2855
        /* Decode up to temptative start point */
2856
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2857
0
            goto fail;
2858
0
        DECODER_DECODE(input, skip_bytes, chars_decoded);
2859
0
        if (chars_decoded <= chars_to_skip) {
2860
0
            DECODER_GETSTATE();
2861
0
            if (dec_buffer_len == 0) {
2862
                /* Before pos and no bytes buffered in decoder => OK */
2863
0
                cookie.dec_flags = dec_flags;
2864
0
                chars_to_skip -= chars_decoded;
2865
0
                break;
2866
0
            }
2867
            /* Skip back by buffered amount and reset heuristic */
2868
0
            skip_bytes -= dec_buffer_len;
2869
0
            skip_back = 1;
2870
0
        }
2871
0
        else {
2872
            /* We're too far ahead, skip back a bit */
2873
0
            skip_bytes -= skip_back;
2874
0
            skip_back *= 2;
2875
0
        }
2876
0
    }
2877
0
    if (skip_bytes <= 0) {
2878
0
        skip_bytes = 0;
2879
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2880
0
            goto fail;
2881
0
    }
2882
2883
    /* Note our initial start point. */
2884
0
    cookie.start_pos += skip_bytes;
2885
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2886
0
    if (chars_to_skip == 0)
2887
0
        goto finally;
2888
2889
    /* We should be close to the desired position.  Now feed the decoder one
2890
     * byte at a time until we reach the `chars_to_skip` target.
2891
     * As we go, note the nearest "safe start point" before the current
2892
     * location (a point where the decoder has nothing buffered, so seek()
2893
     * can safely start from there and advance to this location).
2894
     */
2895
0
    chars_decoded = 0;
2896
0
    input = PyBytes_AS_STRING(next_input);
2897
0
    input_end = input + PyBytes_GET_SIZE(next_input);
2898
0
    input += skip_bytes;
2899
0
    while (input < input_end) {
2900
0
        Py_ssize_t n;
2901
2902
0
        DECODER_DECODE(input, (Py_ssize_t)1, n);
2903
        /* We got n chars for 1 byte */
2904
0
        chars_decoded += n;
2905
0
        cookie.bytes_to_feed += 1;
2906
0
        DECODER_GETSTATE();
2907
2908
0
        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2909
            /* Decoder buffer is empty, so this is a safe start point. */
2910
0
            cookie.start_pos += cookie.bytes_to_feed;
2911
0
            chars_to_skip -= chars_decoded;
2912
0
            cookie.dec_flags = dec_flags;
2913
0
            cookie.bytes_to_feed = 0;
2914
0
            chars_decoded = 0;
2915
0
        }
2916
0
        if (chars_decoded >= chars_to_skip)
2917
0
            break;
2918
0
        input++;
2919
0
    }
2920
0
    if (input == input_end) {
2921
        /* We didn't get enough decoded data; signal EOF to get more. */
2922
0
        PyObject *decoded = _PyObject_CallMethod(
2923
0
            self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2924
0
        if (check_decoded(decoded) < 0)
2925
0
            goto fail;
2926
0
        chars_decoded += PyUnicode_GET_LENGTH(decoded);
2927
0
        Py_DECREF(decoded);
2928
0
        cookie.need_eof = 1;
2929
2930
0
        if (chars_decoded < chars_to_skip) {
2931
0
            PyErr_SetString(PyExc_OSError,
2932
0
                            "can't reconstruct logical file position");
2933
0
            goto fail;
2934
0
        }
2935
0
    }
2936
2937
0
finally:
2938
0
    res = PyObject_CallMethodOneArg(
2939
0
            self->decoder, &_Py_ID(setstate), saved_state);
2940
0
    Py_DECREF(saved_state);
2941
0
    if (res == NULL)
2942
0
        return NULL;
2943
0
    Py_DECREF(res);
2944
2945
    /* The returned cookie corresponds to the last safe start point. */
2946
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2947
0
    return textiowrapper_build_cookie(&cookie);
2948
2949
0
fail:
2950
0
    if (saved_state) {
2951
0
        PyObject *exc = PyErr_GetRaisedException();
2952
0
        res = PyObject_CallMethodOneArg(
2953
0
                self->decoder, &_Py_ID(setstate), saved_state);
2954
0
        _PyErr_ChainExceptions1(exc);
2955
0
        Py_DECREF(saved_state);
2956
0
        Py_XDECREF(res);
2957
0
    }
2958
0
    return NULL;
2959
0
}
2960
2961
/*[clinic input]
2962
@critical_section
2963
_io.TextIOWrapper.truncate
2964
    pos: object = None
2965
    /
2966
[clinic start generated code]*/
2967
2968
static PyObject *
2969
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2970
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
2971
0
{
2972
0
    CHECK_ATTACHED(self)
2973
2974
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2975
0
        return NULL;
2976
0
    }
2977
2978
0
    return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2979
0
}
2980
2981
static PyObject *
2982
textiowrapper_repr(PyObject *op)
2983
0
{
2984
0
    PyObject *nameobj, *modeobj, *res, *s;
2985
0
    int status;
2986
0
    textio *self = textio_CAST(op);
2987
0
    const char *type_name = Py_TYPE(self)->tp_name;
2988
2989
0
    CHECK_INITIALIZED(self);
2990
2991
0
    res = PyUnicode_FromFormat("<%.100s", type_name);
2992
0
    if (res == NULL)
2993
0
        return NULL;
2994
2995
0
    status = Py_ReprEnter(op);
2996
0
    if (status != 0) {
2997
0
        if (status > 0) {
2998
0
            PyErr_Format(PyExc_RuntimeError,
2999
0
                         "reentrant call inside %.100s.__repr__",
3000
0
                         type_name);
3001
0
        }
3002
0
        goto error;
3003
0
    }
3004
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(name), &nameobj) < 0) {
3005
0
        if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
3006
0
            goto error;
3007
0
        }
3008
        /* Ignore ValueError raised if the underlying stream was detached */
3009
0
        PyErr_Clear();
3010
0
    }
3011
0
    if (nameobj != NULL) {
3012
0
        s = PyUnicode_FromFormat(" name=%R", nameobj);
3013
0
        Py_DECREF(nameobj);
3014
0
        if (s == NULL)
3015
0
            goto error;
3016
0
        PyUnicode_AppendAndDel(&res, s);
3017
0
        if (res == NULL)
3018
0
            goto error;
3019
0
    }
3020
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(mode), &modeobj) < 0) {
3021
0
        goto error;
3022
0
    }
3023
0
    if (modeobj != NULL) {
3024
0
        s = PyUnicode_FromFormat(" mode=%R", modeobj);
3025
0
        Py_DECREF(modeobj);
3026
0
        if (s == NULL)
3027
0
            goto error;
3028
0
        PyUnicode_AppendAndDel(&res, s);
3029
0
        if (res == NULL)
3030
0
            goto error;
3031
0
    }
3032
0
    s = PyUnicode_FromFormat("%U encoding=%R>",
3033
0
                             res, self->encoding);
3034
0
    Py_DECREF(res);
3035
0
    if (status == 0) {
3036
0
        Py_ReprLeave(op);
3037
0
    }
3038
0
    return s;
3039
3040
0
  error:
3041
0
    Py_XDECREF(res);
3042
0
    if (status == 0) {
3043
0
        Py_ReprLeave(op);
3044
0
    }
3045
0
    return NULL;
3046
0
}
3047
3048
3049
/* Inquiries */
3050
3051
/*[clinic input]
3052
@critical_section
3053
_io.TextIOWrapper.fileno
3054
[clinic start generated code]*/
3055
3056
static PyObject *
3057
_io_TextIOWrapper_fileno_impl(textio *self)
3058
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3059
0
{
3060
0
    CHECK_ATTACHED(self);
3061
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3062
0
}
3063
3064
/*[clinic input]
3065
@critical_section
3066
_io.TextIOWrapper.seekable
3067
[clinic start generated code]*/
3068
3069
static PyObject *
3070
_io_TextIOWrapper_seekable_impl(textio *self)
3071
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3072
0
{
3073
0
    CHECK_ATTACHED(self);
3074
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3075
0
}
3076
3077
/*[clinic input]
3078
@critical_section
3079
_io.TextIOWrapper.readable
3080
[clinic start generated code]*/
3081
3082
static PyObject *
3083
_io_TextIOWrapper_readable_impl(textio *self)
3084
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3085
0
{
3086
0
    CHECK_ATTACHED(self);
3087
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3088
0
}
3089
3090
/*[clinic input]
3091
@critical_section
3092
_io.TextIOWrapper.writable
3093
[clinic start generated code]*/
3094
3095
static PyObject *
3096
_io_TextIOWrapper_writable_impl(textio *self)
3097
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3098
0
{
3099
0
    CHECK_ATTACHED(self);
3100
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3101
0
}
3102
3103
/*[clinic input]
3104
@critical_section
3105
_io.TextIOWrapper.isatty
3106
[clinic start generated code]*/
3107
3108
static PyObject *
3109
_io_TextIOWrapper_isatty_impl(textio *self)
3110
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3111
0
{
3112
0
    CHECK_ATTACHED(self);
3113
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3114
0
}
3115
3116
/*[clinic input]
3117
@critical_section
3118
_io.TextIOWrapper.flush
3119
[clinic start generated code]*/
3120
3121
static PyObject *
3122
_io_TextIOWrapper_flush_impl(textio *self)
3123
/*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3124
0
{
3125
0
    CHECK_ATTACHED(self);
3126
0
    CHECK_CLOSED(self);
3127
0
    self->telling = self->seekable;
3128
0
    if (_textiowrapper_writeflush(self) < 0)
3129
0
        return NULL;
3130
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3131
0
}
3132
3133
/*[clinic input]
3134
@critical_section
3135
_io.TextIOWrapper.close
3136
[clinic start generated code]*/
3137
3138
static PyObject *
3139
_io_TextIOWrapper_close_impl(textio *self)
3140
/*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3141
0
{
3142
0
    PyObject *res;
3143
0
    int r;
3144
0
    CHECK_ATTACHED(self);
3145
3146
0
    res = _io_TextIOWrapper_closed_get_impl(self);
3147
0
    if (res == NULL)
3148
0
        return NULL;
3149
0
    r = PyObject_IsTrue(res);
3150
0
    Py_DECREF(res);
3151
0
    if (r < 0)
3152
0
        return NULL;
3153
3154
0
    if (r > 0) {
3155
0
        Py_RETURN_NONE; /* stream already closed */
3156
0
    }
3157
0
    if (self->detached) {
3158
0
        Py_RETURN_NONE; /* gh-142594 null pointer issue */
3159
0
    }
3160
0
    else {
3161
0
        PyObject *exc = NULL;
3162
0
        if (self->finalizing) {
3163
0
            res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3164
0
                                            (PyObject *)self);
3165
0
            if (res) {
3166
0
                Py_DECREF(res);
3167
0
            }
3168
0
            else {
3169
0
                PyErr_Clear();
3170
0
            }
3171
0
        }
3172
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
3173
0
            exc = PyErr_GetRaisedException();
3174
0
        }
3175
3176
0
        res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3177
0
        if (exc != NULL) {
3178
0
            _PyErr_ChainExceptions1(exc);
3179
0
            Py_CLEAR(res);
3180
0
        }
3181
0
        return res;
3182
0
    }
3183
0
}
3184
3185
static PyObject *
3186
textiowrapper_iternext_lock_held(PyObject *op)
3187
0
{
3188
0
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
3189
0
    PyObject *line;
3190
0
    textio *self = textio_CAST(op);
3191
3192
0
    CHECK_ATTACHED(self);
3193
3194
0
    self->telling = 0;
3195
0
    if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3196
        /* Skip method call overhead for speed */
3197
0
        line = _textiowrapper_readline(self, -1);
3198
0
    }
3199
0
    else {
3200
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
3201
0
        if (line && !PyUnicode_Check(line)) {
3202
0
            PyErr_Format(PyExc_OSError,
3203
0
                         "readline() should have returned a str object, "
3204
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
3205
0
            Py_DECREF(line);
3206
0
            return NULL;
3207
0
        }
3208
0
    }
3209
3210
0
    if (line == NULL)
3211
0
        return NULL;
3212
3213
0
    if (PyUnicode_GET_LENGTH(line) == 0) {
3214
        /* Reached EOF or would have blocked */
3215
0
        Py_DECREF(line);
3216
0
        Py_CLEAR(self->snapshot);
3217
0
        self->telling = self->seekable;
3218
0
        return NULL;
3219
0
    }
3220
3221
0
    return line;
3222
0
}
3223
3224
static PyObject *
3225
textiowrapper_iternext(PyObject *op)
3226
0
{
3227
0
    PyObject *result;
3228
0
    Py_BEGIN_CRITICAL_SECTION(op);
3229
0
    result = textiowrapper_iternext_lock_held(op);
3230
0
    Py_END_CRITICAL_SECTION();
3231
0
    return result;
3232
0
}
3233
3234
/*[clinic input]
3235
@critical_section
3236
@getter
3237
_io.TextIOWrapper.name
3238
[clinic start generated code]*/
3239
3240
static PyObject *
3241
_io_TextIOWrapper_name_get_impl(textio *self)
3242
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3243
0
{
3244
0
    CHECK_ATTACHED(self);
3245
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3246
0
}
3247
3248
/*[clinic input]
3249
@critical_section
3250
@getter
3251
_io.TextIOWrapper.closed
3252
[clinic start generated code]*/
3253
3254
static PyObject *
3255
_io_TextIOWrapper_closed_get_impl(textio *self)
3256
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3257
0
{
3258
0
    CHECK_ATTACHED(self);
3259
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3260
0
}
3261
3262
/*[clinic input]
3263
@critical_section
3264
@getter
3265
_io.TextIOWrapper.newlines
3266
[clinic start generated code]*/
3267
3268
static PyObject *
3269
_io_TextIOWrapper_newlines_get_impl(textio *self)
3270
/*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3271
0
{
3272
0
    PyObject *res;
3273
0
    CHECK_ATTACHED(self);
3274
0
    if (self->decoder == NULL ||
3275
0
        PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3276
0
    {
3277
0
        Py_RETURN_NONE;
3278
0
    }
3279
0
    return res;
3280
0
}
3281
3282
/*[clinic input]
3283
@critical_section
3284
@getter
3285
_io.TextIOWrapper.errors
3286
[clinic start generated code]*/
3287
3288
static PyObject *
3289
_io_TextIOWrapper_errors_get_impl(textio *self)
3290
/*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3291
0
{
3292
0
    CHECK_INITIALIZED(self);
3293
0
    return Py_NewRef(self->errors);
3294
0
}
3295
3296
/*[clinic input]
3297
@critical_section
3298
@getter
3299
_io.TextIOWrapper._CHUNK_SIZE
3300
[clinic start generated code]*/
3301
3302
static PyObject *
3303
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3304
/*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3305
0
{
3306
0
    CHECK_ATTACHED(self);
3307
0
    return PyLong_FromSsize_t(self->chunk_size);
3308
0
}
3309
3310
/*[clinic input]
3311
@critical_section
3312
@setter
3313
_io.TextIOWrapper._CHUNK_SIZE
3314
[clinic start generated code]*/
3315
3316
static int
3317
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3318
/*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3319
0
{
3320
0
    Py_ssize_t n;
3321
0
    CHECK_ATTACHED_INT(self);
3322
0
    if (value == NULL) {
3323
0
        PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3324
0
        return -1;
3325
0
    }
3326
0
    n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3327
0
    if (n == -1 && PyErr_Occurred())
3328
0
        return -1;
3329
0
    if (n <= 0) {
3330
0
        PyErr_SetString(PyExc_ValueError,
3331
0
                        "a strictly positive integer is required");
3332
0
        return -1;
3333
0
    }
3334
0
    self->chunk_size = n;
3335
0
    return 0;
3336
0
}
3337
3338
static PyMethodDef incrementalnewlinedecoder_methods[] = {
3339
    _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3340
    _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3341
    _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3342
    _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3343
    {NULL}
3344
};
3345
3346
static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3347
    {"newlines", incrementalnewlinedecoder_newlines_get, NULL, NULL},
3348
    {NULL}
3349
};
3350
3351
static PyType_Slot nldecoder_slots[] = {
3352
    {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3353
    {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3354
    {Py_tp_methods, incrementalnewlinedecoder_methods},
3355
    {Py_tp_getset, incrementalnewlinedecoder_getset},
3356
    {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3357
    {Py_tp_clear, incrementalnewlinedecoder_clear},
3358
    {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3359
    {0, NULL},
3360
};
3361
3362
PyType_Spec _Py_nldecoder_spec = {
3363
    .name = "_io.IncrementalNewlineDecoder",
3364
    .basicsize = sizeof(nldecoder_object),
3365
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3366
              Py_TPFLAGS_IMMUTABLETYPE),
3367
    .slots = nldecoder_slots,
3368
};
3369
3370
3371
static PyMethodDef textiowrapper_methods[] = {
3372
    _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3373
    _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3374
    _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3375
    _IO_TEXTIOWRAPPER_READ_METHODDEF
3376
    _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3377
    _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3378
    _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3379
3380
    _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3381
    _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3382
    _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3383
    _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3384
    _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3385
3386
    _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3387
    _IO_TEXTIOWRAPPER_TELL_METHODDEF
3388
    _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3389
3390
    {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS},
3391
    {NULL, NULL}
3392
};
3393
3394
static PyMemberDef textiowrapper_members[] = {
3395
    {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3396
    {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3397
    {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3398
    {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3399
    {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3400
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3401
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3402
    {NULL}
3403
};
3404
3405
static PyGetSetDef textiowrapper_getset[] = {
3406
    _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3407
    _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3408
    _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3409
    _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3410
    _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3411
    {NULL}
3412
};
3413
3414
static PyType_Slot textiowrapper_slots[] = {
3415
    {Py_tp_dealloc, textiowrapper_dealloc},
3416
    {Py_tp_repr, textiowrapper_repr},
3417
    {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3418
    {Py_tp_traverse, textiowrapper_traverse},
3419
    {Py_tp_clear, textiowrapper_clear},
3420
    {Py_tp_iternext, textiowrapper_iternext},
3421
    {Py_tp_methods, textiowrapper_methods},
3422
    {Py_tp_members, textiowrapper_members},
3423
    {Py_tp_getset, textiowrapper_getset},
3424
    {Py_tp_init, _io_TextIOWrapper___init__},
3425
    {0, NULL},
3426
};
3427
3428
PyType_Spec _Py_textiowrapper_spec = {
3429
    .name = "_io.TextIOWrapper",
3430
    .basicsize = sizeof(textio),
3431
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3432
              Py_TPFLAGS_IMMUTABLETYPE),
3433
    .slots = textiowrapper_slots,
3434
};