Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Modules/_io/textio.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
    An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4
    Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6
    Written by Amaury Forgeot d'Arc and Antoine Pitrou
7
*/
8
9
#include "Python.h"
10
#include "pycore_call.h"          // _PyObject_CallMethod()
11
#include "pycore_codecs.h"        // _PyCodecInfo_GetIncrementalDecoder()
12
#include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
13
#include "pycore_interp.h"        // PyInterpreterState.fs_codec
14
#include "pycore_long.h"          // _PyLong_GetZero()
15
#include "pycore_object.h"        // _PyObject_GC_UNTRACK()
16
#include "pycore_pyerrors.h"      // _PyErr_ChainExceptions1()
17
#include "pycore_pystate.h"       // _PyInterpreterState_GET()
18
#include "pycore_unicodeobject.h" // _PyUnicode_AsASCIIString()
19
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
20
21
#include "_iomodule.h"
22
23
/*[clinic input]
24
module _io
25
class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
26
class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
27
class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
28
[clinic start generated code]*/
29
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
30
31
typedef struct nldecoder_object nldecoder_object;
32
typedef struct textio textio;
33
34
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
35
#include "clinic/textio.c.h"
36
#undef clinic_state
37
38
/* TextIOBase */
39
40
PyDoc_STRVAR(textiobase_doc,
41
    "Base class for text I/O.\n"
42
    "\n"
43
    "This class provides a character and line based interface to stream\n"
44
    "I/O. There is no readinto method because Python's character strings\n"
45
    "are immutable.\n"
46
    );
47
48
static PyObject *
49
_unsupported(_PyIO_State *state, const char *message)
50
0
{
51
0
    PyErr_SetString(state->unsupported_operation, message);
52
0
    return NULL;
53
0
}
54
55
/*[clinic input]
56
_io._TextIOBase.detach
57
    cls: defining_class
58
    /
59
60
Separate the underlying buffer from the TextIOBase and return it.
61
62
After the underlying buffer has been detached, the TextIO is in an unusable state.
63
[clinic start generated code]*/
64
65
static PyObject *
66
_io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
67
/*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/
68
0
{
69
0
    _PyIO_State *state = get_io_state_by_cls(cls);
70
0
    return _unsupported(state, "detach");
71
0
}
72
73
/*[clinic input]
74
_io._TextIOBase.read
75
    cls: defining_class
76
    size: int(unused=True) = -1
77
    /
78
79
Read at most size characters from stream.
80
81
Read from underlying buffer until we have size characters or we hit EOF.
82
If size is negative or omitted, read until EOF.
83
[clinic start generated code]*/
84
85
static PyObject *
86
_io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
87
                          int Py_UNUSED(size))
88
/*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
89
0
{
90
0
    _PyIO_State *state = get_io_state_by_cls(cls);
91
0
    return _unsupported(state, "read");
92
0
}
93
94
/*[clinic input]
95
_io._TextIOBase.readline
96
    cls: defining_class
97
    size: int(unused=True) = -1
98
    /
99
100
Read until newline or EOF.
101
102
Return an empty string if EOF is hit immediately.
103
If size is specified, at most size characters will be read.
104
[clinic start generated code]*/
105
106
static PyObject *
107
_io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
108
                              int Py_UNUSED(size))
109
/*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
110
0
{
111
0
    _PyIO_State *state = get_io_state_by_cls(cls);
112
0
    return _unsupported(state, "readline");
113
0
}
114
115
/*[clinic input]
116
_io._TextIOBase.write
117
    cls: defining_class
118
    s: str(unused=True)
119
    /
120
121
Write string s to stream.
122
123
Return the number of characters written
124
(which is always equal to the length of the string).
125
[clinic start generated code]*/
126
127
static PyObject *
128
_io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
129
                           const char *Py_UNUSED(s))
130
/*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
131
0
{
132
0
    _PyIO_State *state = get_io_state_by_cls(cls);
133
0
    return _unsupported(state, "write");
134
0
}
135
136
/*[clinic input]
137
@getter
138
_io._TextIOBase.encoding
139
140
Encoding of the text stream.
141
142
Subclasses should override.
143
[clinic start generated code]*/
144
145
static PyObject *
146
_io__TextIOBase_encoding_get_impl(PyObject *self)
147
/*[clinic end generated code: output=e0f5d8f548b92432 input=4736d7621dd38f43]*/
148
0
{
149
0
    Py_RETURN_NONE;
150
0
}
151
152
/*[clinic input]
153
@getter
154
_io._TextIOBase.newlines
155
156
Line endings translated so far.
157
158
Only line endings translated during reading are considered.
159
160
Subclasses should override.
161
[clinic start generated code]*/
162
163
static PyObject *
164
_io__TextIOBase_newlines_get_impl(PyObject *self)
165
/*[clinic end generated code: output=46ec147fb9f00c2a input=a5b196d076af1164]*/
166
0
{
167
0
    Py_RETURN_NONE;
168
0
}
169
170
/*[clinic input]
171
@getter
172
_io._TextIOBase.errors
173
174
The error setting of the decoder or encoder.
175
176
Subclasses should override.
177
[clinic start generated code]*/
178
179
static PyObject *
180
_io__TextIOBase_errors_get_impl(PyObject *self)
181
/*[clinic end generated code: output=c6623d6addcd087d input=974aa52d1db93a82]*/
182
0
{
183
0
    Py_RETURN_NONE;
184
0
}
185
186
187
static PyMethodDef textiobase_methods[] = {
188
    _IO__TEXTIOBASE_DETACH_METHODDEF
189
    _IO__TEXTIOBASE_READ_METHODDEF
190
    _IO__TEXTIOBASE_READLINE_METHODDEF
191
    _IO__TEXTIOBASE_WRITE_METHODDEF
192
    {NULL, NULL}
193
};
194
195
static PyGetSetDef textiobase_getset[] = {
196
    _IO__TEXTIOBASE_ENCODING_GETSETDEF
197
    _IO__TEXTIOBASE_NEWLINES_GETSETDEF
198
    _IO__TEXTIOBASE_ERRORS_GETSETDEF
199
    {NULL}
200
};
201
202
static PyType_Slot textiobase_slots[] = {
203
    {Py_tp_doc, (void *)textiobase_doc},
204
    {Py_tp_methods, textiobase_methods},
205
    {Py_tp_getset, textiobase_getset},
206
    {0, NULL},
207
};
208
209
/* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
210
PyType_Spec textiobase_spec = {
211
    .name = "_io._TextIOBase",
212
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
213
              Py_TPFLAGS_IMMUTABLETYPE),
214
    .slots = textiobase_slots,
215
};
216
217
/* IncrementalNewlineDecoder */
218
219
struct nldecoder_object {
220
    PyObject_HEAD
221
    PyObject *decoder;
222
    PyObject *errors;
223
    unsigned int pendingcr: 1;
224
    unsigned int translate: 1;
225
    unsigned int seennl: 3;
226
};
227
228
104k
#define nldecoder_object_CAST(op)   ((nldecoder_object *)(op))
229
230
/*[clinic input]
231
_io.IncrementalNewlineDecoder.__init__
232
    decoder: object
233
    translate: bool
234
    errors: object(c_default="NULL") = "strict"
235
236
Codec used when reading a file in universal newlines mode.
237
238
It wraps another incremental decoder, translating \r\n and \r into \n.
239
It also records the types of newlines encountered.  When used with
240
translate=False, it ensures that the newline sequence is returned in
241
one piece. When used with decoder=None, it expects unicode strings as
242
decode input and translates newlines without first invoking an external
243
decoder.
244
[clinic start generated code]*/
245
246
static int
247
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
248
                                            PyObject *decoder, int translate,
249
                                            PyObject *errors)
250
/*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
251
17.4k
{
252
253
17.4k
    if (errors == NULL) {
254
17.4k
        errors = &_Py_ID(strict);
255
17.4k
    }
256
0
    else {
257
0
        errors = Py_NewRef(errors);
258
0
    }
259
260
17.4k
    Py_XSETREF(self->errors, errors);
261
17.4k
    Py_XSETREF(self->decoder, Py_NewRef(decoder));
262
17.4k
    self->translate = translate ? 1 : 0;
263
17.4k
    self->seennl = 0;
264
17.4k
    self->pendingcr = 0;
265
266
17.4k
    return 0;
267
17.4k
}
268
269
static int
270
incrementalnewlinedecoder_traverse(PyObject *op, visitproc visit, void *arg)
271
3.09k
{
272
3.09k
    nldecoder_object *self = nldecoder_object_CAST(op);
273
3.09k
    Py_VISIT(Py_TYPE(self));
274
3.09k
    Py_VISIT(self->decoder);
275
3.09k
    Py_VISIT(self->errors);
276
3.09k
    return 0;
277
3.09k
}
278
279
static int
280
incrementalnewlinedecoder_clear(PyObject *op)
281
17.4k
{
282
17.4k
    nldecoder_object *self = nldecoder_object_CAST(op);
283
17.4k
    Py_CLEAR(self->decoder);
284
17.4k
    Py_CLEAR(self->errors);
285
17.4k
    return 0;
286
17.4k
}
287
288
static void
289
incrementalnewlinedecoder_dealloc(PyObject *op)
290
17.4k
{
291
17.4k
    nldecoder_object *self = nldecoder_object_CAST(op);
292
17.4k
    PyTypeObject *tp = Py_TYPE(self);
293
17.4k
    _PyObject_GC_UNTRACK(self);
294
17.4k
    (void)incrementalnewlinedecoder_clear(op);
295
17.4k
    tp->tp_free(self);
296
17.4k
    Py_DECREF(tp);
297
17.4k
}
298
299
static int
300
check_decoded(PyObject *decoded)
301
66.0k
{
302
66.0k
    if (decoded == NULL)
303
0
        return -1;
304
66.0k
    if (!PyUnicode_Check(decoded)) {
305
0
        PyErr_Format(PyExc_TypeError,
306
0
                     "decoder should return a string result, not '%.200s'",
307
0
                     Py_TYPE(decoded)->tp_name);
308
0
        Py_DECREF(decoded);
309
0
        return -1;
310
0
    }
311
66.0k
    return 0;
312
66.0k
}
313
314
#define CHECK_INITIALIZED_DECODER(self) \
315
66.0k
    if (self->errors == NULL) { \
316
0
        PyErr_SetString(PyExc_ValueError, \
317
0
                        "IncrementalNewlineDecoder.__init__() not called"); \
318
0
        return NULL; \
319
0
    }
320
321
34.3M
#define SEEN_CR   1
322
27.6M
#define SEEN_LF   2
323
26.3M
#define SEEN_CRLF 4
324
26.3M
#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
325
326
PyObject *
327
_PyIncrementalNewlineDecoder_decode(PyObject *myself,
328
                                    PyObject *input, int final)
329
66.0k
{
330
66.0k
    PyObject *output;
331
66.0k
    Py_ssize_t output_len;
332
66.0k
    nldecoder_object *self = nldecoder_object_CAST(myself);
333
334
66.0k
    CHECK_INITIALIZED_DECODER(self);
335
336
    /* decode input (with the eventual \r from a previous pass) */
337
66.0k
    if (self->decoder != Py_None) {
338
0
        output = PyObject_CallMethodObjArgs(self->decoder,
339
0
            &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
340
0
    }
341
66.0k
    else {
342
66.0k
        output = Py_NewRef(input);
343
66.0k
    }
344
345
66.0k
    if (check_decoded(output) < 0)
346
0
        return NULL;
347
348
66.0k
    output_len = PyUnicode_GET_LENGTH(output);
349
66.0k
    if (self->pendingcr && (final || output_len > 0)) {
350
        /* Prefix output with CR */
351
0
        int kind;
352
0
        PyObject *modified;
353
0
        char *out;
354
355
0
        modified = PyUnicode_New(output_len + 1,
356
0
                                 PyUnicode_MAX_CHAR_VALUE(output));
357
0
        if (modified == NULL)
358
0
            goto error;
359
0
        kind = PyUnicode_KIND(modified);
360
0
        out = PyUnicode_DATA(modified);
361
0
        PyUnicode_WRITE(kind, out, 0, '\r');
362
0
        memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
363
0
        Py_SETREF(output, modified);
364
0
        self->pendingcr = 0;
365
0
        output_len++;
366
0
    }
367
368
    /* retain last \r even when not translating data:
369
     * then readline() is sure to get \r\n in one pass
370
     */
371
66.0k
    if (!final) {
372
0
        if (output_len > 0
373
0
            && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
374
0
        {
375
0
            PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
376
0
            if (modified == NULL)
377
0
                goto error;
378
0
            Py_SETREF(output, modified);
379
0
            self->pendingcr = 1;
380
0
        }
381
0
    }
382
383
    /* Record which newlines are read and do newline translation if desired,
384
       all in one pass. */
385
66.0k
    {
386
66.0k
        const void *in_str;
387
66.0k
        Py_ssize_t len;
388
66.0k
        int seennl = self->seennl;
389
66.0k
        int only_lf = 0;
390
66.0k
        int kind;
391
392
66.0k
        in_str = PyUnicode_DATA(output);
393
66.0k
        len = PyUnicode_GET_LENGTH(output);
394
66.0k
        kind = PyUnicode_KIND(output);
395
396
66.0k
        if (len == 0)
397
0
            return output;
398
399
        /* If, up to now, newlines are consistently \n, do a quick check
400
           for the \r *byte* with the libc's optimized memchr.
401
           */
402
66.0k
        if (seennl == SEEN_LF || seennl == 0) {
403
33.5k
            only_lf = (memchr(in_str, '\r', kind * len) == NULL);
404
33.5k
        }
405
406
66.0k
        if (only_lf) {
407
            /* If not already seen, quick scan for a possible "\n" character.
408
               (there's nothing else to be done, even when in translation mode)
409
            */
410
26.9k
            if (seennl == 0 &&
411
26.9k
                memchr(in_str, '\n', kind * len) != NULL) {
412
2.12k
                if (kind == PyUnicode_1BYTE_KIND)
413
1.07k
                    seennl |= SEEN_LF;
414
1.05k
                else {
415
1.05k
                    Py_ssize_t i = 0;
416
161k
                    for (;;) {
417
161k
                        Py_UCS4 c;
418
                        /* Fast loop for non-control characters */
419
1.13M
                        while (PyUnicode_READ(kind, in_str, i) > '\n')
420
971k
                            i++;
421
161k
                        c = PyUnicode_READ(kind, in_str, i++);
422
161k
                        if (c == '\n') {
423
838
                            seennl |= SEEN_LF;
424
838
                            break;
425
838
                        }
426
160k
                        if (i >= len)
427
220
                            break;
428
160k
                    }
429
1.05k
                }
430
2.12k
            }
431
            /* Finished: we have scanned for newlines, and none of them
432
               need translating */
433
26.9k
        }
434
39.0k
        else if (!self->translate) {
435
39.0k
            Py_ssize_t i = 0;
436
            /* We have already seen all newline types, no need to scan again */
437
39.0k
            if (seennl == SEEN_ALL)
438
11.7k
                goto endscan;
439
26.3M
            for (;;) {
440
26.3M
                Py_UCS4 c;
441
                /* Fast loop for non-control characters */
442
89.5M
                while (PyUnicode_READ(kind, in_str, i) > '\r')
443
63.1M
                    i++;
444
26.3M
                c = PyUnicode_READ(kind, in_str, i++);
445
26.3M
                if (c == '\n')
446
1.10M
                    seennl |= SEEN_LF;
447
25.2M
                else if (c == '\r') {
448
7.94M
                    if (PyUnicode_READ(kind, in_str, i) == '\n') {
449
3.27k
                        seennl |= SEEN_CRLF;
450
3.27k
                        i++;
451
3.27k
                    }
452
7.94M
                    else
453
7.94M
                        seennl |= SEEN_CR;
454
7.94M
                }
455
26.3M
                if (i >= len)
456
26.1k
                    break;
457
26.3M
                if (seennl == SEEN_ALL)
458
1.12k
                    break;
459
26.3M
            }
460
39.0k
        endscan:
461
39.0k
            ;
462
39.0k
        }
463
0
        else {
464
0
            void *translated;
465
0
            int kind = PyUnicode_KIND(output);
466
0
            const void *in_str = PyUnicode_DATA(output);
467
0
            Py_ssize_t in, out;
468
            /* XXX: Previous in-place translation here is disabled as
469
               resizing is not possible anymore */
470
            /* We could try to optimize this so that we only do a copy
471
               when there is something to translate. On the other hand,
472
               we already know there is a \r byte, so chances are high
473
               that something needs to be done. */
474
0
            translated = PyMem_Malloc(kind * len);
475
0
            if (translated == NULL) {
476
0
                PyErr_NoMemory();
477
0
                goto error;
478
0
            }
479
0
            in = out = 0;
480
0
            for (;;) {
481
0
                Py_UCS4 c;
482
                /* Fast loop for non-control characters */
483
0
                while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
484
0
                    PyUnicode_WRITE(kind, translated, out++, c);
485
0
                if (c == '\n') {
486
0
                    PyUnicode_WRITE(kind, translated, out++, c);
487
0
                    seennl |= SEEN_LF;
488
0
                    continue;
489
0
                }
490
0
                if (c == '\r') {
491
0
                    if (PyUnicode_READ(kind, in_str, in) == '\n') {
492
0
                        in++;
493
0
                        seennl |= SEEN_CRLF;
494
0
                    }
495
0
                    else
496
0
                        seennl |= SEEN_CR;
497
0
                    PyUnicode_WRITE(kind, translated, out++, '\n');
498
0
                    continue;
499
0
                }
500
0
                if (in > len)
501
0
                    break;
502
0
                PyUnicode_WRITE(kind, translated, out++, c);
503
0
            }
504
0
            Py_DECREF(output);
505
0
            output = PyUnicode_FromKindAndData(kind, translated, out);
506
0
            PyMem_Free(translated);
507
0
            if (!output)
508
0
                return NULL;
509
0
        }
510
66.0k
        self->seennl |= seennl;
511
66.0k
    }
512
513
0
    return output;
514
515
0
  error:
516
0
    Py_DECREF(output);
517
0
    return NULL;
518
66.0k
}
519
520
/*[clinic input]
521
_io.IncrementalNewlineDecoder.decode
522
    input: object
523
    final: bool = False
524
[clinic start generated code]*/
525
526
static PyObject *
527
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
528
                                          PyObject *input, int final)
529
/*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
530
0
{
531
0
    return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
532
0
}
533
534
/*[clinic input]
535
_io.IncrementalNewlineDecoder.getstate
536
[clinic start generated code]*/
537
538
static PyObject *
539
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
540
/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
541
0
{
542
0
    PyObject *buffer;
543
0
    unsigned long long flag;
544
545
0
    CHECK_INITIALIZED_DECODER(self);
546
547
0
    if (self->decoder != Py_None) {
548
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
549
0
           &_Py_ID(getstate));
550
0
        if (state == NULL)
551
0
            return NULL;
552
0
        if (!PyTuple_Check(state)) {
553
0
            PyErr_SetString(PyExc_TypeError,
554
0
                            "illegal decoder state");
555
0
            Py_DECREF(state);
556
0
            return NULL;
557
0
        }
558
0
        if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
559
0
                              &buffer, &flag))
560
0
        {
561
0
            Py_DECREF(state);
562
0
            return NULL;
563
0
        }
564
0
        Py_INCREF(buffer);
565
0
        Py_DECREF(state);
566
0
    }
567
0
    else {
568
0
        buffer = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
569
0
        flag = 0;
570
0
    }
571
0
    flag <<= 1;
572
0
    if (self->pendingcr)
573
0
        flag |= 1;
574
0
    return Py_BuildValue("NK", buffer, flag);
575
0
}
576
577
/*[clinic input]
578
_io.IncrementalNewlineDecoder.setstate
579
    state: object
580
    /
581
[clinic start generated code]*/
582
583
static PyObject *
584
_io_IncrementalNewlineDecoder_setstate_impl(nldecoder_object *self,
585
                                            PyObject *state)
586
/*[clinic end generated code: output=09135cb6e78a1dc8 input=c53fb505a76dbbe2]*/
587
0
{
588
0
    PyObject *buffer;
589
0
    unsigned long long flag;
590
591
0
    CHECK_INITIALIZED_DECODER(self);
592
593
0
    if (!PyTuple_Check(state)) {
594
0
        PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
595
0
        return NULL;
596
0
    }
597
0
    if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
598
0
                          &buffer, &flag))
599
0
    {
600
0
        return NULL;
601
0
    }
602
603
0
    self->pendingcr = (int) (flag & 1);
604
0
    flag >>= 1;
605
606
0
    if (self->decoder != Py_None) {
607
0
        return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
608
0
                                    "((OK))", buffer, flag);
609
0
    }
610
0
    else {
611
0
        Py_RETURN_NONE;
612
0
    }
613
0
}
614
615
/*[clinic input]
616
_io.IncrementalNewlineDecoder.reset
617
[clinic start generated code]*/
618
619
static PyObject *
620
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
621
/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
622
0
{
623
0
    CHECK_INITIALIZED_DECODER(self);
624
625
0
    self->seennl = 0;
626
0
    self->pendingcr = 0;
627
0
    if (self->decoder != Py_None)
628
0
        return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
629
0
    else
630
0
        Py_RETURN_NONE;
631
0
}
632
633
static PyObject *
634
incrementalnewlinedecoder_newlines_get(PyObject *op, void *Py_UNUSED(context))
635
0
{
636
0
    nldecoder_object *self = nldecoder_object_CAST(op);
637
0
    CHECK_INITIALIZED_DECODER(self);
638
639
0
    switch (self->seennl) {
640
0
    case SEEN_CR:
641
0
        return PyUnicode_FromString("\r");
642
0
    case SEEN_LF:
643
0
        return PyUnicode_FromString("\n");
644
0
    case SEEN_CRLF:
645
0
        return PyUnicode_FromString("\r\n");
646
0
    case SEEN_CR | SEEN_LF:
647
0
        return Py_BuildValue("ss", "\r", "\n");
648
0
    case SEEN_CR | SEEN_CRLF:
649
0
        return Py_BuildValue("ss", "\r", "\r\n");
650
0
    case SEEN_LF | SEEN_CRLF:
651
0
        return Py_BuildValue("ss", "\n", "\r\n");
652
0
    case SEEN_CR | SEEN_LF | SEEN_CRLF:
653
0
        return Py_BuildValue("sss", "\r", "\n", "\r\n");
654
0
    default:
655
0
        Py_RETURN_NONE;
656
0
   }
657
658
0
}
659
660
/* TextIOWrapper */
661
662
typedef PyObject *(*encodefunc_t)(PyObject *, PyObject *);
663
664
struct textio
665
{
666
    PyObject_HEAD
667
    int ok; /* initialized? */
668
    int detached;
669
    Py_ssize_t chunk_size;
670
    PyObject *buffer;
671
    PyObject *encoding;
672
    PyObject *encoder;
673
    PyObject *decoder;
674
    PyObject *readnl;
675
    PyObject *errors;
676
    const char *writenl; /* ASCII-encoded; NULL stands for \n */
677
    char line_buffering;
678
    char write_through;
679
    char readuniversal;
680
    char readtranslate;
681
    char writetranslate;
682
    char seekable;
683
    char has_read1;
684
    char telling;
685
    char finalizing;
686
    /* Specialized encoding func (see below) */
687
    encodefunc_t encodefunc;
688
    /* Whether or not it's the start of the stream */
689
    char encoding_start_of_stream;
690
691
    /* Reads and writes are internally buffered in order to speed things up.
692
       However, any read will first flush the write buffer if itsn't empty.
693
694
       Please also note that text to be written is first encoded before being
695
       buffered. This is necessary so that encoding errors are immediately
696
       reported to the caller, but it unfortunately means that the
697
       IncrementalEncoder (whose encode() method is always written in Python)
698
       becomes a bottleneck for small writes.
699
    */
700
    PyObject *decoded_chars;       /* buffer for text returned from decoder */
701
    Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
702
    PyObject *pending_bytes;       // data waiting to be written.
703
                                   // ascii unicode, bytes, or list of them.
704
    Py_ssize_t pending_bytes_count;
705
706
    /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
707
     * dec_flags is the second (integer) item of the decoder state and
708
     * next_input is the chunk of input bytes that comes next after the
709
     * snapshot point.  We use this to reconstruct decoder states in tell().
710
     */
711
    PyObject *snapshot;
712
    /* Bytes-to-characters ratio for the current chunk. Serves as input for
713
       the heuristic in tell(). */
714
    double b2cratio;
715
716
    /* Cache raw object if it's a FileIO object */
717
    PyObject *raw;
718
719
    PyObject *weakreflist;
720
    PyObject *dict;
721
722
    _PyIO_State *state;
723
};
724
725
16.7k
#define textio_CAST(op) ((textio *)(op))
726
727
static void
728
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
729
730
/* A couple of specialized cases in order to bypass the slow incremental
731
   encoding methods for the most popular encodings. */
732
733
static PyObject *
734
ascii_encode(PyObject *op, PyObject *text)
735
0
{
736
0
    textio *self = textio_CAST(op);
737
0
    return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
738
0
}
739
740
static PyObject *
741
utf16be_encode(PyObject *op, PyObject *text)
742
0
{
743
0
    textio *self = textio_CAST(op);
744
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 1);
745
0
}
746
747
static PyObject *
748
utf16le_encode(PyObject *op, PyObject *text)
749
0
{
750
0
    textio *self = textio_CAST(op);
751
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), -1);
752
0
}
753
754
static PyObject *
755
utf16_encode(PyObject *op, PyObject *text)
756
0
{
757
0
    textio *self = textio_CAST(op);
758
0
    if (!self->encoding_start_of_stream) {
759
        /* Skip the BOM and use native byte ordering */
760
#if PY_BIG_ENDIAN
761
        return utf16be_encode(op, text);
762
#else
763
0
        return utf16le_encode(op, text);
764
0
#endif
765
0
    }
766
0
    return _PyUnicode_EncodeUTF16(text, PyUnicode_AsUTF8(self->errors), 0);
767
0
}
768
769
static PyObject *
770
utf32be_encode(PyObject *op, PyObject *text)
771
0
{
772
0
    textio *self = textio_CAST(op);
773
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 1);
774
0
}
775
776
static PyObject *
777
utf32le_encode(PyObject *op, PyObject *text)
778
0
{
779
0
    textio *self = textio_CAST(op);
780
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), -1);
781
0
}
782
783
static PyObject *
784
utf32_encode(PyObject *op, PyObject *text)
785
0
{
786
0
    textio *self = textio_CAST(op);
787
0
    if (!self->encoding_start_of_stream) {
788
        /* Skip the BOM and use native byte ordering */
789
#if PY_BIG_ENDIAN
790
        return utf32be_encode(op, text);
791
#else
792
0
        return utf32le_encode(op, text);
793
0
#endif
794
0
    }
795
0
    return _PyUnicode_EncodeUTF32(text, PyUnicode_AsUTF8(self->errors), 0);
796
0
}
797
798
static PyObject *
799
utf8_encode(PyObject *op, PyObject *text)
800
0
{
801
0
    textio *self = textio_CAST(op);
802
0
    return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
803
0
}
804
805
static PyObject *
806
latin1_encode(PyObject *op, PyObject *text)
807
0
{
808
0
    textio *self = textio_CAST(op);
809
0
    return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
810
0
}
811
812
// Return true when encoding can be skipped when text is ascii.
813
static inline int
814
is_asciicompat_encoding(encodefunc_t f)
815
0
{
816
0
    return f == ascii_encode || f == latin1_encode || f == utf8_encode;
817
0
}
818
819
/* Map normalized encoding names onto the specialized encoding funcs */
820
821
typedef struct {
822
    const char *name;
823
    encodefunc_t encodefunc;
824
} encodefuncentry;
825
826
static const encodefuncentry encodefuncs[] = {
827
    {"ascii",       ascii_encode},
828
    {"iso8859-1",   latin1_encode},
829
    {"utf-8",       utf8_encode},
830
    {"utf-16-be",   utf16be_encode},
831
    {"utf-16-le",   utf16le_encode},
832
    {"utf-16",      utf16_encode},
833
    {"utf-32-be",   utf32be_encode},
834
    {"utf-32-le",   utf32le_encode},
835
    {"utf-32",      utf32_encode},
836
    {NULL, NULL}
837
};
838
839
static int
840
validate_newline(const char *newline)
841
48
{
842
48
    if (newline && newline[0] != '\0'
843
48
        && !(newline[0] == '\n' && newline[1] == '\0')
844
48
        && !(newline[0] == '\r' && newline[1] == '\0')
845
48
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
846
0
        PyErr_Format(PyExc_ValueError,
847
0
                     "illegal newline value: %s", newline);
848
0
        return -1;
849
0
    }
850
48
    return 0;
851
48
}
852
853
static int
854
set_newline(textio *self, const char *newline)
855
48
{
856
48
    PyObject *old = self->readnl;
857
48
    if (newline == NULL) {
858
0
        self->readnl = NULL;
859
0
    }
860
48
    else {
861
48
        self->readnl = PyUnicode_FromString(newline);
862
48
        if (self->readnl == NULL) {
863
0
            self->readnl = old;
864
0
            return -1;
865
0
        }
866
48
    }
867
48
    self->readuniversal = (newline == NULL || newline[0] == '\0');
868
48
    self->readtranslate = (newline == NULL);
869
48
    self->writetranslate = (newline == NULL || newline[0] != '\0');
870
48
    if (!self->readuniversal && self->readnl != NULL) {
871
        // validate_newline() accepts only ASCII newlines.
872
48
        assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
873
48
        self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
874
48
        if (strcmp(self->writenl, "\n") == 0) {
875
48
            self->writenl = NULL;
876
48
        }
877
48
    }
878
0
    else {
879
#ifdef MS_WINDOWS
880
        self->writenl = "\r\n";
881
#else
882
0
        self->writenl = NULL;
883
0
#endif
884
0
    }
885
48
    Py_XDECREF(old);
886
48
    return 0;
887
48
}
888
889
static int
890
_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
891
                           const char *errors)
892
48
{
893
48
    PyObject *res;
894
48
    int r;
895
896
48
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
897
48
    if (res == NULL)
898
0
        return -1;
899
900
48
    r = PyObject_IsTrue(res);
901
48
    Py_DECREF(res);
902
48
    if (r == -1)
903
0
        return -1;
904
905
48
    if (r != 1)
906
32
        return 0;
907
908
16
    Py_CLEAR(self->decoder);
909
16
    self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
910
16
    if (self->decoder == NULL)
911
0
        return -1;
912
913
16
    if (self->readuniversal) {
914
0
        _PyIO_State *state = self->state;
915
0
        PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
916
0
            (PyObject *)state->PyIncrementalNewlineDecoder_Type,
917
0
            self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
918
0
        if (incrementalDecoder == NULL)
919
0
            return -1;
920
0
        Py_XSETREF(self->decoder, incrementalDecoder);
921
0
    }
922
923
16
    return 0;
924
16
}
925
926
static PyObject*
927
_textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
928
                      int eof)
929
0
{
930
0
    PyObject *chars;
931
932
0
    if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
933
0
        chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
934
0
    else
935
0
        chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
936
0
                                           eof ? Py_True : Py_False, NULL);
937
938
0
    if (check_decoded(chars) < 0)
939
        // check_decoded already decreases refcount
940
0
        return NULL;
941
942
0
    return chars;
943
0
}
944
945
static int
946
_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
947
                           const char *errors)
948
48
{
949
48
    PyObject *res;
950
48
    int r;
951
952
48
    res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
953
48
    if (res == NULL)
954
0
        return -1;
955
956
48
    r = PyObject_IsTrue(res);
957
48
    Py_DECREF(res);
958
48
    if (r == -1)
959
0
        return -1;
960
961
48
    if (r != 1)
962
16
        return 0;
963
964
32
    Py_CLEAR(self->encoder);
965
32
    self->encodefunc = NULL;
966
32
    self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
967
32
    if (self->encoder == NULL)
968
0
        return -1;
969
970
    /* Get the normalized named of the codec */
971
32
    if (PyObject_GetOptionalAttr(codec_info, &_Py_ID(name), &res) < 0) {
972
0
        return -1;
973
0
    }
974
32
    if (res != NULL && PyUnicode_Check(res)) {
975
32
        const encodefuncentry *e = encodefuncs;
976
32
        while (e->name != NULL) {
977
32
            if (_PyUnicode_EqualToASCIIString(res, e->name)) {
978
32
                self->encodefunc = e->encodefunc;
979
32
                break;
980
32
            }
981
0
            e++;
982
0
        }
983
32
    }
984
32
    Py_XDECREF(res);
985
986
32
    return 0;
987
32
}
988
989
static int
990
_textiowrapper_fix_encoder_state(textio *self)
991
48
{
992
48
    if (!self->seekable || !self->encoder) {
993
16
        return 0;
994
16
    }
995
996
32
    self->encoding_start_of_stream = 1;
997
998
32
    PyObject *cookieObj = PyObject_CallMethodNoArgs(
999
32
        self->buffer, &_Py_ID(tell));
1000
32
    if (cookieObj == NULL) {
1001
0
        return -1;
1002
0
    }
1003
1004
32
    int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
1005
32
    Py_DECREF(cookieObj);
1006
32
    if (cmp < 0) {
1007
0
        return -1;
1008
0
    }
1009
1010
32
    if (cmp == 0) {
1011
16
        self->encoding_start_of_stream = 0;
1012
16
        PyObject *res = PyObject_CallMethodOneArg(
1013
16
            self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
1014
16
        if (res == NULL) {
1015
0
            return -1;
1016
0
        }
1017
16
        Py_DECREF(res);
1018
16
    }
1019
1020
32
    return 0;
1021
32
}
1022
1023
static int
1024
io_check_errors(PyObject *errors)
1025
48
{
1026
48
    assert(errors != NULL && errors != Py_None);
1027
1028
48
    PyInterpreterState *interp = _PyInterpreterState_GET();
1029
48
#ifndef Py_DEBUG
1030
    /* In release mode, only check in development mode (-X dev) */
1031
48
    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1032
48
        return 0;
1033
48
    }
1034
#else
1035
    /* Always check in debug mode */
1036
#endif
1037
1038
    /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1039
       before_PyUnicode_InitEncodings() is called. */
1040
0
    if (!interp->unicode.fs_codec.encoding) {
1041
0
        return 0;
1042
0
    }
1043
1044
0
    const char *name = _PyUnicode_AsUTF8NoNUL(errors);
1045
0
    if (name == NULL) {
1046
0
        return -1;
1047
0
    }
1048
0
    PyObject *handler = PyCodec_LookupError(name);
1049
0
    if (handler != NULL) {
1050
0
        Py_DECREF(handler);
1051
0
        return 0;
1052
0
    }
1053
0
    return -1;
1054
0
}
1055
1056
1057
1058
/*[clinic input]
1059
_io.TextIOWrapper.__init__
1060
    buffer: object
1061
    encoding: str(accept={str, NoneType}) = None
1062
    errors: object = None
1063
    newline: str(accept={str, NoneType}) = None
1064
    line_buffering: bool = False
1065
    write_through: bool = False
1066
1067
Character and line based layer over a BufferedIOBase object, buffer.
1068
1069
encoding gives the name of the encoding that the stream will be
1070
decoded or encoded with. It defaults to locale.getencoding().
1071
1072
errors determines the strictness of encoding and decoding (see
1073
help(codecs.Codec) or the documentation for codecs.register) and
1074
defaults to "strict".
1075
1076
newline controls how line endings are handled. It can be None, '',
1077
'\n', '\r', and '\r\n'.  It works as follows:
1078
1079
* On input, if newline is None, universal newlines mode is
1080
  enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1081
  these are translated into '\n' before being returned to the
1082
  caller. If it is '', universal newline mode is enabled, but line
1083
  endings are returned to the caller untranslated. If it has any of
1084
  the other legal values, input lines are only terminated by the given
1085
  string, and the line ending is returned to the caller untranslated.
1086
1087
* On output, if newline is None, any '\n' characters written are
1088
  translated to the system default line separator, os.linesep. If
1089
  newline is '' or '\n', no translation takes place. If newline is any
1090
  of the other legal values, any '\n' characters written are translated
1091
  to the given string.
1092
1093
If line_buffering is True, a call to flush is implied when a call to
1094
write contains a newline character.
1095
[clinic start generated code]*/
1096
1097
static int
1098
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1099
                                const char *encoding, PyObject *errors,
1100
                                const char *newline, int line_buffering,
1101
                                int write_through)
1102
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1103
48
{
1104
48
    PyObject *raw, *codec_info = NULL;
1105
48
    PyObject *res;
1106
48
    int r;
1107
1108
48
    self->ok = 0;
1109
48
    self->detached = 0;
1110
1111
48
    if (encoding == NULL) {
1112
0
        PyInterpreterState *interp = _PyInterpreterState_GET();
1113
0
        if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1114
0
            if (PyErr_WarnEx(PyExc_EncodingWarning,
1115
0
                             "'encoding' argument not specified", 1)) {
1116
0
                return -1;
1117
0
            }
1118
0
        }
1119
0
    }
1120
1121
48
    if (errors == Py_None) {
1122
0
        errors = &_Py_ID(strict);
1123
0
    }
1124
48
    else if (!PyUnicode_Check(errors)) {
1125
        // Check 'errors' argument here because Argument Clinic doesn't support
1126
        // 'str(accept={str, NoneType})' converter.
1127
0
        PyErr_Format(
1128
0
            PyExc_TypeError,
1129
0
            "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1130
0
            Py_TYPE(errors)->tp_name);
1131
0
        return -1;
1132
0
    }
1133
48
    else if (io_check_errors(errors)) {
1134
0
        return -1;
1135
0
    }
1136
48
    const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors);
1137
48
    if (errors_str == NULL) {
1138
0
        return -1;
1139
0
    }
1140
1141
48
    if (validate_newline(newline) < 0) {
1142
0
        return -1;
1143
0
    }
1144
1145
48
    Py_CLEAR(self->buffer);
1146
48
    Py_CLEAR(self->encoding);
1147
48
    Py_CLEAR(self->encoder);
1148
48
    Py_CLEAR(self->decoder);
1149
48
    Py_CLEAR(self->readnl);
1150
48
    Py_CLEAR(self->decoded_chars);
1151
48
    Py_CLEAR(self->pending_bytes);
1152
48
    Py_CLEAR(self->snapshot);
1153
48
    Py_CLEAR(self->errors);
1154
48
    Py_CLEAR(self->raw);
1155
48
    self->decoded_chars_used = 0;
1156
48
    self->pending_bytes_count = 0;
1157
48
    self->encodefunc = NULL;
1158
48
    self->b2cratio = 0.0;
1159
1160
48
    if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1161
0
        _Py_DECLARE_STR(utf_8, "utf-8");
1162
0
        self->encoding = &_Py_STR(utf_8);
1163
0
    }
1164
48
    else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1165
0
        self->encoding = _Py_GetLocaleEncodingObject();
1166
0
        if (self->encoding == NULL) {
1167
0
            goto error;
1168
0
        }
1169
0
        assert(PyUnicode_Check(self->encoding));
1170
0
    }
1171
1172
48
    if (self->encoding != NULL) {
1173
0
        encoding = PyUnicode_AsUTF8(self->encoding);
1174
0
        if (encoding == NULL)
1175
0
            goto error;
1176
0
    }
1177
48
    else if (encoding != NULL) {
1178
48
        self->encoding = PyUnicode_FromString(encoding);
1179
48
        if (self->encoding == NULL)
1180
0
            goto error;
1181
48
    }
1182
0
    else {
1183
0
        PyErr_SetString(PyExc_OSError,
1184
0
                        "could not determine default encoding");
1185
0
        goto error;
1186
0
    }
1187
1188
    /* Check we have been asked for a real text encoding */
1189
48
    codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
1190
48
    if (codec_info == NULL) {
1191
0
        Py_CLEAR(self->encoding);
1192
0
        goto error;
1193
0
    }
1194
1195
    /* XXX: Failures beyond this point have the potential to leak elements
1196
     * of the partially constructed object (like self->encoding)
1197
     */
1198
1199
48
    self->errors = Py_NewRef(errors);
1200
48
    self->chunk_size = 8192;
1201
48
    self->line_buffering = line_buffering;
1202
48
    self->write_through = write_through;
1203
48
    if (set_newline(self, newline) < 0) {
1204
0
        goto error;
1205
0
    }
1206
1207
48
    self->buffer = Py_NewRef(buffer);
1208
1209
    /* Build the decoder object */
1210
48
    _PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1211
48
    self->state = state;
1212
48
    if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
1213
0
        goto error;
1214
1215
    /* Build the encoder object */
1216
48
    if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
1217
0
        goto error;
1218
1219
    /* Finished sorting out the codec details */
1220
48
    Py_CLEAR(codec_info);
1221
1222
48
    if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1223
48
        Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1224
48
        Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1225
48
    {
1226
48
        if (PyObject_GetOptionalAttr(buffer, &_Py_ID(raw), &raw) < 0)
1227
0
            goto error;
1228
        /* Cache the raw FileIO object to speed up 'closed' checks */
1229
48
        if (raw != NULL) {
1230
48
            if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1231
48
                self->raw = raw;
1232
0
            else
1233
0
                Py_DECREF(raw);
1234
48
        }
1235
48
    }
1236
1237
48
    res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1238
48
    if (res == NULL)
1239
0
        goto error;
1240
48
    r = PyObject_IsTrue(res);
1241
48
    Py_DECREF(res);
1242
48
    if (r < 0)
1243
0
        goto error;
1244
48
    self->seekable = self->telling = r;
1245
1246
48
    r = PyObject_HasAttrWithError(buffer, &_Py_ID(read1));
1247
48
    if (r < 0) {
1248
0
        goto error;
1249
0
    }
1250
48
    self->has_read1 = r;
1251
1252
48
    self->encoding_start_of_stream = 0;
1253
48
    if (_textiowrapper_fix_encoder_state(self) < 0) {
1254
0
        goto error;
1255
0
    }
1256
1257
48
    self->ok = 1;
1258
48
    return 0;
1259
1260
0
  error:
1261
0
    Py_XDECREF(codec_info);
1262
0
    return -1;
1263
48
}
1264
1265
/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1266
 * -1 on error.
1267
 */
1268
static int
1269
convert_optional_bool(PyObject *obj, int default_value)
1270
0
{
1271
0
    long v;
1272
0
    if (obj == Py_None) {
1273
0
        v = default_value;
1274
0
    }
1275
0
    else {
1276
0
        v = PyLong_AsLong(obj);
1277
0
        if (v == -1 && PyErr_Occurred())
1278
0
            return -1;
1279
0
    }
1280
0
    return v != 0;
1281
0
}
1282
1283
static int
1284
textiowrapper_change_encoding(textio *self, PyObject *encoding,
1285
                              PyObject *errors, int newline_changed)
1286
0
{
1287
    /* Use existing settings where new settings are not specified */
1288
0
    if (encoding == Py_None && errors == Py_None && !newline_changed) {
1289
0
        return 0;  // no change
1290
0
    }
1291
1292
0
    if (encoding == Py_None) {
1293
0
        encoding = self->encoding;
1294
0
        if (errors == Py_None) {
1295
0
            errors = self->errors;
1296
0
        }
1297
0
        Py_INCREF(encoding);
1298
0
    }
1299
0
    else {
1300
0
        if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1301
0
            encoding = _Py_GetLocaleEncodingObject();
1302
0
            if (encoding == NULL) {
1303
0
                return -1;
1304
0
            }
1305
0
        } else {
1306
0
            Py_INCREF(encoding);
1307
0
        }
1308
0
        if (errors == Py_None) {
1309
0
            errors = &_Py_ID(strict);
1310
0
        }
1311
0
    }
1312
0
    Py_INCREF(errors);
1313
1314
0
    const char *c_encoding = PyUnicode_AsUTF8(encoding);
1315
0
    if (c_encoding == NULL) {
1316
0
        Py_DECREF(encoding);
1317
0
        Py_DECREF(errors);
1318
0
        return -1;
1319
0
    }
1320
0
    const char *c_errors = PyUnicode_AsUTF8(errors);
1321
0
    if (c_errors == NULL) {
1322
0
        Py_DECREF(encoding);
1323
0
        Py_DECREF(errors);
1324
0
        return -1;
1325
0
    }
1326
1327
    // Create new encoder & decoder
1328
0
    PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
1329
0
    if (codec_info == NULL) {
1330
0
        Py_DECREF(encoding);
1331
0
        Py_DECREF(errors);
1332
0
        return -1;
1333
0
    }
1334
0
    if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1335
0
            _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1336
0
        Py_DECREF(codec_info);
1337
0
        Py_DECREF(encoding);
1338
0
        Py_DECREF(errors);
1339
0
        return -1;
1340
0
    }
1341
0
    Py_DECREF(codec_info);
1342
1343
0
    Py_SETREF(self->encoding, encoding);
1344
0
    Py_SETREF(self->errors, errors);
1345
1346
0
    return _textiowrapper_fix_encoder_state(self);
1347
0
}
1348
1349
/*[clinic input]
1350
@critical_section
1351
_io.TextIOWrapper.reconfigure
1352
    *
1353
    encoding: object = None
1354
    errors: object = None
1355
    newline as newline_obj: object(c_default="NULL") = None
1356
    line_buffering as line_buffering_obj: object = None
1357
    write_through as write_through_obj: object = None
1358
1359
Reconfigure the text stream with new parameters.
1360
1361
This also does an implicit stream flush.
1362
1363
[clinic start generated code]*/
1364
1365
static PyObject *
1366
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1367
                                   PyObject *errors, PyObject *newline_obj,
1368
                                   PyObject *line_buffering_obj,
1369
                                   PyObject *write_through_obj)
1370
/*[clinic end generated code: output=52b812ff4b3d4b0f input=dc3bd35ebda702a7]*/
1371
0
{
1372
0
    int line_buffering;
1373
0
    int write_through;
1374
0
    const char *newline = NULL;
1375
1376
0
    if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1377
0
        PyErr_Format(PyExc_TypeError,
1378
0
                "reconfigure() argument 'encoding' must be str or None, not %s",
1379
0
                Py_TYPE(encoding)->tp_name);
1380
0
        return NULL;
1381
0
    }
1382
0
    if (errors != Py_None && !PyUnicode_Check(errors)) {
1383
0
        PyErr_Format(PyExc_TypeError,
1384
0
                "reconfigure() argument 'errors' must be str or None, not %s",
1385
0
                Py_TYPE(errors)->tp_name);
1386
0
        return NULL;
1387
0
    }
1388
0
    if (newline_obj != NULL && newline_obj != Py_None &&
1389
0
        !PyUnicode_Check(newline_obj))
1390
0
    {
1391
0
        PyErr_Format(PyExc_TypeError,
1392
0
                "reconfigure() argument 'newline' must be str or None, not %s",
1393
0
                Py_TYPE(newline_obj)->tp_name);
1394
0
        return NULL;
1395
0
    }
1396
    /* Check if something is in the read buffer */
1397
0
    if (self->decoded_chars != NULL) {
1398
0
        if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1399
0
            _unsupported(self->state,
1400
0
                         "It is not possible to set the encoding or newline "
1401
0
                         "of stream after the first read");
1402
0
            return NULL;
1403
0
        }
1404
0
    }
1405
1406
0
    if (newline_obj != NULL && newline_obj != Py_None) {
1407
0
        newline = PyUnicode_AsUTF8(newline_obj);
1408
0
        if (newline == NULL || validate_newline(newline) < 0) {
1409
0
            return NULL;
1410
0
        }
1411
0
    }
1412
1413
0
    line_buffering = convert_optional_bool(line_buffering_obj,
1414
0
                                           self->line_buffering);
1415
0
    if (line_buffering < 0) {
1416
0
        return NULL;
1417
0
    }
1418
0
    write_through = convert_optional_bool(write_through_obj,
1419
0
                                          self->write_through);
1420
0
    if (write_through < 0) {
1421
0
        return NULL;
1422
0
    }
1423
1424
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1425
0
        return NULL;
1426
0
    }
1427
0
    self->b2cratio = 0;
1428
1429
0
    if (newline_obj != NULL && set_newline(self, newline) < 0) {
1430
0
        return NULL;
1431
0
    }
1432
1433
0
    if (textiowrapper_change_encoding(
1434
0
            self, encoding, errors, newline_obj != NULL) < 0) {
1435
0
        return NULL;
1436
0
    }
1437
1438
0
    self->line_buffering = line_buffering;
1439
0
    self->write_through = write_through;
1440
0
    Py_RETURN_NONE;
1441
0
}
1442
1443
static int
1444
textiowrapper_clear(PyObject *op)
1445
0
{
1446
0
    textio *self = textio_CAST(op);
1447
0
    self->ok = 0;
1448
0
    Py_CLEAR(self->buffer);
1449
0
    Py_CLEAR(self->encoding);
1450
0
    Py_CLEAR(self->encoder);
1451
0
    Py_CLEAR(self->decoder);
1452
0
    Py_CLEAR(self->readnl);
1453
0
    Py_CLEAR(self->decoded_chars);
1454
0
    Py_CLEAR(self->pending_bytes);
1455
0
    Py_CLEAR(self->snapshot);
1456
0
    Py_CLEAR(self->errors);
1457
0
    Py_CLEAR(self->raw);
1458
1459
0
    Py_CLEAR(self->dict);
1460
0
    return 0;
1461
0
}
1462
1463
static void
1464
textiowrapper_dealloc(PyObject *op)
1465
0
{
1466
0
    textio *self = textio_CAST(op);
1467
0
    PyTypeObject *tp = Py_TYPE(self);
1468
0
    self->finalizing = 1;
1469
0
    if (_PyIOBase_finalize(op) < 0)
1470
0
        return;
1471
0
    self->ok = 0;
1472
0
    _PyObject_GC_UNTRACK(self);
1473
0
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
1474
0
    (void)textiowrapper_clear(op);
1475
0
    tp->tp_free(self);
1476
0
    Py_DECREF(tp);
1477
0
}
1478
1479
static int
1480
textiowrapper_traverse(PyObject *op, visitproc visit, void *arg)
1481
16.7k
{
1482
16.7k
    textio *self = textio_CAST(op);
1483
16.7k
    Py_VISIT(Py_TYPE(self));
1484
16.7k
    Py_VISIT(self->buffer);
1485
16.7k
    Py_VISIT(self->encoding);
1486
16.7k
    Py_VISIT(self->encoder);
1487
16.7k
    Py_VISIT(self->decoder);
1488
16.7k
    Py_VISIT(self->readnl);
1489
16.7k
    Py_VISIT(self->decoded_chars);
1490
16.7k
    Py_VISIT(self->pending_bytes);
1491
16.7k
    Py_VISIT(self->snapshot);
1492
16.7k
    Py_VISIT(self->errors);
1493
16.7k
    Py_VISIT(self->raw);
1494
1495
16.7k
    Py_VISIT(self->dict);
1496
16.7k
    return 0;
1497
16.7k
}
1498
1499
static PyObject *
1500
_io_TextIOWrapper_closed_get_impl(textio *self);
1501
1502
/* This macro takes some shortcuts to make the common case faster. */
1503
#define CHECK_CLOSED(self) \
1504
0
    do { \
1505
0
        int r; \
1506
0
        PyObject *_res; \
1507
0
        if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1508
0
            if (self->raw != NULL) \
1509
0
                r = _PyFileIO_closed(self->raw); \
1510
0
            else { \
1511
0
                _res = _io_TextIOWrapper_closed_get_impl(self); \
1512
0
                if (_res == NULL) \
1513
0
                    return NULL; \
1514
0
                r = PyObject_IsTrue(_res); \
1515
0
                Py_DECREF(_res); \
1516
0
                if (r < 0) \
1517
0
                    return NULL; \
1518
0
            } \
1519
0
            if (r > 0) { \
1520
0
                PyErr_SetString(PyExc_ValueError, \
1521
0
                                "I/O operation on closed file."); \
1522
0
                return NULL; \
1523
0
            } \
1524
0
        } \
1525
0
        else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1526
0
            return NULL; \
1527
0
    } while (0)
1528
1529
#define CHECK_INITIALIZED(self) \
1530
0
    if (self->ok <= 0) { \
1531
0
        PyErr_SetString(PyExc_ValueError, \
1532
0
            "I/O operation on uninitialized object"); \
1533
0
        return NULL; \
1534
0
    }
1535
1536
#define CHECK_ATTACHED(self) \
1537
0
    CHECK_INITIALIZED(self); \
1538
0
    if (self->detached) { \
1539
0
        PyErr_SetString(PyExc_ValueError, \
1540
0
             "underlying buffer has been detached"); \
1541
0
        return NULL; \
1542
0
    }
1543
1544
#define CHECK_ATTACHED_INT(self) \
1545
0
    if (self->ok <= 0) { \
1546
0
        PyErr_SetString(PyExc_ValueError, \
1547
0
            "I/O operation on uninitialized object"); \
1548
0
        return -1; \
1549
0
    } else if (self->detached) { \
1550
0
        PyErr_SetString(PyExc_ValueError, \
1551
0
             "underlying buffer has been detached"); \
1552
0
        return -1; \
1553
0
    }
1554
1555
1556
/*[clinic input]
1557
@critical_section
1558
_io.TextIOWrapper.detach
1559
[clinic start generated code]*/
1560
1561
static PyObject *
1562
_io_TextIOWrapper_detach_impl(textio *self)
1563
/*[clinic end generated code: output=7ba3715cd032d5f2 input=c908a3b4ef203b0f]*/
1564
0
{
1565
0
    PyObject *buffer;
1566
0
    CHECK_ATTACHED(self);
1567
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
1568
0
        return NULL;
1569
0
    }
1570
0
    buffer = self->buffer;
1571
0
    self->buffer = NULL;
1572
0
    self->detached = 1;
1573
0
    return buffer;
1574
0
}
1575
1576
/* Flush the internal write buffer. This doesn't explicitly flush the
1577
   underlying buffered object, though. */
1578
static int
1579
_textiowrapper_writeflush(textio *self)
1580
0
{
1581
0
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
1582
1583
0
    if (self->pending_bytes == NULL)
1584
0
        return 0;
1585
1586
0
    PyObject *pending = self->pending_bytes;
1587
0
    PyObject *b;
1588
1589
0
    if (PyBytes_Check(pending)) {
1590
0
        b = Py_NewRef(pending);
1591
0
    }
1592
0
    else if (PyUnicode_Check(pending)) {
1593
0
        assert(PyUnicode_IS_ASCII(pending));
1594
0
        assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1595
0
        b = PyBytes_FromStringAndSize(
1596
0
                PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1597
0
        if (b == NULL) {
1598
0
            return -1;
1599
0
        }
1600
0
    }
1601
0
    else {
1602
0
        assert(PyList_Check(pending));
1603
0
        b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1604
0
        if (b == NULL) {
1605
0
            return -1;
1606
0
        }
1607
1608
0
        char *buf = PyBytes_AsString(b);
1609
0
        Py_ssize_t pos = 0;
1610
1611
0
        for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1612
0
            PyObject *obj = PyList_GET_ITEM(pending, i);
1613
0
            char *src;
1614
0
            Py_ssize_t len;
1615
0
            if (PyUnicode_Check(obj)) {
1616
0
                assert(PyUnicode_IS_ASCII(obj));
1617
0
                src = PyUnicode_DATA(obj);
1618
0
                len = PyUnicode_GET_LENGTH(obj);
1619
0
            }
1620
0
            else {
1621
0
                assert(PyBytes_Check(obj));
1622
0
                if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1623
0
                    Py_DECREF(b);
1624
0
                    return -1;
1625
0
                }
1626
0
            }
1627
0
            memcpy(buf + pos, src, len);
1628
0
            pos += len;
1629
0
        }
1630
0
        assert(pos == self->pending_bytes_count);
1631
0
    }
1632
1633
0
    self->pending_bytes_count = 0;
1634
0
    self->pending_bytes = NULL;
1635
0
    Py_DECREF(pending);
1636
1637
0
    PyObject *ret;
1638
0
    do {
1639
0
        ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1640
0
    } while (ret == NULL && _PyIO_trap_eintr());
1641
0
    Py_DECREF(b);
1642
    // NOTE: We cleared buffer but we don't know how many bytes are actually written
1643
    // when an error occurred.
1644
0
    if (ret == NULL)
1645
0
        return -1;
1646
0
    Py_DECREF(ret);
1647
0
    return 0;
1648
0
}
1649
1650
/*[clinic input]
1651
@critical_section
1652
_io.TextIOWrapper.write
1653
    text: unicode
1654
    /
1655
[clinic start generated code]*/
1656
1657
static PyObject *
1658
_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1659
/*[clinic end generated code: output=d2deb0d50771fcec input=73ec95c5c4a3489c]*/
1660
0
{
1661
0
    PyObject *ret;
1662
0
    PyObject *b;
1663
0
    Py_ssize_t textlen;
1664
0
    int haslf = 0;
1665
0
    int needflush = 0, text_needflush = 0;
1666
1667
0
    CHECK_ATTACHED(self);
1668
0
    CHECK_CLOSED(self);
1669
1670
0
    if (self->encoder == NULL) {
1671
0
        return _unsupported(self->state, "not writable");
1672
0
    }
1673
1674
0
    Py_INCREF(text);
1675
1676
0
    textlen = PyUnicode_GET_LENGTH(text);
1677
1678
0
    if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1679
0
        if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1680
0
            haslf = 1;
1681
1682
0
    if (haslf && self->writetranslate && self->writenl != NULL) {
1683
0
        PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1684
0
                                                 "ss", "\n", self->writenl);
1685
0
        Py_DECREF(text);
1686
0
        if (newtext == NULL)
1687
0
            return NULL;
1688
0
        text = newtext;
1689
0
    }
1690
1691
0
    if (self->write_through)
1692
0
        text_needflush = 1;
1693
0
    if (self->line_buffering &&
1694
0
        (haslf ||
1695
0
         PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1696
0
        needflush = 1;
1697
1698
    /* XXX What if we were just reading? */
1699
0
    if (self->encodefunc != NULL) {
1700
0
        if (PyUnicode_IS_ASCII(text) &&
1701
                // See bpo-43260
1702
0
                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1703
0
                is_asciicompat_encoding(self->encodefunc)) {
1704
0
            b = Py_NewRef(text);
1705
0
        }
1706
0
        else {
1707
0
            b = (*self->encodefunc)((PyObject *) self, text);
1708
0
        }
1709
0
        self->encoding_start_of_stream = 0;
1710
0
    }
1711
0
    else {
1712
0
        b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1713
0
    }
1714
1715
0
    Py_DECREF(text);
1716
0
    if (b == NULL)
1717
0
        return NULL;
1718
0
    if (b != text && !PyBytes_Check(b)) {
1719
0
        PyErr_Format(PyExc_TypeError,
1720
0
                     "encoder should return a bytes object, not '%.200s'",
1721
0
                     Py_TYPE(b)->tp_name);
1722
0
        Py_DECREF(b);
1723
0
        return NULL;
1724
0
    }
1725
1726
0
    Py_ssize_t bytes_len;
1727
0
    if (b == text) {
1728
0
        bytes_len = PyUnicode_GET_LENGTH(b);
1729
0
    }
1730
0
    else {
1731
0
        bytes_len = PyBytes_GET_SIZE(b);
1732
0
    }
1733
1734
    // We should avoid concatenating huge data.
1735
    // Flush the buffer before adding b to the buffer if b is not small.
1736
    // https://github.com/python/cpython/issues/87426
1737
0
    if (bytes_len >= self->chunk_size) {
1738
        // _textiowrapper_writeflush() calls buffer.write().
1739
        // self->pending_bytes can be appended during buffer->write()
1740
        // or other thread.
1741
        // We need to loop until buffer becomes empty.
1742
        // https://github.com/python/cpython/issues/118138
1743
        // https://github.com/python/cpython/issues/119506
1744
0
        while (self->pending_bytes != NULL) {
1745
0
            if (_textiowrapper_writeflush(self) < 0) {
1746
0
                Py_DECREF(b);
1747
0
                return NULL;
1748
0
            }
1749
0
        }
1750
0
    }
1751
1752
0
    if (self->pending_bytes == NULL) {
1753
0
        assert(self->pending_bytes_count == 0);
1754
0
        self->pending_bytes = b;
1755
0
    }
1756
0
    else if (!PyList_CheckExact(self->pending_bytes)) {
1757
0
        PyObject *list = PyList_New(2);
1758
0
        if (list == NULL) {
1759
0
            Py_DECREF(b);
1760
0
            return NULL;
1761
0
        }
1762
        // Since Python 3.12, allocating GC object won't trigger GC and release
1763
        // GIL. See https://github.com/python/cpython/issues/97922
1764
0
        assert(!PyList_CheckExact(self->pending_bytes));
1765
0
        PyList_SET_ITEM(list, 0, self->pending_bytes);
1766
0
        PyList_SET_ITEM(list, 1, b);
1767
0
        self->pending_bytes = list;
1768
0
    }
1769
0
    else {
1770
0
        if (PyList_Append(self->pending_bytes, b) < 0) {
1771
0
            Py_DECREF(b);
1772
0
            return NULL;
1773
0
        }
1774
0
        Py_DECREF(b);
1775
0
    }
1776
1777
0
    self->pending_bytes_count += bytes_len;
1778
0
    if (self->pending_bytes_count >= self->chunk_size || needflush ||
1779
0
        text_needflush) {
1780
0
        if (_textiowrapper_writeflush(self) < 0)
1781
0
            return NULL;
1782
0
    }
1783
1784
0
    if (needflush) {
1785
0
        if (_PyFile_Flush(self->buffer) < 0) {
1786
0
            return NULL;
1787
0
        }
1788
0
    }
1789
1790
0
    if (self->snapshot != NULL) {
1791
0
        textiowrapper_set_decoded_chars(self, NULL);
1792
0
        Py_CLEAR(self->snapshot);
1793
0
    }
1794
1795
0
    if (self->decoder) {
1796
0
        ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1797
0
        if (ret == NULL)
1798
0
            return NULL;
1799
0
        Py_DECREF(ret);
1800
0
    }
1801
1802
0
    return PyLong_FromSsize_t(textlen);
1803
0
}
1804
1805
/* Steal a reference to chars and store it in the decoded_char buffer;
1806
 */
1807
static void
1808
textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1809
0
{
1810
0
    Py_XSETREF(self->decoded_chars, chars);
1811
0
    self->decoded_chars_used = 0;
1812
0
}
1813
1814
static PyObject *
1815
textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1816
0
{
1817
0
    PyObject *chars;
1818
0
    Py_ssize_t avail;
1819
1820
0
    if (self->decoded_chars == NULL)
1821
0
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
1822
1823
0
    avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1824
0
             - self->decoded_chars_used);
1825
1826
0
    assert(avail >= 0);
1827
1828
0
    if (n < 0 || n > avail)
1829
0
        n = avail;
1830
1831
0
    if (self->decoded_chars_used > 0 || n < avail) {
1832
0
        chars = PyUnicode_Substring(self->decoded_chars,
1833
0
                                    self->decoded_chars_used,
1834
0
                                    self->decoded_chars_used + n);
1835
0
        if (chars == NULL)
1836
0
            return NULL;
1837
0
    }
1838
0
    else {
1839
0
        chars = Py_NewRef(self->decoded_chars);
1840
0
    }
1841
1842
0
    self->decoded_chars_used += n;
1843
0
    return chars;
1844
0
}
1845
1846
/* Read and decode the next chunk of data from the BufferedReader.
1847
 */
1848
static int
1849
textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1850
0
{
1851
0
    PyObject *dec_buffer = NULL;
1852
0
    PyObject *dec_flags = NULL;
1853
0
    PyObject *input_chunk = NULL;
1854
0
    Py_buffer input_chunk_buf;
1855
0
    PyObject *decoded_chars, *chunk_size;
1856
0
    Py_ssize_t nbytes, nchars;
1857
0
    int eof;
1858
1859
    /* The return value is True unless EOF was reached.  The decoded string is
1860
     * placed in self._decoded_chars (replacing its previous value).  The
1861
     * entire input chunk is sent to the decoder, though some of it may remain
1862
     * buffered in the decoder, yet to be converted.
1863
     */
1864
1865
0
    if (self->decoder == NULL) {
1866
0
        _unsupported(self->state, "not readable");
1867
0
        return -1;
1868
0
    }
1869
1870
0
    if (self->telling) {
1871
        /* To prepare for tell(), we need to snapshot a point in the file
1872
         * where the decoder's input buffer is empty.
1873
         */
1874
0
        PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1875
0
                                                     &_Py_ID(getstate));
1876
0
        if (state == NULL)
1877
0
            return -1;
1878
        /* Given this, we know there was a valid snapshot point
1879
         * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1880
         */
1881
0
        if (!PyTuple_Check(state)) {
1882
0
            PyErr_SetString(PyExc_TypeError,
1883
0
                            "illegal decoder state");
1884
0
            Py_DECREF(state);
1885
0
            return -1;
1886
0
        }
1887
0
        if (!PyArg_ParseTuple(state,
1888
0
                              "OO;illegal decoder state", &dec_buffer, &dec_flags))
1889
0
        {
1890
0
            Py_DECREF(state);
1891
0
            return -1;
1892
0
        }
1893
1894
0
        if (!PyBytes_Check(dec_buffer)) {
1895
0
            PyErr_Format(PyExc_TypeError,
1896
0
                         "illegal decoder state: the first item should be a "
1897
0
                         "bytes object, not '%.200s'",
1898
0
                         Py_TYPE(dec_buffer)->tp_name);
1899
0
            Py_DECREF(state);
1900
0
            return -1;
1901
0
        }
1902
0
        Py_INCREF(dec_buffer);
1903
0
        Py_INCREF(dec_flags);
1904
0
        Py_DECREF(state);
1905
0
    }
1906
1907
    /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1908
0
    if (size_hint > 0) {
1909
0
        size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1910
0
    }
1911
0
    chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1912
0
    if (chunk_size == NULL)
1913
0
        goto fail;
1914
1915
0
    input_chunk = PyObject_CallMethodOneArg(self->buffer,
1916
0
        (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1917
0
        chunk_size);
1918
0
    Py_DECREF(chunk_size);
1919
0
    if (input_chunk == NULL)
1920
0
        goto fail;
1921
1922
0
    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1923
0
        PyErr_Format(PyExc_TypeError,
1924
0
                     "underlying %s() should have returned a bytes-like object, "
1925
0
                     "not '%.200s'", (self->has_read1 ? "read1": "read"),
1926
0
                     Py_TYPE(input_chunk)->tp_name);
1927
0
        goto fail;
1928
0
    }
1929
1930
0
    nbytes = input_chunk_buf.len;
1931
0
    eof = (nbytes == 0);
1932
1933
0
    decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1934
0
                                          input_chunk, eof);
1935
0
    PyBuffer_Release(&input_chunk_buf);
1936
0
    if (decoded_chars == NULL)
1937
0
        goto fail;
1938
1939
0
    textiowrapper_set_decoded_chars(self, decoded_chars);
1940
0
    nchars = PyUnicode_GET_LENGTH(decoded_chars);
1941
0
    if (nchars > 0)
1942
0
        self->b2cratio = (double) nbytes / nchars;
1943
0
    else
1944
0
        self->b2cratio = 0.0;
1945
0
    if (nchars > 0)
1946
0
        eof = 0;
1947
1948
0
    if (self->telling) {
1949
        /* At the snapshot point, len(dec_buffer) bytes before the read, the
1950
         * next input to be decoded is dec_buffer + input_chunk.
1951
         */
1952
0
        PyObject *next_input = dec_buffer;
1953
0
        PyBytes_Concat(&next_input, input_chunk);
1954
0
        dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1955
0
        if (next_input == NULL) {
1956
0
            goto fail;
1957
0
        }
1958
0
        PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1959
0
        if (snapshot == NULL) {
1960
0
            dec_flags = NULL;
1961
0
            goto fail;
1962
0
        }
1963
0
        Py_XSETREF(self->snapshot, snapshot);
1964
0
    }
1965
0
    Py_DECREF(input_chunk);
1966
1967
0
    return (eof == 0);
1968
1969
0
  fail:
1970
0
    Py_XDECREF(dec_buffer);
1971
0
    Py_XDECREF(dec_flags);
1972
0
    Py_XDECREF(input_chunk);
1973
0
    return -1;
1974
0
}
1975
1976
/*[clinic input]
1977
@critical_section
1978
_io.TextIOWrapper.read
1979
    size as n: Py_ssize_t(accept={int, NoneType}) = -1
1980
    /
1981
[clinic start generated code]*/
1982
1983
static PyObject *
1984
_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1985
/*[clinic end generated code: output=7e651ce6cc6a25a6 input=67d14c5661121377]*/
1986
0
{
1987
0
    PyObject *result = NULL, *chunks = NULL;
1988
1989
0
    CHECK_ATTACHED(self);
1990
0
    CHECK_CLOSED(self);
1991
1992
0
    if (self->decoder == NULL) {
1993
0
        return _unsupported(self->state, "not readable");
1994
0
    }
1995
1996
0
    if (_textiowrapper_writeflush(self) < 0)
1997
0
        return NULL;
1998
1999
0
    if (n < 0) {
2000
        /* Read everything */
2001
0
        PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
2002
0
        PyObject *decoded;
2003
0
        if (bytes == NULL)
2004
0
            goto fail;
2005
2006
0
        if (bytes == Py_None){
2007
0
            Py_DECREF(bytes);
2008
0
            PyErr_SetString(PyExc_BlockingIOError, "Read returned None.");
2009
0
            return NULL;
2010
0
        }
2011
2012
0
        _PyIO_State *state = self->state;
2013
0
        if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
2014
0
            decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
2015
0
                                                          bytes, 1);
2016
0
        else
2017
0
            decoded = PyObject_CallMethodObjArgs(
2018
0
                self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
2019
0
        Py_DECREF(bytes);
2020
0
        if (check_decoded(decoded) < 0)
2021
0
            goto fail;
2022
2023
0
        result = textiowrapper_get_decoded_chars(self, -1);
2024
2025
0
        if (result == NULL) {
2026
0
            Py_DECREF(decoded);
2027
0
            return NULL;
2028
0
        }
2029
2030
0
        PyUnicode_AppendAndDel(&result, decoded);
2031
0
        if (result == NULL)
2032
0
            goto fail;
2033
2034
0
        if (self->snapshot != NULL) {
2035
0
            textiowrapper_set_decoded_chars(self, NULL);
2036
0
            Py_CLEAR(self->snapshot);
2037
0
        }
2038
0
        return result;
2039
0
    }
2040
0
    else {
2041
0
        int res = 1;
2042
0
        Py_ssize_t remaining = n;
2043
2044
0
        result = textiowrapper_get_decoded_chars(self, n);
2045
0
        if (result == NULL)
2046
0
            goto fail;
2047
0
        remaining -= PyUnicode_GET_LENGTH(result);
2048
2049
        /* Keep reading chunks until we have n characters to return */
2050
0
        while (remaining > 0) {
2051
0
            res = textiowrapper_read_chunk(self, remaining);
2052
0
            if (res < 0) {
2053
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2054
                   when EINTR occurs so we needn't do it ourselves. */
2055
0
                if (_PyIO_trap_eintr()) {
2056
0
                    continue;
2057
0
                }
2058
0
                goto fail;
2059
0
            }
2060
0
            if (res == 0)  /* EOF */
2061
0
                break;
2062
0
            if (chunks == NULL) {
2063
0
                chunks = PyList_New(0);
2064
0
                if (chunks == NULL)
2065
0
                    goto fail;
2066
0
            }
2067
0
            if (PyUnicode_GET_LENGTH(result) > 0 &&
2068
0
                PyList_Append(chunks, result) < 0)
2069
0
                goto fail;
2070
0
            Py_DECREF(result);
2071
0
            result = textiowrapper_get_decoded_chars(self, remaining);
2072
0
            if (result == NULL)
2073
0
                goto fail;
2074
0
            remaining -= PyUnicode_GET_LENGTH(result);
2075
0
        }
2076
0
        if (chunks != NULL) {
2077
0
            if (result != NULL && PyList_Append(chunks, result) < 0)
2078
0
                goto fail;
2079
0
            _Py_DECLARE_STR(empty, "");
2080
0
            Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2081
0
            if (result == NULL)
2082
0
                goto fail;
2083
0
            Py_CLEAR(chunks);
2084
0
        }
2085
0
        return result;
2086
0
    }
2087
0
  fail:
2088
0
    Py_XDECREF(result);
2089
0
    Py_XDECREF(chunks);
2090
0
    return NULL;
2091
0
}
2092
2093
2094
/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2095
   that is to the NUL character. Otherwise the function will produce
2096
   incorrect results. */
2097
static const char *
2098
find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2099
0
{
2100
0
    if (kind == PyUnicode_1BYTE_KIND) {
2101
0
        assert(ch < 256);
2102
0
        return (char *) memchr((const void *) s, (char) ch, end - s);
2103
0
    }
2104
0
    for (;;) {
2105
0
        while (PyUnicode_READ(kind, s, 0) > ch)
2106
0
            s += kind;
2107
0
        if (PyUnicode_READ(kind, s, 0) == ch)
2108
0
            return s;
2109
0
        if (s == end)
2110
0
            return NULL;
2111
0
        s += kind;
2112
0
    }
2113
0
}
2114
2115
Py_ssize_t
2116
_PyIO_find_line_ending(
2117
    int translated, int universal, PyObject *readnl,
2118
    int kind, const char *start, const char *end, Py_ssize_t *consumed)
2119
20.8M
{
2120
20.8M
    Py_ssize_t len = (end - start)/kind;
2121
2122
20.8M
    if (translated) {
2123
        /* Newlines are already translated, only search for \n */
2124
0
        const char *pos = find_control_char(kind, start, end, '\n');
2125
0
        if (pos != NULL)
2126
0
            return (pos - start)/kind + 1;
2127
0
        else {
2128
0
            *consumed = len;
2129
0
            return -1;
2130
0
        }
2131
0
    }
2132
20.8M
    else if (universal) {
2133
        /* Universal newline search. Find any of \r, \r\n, \n
2134
         * The decoder ensures that \r\n are not split in two pieces
2135
         */
2136
20.8M
        const char *s = start;
2137
83.3M
        for (;;) {
2138
83.3M
            Py_UCS4 ch;
2139
            /* Fast path for non-control chars. The loop always ends
2140
               since the Unicode string is NUL-terminated. */
2141
257M
            while (PyUnicode_READ(kind, s, 0) > '\r')
2142
174M
                s += kind;
2143
83.3M
            if (s >= end) {
2144
33.4k
                *consumed = len;
2145
33.4k
                return -1;
2146
33.4k
            }
2147
83.3M
            ch = PyUnicode_READ(kind, s, 0);
2148
83.3M
            s += kind;
2149
83.3M
            if (ch == '\n')
2150
5.19M
                return (s - start)/kind;
2151
78.1M
            if (ch == '\r') {
2152
15.6M
                if (PyUnicode_READ(kind, s, 0) == '\n')
2153
439k
                    return (s - start)/kind + 1;
2154
15.1M
                else
2155
15.1M
                    return (s - start)/kind;
2156
15.6M
            }
2157
78.1M
        }
2158
20.8M
    }
2159
0
    else {
2160
        /* Non-universal mode. */
2161
0
        Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2162
0
        const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2163
        /* Assume that readnl is an ASCII character. */
2164
0
        assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2165
0
        if (readnl_len == 1) {
2166
0
            const char *pos = find_control_char(kind, start, end, nl[0]);
2167
0
            if (pos != NULL)
2168
0
                return (pos - start)/kind + 1;
2169
0
            *consumed = len;
2170
0
            return -1;
2171
0
        }
2172
0
        else {
2173
0
            const char *s = start;
2174
0
            const char *e = end - (readnl_len - 1)*kind;
2175
0
            const char *pos;
2176
0
            if (e < s)
2177
0
                e = s;
2178
0
            while (s < e) {
2179
0
                Py_ssize_t i;
2180
0
                const char *pos = find_control_char(kind, s, end, nl[0]);
2181
0
                if (pos == NULL || pos >= e)
2182
0
                    break;
2183
0
                for (i = 1; i < readnl_len; i++) {
2184
0
                    if (PyUnicode_READ(kind, pos, i) != nl[i])
2185
0
                        break;
2186
0
                }
2187
0
                if (i == readnl_len)
2188
0
                    return (pos - start)/kind + readnl_len;
2189
0
                s = pos + kind;
2190
0
            }
2191
0
            pos = find_control_char(kind, e, end, nl[0]);
2192
0
            if (pos == NULL)
2193
0
                *consumed = len;
2194
0
            else
2195
0
                *consumed = (pos - start)/kind;
2196
0
            return -1;
2197
0
        }
2198
0
    }
2199
20.8M
}
2200
2201
static PyObject *
2202
_textiowrapper_readline(textio *self, Py_ssize_t limit)
2203
0
{
2204
0
    PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2205
0
    Py_ssize_t start, endpos, chunked, offset_to_buffer;
2206
0
    int res;
2207
2208
0
    CHECK_CLOSED(self);
2209
2210
0
    if (_textiowrapper_writeflush(self) < 0)
2211
0
        return NULL;
2212
2213
0
    chunked = 0;
2214
2215
0
    while (1) {
2216
0
        const char *ptr;
2217
0
        Py_ssize_t line_len;
2218
0
        int kind;
2219
0
        Py_ssize_t consumed = 0;
2220
2221
        /* First, get some data if necessary */
2222
0
        res = 1;
2223
0
        while (!self->decoded_chars ||
2224
0
               !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2225
0
            res = textiowrapper_read_chunk(self, 0);
2226
0
            if (res < 0) {
2227
                /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2228
                   when EINTR occurs so we needn't do it ourselves. */
2229
0
                if (_PyIO_trap_eintr()) {
2230
0
                    continue;
2231
0
                }
2232
0
                goto error;
2233
0
            }
2234
0
            if (res == 0)
2235
0
                break;
2236
0
        }
2237
0
        if (res == 0) {
2238
            /* end of file */
2239
0
            textiowrapper_set_decoded_chars(self, NULL);
2240
0
            Py_CLEAR(self->snapshot);
2241
0
            start = endpos = offset_to_buffer = 0;
2242
0
            break;
2243
0
        }
2244
2245
0
        if (remaining == NULL) {
2246
0
            line = Py_NewRef(self->decoded_chars);
2247
0
            start = self->decoded_chars_used;
2248
0
            offset_to_buffer = 0;
2249
0
        }
2250
0
        else {
2251
0
            assert(self->decoded_chars_used == 0);
2252
0
            line = PyUnicode_Concat(remaining, self->decoded_chars);
2253
0
            start = 0;
2254
0
            offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2255
0
            Py_CLEAR(remaining);
2256
0
            if (line == NULL)
2257
0
                goto error;
2258
0
        }
2259
2260
0
        ptr = PyUnicode_DATA(line);
2261
0
        line_len = PyUnicode_GET_LENGTH(line);
2262
0
        kind = PyUnicode_KIND(line);
2263
2264
0
        endpos = _PyIO_find_line_ending(
2265
0
            self->readtranslate, self->readuniversal, self->readnl,
2266
0
            kind,
2267
0
            ptr + kind * start,
2268
0
            ptr + kind * line_len,
2269
0
            &consumed);
2270
0
        if (endpos >= 0) {
2271
0
            endpos += start;
2272
0
            if (limit >= 0 && (endpos - start) + chunked >= limit)
2273
0
                endpos = start + limit - chunked;
2274
0
            break;
2275
0
        }
2276
2277
        /* We can put aside up to `endpos` */
2278
0
        endpos = consumed + start;
2279
0
        if (limit >= 0 && (endpos - start) + chunked >= limit) {
2280
            /* Didn't find line ending, but reached length limit */
2281
0
            endpos = start + limit - chunked;
2282
0
            break;
2283
0
        }
2284
2285
0
        if (endpos > start) {
2286
            /* No line ending seen yet - put aside current data */
2287
0
            PyObject *s;
2288
0
            if (chunks == NULL) {
2289
0
                chunks = PyList_New(0);
2290
0
                if (chunks == NULL)
2291
0
                    goto error;
2292
0
            }
2293
0
            s = PyUnicode_Substring(line, start, endpos);
2294
0
            if (s == NULL)
2295
0
                goto error;
2296
0
            if (PyList_Append(chunks, s) < 0) {
2297
0
                Py_DECREF(s);
2298
0
                goto error;
2299
0
            }
2300
0
            chunked += PyUnicode_GET_LENGTH(s);
2301
0
            Py_DECREF(s);
2302
0
        }
2303
        /* There may be some remaining bytes we'll have to prepend to the
2304
           next chunk of data */
2305
0
        if (endpos < line_len) {
2306
0
            remaining = PyUnicode_Substring(line, endpos, line_len);
2307
0
            if (remaining == NULL)
2308
0
                goto error;
2309
0
        }
2310
0
        Py_CLEAR(line);
2311
        /* We have consumed the buffer */
2312
0
        textiowrapper_set_decoded_chars(self, NULL);
2313
0
    }
2314
2315
0
    if (line != NULL) {
2316
        /* Our line ends in the current buffer */
2317
0
        self->decoded_chars_used = endpos - offset_to_buffer;
2318
0
        if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2319
0
            PyObject *s = PyUnicode_Substring(line, start, endpos);
2320
0
            Py_CLEAR(line);
2321
0
            if (s == NULL)
2322
0
                goto error;
2323
0
            line = s;
2324
0
        }
2325
0
    }
2326
0
    if (remaining != NULL) {
2327
0
        if (chunks == NULL) {
2328
0
            chunks = PyList_New(0);
2329
0
            if (chunks == NULL)
2330
0
                goto error;
2331
0
        }
2332
0
        if (PyList_Append(chunks, remaining) < 0)
2333
0
            goto error;
2334
0
        Py_CLEAR(remaining);
2335
0
    }
2336
0
    if (chunks != NULL) {
2337
0
        if (line != NULL) {
2338
0
            if (PyList_Append(chunks, line) < 0)
2339
0
                goto error;
2340
0
            Py_DECREF(line);
2341
0
        }
2342
0
        line = PyUnicode_Join(&_Py_STR(empty), chunks);
2343
0
        if (line == NULL)
2344
0
            goto error;
2345
0
        Py_CLEAR(chunks);
2346
0
    }
2347
0
    if (line == NULL) {
2348
0
        line = &_Py_STR(empty);
2349
0
    }
2350
2351
0
    return line;
2352
2353
0
  error:
2354
0
    Py_XDECREF(chunks);
2355
0
    Py_XDECREF(remaining);
2356
0
    Py_XDECREF(line);
2357
0
    return NULL;
2358
0
}
2359
2360
/*[clinic input]
2361
@critical_section
2362
_io.TextIOWrapper.readline
2363
    size: Py_ssize_t = -1
2364
    /
2365
[clinic start generated code]*/
2366
2367
static PyObject *
2368
_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2369
/*[clinic end generated code: output=344afa98804e8b25 input=b65bab871dc3ddba]*/
2370
0
{
2371
0
    CHECK_ATTACHED(self);
2372
0
    return _textiowrapper_readline(self, size);
2373
0
}
2374
2375
/* Seek and Tell */
2376
2377
typedef struct {
2378
    Py_off_t start_pos;
2379
    int dec_flags;
2380
    int bytes_to_feed;
2381
    int chars_to_skip;
2382
    char need_eof;
2383
} cookie_type;
2384
2385
/*
2386
   To speed up cookie packing/unpacking, we store the fields in a temporary
2387
   string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2388
   The following macros define at which offsets in the intermediary byte
2389
   string the various CookieStruct fields will be stored.
2390
 */
2391
2392
#define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2393
2394
#if PY_BIG_ENDIAN
2395
/* We want the least significant byte of start_pos to also be the least
2396
   significant byte of the cookie, which means that in big-endian mode we
2397
   must copy the fields in reverse order. */
2398
2399
# define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2400
# define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2401
# define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2402
# define OFF_CHARS_TO_SKIP  (sizeof(char))
2403
# define OFF_NEED_EOF       0
2404
2405
#else
2406
/* Little-endian mode: the least significant byte of start_pos will
2407
   naturally end up the least significant byte of the cookie. */
2408
2409
0
# define OFF_START_POS      0
2410
0
# define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2411
0
# define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2412
0
# define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2413
0
# define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2414
2415
#endif
2416
2417
static int
2418
textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2419
0
{
2420
0
    unsigned char buffer[COOKIE_BUF_LEN];
2421
0
    PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2422
0
    if (cookieLong == NULL)
2423
0
        return -1;
2424
2425
0
    if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2426
0
                            PY_LITTLE_ENDIAN, 0, 1) < 0) {
2427
0
        Py_DECREF(cookieLong);
2428
0
        return -1;
2429
0
    }
2430
0
    Py_DECREF(cookieLong);
2431
2432
0
    memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2433
0
    memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2434
0
    memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2435
0
    memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2436
0
    memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2437
2438
0
    return 0;
2439
0
}
2440
2441
static PyObject *
2442
textiowrapper_build_cookie(cookie_type *cookie)
2443
0
{
2444
0
    unsigned char buffer[COOKIE_BUF_LEN];
2445
2446
0
    memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2447
0
    memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2448
0
    memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2449
0
    memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2450
0
    memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2451
2452
0
    return _PyLong_FromByteArray(buffer, sizeof(buffer),
2453
0
                                 PY_LITTLE_ENDIAN, 0);
2454
0
}
2455
2456
static int
2457
_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2458
0
{
2459
0
    PyObject *res;
2460
    /* When seeking to the start of the stream, we call decoder.reset()
2461
       rather than decoder.getstate().
2462
       This is for a few decoders such as utf-16 for which the state value
2463
       at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2464
       utf-16, that we are expecting a BOM).
2465
    */
2466
0
    if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2467
0
        res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2468
0
    }
2469
0
    else {
2470
0
        res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2471
0
                                   "((yi))", "", cookie->dec_flags);
2472
0
    }
2473
0
    if (res == NULL) {
2474
0
        return -1;
2475
0
    }
2476
0
    Py_DECREF(res);
2477
0
    return 0;
2478
0
}
2479
2480
static int
2481
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2482
0
{
2483
0
    PyObject *res;
2484
0
    if (start_of_stream) {
2485
0
        res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2486
0
        self->encoding_start_of_stream = 1;
2487
0
    }
2488
0
    else {
2489
0
        res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2490
0
                                        _PyLong_GetZero());
2491
0
        self->encoding_start_of_stream = 0;
2492
0
    }
2493
0
    if (res == NULL)
2494
0
        return -1;
2495
0
    Py_DECREF(res);
2496
0
    return 0;
2497
0
}
2498
2499
static int
2500
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2501
0
{
2502
    /* Same as _textiowrapper_decoder_setstate() above. */
2503
0
    return _textiowrapper_encoder_reset(
2504
0
        self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2505
0
}
2506
2507
/*[clinic input]
2508
@critical_section
2509
_io.TextIOWrapper.seek
2510
    cookie as cookieObj: object
2511
      Zero or an opaque number returned by tell().
2512
    whence: int(c_default='0') = os.SEEK_SET
2513
      The relative position to seek from.
2514
    /
2515
2516
Set the stream position, and return the new stream position.
2517
2518
Four operations are supported, given by the following argument
2519
combinations:
2520
2521
- seek(0, SEEK_SET): Rewind to the start of the stream.
2522
- seek(cookie, SEEK_SET): Restore a previous position;
2523
  'cookie' must be a number returned by tell().
2524
- seek(0, SEEK_END): Fast-forward to the end of the stream.
2525
- seek(0, SEEK_CUR): Leave the current stream position unchanged.
2526
2527
Any other argument combinations are invalid,
2528
and may raise exceptions.
2529
[clinic start generated code]*/
2530
2531
static PyObject *
2532
_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2533
/*[clinic end generated code: output=0a15679764e2d04d input=4bea78698be23d7e]*/
2534
0
{
2535
0
    PyObject *posobj;
2536
0
    cookie_type cookie;
2537
0
    PyObject *res;
2538
0
    int cmp;
2539
0
    PyObject *snapshot;
2540
2541
0
    CHECK_ATTACHED(self);
2542
0
    CHECK_CLOSED(self);
2543
2544
0
    Py_INCREF(cookieObj);
2545
2546
0
    if (!self->seekable) {
2547
0
        _unsupported(self->state, "underlying stream is not seekable");
2548
0
        goto fail;
2549
0
    }
2550
2551
0
    PyObject *zero = _PyLong_GetZero();  // borrowed reference
2552
2553
0
    switch (whence) {
2554
0
    case SEEK_CUR:
2555
        /* seek relative to current position */
2556
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2557
0
        if (cmp < 0)
2558
0
            goto fail;
2559
2560
0
        if (cmp == 0) {
2561
0
            _unsupported(self->state, "can't do nonzero cur-relative seeks");
2562
0
            goto fail;
2563
0
        }
2564
2565
        /* Seeking to the current position should attempt to
2566
         * sync the underlying buffer with the current position.
2567
         */
2568
0
        Py_DECREF(cookieObj);
2569
0
        cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2570
0
        if (cookieObj == NULL)
2571
0
            goto fail;
2572
0
        break;
2573
2574
0
    case SEEK_END:
2575
        /* seek relative to end of file */
2576
0
        cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2577
0
        if (cmp < 0)
2578
0
            goto fail;
2579
2580
0
        if (cmp == 0) {
2581
0
            _unsupported(self->state, "can't do nonzero end-relative seeks");
2582
0
            goto fail;
2583
0
        }
2584
2585
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
2586
0
            goto fail;
2587
0
        }
2588
2589
0
        textiowrapper_set_decoded_chars(self, NULL);
2590
0
        Py_CLEAR(self->snapshot);
2591
0
        if (self->decoder) {
2592
0
            res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2593
0
            if (res == NULL)
2594
0
                goto fail;
2595
0
            Py_DECREF(res);
2596
0
        }
2597
2598
0
        res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2599
0
        Py_CLEAR(cookieObj);
2600
0
        if (res == NULL)
2601
0
            goto fail;
2602
0
        if (self->encoder) {
2603
            /* If seek() == 0, we are at the start of stream, otherwise not */
2604
0
            cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2605
0
            if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2606
0
                Py_DECREF(res);
2607
0
                goto fail;
2608
0
            }
2609
0
        }
2610
0
        return res;
2611
2612
0
    case SEEK_SET:
2613
0
        break;
2614
2615
0
    default:
2616
0
        PyErr_Format(PyExc_ValueError,
2617
0
                     "invalid whence (%d, should be %d, %d or %d)", whence,
2618
0
                     SEEK_SET, SEEK_CUR, SEEK_END);
2619
0
        goto fail;
2620
0
    }
2621
2622
0
    cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2623
0
    if (cmp < 0)
2624
0
        goto fail;
2625
2626
0
    if (cmp == 1) {
2627
0
        PyErr_Format(PyExc_ValueError,
2628
0
                     "negative seek position %R", cookieObj);
2629
0
        goto fail;
2630
0
    }
2631
2632
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2633
0
        goto fail;
2634
0
    }
2635
2636
    /* The strategy of seek() is to go back to the safe start point
2637
     * and replay the effect of read(chars_to_skip) from there.
2638
     */
2639
0
    if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2640
0
        goto fail;
2641
2642
    /* Seek back to the safe start point. */
2643
0
    posobj = PyLong_FromOff_t(cookie.start_pos);
2644
0
    if (posobj == NULL)
2645
0
        goto fail;
2646
0
    res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2647
0
    Py_DECREF(posobj);
2648
0
    if (res == NULL)
2649
0
        goto fail;
2650
0
    Py_DECREF(res);
2651
2652
0
    textiowrapper_set_decoded_chars(self, NULL);
2653
0
    Py_CLEAR(self->snapshot);
2654
2655
    /* Restore the decoder to its state from the safe start point. */
2656
0
    if (self->decoder) {
2657
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2658
0
            goto fail;
2659
0
    }
2660
2661
0
    if (cookie.chars_to_skip) {
2662
        /* Just like _read_chunk, feed the decoder and save a snapshot. */
2663
0
        PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2664
0
                                                     "i", cookie.bytes_to_feed);
2665
0
        PyObject *decoded;
2666
2667
0
        if (input_chunk == NULL)
2668
0
            goto fail;
2669
2670
0
        if (!PyBytes_Check(input_chunk)) {
2671
0
            PyErr_Format(PyExc_TypeError,
2672
0
                         "underlying read() should have returned a bytes "
2673
0
                         "object, not '%.200s'",
2674
0
                         Py_TYPE(input_chunk)->tp_name);
2675
0
            Py_DECREF(input_chunk);
2676
0
            goto fail;
2677
0
        }
2678
2679
0
        snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2680
0
        if (snapshot == NULL) {
2681
0
            goto fail;
2682
0
        }
2683
0
        Py_XSETREF(self->snapshot, snapshot);
2684
2685
0
        decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2686
0
            input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2687
2688
0
        if (check_decoded(decoded) < 0)
2689
0
            goto fail;
2690
2691
0
        textiowrapper_set_decoded_chars(self, decoded);
2692
2693
        /* Skip chars_to_skip of the decoded characters. */
2694
0
        if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2695
0
            PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2696
0
            goto fail;
2697
0
        }
2698
0
        self->decoded_chars_used = cookie.chars_to_skip;
2699
0
    }
2700
0
    else {
2701
0
        snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2702
0
        if (snapshot == NULL)
2703
0
            goto fail;
2704
0
        Py_XSETREF(self->snapshot, snapshot);
2705
0
    }
2706
2707
    /* Finally, reset the encoder (merely useful for proper BOM handling) */
2708
0
    if (self->encoder) {
2709
0
        if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2710
0
            goto fail;
2711
0
    }
2712
0
    return cookieObj;
2713
0
  fail:
2714
0
    Py_XDECREF(cookieObj);
2715
0
    return NULL;
2716
2717
0
}
2718
2719
/*[clinic input]
2720
@critical_section
2721
_io.TextIOWrapper.tell
2722
2723
Return the stream position as an opaque number.
2724
2725
The return value of tell() can be given as input to seek(), to restore a
2726
previous stream position.
2727
[clinic start generated code]*/
2728
2729
static PyObject *
2730
_io_TextIOWrapper_tell_impl(textio *self)
2731
/*[clinic end generated code: output=4f168c08bf34ad5f input=415d6b4e4f8e6e8c]*/
2732
0
{
2733
0
    PyObject *res;
2734
0
    PyObject *posobj = NULL;
2735
0
    cookie_type cookie = {0,0,0,0,0};
2736
0
    PyObject *next_input;
2737
0
    Py_ssize_t chars_to_skip, chars_decoded;
2738
0
    Py_ssize_t skip_bytes, skip_back;
2739
0
    PyObject *saved_state = NULL;
2740
0
    const char *input, *input_end;
2741
0
    Py_ssize_t dec_buffer_len;
2742
0
    int dec_flags;
2743
2744
0
    CHECK_ATTACHED(self);
2745
0
    CHECK_CLOSED(self);
2746
2747
0
    if (!self->seekable) {
2748
0
        _unsupported(self->state, "underlying stream is not seekable");
2749
0
        goto fail;
2750
0
    }
2751
0
    if (!self->telling) {
2752
0
        PyErr_SetString(PyExc_OSError,
2753
0
                        "telling position disabled by next() call");
2754
0
        goto fail;
2755
0
    }
2756
2757
0
    if (_textiowrapper_writeflush(self) < 0)
2758
0
        return NULL;
2759
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2760
0
        goto fail;
2761
0
    }
2762
2763
0
    posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2764
0
    if (posobj == NULL)
2765
0
        goto fail;
2766
2767
0
    if (self->decoder == NULL || self->snapshot == NULL) {
2768
0
        assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2769
0
        return posobj;
2770
0
    }
2771
2772
#if defined(HAVE_LARGEFILE_SUPPORT)
2773
    cookie.start_pos = PyLong_AsLongLong(posobj);
2774
#else
2775
0
    cookie.start_pos = PyLong_AsLong(posobj);
2776
0
#endif
2777
0
    Py_DECREF(posobj);
2778
0
    if (PyErr_Occurred())
2779
0
        goto fail;
2780
2781
    /* Skip backward to the snapshot point (see _read_chunk). */
2782
0
    assert(PyTuple_Check(self->snapshot));
2783
0
    if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2784
0
        goto fail;
2785
2786
0
    assert (PyBytes_Check(next_input));
2787
2788
0
    cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2789
2790
    /* How many decoded characters have been used up since the snapshot? */
2791
0
    if (self->decoded_chars_used == 0)  {
2792
        /* We haven't moved from the snapshot point. */
2793
0
        return textiowrapper_build_cookie(&cookie);
2794
0
    }
2795
2796
0
    chars_to_skip = self->decoded_chars_used;
2797
2798
    /* Decoder state will be restored at the end */
2799
0
    saved_state = PyObject_CallMethodNoArgs(self->decoder,
2800
0
                                             &_Py_ID(getstate));
2801
0
    if (saved_state == NULL)
2802
0
        goto fail;
2803
2804
0
#define DECODER_GETSTATE() do { \
2805
0
        PyObject *dec_buffer; \
2806
0
        PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2807
0
            &_Py_ID(getstate)); \
2808
0
        if (_state == NULL) \
2809
0
            goto fail; \
2810
0
        if (!PyTuple_Check(_state)) { \
2811
0
            PyErr_SetString(PyExc_TypeError, \
2812
0
                            "illegal decoder state"); \
2813
0
            Py_DECREF(_state); \
2814
0
            goto fail; \
2815
0
        } \
2816
0
        if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2817
0
                              &dec_buffer, &dec_flags)) \
2818
0
        { \
2819
0
            Py_DECREF(_state); \
2820
0
            goto fail; \
2821
0
        } \
2822
0
        if (!PyBytes_Check(dec_buffer)) { \
2823
0
            PyErr_Format(PyExc_TypeError, \
2824
0
                         "illegal decoder state: the first item should be a " \
2825
0
                         "bytes object, not '%.200s'", \
2826
0
                         Py_TYPE(dec_buffer)->tp_name); \
2827
0
            Py_DECREF(_state); \
2828
0
            goto fail; \
2829
0
        } \
2830
0
        dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2831
0
        Py_DECREF(_state); \
2832
0
    } while (0)
2833
2834
0
#define DECODER_DECODE(start, len, res) do { \
2835
0
        PyObject *_decoded = _PyObject_CallMethod( \
2836
0
            self->decoder, &_Py_ID(decode), "y#", start, len); \
2837
0
        if (check_decoded(_decoded) < 0) \
2838
0
            goto fail; \
2839
0
        res = PyUnicode_GET_LENGTH(_decoded); \
2840
0
        Py_DECREF(_decoded); \
2841
0
    } while (0)
2842
2843
    /* Fast search for an acceptable start point, close to our
2844
       current pos */
2845
0
    skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2846
0
    skip_back = 1;
2847
0
    assert(skip_back <= PyBytes_GET_SIZE(next_input));
2848
0
    input = PyBytes_AS_STRING(next_input);
2849
0
    while (skip_bytes > 0) {
2850
        /* Decode up to temptative start point */
2851
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2852
0
            goto fail;
2853
0
        DECODER_DECODE(input, skip_bytes, chars_decoded);
2854
0
        if (chars_decoded <= chars_to_skip) {
2855
0
            DECODER_GETSTATE();
2856
0
            if (dec_buffer_len == 0) {
2857
                /* Before pos and no bytes buffered in decoder => OK */
2858
0
                cookie.dec_flags = dec_flags;
2859
0
                chars_to_skip -= chars_decoded;
2860
0
                break;
2861
0
            }
2862
            /* Skip back by buffered amount and reset heuristic */
2863
0
            skip_bytes -= dec_buffer_len;
2864
0
            skip_back = 1;
2865
0
        }
2866
0
        else {
2867
            /* We're too far ahead, skip back a bit */
2868
0
            skip_bytes -= skip_back;
2869
0
            skip_back *= 2;
2870
0
        }
2871
0
    }
2872
0
    if (skip_bytes <= 0) {
2873
0
        skip_bytes = 0;
2874
0
        if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2875
0
            goto fail;
2876
0
    }
2877
2878
    /* Note our initial start point. */
2879
0
    cookie.start_pos += skip_bytes;
2880
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2881
0
    if (chars_to_skip == 0)
2882
0
        goto finally;
2883
2884
    /* We should be close to the desired position.  Now feed the decoder one
2885
     * byte at a time until we reach the `chars_to_skip` target.
2886
     * As we go, note the nearest "safe start point" before the current
2887
     * location (a point where the decoder has nothing buffered, so seek()
2888
     * can safely start from there and advance to this location).
2889
     */
2890
0
    chars_decoded = 0;
2891
0
    input = PyBytes_AS_STRING(next_input);
2892
0
    input_end = input + PyBytes_GET_SIZE(next_input);
2893
0
    input += skip_bytes;
2894
0
    while (input < input_end) {
2895
0
        Py_ssize_t n;
2896
2897
0
        DECODER_DECODE(input, (Py_ssize_t)1, n);
2898
        /* We got n chars for 1 byte */
2899
0
        chars_decoded += n;
2900
0
        cookie.bytes_to_feed += 1;
2901
0
        DECODER_GETSTATE();
2902
2903
0
        if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2904
            /* Decoder buffer is empty, so this is a safe start point. */
2905
0
            cookie.start_pos += cookie.bytes_to_feed;
2906
0
            chars_to_skip -= chars_decoded;
2907
0
            cookie.dec_flags = dec_flags;
2908
0
            cookie.bytes_to_feed = 0;
2909
0
            chars_decoded = 0;
2910
0
        }
2911
0
        if (chars_decoded >= chars_to_skip)
2912
0
            break;
2913
0
        input++;
2914
0
    }
2915
0
    if (input == input_end) {
2916
        /* We didn't get enough decoded data; signal EOF to get more. */
2917
0
        PyObject *decoded = _PyObject_CallMethod(
2918
0
            self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2919
0
        if (check_decoded(decoded) < 0)
2920
0
            goto fail;
2921
0
        chars_decoded += PyUnicode_GET_LENGTH(decoded);
2922
0
        Py_DECREF(decoded);
2923
0
        cookie.need_eof = 1;
2924
2925
0
        if (chars_decoded < chars_to_skip) {
2926
0
            PyErr_SetString(PyExc_OSError,
2927
0
                            "can't reconstruct logical file position");
2928
0
            goto fail;
2929
0
        }
2930
0
    }
2931
2932
0
finally:
2933
0
    res = PyObject_CallMethodOneArg(
2934
0
            self->decoder, &_Py_ID(setstate), saved_state);
2935
0
    Py_DECREF(saved_state);
2936
0
    if (res == NULL)
2937
0
        return NULL;
2938
0
    Py_DECREF(res);
2939
2940
    /* The returned cookie corresponds to the last safe start point. */
2941
0
    cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2942
0
    return textiowrapper_build_cookie(&cookie);
2943
2944
0
fail:
2945
0
    if (saved_state) {
2946
0
        PyObject *exc = PyErr_GetRaisedException();
2947
0
        res = PyObject_CallMethodOneArg(
2948
0
                self->decoder, &_Py_ID(setstate), saved_state);
2949
0
        _PyErr_ChainExceptions1(exc);
2950
0
        Py_DECREF(saved_state);
2951
0
        Py_XDECREF(res);
2952
0
    }
2953
0
    return NULL;
2954
0
}
2955
2956
/*[clinic input]
2957
@critical_section
2958
_io.TextIOWrapper.truncate
2959
    pos: object = None
2960
    /
2961
[clinic start generated code]*/
2962
2963
static PyObject *
2964
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2965
/*[clinic end generated code: output=90ec2afb9bb7745f input=8bddb320834c93ee]*/
2966
0
{
2967
0
    CHECK_ATTACHED(self)
2968
2969
0
    if (_PyFile_Flush((PyObject *)self) < 0) {
2970
0
        return NULL;
2971
0
    }
2972
2973
0
    return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2974
0
}
2975
2976
static PyObject *
2977
textiowrapper_repr(PyObject *op)
2978
0
{
2979
0
    PyObject *nameobj, *modeobj, *res, *s;
2980
0
    int status;
2981
0
    textio *self = textio_CAST(op);
2982
0
    const char *type_name = Py_TYPE(self)->tp_name;
2983
2984
0
    CHECK_INITIALIZED(self);
2985
2986
0
    res = PyUnicode_FromFormat("<%.100s", type_name);
2987
0
    if (res == NULL)
2988
0
        return NULL;
2989
2990
0
    status = Py_ReprEnter(op);
2991
0
    if (status != 0) {
2992
0
        if (status > 0) {
2993
0
            PyErr_Format(PyExc_RuntimeError,
2994
0
                         "reentrant call inside %.100s.__repr__",
2995
0
                         type_name);
2996
0
        }
2997
0
        goto error;
2998
0
    }
2999
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(name), &nameobj) < 0) {
3000
0
        if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
3001
0
            goto error;
3002
0
        }
3003
        /* Ignore ValueError raised if the underlying stream was detached */
3004
0
        PyErr_Clear();
3005
0
    }
3006
0
    if (nameobj != NULL) {
3007
0
        s = PyUnicode_FromFormat(" name=%R", nameobj);
3008
0
        Py_DECREF(nameobj);
3009
0
        if (s == NULL)
3010
0
            goto error;
3011
0
        PyUnicode_AppendAndDel(&res, s);
3012
0
        if (res == NULL)
3013
0
            goto error;
3014
0
    }
3015
0
    if (PyObject_GetOptionalAttr(op, &_Py_ID(mode), &modeobj) < 0) {
3016
0
        goto error;
3017
0
    }
3018
0
    if (modeobj != NULL) {
3019
0
        s = PyUnicode_FromFormat(" mode=%R", modeobj);
3020
0
        Py_DECREF(modeobj);
3021
0
        if (s == NULL)
3022
0
            goto error;
3023
0
        PyUnicode_AppendAndDel(&res, s);
3024
0
        if (res == NULL)
3025
0
            goto error;
3026
0
    }
3027
0
    s = PyUnicode_FromFormat("%U encoding=%R>",
3028
0
                             res, self->encoding);
3029
0
    Py_DECREF(res);
3030
0
    if (status == 0) {
3031
0
        Py_ReprLeave(op);
3032
0
    }
3033
0
    return s;
3034
3035
0
  error:
3036
0
    Py_XDECREF(res);
3037
0
    if (status == 0) {
3038
0
        Py_ReprLeave(op);
3039
0
    }
3040
0
    return NULL;
3041
0
}
3042
3043
3044
/* Inquiries */
3045
3046
/*[clinic input]
3047
@critical_section
3048
_io.TextIOWrapper.fileno
3049
[clinic start generated code]*/
3050
3051
static PyObject *
3052
_io_TextIOWrapper_fileno_impl(textio *self)
3053
/*[clinic end generated code: output=21490a4c3da13e6c input=515e1196aceb97ab]*/
3054
0
{
3055
0
    CHECK_ATTACHED(self);
3056
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
3057
0
}
3058
3059
/*[clinic input]
3060
@critical_section
3061
_io.TextIOWrapper.seekable
3062
[clinic start generated code]*/
3063
3064
static PyObject *
3065
_io_TextIOWrapper_seekable_impl(textio *self)
3066
/*[clinic end generated code: output=ab223dbbcffc0f00 input=71c4c092736c549b]*/
3067
0
{
3068
0
    CHECK_ATTACHED(self);
3069
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
3070
0
}
3071
3072
/*[clinic input]
3073
@critical_section
3074
_io.TextIOWrapper.readable
3075
[clinic start generated code]*/
3076
3077
static PyObject *
3078
_io_TextIOWrapper_readable_impl(textio *self)
3079
/*[clinic end generated code: output=72ff7ba289a8a91b input=80438d1f01b0a89b]*/
3080
0
{
3081
0
    CHECK_ATTACHED(self);
3082
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
3083
0
}
3084
3085
/*[clinic input]
3086
@critical_section
3087
_io.TextIOWrapper.writable
3088
[clinic start generated code]*/
3089
3090
static PyObject *
3091
_io_TextIOWrapper_writable_impl(textio *self)
3092
/*[clinic end generated code: output=a728c71790d03200 input=9d6c22befb0c340a]*/
3093
0
{
3094
0
    CHECK_ATTACHED(self);
3095
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
3096
0
}
3097
3098
/*[clinic input]
3099
@critical_section
3100
_io.TextIOWrapper.isatty
3101
[clinic start generated code]*/
3102
3103
static PyObject *
3104
_io_TextIOWrapper_isatty_impl(textio *self)
3105
/*[clinic end generated code: output=12be1a35bace882e input=7f83ff04d4d1733d]*/
3106
0
{
3107
0
    CHECK_ATTACHED(self);
3108
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3109
0
}
3110
3111
/*[clinic input]
3112
@critical_section
3113
_io.TextIOWrapper.flush
3114
[clinic start generated code]*/
3115
3116
static PyObject *
3117
_io_TextIOWrapper_flush_impl(textio *self)
3118
/*[clinic end generated code: output=59de9165f9c2e4d2 input=3ac3bf521bfed59d]*/
3119
0
{
3120
0
    CHECK_ATTACHED(self);
3121
0
    CHECK_CLOSED(self);
3122
0
    self->telling = self->seekable;
3123
0
    if (_textiowrapper_writeflush(self) < 0)
3124
0
        return NULL;
3125
0
    return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3126
0
}
3127
3128
/*[clinic input]
3129
@critical_section
3130
_io.TextIOWrapper.close
3131
[clinic start generated code]*/
3132
3133
static PyObject *
3134
_io_TextIOWrapper_close_impl(textio *self)
3135
/*[clinic end generated code: output=056ccf8b4876e4f4 input=8e12d7079d5ac5c1]*/
3136
0
{
3137
0
    PyObject *res;
3138
0
    int r;
3139
0
    CHECK_ATTACHED(self);
3140
3141
0
    res = _io_TextIOWrapper_closed_get_impl(self);
3142
0
    if (res == NULL)
3143
0
        return NULL;
3144
0
    r = PyObject_IsTrue(res);
3145
0
    Py_DECREF(res);
3146
0
    if (r < 0)
3147
0
        return NULL;
3148
3149
0
    if (r > 0) {
3150
0
        Py_RETURN_NONE; /* stream already closed */
3151
0
    }
3152
0
    else {
3153
0
        PyObject *exc = NULL;
3154
0
        if (self->finalizing) {
3155
0
            res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3156
0
                                            (PyObject *)self);
3157
0
            if (res) {
3158
0
                Py_DECREF(res);
3159
0
            }
3160
0
            else {
3161
0
                PyErr_Clear();
3162
0
            }
3163
0
        }
3164
0
        if (_PyFile_Flush((PyObject *)self) < 0) {
3165
0
            exc = PyErr_GetRaisedException();
3166
0
        }
3167
3168
0
        res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3169
0
        if (exc != NULL) {
3170
0
            _PyErr_ChainExceptions1(exc);
3171
0
            Py_CLEAR(res);
3172
0
        }
3173
0
        return res;
3174
0
    }
3175
0
}
3176
3177
static PyObject *
3178
textiowrapper_iternext_lock_held(PyObject *op)
3179
0
{
3180
0
    _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op);
3181
0
    PyObject *line;
3182
0
    textio *self = textio_CAST(op);
3183
3184
0
    CHECK_ATTACHED(self);
3185
3186
0
    self->telling = 0;
3187
0
    if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3188
        /* Skip method call overhead for speed */
3189
0
        line = _textiowrapper_readline(self, -1);
3190
0
    }
3191
0
    else {
3192
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
3193
0
        if (line && !PyUnicode_Check(line)) {
3194
0
            PyErr_Format(PyExc_OSError,
3195
0
                         "readline() should have returned a str object, "
3196
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
3197
0
            Py_DECREF(line);
3198
0
            return NULL;
3199
0
        }
3200
0
    }
3201
3202
0
    if (line == NULL)
3203
0
        return NULL;
3204
3205
0
    if (PyUnicode_GET_LENGTH(line) == 0) {
3206
        /* Reached EOF or would have blocked */
3207
0
        Py_DECREF(line);
3208
0
        Py_CLEAR(self->snapshot);
3209
0
        self->telling = self->seekable;
3210
0
        return NULL;
3211
0
    }
3212
3213
0
    return line;
3214
0
}
3215
3216
static PyObject *
3217
textiowrapper_iternext(PyObject *op)
3218
0
{
3219
0
    PyObject *result;
3220
0
    Py_BEGIN_CRITICAL_SECTION(op);
3221
0
    result = textiowrapper_iternext_lock_held(op);
3222
0
    Py_END_CRITICAL_SECTION();
3223
0
    return result;
3224
0
}
3225
3226
/*[clinic input]
3227
@critical_section
3228
@getter
3229
_io.TextIOWrapper.name
3230
[clinic start generated code]*/
3231
3232
static PyObject *
3233
_io_TextIOWrapper_name_get_impl(textio *self)
3234
/*[clinic end generated code: output=8c2f1d6d8756af40 input=26ecec9b39e30e07]*/
3235
0
{
3236
0
    CHECK_ATTACHED(self);
3237
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3238
0
}
3239
3240
/*[clinic input]
3241
@critical_section
3242
@getter
3243
_io.TextIOWrapper.closed
3244
[clinic start generated code]*/
3245
3246
static PyObject *
3247
_io_TextIOWrapper_closed_get_impl(textio *self)
3248
/*[clinic end generated code: output=b49b68f443a85e3c input=7dfcf43f63c7003d]*/
3249
0
{
3250
0
    CHECK_ATTACHED(self);
3251
0
    return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3252
0
}
3253
3254
/*[clinic input]
3255
@critical_section
3256
@getter
3257
_io.TextIOWrapper.newlines
3258
[clinic start generated code]*/
3259
3260
static PyObject *
3261
_io_TextIOWrapper_newlines_get_impl(textio *self)
3262
/*[clinic end generated code: output=53aa03ac35573180 input=610df647e514b3e8]*/
3263
0
{
3264
0
    PyObject *res;
3265
0
    CHECK_ATTACHED(self);
3266
0
    if (self->decoder == NULL ||
3267
0
        PyObject_GetOptionalAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3268
0
    {
3269
0
        Py_RETURN_NONE;
3270
0
    }
3271
0
    return res;
3272
0
}
3273
3274
/*[clinic input]
3275
@critical_section
3276
@getter
3277
_io.TextIOWrapper.errors
3278
[clinic start generated code]*/
3279
3280
static PyObject *
3281
_io_TextIOWrapper_errors_get_impl(textio *self)
3282
/*[clinic end generated code: output=dca3a3ef21b09484 input=b45f983e6d43c4d8]*/
3283
0
{
3284
0
    CHECK_INITIALIZED(self);
3285
0
    return Py_NewRef(self->errors);
3286
0
}
3287
3288
/*[clinic input]
3289
@critical_section
3290
@getter
3291
_io.TextIOWrapper._CHUNK_SIZE
3292
[clinic start generated code]*/
3293
3294
static PyObject *
3295
_io_TextIOWrapper__CHUNK_SIZE_get_impl(textio *self)
3296
/*[clinic end generated code: output=039925cd2df375bc input=e9715b0e06ff0fa6]*/
3297
0
{
3298
0
    CHECK_ATTACHED(self);
3299
0
    return PyLong_FromSsize_t(self->chunk_size);
3300
0
}
3301
3302
/*[clinic input]
3303
@critical_section
3304
@setter
3305
_io.TextIOWrapper._CHUNK_SIZE
3306
[clinic start generated code]*/
3307
3308
static int
3309
_io_TextIOWrapper__CHUNK_SIZE_set_impl(textio *self, PyObject *value)
3310
/*[clinic end generated code: output=edb86d2db660a5ab input=32fc99861db02a0a]*/
3311
0
{
3312
0
    Py_ssize_t n;
3313
0
    CHECK_ATTACHED_INT(self);
3314
0
    if (value == NULL) {
3315
0
        PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3316
0
        return -1;
3317
0
    }
3318
0
    n = PyNumber_AsSsize_t(value, PyExc_ValueError);
3319
0
    if (n == -1 && PyErr_Occurred())
3320
0
        return -1;
3321
0
    if (n <= 0) {
3322
0
        PyErr_SetString(PyExc_ValueError,
3323
0
                        "a strictly positive integer is required");
3324
0
        return -1;
3325
0
    }
3326
0
    self->chunk_size = n;
3327
0
    return 0;
3328
0
}
3329
3330
static PyMethodDef incrementalnewlinedecoder_methods[] = {
3331
    _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3332
    _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3333
    _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3334
    _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3335
    {NULL}
3336
};
3337
3338
static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3339
    {"newlines", incrementalnewlinedecoder_newlines_get, NULL, NULL},
3340
    {NULL}
3341
};
3342
3343
static PyType_Slot nldecoder_slots[] = {
3344
    {Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3345
    {Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3346
    {Py_tp_methods, incrementalnewlinedecoder_methods},
3347
    {Py_tp_getset, incrementalnewlinedecoder_getset},
3348
    {Py_tp_traverse, incrementalnewlinedecoder_traverse},
3349
    {Py_tp_clear, incrementalnewlinedecoder_clear},
3350
    {Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3351
    {0, NULL},
3352
};
3353
3354
PyType_Spec nldecoder_spec = {
3355
    .name = "_io.IncrementalNewlineDecoder",
3356
    .basicsize = sizeof(nldecoder_object),
3357
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3358
              Py_TPFLAGS_IMMUTABLETYPE),
3359
    .slots = nldecoder_slots,
3360
};
3361
3362
3363
static PyMethodDef textiowrapper_methods[] = {
3364
    _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3365
    _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3366
    _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3367
    _IO_TEXTIOWRAPPER_READ_METHODDEF
3368
    _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3369
    _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3370
    _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3371
3372
    _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3373
    _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3374
    _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3375
    _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3376
    _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3377
3378
    _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3379
    _IO_TEXTIOWRAPPER_TELL_METHODDEF
3380
    _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3381
3382
    {"__getstate__", _PyIOBase_cannot_pickle, METH_NOARGS},
3383
    {NULL, NULL}
3384
};
3385
3386
static PyMemberDef textiowrapper_members[] = {
3387
    {"encoding", _Py_T_OBJECT, offsetof(textio, encoding), Py_READONLY},
3388
    {"buffer", _Py_T_OBJECT, offsetof(textio, buffer), Py_READONLY},
3389
    {"line_buffering", Py_T_BOOL, offsetof(textio, line_buffering), Py_READONLY},
3390
    {"write_through", Py_T_BOOL, offsetof(textio, write_through), Py_READONLY},
3391
    {"_finalizing", Py_T_BOOL, offsetof(textio, finalizing), 0},
3392
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(textio, weakreflist), Py_READONLY},
3393
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(textio, dict), Py_READONLY},
3394
    {NULL}
3395
};
3396
3397
static PyGetSetDef textiowrapper_getset[] = {
3398
    _IO_TEXTIOWRAPPER_NAME_GETSETDEF
3399
    _IO_TEXTIOWRAPPER_CLOSED_GETSETDEF
3400
    _IO_TEXTIOWRAPPER_NEWLINES_GETSETDEF
3401
    _IO_TEXTIOWRAPPER_ERRORS_GETSETDEF
3402
    _IO_TEXTIOWRAPPER__CHUNK_SIZE_GETSETDEF
3403
    {NULL}
3404
};
3405
3406
PyType_Slot textiowrapper_slots[] = {
3407
    {Py_tp_dealloc, textiowrapper_dealloc},
3408
    {Py_tp_repr, textiowrapper_repr},
3409
    {Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3410
    {Py_tp_traverse, textiowrapper_traverse},
3411
    {Py_tp_clear, textiowrapper_clear},
3412
    {Py_tp_iternext, textiowrapper_iternext},
3413
    {Py_tp_methods, textiowrapper_methods},
3414
    {Py_tp_members, textiowrapper_members},
3415
    {Py_tp_getset, textiowrapper_getset},
3416
    {Py_tp_init, _io_TextIOWrapper___init__},
3417
    {0, NULL},
3418
};
3419
3420
PyType_Spec textiowrapper_spec = {
3421
    .name = "_io.TextIOWrapper",
3422
    .basicsize = sizeof(textio),
3423
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3424
              Py_TPFLAGS_IMMUTABLETYPE),
3425
    .slots = textiowrapper_slots,
3426
};