Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_io/stringio.c
Line
Count
Source
1
#include "Python.h"
2
#include <stddef.h>               // offsetof()
3
#include "pycore_object.h"
4
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
5
#include "_iomodule.h"
6
7
/* Implementation note: the buffer is always at least one character longer
8
   than the enclosed string, for proper functioning of _PyIO_find_line_ending.
9
*/
10
11
19.6M
#define STATE_REALIZED 1
12
207k
#define STATE_ACCUMULATING 2
13
14
/*[clinic input]
15
module _io
16
class _io.StringIO "stringio *" "clinic_state()->PyStringIO_Type"
17
[clinic start generated code]*/
18
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2693eada0658d470]*/
19
20
typedef struct {
21
    PyObject_HEAD
22
    Py_UCS4 *buf;
23
    Py_ssize_t pos;
24
    Py_ssize_t string_size;
25
    size_t buf_size;
26
27
    /* The stringio object can be in two states: accumulating or realized.
28
       In accumulating state, the internal buffer contains nothing and
29
       the contents are given by the embedded _PyUnicodeWriter structure.
30
       In realized state, the internal buffer is meaningful and the
31
       _PyUnicodeWriter is destroyed.
32
    */
33
    int state;
34
    PyUnicodeWriter *writer;
35
36
    char ok; /* initialized? */
37
    char closed;
38
    char readuniversal;
39
    char readtranslate;
40
    PyObject *decoder;
41
    PyObject *readnl;
42
    PyObject *writenl;
43
44
    PyObject *dict;
45
    PyObject *weakreflist;
46
    _PyIO_State *module_state;
47
} stringio;
48
49
19.5M
#define stringio_CAST(op)   ((stringio *)(op))
50
51
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
52
#include "clinic/stringio.c.h"
53
#undef clinic_state
54
55
static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
56
57
#define CHECK_INITIALIZED(self) \
58
19.7M
    if (self->ok <= 0) { \
59
0
        PyErr_SetString(PyExc_ValueError, \
60
0
            "I/O operation on uninitialized object"); \
61
0
        return NULL; \
62
0
    }
63
64
#define CHECK_CLOSED(self) \
65
19.7M
    if (self->closed) { \
66
0
        PyErr_SetString(PyExc_ValueError, \
67
0
            "I/O operation on closed file"); \
68
0
        return NULL; \
69
0
    }
70
71
#define ENSURE_REALIZED(self) \
72
19.6M
    if (realize(self) < 0) { \
73
0
        return NULL; \
74
0
    }
75
76
77
/* Internal routine for changing the size, in terms of characters, of the
78
   buffer of StringIO objects.  The caller should ensure that the 'size'
79
   argument is non-negative.  Returns 0 on success, -1 otherwise. */
80
static int
81
resize_buffer(stringio *self, size_t size)
82
138k
{
83
    /* Here, unsigned types are used to avoid dealing with signed integer
84
       overflow, which is undefined in C. */
85
138k
    size_t alloc = self->buf_size;
86
138k
    Py_UCS4 *new_buf = NULL;
87
88
138k
    assert(self->buf != NULL);
89
90
    /* Reserve one more char for line ending detection. */
91
138k
    size = size + 1;
92
    /* For simplicity, stay in the range of the signed type. Anyway, Python
93
       doesn't allow strings to be longer than this. */
94
138k
    if (size > PY_SSIZE_T_MAX)
95
0
        goto overflow;
96
97
138k
    if (size < alloc / 2) {
98
        /* Major downsize; resize down to exact size. */
99
36.4k
        alloc = size + 1;
100
36.4k
    }
101
102k
    else if (size < alloc) {
102
        /* Within allocated size; quick exit */
103
1.83k
        return 0;
104
1.83k
    }
105
100k
    else if (size <= alloc * 1.125) {
106
        /* Moderate upsize; overallocate similar to list_resize() */
107
4.80k
        alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
108
4.80k
    }
109
95.6k
    else {
110
        /* Major upsize; resize up to exact size */
111
95.6k
        alloc = size + 1;
112
95.6k
    }
113
114
136k
    if (alloc > SIZE_MAX / sizeof(Py_UCS4))
115
0
        goto overflow;
116
136k
    new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
117
136k
    if (new_buf == NULL) {
118
0
        PyErr_NoMemory();
119
0
        return -1;
120
0
    }
121
136k
    self->buf_size = alloc;
122
136k
    self->buf = new_buf;
123
124
136k
    return 0;
125
126
0
  overflow:
127
0
    PyErr_SetString(PyExc_OverflowError,
128
0
                    "new buffer size too large");
129
0
    return -1;
130
136k
}
131
132
static PyObject *
133
make_intermediate(stringio *self)
134
28
{
135
28
    PyObject *intermediate = PyUnicodeWriter_Finish(self->writer);
136
28
    self->writer = NULL;
137
28
    self->state = STATE_REALIZED;
138
28
    if (intermediate == NULL)
139
0
        return NULL;
140
141
28
    self->writer = PyUnicodeWriter_Create(0);
142
28
    if (self->writer == NULL) {
143
0
        Py_DECREF(intermediate);
144
0
        return NULL;
145
0
    }
146
28
    if (PyUnicodeWriter_WriteStr(self->writer, intermediate)) {
147
0
        Py_DECREF(intermediate);
148
0
        return NULL;
149
0
    }
150
28
    self->state = STATE_ACCUMULATING;
151
28
    return intermediate;
152
28
}
153
154
static int
155
realize(stringio *self)
156
19.6M
{
157
19.6M
    Py_ssize_t len;
158
19.6M
    PyObject *intermediate;
159
160
19.6M
    if (self->state == STATE_REALIZED)
161
19.5M
        return 0;
162
19.6M
    assert(self->state == STATE_ACCUMULATING);
163
15.8k
    self->state = STATE_REALIZED;
164
165
15.8k
    intermediate = PyUnicodeWriter_Finish(self->writer);
166
15.8k
    self->writer = NULL;
167
15.8k
    if (intermediate == NULL)
168
0
        return -1;
169
170
    /* Append the intermediate string to the internal buffer.
171
       The length should be equal to the current cursor position.
172
     */
173
15.8k
    len = PyUnicode_GET_LENGTH(intermediate);
174
15.8k
    if (resize_buffer(self, len) < 0) {
175
0
        Py_DECREF(intermediate);
176
0
        return -1;
177
0
    }
178
15.8k
    if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
179
0
        Py_DECREF(intermediate);
180
0
        return -1;
181
0
    }
182
183
15.8k
    Py_DECREF(intermediate);
184
15.8k
    return 0;
185
15.8k
}
186
187
/* Internal routine for writing a whole PyUnicode object to the buffer of a
188
   StringIO object. Returns 0 on success, or -1 on error. */
189
static Py_ssize_t
190
write_str(stringio *self, PyObject *obj)
191
77.4k
{
192
77.4k
    Py_ssize_t len;
193
77.4k
    PyObject *decoded = NULL;
194
195
77.4k
    assert(self->buf != NULL);
196
77.4k
    assert(self->pos >= 0);
197
198
77.4k
    if (self->decoder != NULL) {
199
61.5k
        decoded = _PyIncrementalNewlineDecoder_decode(
200
61.5k
            self->decoder, obj, 1 /* always final */);
201
61.5k
    }
202
15.8k
    else {
203
15.8k
        decoded = Py_NewRef(obj);
204
15.8k
    }
205
77.4k
    if (self->writenl) {
206
0
        PyObject *translated = PyUnicode_Replace(
207
0
            decoded, _Py_LATIN1_CHR('\n'), self->writenl, -1);
208
0
        Py_SETREF(decoded, translated);
209
0
    }
210
77.4k
    if (decoded == NULL)
211
0
        return -1;
212
213
77.4k
    assert(PyUnicode_Check(decoded));
214
77.4k
    len = PyUnicode_GET_LENGTH(decoded);
215
77.4k
    assert(len >= 0);
216
217
    /* This overflow check is not strictly necessary. However, it avoids us to
218
       deal with funky things like comparing an unsigned and a signed
219
       integer. */
220
77.4k
    if (self->pos > PY_SSIZE_T_MAX - len) {
221
0
        PyErr_SetString(PyExc_OverflowError,
222
0
                        "new position too large");
223
0
        goto fail;
224
0
    }
225
226
77.4k
    if (self->state == STATE_ACCUMULATING) {
227
22.8k
        if (self->string_size == self->pos) {
228
            // gh-149046: Avoid PyUnicodeWriter_WriteStr() which calls str(obj)
229
            // on str subclasses
230
22.8k
            if (_PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)self->writer, decoded))
231
0
                goto fail;
232
22.8k
            goto success;
233
22.8k
        }
234
0
        if (realize(self))
235
0
            goto fail;
236
0
    }
237
238
54.6k
    if (self->pos + len > self->string_size) {
239
54.6k
        if (resize_buffer(self, self->pos + len) < 0)
240
0
            goto fail;
241
54.6k
    }
242
243
54.6k
    if (self->pos > self->string_size) {
244
        /* In case of overseek, pad with null bytes the buffer region between
245
           the end of stream and the current position.
246
247
          0   lo      string_size                           hi
248
          |   |<---used--->|<----------available----------->|
249
          |   |            <--to pad-->|<---to write--->    |
250
          0   buf                   position
251
252
        */
253
0
        memset(self->buf + self->string_size, '\0',
254
0
               (self->pos - self->string_size) * sizeof(Py_UCS4));
255
0
    }
256
257
    /* Copy the data to the internal buffer, overwriting some of the
258
       existing data if self->pos < self->string_size. */
259
54.6k
    if (!PyUnicode_AsUCS4(decoded,
260
54.6k
                          self->buf + self->pos,
261
54.6k
                          self->buf_size - self->pos,
262
54.6k
                          0))
263
0
        goto fail;
264
265
77.4k
success:
266
    /* Set the new length of the internal string if it has changed. */
267
77.4k
    self->pos += len;
268
77.4k
    if (self->string_size < self->pos)
269
77.4k
        self->string_size = self->pos;
270
271
77.4k
    Py_DECREF(decoded);
272
77.4k
    return 0;
273
274
0
fail:
275
0
    Py_XDECREF(decoded);
276
0
    return -1;
277
54.6k
}
278
279
/*[clinic input]
280
@critical_section
281
_io.StringIO.getvalue
282
283
Retrieve the entire contents of the object.
284
[clinic start generated code]*/
285
286
static PyObject *
287
_io_StringIO_getvalue_impl(stringio *self)
288
/*[clinic end generated code: output=27b6a7bfeaebce01 input=fb5dee06b8d467f3]*/
289
0
{
290
0
    CHECK_INITIALIZED(self);
291
0
    CHECK_CLOSED(self);
292
0
    if (self->state == STATE_ACCUMULATING)
293
0
        return make_intermediate(self);
294
0
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
295
0
                                     self->string_size);
296
0
}
297
298
/*[clinic input]
299
@critical_section
300
_io.StringIO.tell
301
302
Tell the current file position.
303
[clinic start generated code]*/
304
305
static PyObject *
306
_io_StringIO_tell_impl(stringio *self)
307
/*[clinic end generated code: output=2e87ac67b116c77b input=98a08f3e2dae3550]*/
308
0
{
309
0
    CHECK_INITIALIZED(self);
310
0
    CHECK_CLOSED(self);
311
0
    return PyLong_FromSsize_t(self->pos);
312
0
}
313
314
/*[clinic input]
315
@critical_section
316
_io.StringIO.read
317
    size: Py_ssize_t(accept={int, NoneType}) = -1
318
    /
319
320
Read at most size characters, returned as a string.
321
322
If the argument is negative or omitted, read until EOF
323
is reached. Return an empty string at EOF.
324
[clinic start generated code]*/
325
326
static PyObject *
327
_io_StringIO_read_impl(stringio *self, Py_ssize_t size)
328
/*[clinic end generated code: output=ae8cf6002f71626c input=9fbef45d8aece8e7]*/
329
57.0k
{
330
57.0k
    Py_ssize_t n;
331
57.0k
    Py_UCS4 *output;
332
333
57.0k
    CHECK_INITIALIZED(self);
334
57.0k
    CHECK_CLOSED(self);
335
336
    /* adjust invalid sizes */
337
57.0k
    n = self->string_size - self->pos;
338
57.0k
    if (size < 0 || size > n) {
339
31.3k
        size = n;
340
31.3k
        if (size < 0)
341
0
            size = 0;
342
31.3k
    }
343
344
    /* Optimization for seek(0); read() */
345
57.0k
    if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
346
28
        PyObject *result = make_intermediate(self);
347
28
        self->pos = self->string_size;
348
28
        return result;
349
28
    }
350
351
57.0k
    ENSURE_REALIZED(self);
352
57.0k
    output = self->buf + self->pos;
353
57.0k
    self->pos += size;
354
57.0k
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
355
57.0k
}
356
357
/* Internal helper, used by stringio_readline and stringio_iternext */
358
static PyObject *
359
_stringio_readline(stringio *self, Py_ssize_t limit)
360
19.5M
{
361
19.5M
    Py_UCS4 *start, *end, old_char;
362
19.5M
    Py_ssize_t len, consumed;
363
364
    /* In case of overseek, return the empty string */
365
19.5M
    if (self->pos >= self->string_size)
366
36.9k
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
367
368
19.4M
    start = self->buf + self->pos;
369
19.4M
    if (limit < 0 || limit > self->string_size - self->pos)
370
19.4M
        limit = self->string_size - self->pos;
371
372
19.4M
    end = start + limit;
373
19.4M
    old_char = *end;
374
19.4M
    *end = '\0';
375
19.4M
    len = _PyIO_find_line_ending(
376
19.4M
        self->readtranslate, self->readuniversal, self->readnl,
377
19.4M
        PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
378
19.4M
    *end = old_char;
379
    /* If we haven't found any line ending, we just return everything
380
       (`consumed` is ignored). */
381
19.4M
    if (len < 0)
382
30.7k
        len = limit;
383
19.4M
    self->pos += len;
384
19.4M
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
385
19.5M
}
386
387
/*[clinic input]
388
@critical_section
389
_io.StringIO.readline
390
    size: Py_ssize_t(accept={int, NoneType}) = -1
391
    /
392
393
Read until newline or EOF.
394
395
Returns an empty string if EOF is hit immediately.
396
[clinic start generated code]*/
397
398
static PyObject *
399
_io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
400
/*[clinic end generated code: output=cabd6452f1b7e85d input=4d14b8495dea1d98]*/
401
68
{
402
68
    CHECK_INITIALIZED(self);
403
68
    CHECK_CLOSED(self);
404
68
    ENSURE_REALIZED(self);
405
406
68
    return _stringio_readline(self, size);
407
68
}
408
409
static PyObject *
410
stringio_iternext(PyObject *op)
411
19.5M
{
412
19.5M
    PyObject *line;
413
19.5M
    stringio *self = stringio_CAST(op);
414
415
19.5M
    CHECK_INITIALIZED(self);
416
19.5M
    CHECK_CLOSED(self);
417
19.5M
    ENSURE_REALIZED(self);
418
419
19.5M
    if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) {
420
        /* Skip method call overhead for speed */
421
19.5M
        line = _stringio_readline(self, -1);
422
19.5M
    }
423
0
    else {
424
        /* XXX is subclassing StringIO really supported? */
425
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
426
0
        if (line && !PyUnicode_Check(line)) {
427
0
            PyErr_Format(PyExc_OSError,
428
0
                         "readline() should have returned a str object, "
429
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
430
0
            Py_DECREF(line);
431
0
            return NULL;
432
0
        }
433
0
    }
434
435
19.5M
    if (line == NULL)
436
0
        return NULL;
437
438
19.5M
    if (PyUnicode_GET_LENGTH(line) == 0) {
439
        /* Reached EOF */
440
36.9k
        Py_DECREF(line);
441
36.9k
        return NULL;
442
36.9k
    }
443
444
19.4M
    return line;
445
19.5M
}
446
447
/*[clinic input]
448
@critical_section
449
_io.StringIO.truncate
450
    pos: object = None
451
    /
452
453
Truncate size to pos.
454
455
The pos argument defaults to the current file position, as
456
returned by tell().  The current file position is unchanged.
457
Returns the new absolute position.
458
[clinic start generated code]*/
459
460
static PyObject *
461
_io_StringIO_truncate_impl(stringio *self, PyObject *pos)
462
/*[clinic end generated code: output=c76c43b5ecfaf4e2 input=d59fd2ee49757ae6]*/
463
36.9k
{
464
36.9k
    CHECK_INITIALIZED(self);
465
36.9k
    CHECK_CLOSED(self);
466
467
36.9k
    Py_ssize_t size;
468
36.9k
    if (pos == Py_None) {
469
36.9k
        size = self->pos;
470
36.9k
    }
471
0
    else {
472
0
        size = PyLong_AsLong(pos);
473
0
        if (size == -1 && PyErr_Occurred()) {
474
0
            return NULL;
475
0
        }
476
0
        if (size < 0) {
477
0
            PyErr_Format(PyExc_ValueError,
478
0
                         "negative pos value %zd", size);
479
0
            return NULL;
480
0
        }
481
0
    }
482
483
36.9k
    if (size < self->string_size) {
484
36.4k
        ENSURE_REALIZED(self);
485
36.4k
        if (resize_buffer(self, size) < 0)
486
0
            return NULL;
487
36.4k
        self->string_size = size;
488
36.4k
    }
489
490
36.9k
    return PyLong_FromSsize_t(size);
491
36.9k
}
492
493
/*[clinic input]
494
@critical_section
495
_io.StringIO.seek
496
    pos: Py_ssize_t
497
    whence: int = 0
498
    /
499
500
Change stream position.
501
502
Seek to character offset pos relative to position indicated by
503
whence:
504
    0  Start of stream (the default).  pos should be >= 0;
505
    1  Current position - pos must be 0;
506
    2  End of stream - pos must be 0.
507
Returns the new absolute position.
508
[clinic start generated code]*/
509
510
static PyObject *
511
_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
512
/*[clinic end generated code: output=e9e0ac9a8ae71c25 input=ffef24668fd71a5d]*/
513
73.8k
{
514
73.8k
    CHECK_INITIALIZED(self);
515
73.8k
    CHECK_CLOSED(self);
516
517
73.8k
    if (whence != 0 && whence != 1 && whence != 2) {
518
0
        PyErr_Format(PyExc_ValueError,
519
0
                     "Invalid whence (%i, should be 0, 1 or 2)", whence);
520
0
        return NULL;
521
0
    }
522
73.8k
    else if (pos < 0 && whence == 0) {
523
0
        PyErr_Format(PyExc_ValueError,
524
0
                     "Negative seek position %zd", pos);
525
0
        return NULL;
526
0
    }
527
73.8k
    else if (whence != 0 && pos != 0) {
528
0
        PyErr_SetString(PyExc_OSError,
529
0
                        "Can't do nonzero cur-relative seeks");
530
0
        return NULL;
531
0
    }
532
533
    /* whence = 0: offset relative to beginning of the string.
534
       whence = 1: no change to current position.
535
       whence = 2: change position to end of file. */
536
73.8k
    if (whence == 1) {
537
0
        pos = self->pos;
538
0
    }
539
73.8k
    else if (whence == 2) {
540
0
        pos = self->string_size;
541
0
    }
542
543
73.8k
    self->pos = pos;
544
545
73.8k
    return PyLong_FromSsize_t(self->pos);
546
73.8k
}
547
548
/*[clinic input]
549
@critical_section
550
_io.StringIO.write
551
    s as obj: object
552
    /
553
554
Write string to file.
555
556
Returns the number of characters written, which is always equal to
557
the length of the string.
558
[clinic start generated code]*/
559
560
static PyObject *
561
_io_StringIO_write_impl(stringio *self, PyObject *obj)
562
/*[clinic end generated code: output=d53b1d841d7db288 input=1561272c0da4651f]*/
563
61.5k
{
564
61.5k
    Py_ssize_t size;
565
566
61.5k
    CHECK_INITIALIZED(self);
567
61.5k
    if (!PyUnicode_Check(obj)) {
568
0
        PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
569
0
                     Py_TYPE(obj)->tp_name);
570
0
        return NULL;
571
0
    }
572
61.5k
    CHECK_CLOSED(self);
573
61.5k
    size = PyUnicode_GET_LENGTH(obj);
574
575
61.5k
    if (size > 0 && write_str(self, obj) < 0)
576
0
        return NULL;
577
578
61.5k
    return PyLong_FromSsize_t(size);
579
61.5k
}
580
581
/*[clinic input]
582
@critical_section
583
_io.StringIO.close
584
585
Close the IO object.
586
587
Attempting any further operation after the object is closed
588
will raise a ValueError.
589
590
This method has no effect if the file is already closed.
591
[clinic start generated code]*/
592
593
static PyObject *
594
_io_StringIO_close_impl(stringio *self)
595
/*[clinic end generated code: output=04399355cbe518f1 input=305d19aa29cc40b9]*/
596
0
{
597
0
    self->closed = 1;
598
    /* Free up some memory */
599
0
    if (resize_buffer(self, 0) < 0)
600
0
        return NULL;
601
0
    PyUnicodeWriter_Discard(self->writer);
602
0
    self->writer = NULL;
603
0
    Py_CLEAR(self->readnl);
604
0
    Py_CLEAR(self->writenl);
605
0
    Py_CLEAR(self->decoder);
606
0
    Py_RETURN_NONE;
607
0
}
608
609
static int
610
stringio_traverse(PyObject *op, visitproc visit, void *arg)
611
11.4k
{
612
11.4k
    stringio *self = stringio_CAST(op);
613
11.4k
    Py_VISIT(Py_TYPE(self));
614
11.4k
    Py_VISIT(self->readnl);
615
11.4k
    Py_VISIT(self->writenl);
616
11.4k
    Py_VISIT(self->decoder);
617
11.4k
    Py_VISIT(self->dict);
618
11.4k
    return 0;
619
11.4k
}
620
621
static int
622
stringio_clear(PyObject *op)
623
31.7k
{
624
31.7k
    stringio *self = stringio_CAST(op);
625
31.7k
    Py_CLEAR(self->readnl);
626
31.7k
    Py_CLEAR(self->writenl);
627
31.7k
    Py_CLEAR(self->decoder);
628
31.7k
    Py_CLEAR(self->dict);
629
31.7k
    return 0;
630
31.7k
}
631
632
static void
633
stringio_dealloc(PyObject *op)
634
31.7k
{
635
31.7k
    stringio *self = stringio_CAST(op);
636
31.7k
    PyTypeObject *tp = Py_TYPE(self);
637
31.7k
    _PyObject_GC_UNTRACK(self);
638
31.7k
    self->ok = 0;
639
31.7k
    if (self->buf) {
640
31.7k
        PyMem_Free(self->buf);
641
31.7k
        self->buf = NULL;
642
31.7k
    }
643
31.7k
    PyUnicodeWriter_Discard(self->writer);
644
31.7k
    (void)stringio_clear(op);
645
31.7k
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
646
31.7k
    tp->tp_free(self);
647
31.7k
    Py_DECREF(tp);
648
31.7k
}
649
650
static PyObject *
651
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
652
31.7k
{
653
31.7k
    stringio *self;
654
655
31.7k
    assert(type != NULL && type->tp_alloc != NULL);
656
31.7k
    self = (stringio *)type->tp_alloc(type, 0);
657
31.7k
    if (self == NULL)
658
0
        return NULL;
659
660
    /* tp_alloc initializes all the fields to zero. So we don't have to
661
       initialize them here. */
662
663
31.7k
    self->buf = (Py_UCS4 *)PyMem_Malloc(0);
664
31.7k
    if (self->buf == NULL) {
665
0
        Py_DECREF(self);
666
0
        return PyErr_NoMemory();
667
0
    }
668
669
31.7k
    return (PyObject *)self;
670
31.7k
}
671
672
/*[clinic input]
673
_io.StringIO.__init__
674
    initial_value as value: object(c_default="NULL") = ''
675
    newline as newline_obj: object(c_default="NULL") = '\n'
676
677
Text I/O implementation using an in-memory buffer.
678
679
The initial_value argument sets the value of object.  The newline
680
argument is like the one of TextIOWrapper's constructor.
681
[clinic start generated code]*/
682
683
static int
684
_io_StringIO___init___impl(stringio *self, PyObject *value,
685
                           PyObject *newline_obj)
686
/*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
687
31.7k
{
688
31.7k
    const char *newline = "\n";
689
31.7k
    Py_ssize_t value_len;
690
691
    /* Parse the newline argument. We only want to allow unicode objects or
692
       None. */
693
31.7k
    if (newline_obj == Py_None) {
694
0
        newline = NULL;
695
0
    }
696
31.7k
    else if (newline_obj) {
697
15.8k
        if (!PyUnicode_Check(newline_obj)) {
698
0
            PyErr_Format(PyExc_TypeError,
699
0
                         "newline must be str or None, not %.200s",
700
0
                         Py_TYPE(newline_obj)->tp_name);
701
0
            return -1;
702
0
        }
703
15.8k
        newline = PyUnicode_AsUTF8(newline_obj);
704
15.8k
        if (newline == NULL)
705
0
            return -1;
706
15.8k
    }
707
708
31.7k
    if (newline && newline[0] != '\0'
709
15.8k
        && !(newline[0] == '\n' && newline[1] == '\0')
710
0
        && !(newline[0] == '\r' && newline[1] == '\0')
711
0
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
712
0
        PyErr_Format(PyExc_ValueError,
713
0
                     "illegal newline value: %R", newline_obj);
714
0
        return -1;
715
0
    }
716
31.7k
    if (value && value != Py_None && !PyUnicode_Check(value)) {
717
0
        PyErr_Format(PyExc_TypeError,
718
0
                     "initial_value must be str or None, not %.200s",
719
0
                     Py_TYPE(value)->tp_name);
720
0
        return -1;
721
0
    }
722
723
31.7k
    self->ok = 0;
724
725
31.7k
    PyUnicodeWriter_Discard(self->writer);
726
31.7k
    self->writer = NULL;
727
31.7k
    Py_CLEAR(self->readnl);
728
31.7k
    Py_CLEAR(self->writenl);
729
31.7k
    Py_CLEAR(self->decoder);
730
731
31.7k
    assert((newline != NULL && newline_obj != Py_None) ||
732
31.7k
           (newline == NULL && newline_obj == Py_None));
733
734
31.7k
    if (newline) {
735
31.7k
        self->readnl = PyUnicode_FromString(newline);
736
31.7k
        if (self->readnl == NULL)
737
0
            return -1;
738
31.7k
    }
739
31.7k
    self->readuniversal = (newline == NULL || newline[0] == '\0');
740
31.7k
    self->readtranslate = (newline == NULL);
741
    /* If newline == "", we don't translate anything.
742
       If newline == "\n" or newline == None, we translate to "\n", which is
743
       a no-op.
744
       (for newline == None, TextIOWrapper translates to os.linesep, but it
745
       is pointless for StringIO)
746
    */
747
31.7k
    if (newline != NULL && newline[0] == '\r') {
748
0
        self->writenl = Py_NewRef(self->readnl);
749
0
    }
750
751
31.7k
    _PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self));
752
31.7k
    if (self->readuniversal) {
753
15.8k
        self->decoder = PyObject_CallFunctionObjArgs(
754
15.8k
            (PyObject *)module_state->PyIncrementalNewlineDecoder_Type,
755
15.8k
            Py_None, self->readtranslate ? Py_True : Py_False, NULL);
756
15.8k
        if (self->decoder == NULL)
757
0
            return -1;
758
15.8k
    }
759
760
    /* Now everything is set up, resize buffer to size of initial value,
761
       and copy it */
762
31.7k
    self->string_size = 0;
763
31.7k
    if (value && value != Py_None)
764
15.8k
        value_len = PyUnicode_GetLength(value);
765
15.8k
    else
766
15.8k
        value_len = 0;
767
31.7k
    if (value_len > 0) {
768
        /* This is a heuristic, for newline translation might change
769
           the string length. */
770
15.8k
        if (resize_buffer(self, 0) < 0)
771
0
            return -1;
772
15.8k
        self->state = STATE_REALIZED;
773
15.8k
        self->pos = 0;
774
15.8k
        if (write_str(self, value) < 0)
775
0
            return -1;
776
15.8k
    }
777
15.9k
    else {
778
        /* Empty stringio object, we can start by accumulating */
779
15.9k
        if (resize_buffer(self, 0) < 0)
780
0
            return -1;
781
15.9k
        self->writer = PyUnicodeWriter_Create(0);
782
15.9k
        if (self->writer == NULL) {
783
0
            return -1;
784
0
        }
785
15.9k
        self->state = STATE_ACCUMULATING;
786
15.9k
    }
787
31.7k
    self->pos = 0;
788
31.7k
    self->module_state = module_state;
789
31.7k
    self->closed = 0;
790
31.7k
    self->ok = 1;
791
31.7k
    return 0;
792
31.7k
}
793
794
/* Properties and pseudo-properties */
795
796
/*[clinic input]
797
@critical_section
798
_io.StringIO.readable
799
800
Returns True if the IO object can be read.
801
[clinic start generated code]*/
802
803
static PyObject *
804
_io_StringIO_readable_impl(stringio *self)
805
/*[clinic end generated code: output=b19d44dd8b1ceb99 input=6cd2ffd65a8e8763]*/
806
0
{
807
0
    CHECK_INITIALIZED(self);
808
0
    CHECK_CLOSED(self);
809
0
    Py_RETURN_TRUE;
810
0
}
811
812
/*[clinic input]
813
@critical_section
814
_io.StringIO.writable
815
816
Returns True if the IO object can be written.
817
[clinic start generated code]*/
818
819
static PyObject *
820
_io_StringIO_writable_impl(stringio *self)
821
/*[clinic end generated code: output=13e4dd77187074ca input=1b3c63dbaa761c69]*/
822
0
{
823
0
    CHECK_INITIALIZED(self);
824
0
    CHECK_CLOSED(self);
825
0
    Py_RETURN_TRUE;
826
0
}
827
828
/*[clinic input]
829
@critical_section
830
_io.StringIO.seekable
831
832
Returns True if the IO object can be seeked.
833
[clinic start generated code]*/
834
835
static PyObject *
836
_io_StringIO_seekable_impl(stringio *self)
837
/*[clinic end generated code: output=4d20b4641c756879 input=a820fad2cf085fc3]*/
838
0
{
839
0
    CHECK_INITIALIZED(self);
840
0
    CHECK_CLOSED(self);
841
0
    Py_RETURN_TRUE;
842
0
}
843
844
/* Pickling support.
845
846
   The implementation of __getstate__ is similar to the one for BytesIO,
847
   except that we also save the newline parameter. For __setstate__ and unlike
848
   BytesIO, we call __init__ to restore the object's state. Doing so allows us
849
   to avoid decoding the complex newline state while keeping the object
850
   representation compact.
851
852
   See comment in bytesio.c regarding why only pickle protocols and onward are
853
   supported.
854
*/
855
856
/*[clinic input]
857
@critical_section
858
_io.StringIO.__getstate__
859
860
[clinic start generated code]*/
861
862
static PyObject *
863
_io_StringIO___getstate___impl(stringio *self)
864
/*[clinic end generated code: output=780be4a996410199 input=76f27255ef83bb92]*/
865
0
{
866
0
    PyObject *initvalue = _io_StringIO_getvalue_impl(self);
867
0
    PyObject *dict;
868
0
    PyObject *state;
869
870
0
    if (initvalue == NULL)
871
0
        return NULL;
872
0
    if (self->dict == NULL) {
873
0
        dict = Py_NewRef(Py_None);
874
0
    }
875
0
    else {
876
0
        dict = PyDict_Copy(self->dict);
877
0
        if (dict == NULL) {
878
0
            Py_DECREF(initvalue);
879
0
            return NULL;
880
0
        }
881
0
    }
882
883
0
    state = Py_BuildValue("(OOnN)", initvalue,
884
0
                          self->readnl ? self->readnl : Py_None,
885
0
                          self->pos, dict);
886
0
    Py_DECREF(initvalue);
887
0
    return state;
888
0
}
889
890
/*[clinic input]
891
@critical_section
892
_io.StringIO.__setstate__
893
894
    state: object
895
    /
896
[clinic start generated code]*/
897
898
static PyObject *
899
_io_StringIO___setstate___impl(stringio *self, PyObject *state)
900
/*[clinic end generated code: output=cb3962bc6d5c5609 input=8a27784b11b82e47]*/
901
0
{
902
0
    PyObject *initarg;
903
0
    PyObject *position_obj;
904
0
    PyObject *dict;
905
0
    Py_ssize_t pos;
906
907
0
    assert(state != NULL);
908
0
    CHECK_CLOSED(self);
909
910
    /* We allow the state tuple to be longer than 4, because we may need
911
       someday to extend the object's state without breaking
912
       backward-compatibility. */
913
0
    if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
914
0
        PyErr_Format(PyExc_TypeError,
915
0
                     "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
916
0
                     Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
917
0
        return NULL;
918
0
    }
919
920
    /* Initialize the object's state. */
921
0
    initarg = PyTuple_GetSlice(state, 0, 2);
922
0
    if (initarg == NULL)
923
0
        return NULL;
924
0
    if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
925
0
        Py_DECREF(initarg);
926
0
        return NULL;
927
0
    }
928
0
    Py_DECREF(initarg);
929
930
    /* Restore the buffer state. Even if __init__ did initialize the buffer,
931
       we have to initialize it again since __init__ may translate the
932
       newlines in the initial_value string. We clearly do not want that
933
       because the string value in the state tuple has already been translated
934
       once by __init__. So we do not take any chance and replace object's
935
       buffer completely. */
936
0
    {
937
0
        PyObject *item = PyTuple_GET_ITEM(state, 0);
938
0
        if (PyUnicode_Check(item)) {
939
0
            Py_UCS4 *buf = PyUnicode_AsUCS4Copy(item);
940
0
            if (buf == NULL)
941
0
                return NULL;
942
0
            Py_ssize_t bufsize = PyUnicode_GET_LENGTH(item);
943
944
0
            if (resize_buffer(self, bufsize) < 0) {
945
0
                PyMem_Free(buf);
946
0
                return NULL;
947
0
            }
948
0
            memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
949
0
            PyMem_Free(buf);
950
0
            self->string_size = bufsize;
951
0
        }
952
0
        else {
953
0
            assert(item == Py_None);
954
0
            self->string_size = 0;
955
0
        }
956
0
    }
957
958
    /* Set carefully the position value. Alternatively, we could use the seek
959
       method instead of modifying self->pos directly to better protect the
960
       object internal state against erroneous (or malicious) inputs. */
961
0
    position_obj = PyTuple_GET_ITEM(state, 2);
962
0
    if (!PyLong_Check(position_obj)) {
963
0
        PyErr_Format(PyExc_TypeError,
964
0
                     "third item of state must be an integer, got %.200s",
965
0
                     Py_TYPE(position_obj)->tp_name);
966
0
        return NULL;
967
0
    }
968
0
    pos = PyLong_AsSsize_t(position_obj);
969
0
    if (pos == -1 && PyErr_Occurred())
970
0
        return NULL;
971
0
    if (pos < 0) {
972
0
        PyErr_SetString(PyExc_ValueError,
973
0
                        "position value cannot be negative");
974
0
        return NULL;
975
0
    }
976
0
    self->pos = pos;
977
978
    /* Set the dictionary of the instance variables. */
979
0
    dict = PyTuple_GET_ITEM(state, 3);
980
0
    if (dict != Py_None) {
981
0
        if (!PyDict_Check(dict)) {
982
0
            PyErr_Format(PyExc_TypeError,
983
0
                         "fourth item of state should be a dict, got a %.200s",
984
0
                         Py_TYPE(dict)->tp_name);
985
0
            return NULL;
986
0
        }
987
0
        if (self->dict) {
988
            /* Alternatively, we could replace the internal dictionary
989
               completely. However, it seems more practical to just update it. */
990
0
            if (PyDict_Update(self->dict, dict) < 0)
991
0
                return NULL;
992
0
        }
993
0
        else {
994
0
            self->dict = Py_NewRef(dict);
995
0
        }
996
0
    }
997
998
0
    Py_RETURN_NONE;
999
0
}
1000
1001
/*[clinic input]
1002
@critical_section
1003
@getter
1004
_io.StringIO.closed
1005
[clinic start generated code]*/
1006
1007
static PyObject *
1008
_io_StringIO_closed_get_impl(stringio *self)
1009
/*[clinic end generated code: output=531ddca7954331d6 input=178d2ef24395fd49]*/
1010
36.9k
{
1011
36.9k
    CHECK_INITIALIZED(self);
1012
36.9k
    return PyBool_FromLong(self->closed);
1013
36.9k
}
1014
1015
/*[clinic input]
1016
@critical_section
1017
@getter
1018
_io.StringIO.line_buffering
1019
[clinic start generated code]*/
1020
1021
static PyObject *
1022
_io_StringIO_line_buffering_get_impl(stringio *self)
1023
/*[clinic end generated code: output=360710e0112966ae input=6a7634e7f890745e]*/
1024
0
{
1025
0
    CHECK_INITIALIZED(self);
1026
0
    CHECK_CLOSED(self);
1027
0
    Py_RETURN_FALSE;
1028
0
}
1029
1030
/*[clinic input]
1031
@critical_section
1032
@getter
1033
_io.StringIO.newlines
1034
[clinic start generated code]*/
1035
1036
static PyObject *
1037
_io_StringIO_newlines_get_impl(stringio *self)
1038
/*[clinic end generated code: output=35d7c0b66d7e0160 input=092a14586718244b]*/
1039
0
{
1040
0
    CHECK_INITIALIZED(self);
1041
0
    CHECK_CLOSED(self);
1042
0
    if (self->decoder == NULL) {
1043
0
        Py_RETURN_NONE;
1044
0
    }
1045
0
    return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
1046
0
}
1047
1048
static struct PyMethodDef stringio_methods[] = {
1049
    _IO_STRINGIO_CLOSE_METHODDEF
1050
    _IO_STRINGIO_GETVALUE_METHODDEF
1051
    _IO_STRINGIO_READ_METHODDEF
1052
    _IO_STRINGIO_READLINE_METHODDEF
1053
    _IO_STRINGIO_TELL_METHODDEF
1054
    _IO_STRINGIO_TRUNCATE_METHODDEF
1055
    _IO_STRINGIO_SEEK_METHODDEF
1056
    _IO_STRINGIO_WRITE_METHODDEF
1057
1058
    _IO_STRINGIO_SEEKABLE_METHODDEF
1059
    _IO_STRINGIO_READABLE_METHODDEF
1060
    _IO_STRINGIO_WRITABLE_METHODDEF
1061
1062
    _IO_STRINGIO___GETSTATE___METHODDEF
1063
    _IO_STRINGIO___SETSTATE___METHODDEF
1064
    {NULL, NULL}        /* sentinel */
1065
};
1066
1067
static PyGetSetDef stringio_getset[] = {
1068
    _IO_STRINGIO_CLOSED_GETSETDEF
1069
    _IO_STRINGIO_NEWLINES_GETSETDEF
1070
    /*  (following comments straight off of the original Python wrapper:)
1071
        XXX Cruft to support the TextIOWrapper API. This would only
1072
        be meaningful if StringIO supported the buffer attribute.
1073
        Hopefully, a better solution, than adding these pseudo-attributes,
1074
        will be found.
1075
    */
1076
    _IO_STRINGIO_LINE_BUFFERING_GETSETDEF
1077
    {NULL}
1078
};
1079
1080
static struct PyMemberDef stringio_members[] = {
1081
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(stringio, weakreflist), Py_READONLY},
1082
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(stringio, dict), Py_READONLY},
1083
    {NULL},
1084
};
1085
1086
static PyType_Slot stringio_slots[] = {
1087
    {Py_tp_dealloc, stringio_dealloc},
1088
    {Py_tp_doc, (void *)_io_StringIO___init____doc__},
1089
    {Py_tp_traverse, stringio_traverse},
1090
    {Py_tp_clear, stringio_clear},
1091
    {Py_tp_iternext, stringio_iternext},
1092
    {Py_tp_methods, stringio_methods},
1093
    {Py_tp_members, stringio_members},
1094
    {Py_tp_getset, stringio_getset},
1095
    {Py_tp_init, _io_StringIO___init__},
1096
    {Py_tp_new, stringio_new},
1097
    {0, NULL},
1098
};
1099
1100
PyType_Spec _Py_stringio_spec = {
1101
    .name = "_io.StringIO",
1102
    .basicsize = sizeof(stringio),
1103
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1104
              Py_TPFLAGS_IMMUTABLETYPE),
1105
    .slots = stringio_slots,
1106
};