Coverage Report

Created: 2026-05-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_io/stringio.c
Line
Count
Source
1
#include "Python.h"
2
#include <stddef.h>               // offsetof()
3
#include "pycore_object.h"
4
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
5
#include "_iomodule.h"
6
7
/* Implementation note: the buffer is always at least one character longer
8
   than the enclosed string, for proper functioning of _PyIO_find_line_ending.
9
*/
10
11
26.0M
#define STATE_REALIZED 1
12
215k
#define STATE_ACCUMULATING 2
13
14
/*[clinic input]
15
module _io
16
class _io.StringIO "stringio *" "clinic_state()->PyStringIO_Type"
17
[clinic start generated code]*/
18
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2693eada0658d470]*/
19
20
typedef struct {
21
    PyObject_HEAD
22
    Py_UCS4 *buf;
23
    Py_ssize_t pos;
24
    Py_ssize_t string_size;
25
    size_t buf_size;
26
27
    /* The stringio object can be in two states: accumulating or realized.
28
       In accumulating state, the internal buffer contains nothing and
29
       the contents are given by the embedded _PyUnicodeWriter structure.
30
       In realized state, the internal buffer is meaningful and the
31
       _PyUnicodeWriter is destroyed.
32
    */
33
    int state;
34
    PyUnicodeWriter *writer;
35
36
    char ok; /* initialized? */
37
    char closed;
38
    char readuniversal;
39
    char readtranslate;
40
    PyObject *decoder;
41
    PyObject *readnl;
42
    PyObject *writenl;
43
44
    PyObject *dict;
45
    PyObject *weakreflist;
46
    _PyIO_State *module_state;
47
} stringio;
48
49
25.9M
#define stringio_CAST(op)   ((stringio *)(op))
50
51
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
52
#include "clinic/stringio.c.h"
53
#undef clinic_state
54
55
static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
56
57
#define CHECK_INITIALIZED(self) \
58
26.1M
    if (self->ok <= 0) { \
59
0
        PyErr_SetString(PyExc_ValueError, \
60
0
            "I/O operation on uninitialized object"); \
61
0
        return NULL; \
62
0
    }
63
64
#define CHECK_CLOSED(self) \
65
26.1M
    if (self->closed) { \
66
0
        PyErr_SetString(PyExc_ValueError, \
67
0
            "I/O operation on closed file"); \
68
0
        return NULL; \
69
0
    }
70
71
#define ENSURE_REALIZED(self) \
72
25.9M
    if (realize(self) < 0) { \
73
0
        return NULL; \
74
0
    }
75
76
77
/* Internal routine for changing the size, in terms of characters, of the
78
   buffer of StringIO objects.  The caller should ensure that the 'size'
79
   argument is non-negative.  Returns 0 on success, -1 otherwise. */
80
static int
81
resize_buffer(stringio *self, size_t size)
82
152k
{
83
    /* Here, unsigned types are used to avoid dealing with signed integer
84
       overflow, which is undefined in C. */
85
152k
    size_t alloc = self->buf_size;
86
152k
    Py_UCS4 *new_buf = NULL;
87
88
152k
    assert(self->buf != NULL);
89
90
    /* Reserve one more char for line ending detection. */
91
152k
    size = size + 1;
92
    /* For simplicity, stay in the range of the signed type. Anyway, Python
93
       doesn't allow strings to be longer than this. */
94
152k
    if (size > PY_SSIZE_T_MAX)
95
0
        goto overflow;
96
97
152k
    if (size < alloc / 2) {
98
        /* Major downsize; resize down to exact size. */
99
37.0k
        alloc = size + 1;
100
37.0k
    }
101
115k
    else if (size < alloc) {
102
        /* Within allocated size; quick exit */
103
1.61k
        return 0;
104
1.61k
    }
105
113k
    else if (size <= alloc * 1.125) {
106
        /* Moderate upsize; overallocate similar to list_resize() */
107
5.05k
        alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
108
5.05k
    }
109
108k
    else {
110
        /* Major upsize; resize up to exact size */
111
108k
        alloc = size + 1;
112
108k
    }
113
114
150k
    if (alloc > SIZE_MAX / sizeof(Py_UCS4))
115
0
        goto overflow;
116
150k
    new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
117
150k
    if (new_buf == NULL) {
118
0
        PyErr_NoMemory();
119
0
        return -1;
120
0
    }
121
150k
    self->buf_size = alloc;
122
150k
    self->buf = new_buf;
123
124
150k
    return 0;
125
126
0
  overflow:
127
0
    PyErr_SetString(PyExc_OverflowError,
128
0
                    "new buffer size too large");
129
0
    return -1;
130
150k
}
131
132
static PyObject *
133
make_intermediate(stringio *self)
134
28
{
135
28
    PyObject *intermediate = PyUnicodeWriter_Finish(self->writer);
136
28
    self->writer = NULL;
137
28
    self->state = STATE_REALIZED;
138
28
    if (intermediate == NULL)
139
0
        return NULL;
140
141
28
    self->writer = PyUnicodeWriter_Create(0);
142
28
    if (self->writer == NULL) {
143
0
        Py_DECREF(intermediate);
144
0
        return NULL;
145
0
    }
146
28
    if (PyUnicodeWriter_WriteStr(self->writer, intermediate)) {
147
0
        Py_DECREF(intermediate);
148
0
        return NULL;
149
0
    }
150
28
    self->state = STATE_ACCUMULATING;
151
28
    return intermediate;
152
28
}
153
154
static int
155
realize(stringio *self)
156
25.9M
{
157
25.9M
    Py_ssize_t len;
158
25.9M
    PyObject *intermediate;
159
160
25.9M
    if (self->state == STATE_REALIZED)
161
25.9M
        return 0;
162
25.9M
    assert(self->state == STATE_ACCUMULATING);
163
16.0k
    self->state = STATE_REALIZED;
164
165
16.0k
    intermediate = PyUnicodeWriter_Finish(self->writer);
166
16.0k
    self->writer = NULL;
167
16.0k
    if (intermediate == NULL)
168
0
        return -1;
169
170
    /* Append the intermediate string to the internal buffer.
171
       The length should be equal to the current cursor position.
172
     */
173
16.0k
    len = PyUnicode_GET_LENGTH(intermediate);
174
16.0k
    if (resize_buffer(self, len) < 0) {
175
0
        Py_DECREF(intermediate);
176
0
        return -1;
177
0
    }
178
16.0k
    if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
179
0
        Py_DECREF(intermediate);
180
0
        return -1;
181
0
    }
182
183
16.0k
    Py_DECREF(intermediate);
184
16.0k
    return 0;
185
16.0k
}
186
187
/* Internal routine for writing a whole PyUnicode object to the buffer of a
188
   StringIO object. Returns 0 on success, or -1 on error. */
189
static Py_ssize_t
190
write_str(stringio *self, PyObject *obj)
191
84.4k
{
192
84.4k
    Py_ssize_t len;
193
84.4k
    PyObject *decoded = NULL;
194
195
84.4k
    assert(self->buf != NULL);
196
84.4k
    assert(self->pos >= 0);
197
198
84.4k
    if (self->decoder != NULL) {
199
62.3k
        decoded = _PyIncrementalNewlineDecoder_decode(
200
62.3k
            self->decoder, obj, 1 /* always final */);
201
62.3k
    }
202
22.0k
    else {
203
22.0k
        decoded = Py_NewRef(obj);
204
22.0k
    }
205
84.4k
    if (self->writenl) {
206
0
        PyObject *translated = PyUnicode_Replace(
207
0
            decoded, _Py_LATIN1_CHR('\n'), self->writenl, -1);
208
0
        Py_SETREF(decoded, translated);
209
0
    }
210
84.4k
    if (decoded == NULL)
211
0
        return -1;
212
213
84.4k
    assert(PyUnicode_Check(decoded));
214
84.4k
    len = PyUnicode_GET_LENGTH(decoded);
215
84.4k
    assert(len >= 0);
216
217
    /* This overflow check is not strictly necessary. However, it avoids us to
218
       deal with funky things like comparing an unsigned and a signed
219
       integer. */
220
84.4k
    if (self->pos > PY_SSIZE_T_MAX - len) {
221
0
        PyErr_SetString(PyExc_OverflowError,
222
0
                        "new position too large");
223
0
        goto fail;
224
0
    }
225
226
84.4k
    if (self->state == STATE_ACCUMULATING) {
227
23.1k
        if (self->string_size == self->pos) {
228
23.1k
            if (PyUnicodeWriter_WriteStr(self->writer, decoded))
229
0
                goto fail;
230
23.1k
            goto success;
231
23.1k
        }
232
0
        if (realize(self))
233
0
            goto fail;
234
0
    }
235
236
61.3k
    if (self->pos + len > self->string_size) {
237
61.3k
        if (resize_buffer(self, self->pos + len) < 0)
238
0
            goto fail;
239
61.3k
    }
240
241
61.3k
    if (self->pos > self->string_size) {
242
        /* In case of overseek, pad with null bytes the buffer region between
243
           the end of stream and the current position.
244
245
          0   lo      string_size                           hi
246
          |   |<---used--->|<----------available----------->|
247
          |   |            <--to pad-->|<---to write--->    |
248
          0   buf                   position
249
250
        */
251
0
        memset(self->buf + self->string_size, '\0',
252
0
               (self->pos - self->string_size) * sizeof(Py_UCS4));
253
0
    }
254
255
    /* Copy the data to the internal buffer, overwriting some of the
256
       existing data if self->pos < self->string_size. */
257
61.3k
    if (!PyUnicode_AsUCS4(decoded,
258
61.3k
                          self->buf + self->pos,
259
61.3k
                          self->buf_size - self->pos,
260
61.3k
                          0))
261
0
        goto fail;
262
263
84.4k
success:
264
    /* Set the new length of the internal string if it has changed. */
265
84.4k
    self->pos += len;
266
84.4k
    if (self->string_size < self->pos)
267
84.4k
        self->string_size = self->pos;
268
269
84.4k
    Py_DECREF(decoded);
270
84.4k
    return 0;
271
272
0
fail:
273
0
    Py_XDECREF(decoded);
274
0
    return -1;
275
61.3k
}
276
277
/*[clinic input]
278
@critical_section
279
_io.StringIO.getvalue
280
281
Retrieve the entire contents of the object.
282
[clinic start generated code]*/
283
284
static PyObject *
285
_io_StringIO_getvalue_impl(stringio *self)
286
/*[clinic end generated code: output=27b6a7bfeaebce01 input=fb5dee06b8d467f3]*/
287
0
{
288
0
    CHECK_INITIALIZED(self);
289
0
    CHECK_CLOSED(self);
290
0
    if (self->state == STATE_ACCUMULATING)
291
0
        return make_intermediate(self);
292
0
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
293
0
                                     self->string_size);
294
0
}
295
296
/*[clinic input]
297
@critical_section
298
_io.StringIO.tell
299
300
Tell the current file position.
301
[clinic start generated code]*/
302
303
static PyObject *
304
_io_StringIO_tell_impl(stringio *self)
305
/*[clinic end generated code: output=2e87ac67b116c77b input=98a08f3e2dae3550]*/
306
0
{
307
0
    CHECK_INITIALIZED(self);
308
0
    CHECK_CLOSED(self);
309
0
    return PyLong_FromSsize_t(self->pos);
310
0
}
311
312
/*[clinic input]
313
@critical_section
314
_io.StringIO.read
315
    size: Py_ssize_t(accept={int, NoneType}) = -1
316
    /
317
318
Read at most size characters, returned as a string.
319
320
If the argument is negative or omitted, read until EOF
321
is reached. Return an empty string at EOF.
322
[clinic start generated code]*/
323
324
static PyObject *
325
_io_StringIO_read_impl(stringio *self, Py_ssize_t size)
326
/*[clinic end generated code: output=ae8cf6002f71626c input=9fbef45d8aece8e7]*/
327
57.6k
{
328
57.6k
    Py_ssize_t n;
329
57.6k
    Py_UCS4 *output;
330
331
57.6k
    CHECK_INITIALIZED(self);
332
57.6k
    CHECK_CLOSED(self);
333
334
    /* adjust invalid sizes */
335
57.6k
    n = self->string_size - self->pos;
336
57.6k
    if (size < 0 || size > n) {
337
31.7k
        size = n;
338
31.7k
        if (size < 0)
339
0
            size = 0;
340
31.7k
    }
341
342
    /* Optimization for seek(0); read() */
343
57.6k
    if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
344
28
        PyObject *result = make_intermediate(self);
345
28
        self->pos = self->string_size;
346
28
        return result;
347
28
    }
348
349
57.5k
    ENSURE_REALIZED(self);
350
57.5k
    output = self->buf + self->pos;
351
57.5k
    self->pos += size;
352
57.5k
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
353
57.5k
}
354
355
/* Internal helper, used by stringio_readline and stringio_iternext */
356
static PyObject *
357
_stringio_readline(stringio *self, Py_ssize_t limit)
358
25.9M
{
359
25.9M
    Py_UCS4 *start, *end, old_char;
360
25.9M
    Py_ssize_t len, consumed;
361
362
    /* In case of overseek, return the empty string */
363
25.9M
    if (self->pos >= self->string_size)
364
41.4k
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
365
366
25.8M
    start = self->buf + self->pos;
367
25.8M
    if (limit < 0 || limit > self->string_size - self->pos)
368
25.8M
        limit = self->string_size - self->pos;
369
370
25.8M
    end = start + limit;
371
25.8M
    old_char = *end;
372
25.8M
    *end = '\0';
373
25.8M
    len = _PyIO_find_line_ending(
374
25.8M
        self->readtranslate, self->readuniversal, self->readnl,
375
25.8M
        PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
376
25.8M
    *end = old_char;
377
    /* If we haven't found any line ending, we just return everything
378
       (`consumed` is ignored). */
379
25.8M
    if (len < 0)
380
36.0k
        len = limit;
381
25.8M
    self->pos += len;
382
25.8M
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
383
25.9M
}
384
385
/*[clinic input]
386
@critical_section
387
_io.StringIO.readline
388
    size: Py_ssize_t(accept={int, NoneType}) = -1
389
    /
390
391
Read until newline or EOF.
392
393
Returns an empty string if EOF is hit immediately.
394
[clinic start generated code]*/
395
396
static PyObject *
397
_io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
398
/*[clinic end generated code: output=cabd6452f1b7e85d input=4d14b8495dea1d98]*/
399
68
{
400
68
    CHECK_INITIALIZED(self);
401
68
    CHECK_CLOSED(self);
402
68
    ENSURE_REALIZED(self);
403
404
68
    return _stringio_readline(self, size);
405
68
}
406
407
static PyObject *
408
stringio_iternext(PyObject *op)
409
25.9M
{
410
25.9M
    PyObject *line;
411
25.9M
    stringio *self = stringio_CAST(op);
412
413
25.9M
    CHECK_INITIALIZED(self);
414
25.9M
    CHECK_CLOSED(self);
415
25.9M
    ENSURE_REALIZED(self);
416
417
25.9M
    if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) {
418
        /* Skip method call overhead for speed */
419
25.9M
        line = _stringio_readline(self, -1);
420
25.9M
    }
421
0
    else {
422
        /* XXX is subclassing StringIO really supported? */
423
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
424
0
        if (line && !PyUnicode_Check(line)) {
425
0
            PyErr_Format(PyExc_OSError,
426
0
                         "readline() should have returned a str object, "
427
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
428
0
            Py_DECREF(line);
429
0
            return NULL;
430
0
        }
431
0
    }
432
433
25.9M
    if (line == NULL)
434
0
        return NULL;
435
436
25.9M
    if (PyUnicode_GET_LENGTH(line) == 0) {
437
        /* Reached EOF */
438
41.4k
        Py_DECREF(line);
439
41.4k
        return NULL;
440
41.4k
    }
441
442
25.8M
    return line;
443
25.9M
}
444
445
/*[clinic input]
446
@critical_section
447
_io.StringIO.truncate
448
    pos: object = None
449
    /
450
451
Truncate size to pos.
452
453
The pos argument defaults to the current file position, as
454
returned by tell().  The current file position is unchanged.
455
Returns the new absolute position.
456
[clinic start generated code]*/
457
458
static PyObject *
459
_io_StringIO_truncate_impl(stringio *self, PyObject *pos)
460
/*[clinic end generated code: output=c76c43b5ecfaf4e2 input=d59fd2ee49757ae6]*/
461
37.5k
{
462
37.5k
    CHECK_INITIALIZED(self);
463
37.5k
    CHECK_CLOSED(self);
464
465
37.5k
    Py_ssize_t size;
466
37.5k
    if (pos == Py_None) {
467
37.5k
        size = self->pos;
468
37.5k
    }
469
0
    else {
470
0
        size = PyLong_AsLong(pos);
471
0
        if (size == -1 && PyErr_Occurred()) {
472
0
            return NULL;
473
0
        }
474
0
        if (size < 0) {
475
0
            PyErr_Format(PyExc_ValueError,
476
0
                         "negative pos value %zd", size);
477
0
            return NULL;
478
0
        }
479
0
    }
480
481
37.5k
    if (size < self->string_size) {
482
37.0k
        ENSURE_REALIZED(self);
483
37.0k
        if (resize_buffer(self, size) < 0)
484
0
            return NULL;
485
37.0k
        self->string_size = size;
486
37.0k
    }
487
488
37.5k
    return PyLong_FromSsize_t(size);
489
37.5k
}
490
491
/*[clinic input]
492
@critical_section
493
_io.StringIO.seek
494
    pos: Py_ssize_t
495
    whence: int = 0
496
    /
497
498
Change stream position.
499
500
Seek to character offset pos relative to position indicated by
501
whence:
502
    0  Start of stream (the default).  pos should be >= 0;
503
    1  Current position - pos must be 0;
504
    2  End of stream - pos must be 0.
505
Returns the new absolute position.
506
[clinic start generated code]*/
507
508
static PyObject *
509
_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
510
/*[clinic end generated code: output=e9e0ac9a8ae71c25 input=ffef24668fd71a5d]*/
511
75.1k
{
512
75.1k
    CHECK_INITIALIZED(self);
513
75.1k
    CHECK_CLOSED(self);
514
515
75.1k
    if (whence != 0 && whence != 1 && whence != 2) {
516
0
        PyErr_Format(PyExc_ValueError,
517
0
                     "Invalid whence (%i, should be 0, 1 or 2)", whence);
518
0
        return NULL;
519
0
    }
520
75.1k
    else if (pos < 0 && whence == 0) {
521
0
        PyErr_Format(PyExc_ValueError,
522
0
                     "Negative seek position %zd", pos);
523
0
        return NULL;
524
0
    }
525
75.1k
    else if (whence != 0 && pos != 0) {
526
0
        PyErr_SetString(PyExc_OSError,
527
0
                        "Can't do nonzero cur-relative seeks");
528
0
        return NULL;
529
0
    }
530
531
    /* whence = 0: offset relative to beginning of the string.
532
       whence = 1: no change to current position.
533
       whence = 2: change position to end of file. */
534
75.1k
    if (whence == 1) {
535
0
        pos = self->pos;
536
0
    }
537
75.1k
    else if (whence == 2) {
538
0
        pos = self->string_size;
539
0
    }
540
541
75.1k
    self->pos = pos;
542
543
75.1k
    return PyLong_FromSsize_t(self->pos);
544
75.1k
}
545
546
/*[clinic input]
547
@critical_section
548
_io.StringIO.write
549
    s as obj: object
550
    /
551
552
Write string to file.
553
554
Returns the number of characters written, which is always equal to
555
the length of the string.
556
[clinic start generated code]*/
557
558
static PyObject *
559
_io_StringIO_write_impl(stringio *self, PyObject *obj)
560
/*[clinic end generated code: output=d53b1d841d7db288 input=1561272c0da4651f]*/
561
62.3k
{
562
62.3k
    Py_ssize_t size;
563
564
62.3k
    CHECK_INITIALIZED(self);
565
62.3k
    if (!PyUnicode_Check(obj)) {
566
0
        PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
567
0
                     Py_TYPE(obj)->tp_name);
568
0
        return NULL;
569
0
    }
570
62.3k
    CHECK_CLOSED(self);
571
62.3k
    size = PyUnicode_GET_LENGTH(obj);
572
573
62.3k
    if (size > 0 && write_str(self, obj) < 0)
574
0
        return NULL;
575
576
62.3k
    return PyLong_FromSsize_t(size);
577
62.3k
}
578
579
/*[clinic input]
580
@critical_section
581
_io.StringIO.close
582
583
Close the IO object.
584
585
Attempting any further operation after the object is closed
586
will raise a ValueError.
587
588
This method has no effect if the file is already closed.
589
[clinic start generated code]*/
590
591
static PyObject *
592
_io_StringIO_close_impl(stringio *self)
593
/*[clinic end generated code: output=04399355cbe518f1 input=305d19aa29cc40b9]*/
594
0
{
595
0
    self->closed = 1;
596
    /* Free up some memory */
597
0
    if (resize_buffer(self, 0) < 0)
598
0
        return NULL;
599
0
    PyUnicodeWriter_Discard(self->writer);
600
0
    self->writer = NULL;
601
0
    Py_CLEAR(self->readnl);
602
0
    Py_CLEAR(self->writenl);
603
0
    Py_CLEAR(self->decoder);
604
0
    Py_RETURN_NONE;
605
0
}
606
607
static int
608
stringio_traverse(PyObject *op, visitproc visit, void *arg)
609
14.0k
{
610
14.0k
    stringio *self = stringio_CAST(op);
611
14.0k
    Py_VISIT(Py_TYPE(self));
612
14.0k
    Py_VISIT(self->readnl);
613
14.0k
    Py_VISIT(self->writenl);
614
14.0k
    Py_VISIT(self->decoder);
615
14.0k
    Py_VISIT(self->dict);
616
14.0k
    return 0;
617
14.0k
}
618
619
static int
620
stringio_clear(PyObject *op)
621
38.1k
{
622
38.1k
    stringio *self = stringio_CAST(op);
623
38.1k
    Py_CLEAR(self->readnl);
624
38.1k
    Py_CLEAR(self->writenl);
625
38.1k
    Py_CLEAR(self->decoder);
626
38.1k
    Py_CLEAR(self->dict);
627
38.1k
    return 0;
628
38.1k
}
629
630
static void
631
stringio_dealloc(PyObject *op)
632
38.1k
{
633
38.1k
    stringio *self = stringio_CAST(op);
634
38.1k
    PyTypeObject *tp = Py_TYPE(self);
635
38.1k
    _PyObject_GC_UNTRACK(self);
636
38.1k
    self->ok = 0;
637
38.1k
    if (self->buf) {
638
38.1k
        PyMem_Free(self->buf);
639
38.1k
        self->buf = NULL;
640
38.1k
    }
641
38.1k
    PyUnicodeWriter_Discard(self->writer);
642
38.1k
    (void)stringio_clear(op);
643
38.1k
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
644
38.1k
    tp->tp_free(self);
645
38.1k
    Py_DECREF(tp);
646
38.1k
}
647
648
static PyObject *
649
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
650
38.1k
{
651
38.1k
    stringio *self;
652
653
38.1k
    assert(type != NULL && type->tp_alloc != NULL);
654
38.1k
    self = (stringio *)type->tp_alloc(type, 0);
655
38.1k
    if (self == NULL)
656
0
        return NULL;
657
658
    /* tp_alloc initializes all the fields to zero. So we don't have to
659
       initialize them here. */
660
661
38.1k
    self->buf = (Py_UCS4 *)PyMem_Malloc(0);
662
38.1k
    if (self->buf == NULL) {
663
0
        Py_DECREF(self);
664
0
        return PyErr_NoMemory();
665
0
    }
666
667
38.1k
    return (PyObject *)self;
668
38.1k
}
669
670
/*[clinic input]
671
_io.StringIO.__init__
672
    initial_value as value: object(c_default="NULL") = ''
673
    newline as newline_obj: object(c_default="NULL") = '\n'
674
675
Text I/O implementation using an in-memory buffer.
676
677
The initial_value argument sets the value of object.  The newline
678
argument is like the one of TextIOWrapper's constructor.
679
[clinic start generated code]*/
680
681
static int
682
_io_StringIO___init___impl(stringio *self, PyObject *value,
683
                           PyObject *newline_obj)
684
/*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
685
38.1k
{
686
38.1k
    const char *newline = "\n";
687
38.1k
    Py_ssize_t value_len;
688
689
    /* Parse the newline argument. We only want to allow unicode objects or
690
       None. */
691
38.1k
    if (newline_obj == Py_None) {
692
0
        newline = NULL;
693
0
    }
694
38.1k
    else if (newline_obj) {
695
16.0k
        if (!PyUnicode_Check(newline_obj)) {
696
0
            PyErr_Format(PyExc_TypeError,
697
0
                         "newline must be str or None, not %.200s",
698
0
                         Py_TYPE(newline_obj)->tp_name);
699
0
            return -1;
700
0
        }
701
16.0k
        newline = PyUnicode_AsUTF8(newline_obj);
702
16.0k
        if (newline == NULL)
703
0
            return -1;
704
16.0k
    }
705
706
38.1k
    if (newline && newline[0] != '\0'
707
22.0k
        && !(newline[0] == '\n' && newline[1] == '\0')
708
0
        && !(newline[0] == '\r' && newline[1] == '\0')
709
0
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
710
0
        PyErr_Format(PyExc_ValueError,
711
0
                     "illegal newline value: %R", newline_obj);
712
0
        return -1;
713
0
    }
714
38.1k
    if (value && value != Py_None && !PyUnicode_Check(value)) {
715
0
        PyErr_Format(PyExc_TypeError,
716
0
                     "initial_value must be str or None, not %.200s",
717
0
                     Py_TYPE(value)->tp_name);
718
0
        return -1;
719
0
    }
720
721
38.1k
    self->ok = 0;
722
723
38.1k
    PyUnicodeWriter_Discard(self->writer);
724
38.1k
    self->writer = NULL;
725
38.1k
    Py_CLEAR(self->readnl);
726
38.1k
    Py_CLEAR(self->writenl);
727
38.1k
    Py_CLEAR(self->decoder);
728
729
38.1k
    assert((newline != NULL && newline_obj != Py_None) ||
730
38.1k
           (newline == NULL && newline_obj == Py_None));
731
732
38.1k
    if (newline) {
733
38.1k
        self->readnl = PyUnicode_FromString(newline);
734
38.1k
        if (self->readnl == NULL)
735
0
            return -1;
736
38.1k
    }
737
38.1k
    self->readuniversal = (newline == NULL || newline[0] == '\0');
738
38.1k
    self->readtranslate = (newline == NULL);
739
    /* If newline == "", we don't translate anything.
740
       If newline == "\n" or newline == None, we translate to "\n", which is
741
       a no-op.
742
       (for newline == None, TextIOWrapper translates to os.linesep, but it
743
       is pointless for StringIO)
744
    */
745
38.1k
    if (newline != NULL && newline[0] == '\r') {
746
0
        self->writenl = Py_NewRef(self->readnl);
747
0
    }
748
749
38.1k
    _PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self));
750
38.1k
    if (self->readuniversal) {
751
16.0k
        self->decoder = PyObject_CallFunctionObjArgs(
752
16.0k
            (PyObject *)module_state->PyIncrementalNewlineDecoder_Type,
753
16.0k
            Py_None, self->readtranslate ? Py_True : Py_False, NULL);
754
16.0k
        if (self->decoder == NULL)
755
0
            return -1;
756
16.0k
    }
757
758
    /* Now everything is set up, resize buffer to size of initial value,
759
       and copy it */
760
38.1k
    self->string_size = 0;
761
38.1k
    if (value && value != Py_None)
762
22.0k
        value_len = PyUnicode_GetLength(value);
763
16.0k
    else
764
16.0k
        value_len = 0;
765
38.1k
    if (value_len > 0) {
766
        /* This is a heuristic, for newline translation might change
767
           the string length. */
768
22.0k
        if (resize_buffer(self, 0) < 0)
769
0
            return -1;
770
22.0k
        self->state = STATE_REALIZED;
771
22.0k
        self->pos = 0;
772
22.0k
        if (write_str(self, value) < 0)
773
0
            return -1;
774
22.0k
    }
775
16.0k
    else {
776
        /* Empty stringio object, we can start by accumulating */
777
16.0k
        if (resize_buffer(self, 0) < 0)
778
0
            return -1;
779
16.0k
        self->writer = PyUnicodeWriter_Create(0);
780
16.0k
        if (self->writer == NULL) {
781
0
            return -1;
782
0
        }
783
16.0k
        self->state = STATE_ACCUMULATING;
784
16.0k
    }
785
38.1k
    self->pos = 0;
786
38.1k
    self->module_state = module_state;
787
38.1k
    self->closed = 0;
788
38.1k
    self->ok = 1;
789
38.1k
    return 0;
790
38.1k
}
791
792
/* Properties and pseudo-properties */
793
794
/*[clinic input]
795
@critical_section
796
_io.StringIO.readable
797
798
Returns True if the IO object can be read.
799
[clinic start generated code]*/
800
801
static PyObject *
802
_io_StringIO_readable_impl(stringio *self)
803
/*[clinic end generated code: output=b19d44dd8b1ceb99 input=6cd2ffd65a8e8763]*/
804
0
{
805
0
    CHECK_INITIALIZED(self);
806
0
    CHECK_CLOSED(self);
807
0
    Py_RETURN_TRUE;
808
0
}
809
810
/*[clinic input]
811
@critical_section
812
_io.StringIO.writable
813
814
Returns True if the IO object can be written.
815
[clinic start generated code]*/
816
817
static PyObject *
818
_io_StringIO_writable_impl(stringio *self)
819
/*[clinic end generated code: output=13e4dd77187074ca input=1b3c63dbaa761c69]*/
820
0
{
821
0
    CHECK_INITIALIZED(self);
822
0
    CHECK_CLOSED(self);
823
0
    Py_RETURN_TRUE;
824
0
}
825
826
/*[clinic input]
827
@critical_section
828
_io.StringIO.seekable
829
830
Returns True if the IO object can be seeked.
831
[clinic start generated code]*/
832
833
static PyObject *
834
_io_StringIO_seekable_impl(stringio *self)
835
/*[clinic end generated code: output=4d20b4641c756879 input=a820fad2cf085fc3]*/
836
0
{
837
0
    CHECK_INITIALIZED(self);
838
0
    CHECK_CLOSED(self);
839
0
    Py_RETURN_TRUE;
840
0
}
841
842
/* Pickling support.
843
844
   The implementation of __getstate__ is similar to the one for BytesIO,
845
   except that we also save the newline parameter. For __setstate__ and unlike
846
   BytesIO, we call __init__ to restore the object's state. Doing so allows us
847
   to avoid decoding the complex newline state while keeping the object
848
   representation compact.
849
850
   See comment in bytesio.c regarding why only pickle protocols and onward are
851
   supported.
852
*/
853
854
/*[clinic input]
855
@critical_section
856
_io.StringIO.__getstate__
857
858
[clinic start generated code]*/
859
860
static PyObject *
861
_io_StringIO___getstate___impl(stringio *self)
862
/*[clinic end generated code: output=780be4a996410199 input=76f27255ef83bb92]*/
863
0
{
864
0
    PyObject *initvalue = _io_StringIO_getvalue_impl(self);
865
0
    PyObject *dict;
866
0
    PyObject *state;
867
868
0
    if (initvalue == NULL)
869
0
        return NULL;
870
0
    if (self->dict == NULL) {
871
0
        dict = Py_NewRef(Py_None);
872
0
    }
873
0
    else {
874
0
        dict = PyDict_Copy(self->dict);
875
0
        if (dict == NULL) {
876
0
            Py_DECREF(initvalue);
877
0
            return NULL;
878
0
        }
879
0
    }
880
881
0
    state = Py_BuildValue("(OOnN)", initvalue,
882
0
                          self->readnl ? self->readnl : Py_None,
883
0
                          self->pos, dict);
884
0
    Py_DECREF(initvalue);
885
0
    return state;
886
0
}
887
888
/*[clinic input]
889
@critical_section
890
_io.StringIO.__setstate__
891
892
    state: object
893
    /
894
[clinic start generated code]*/
895
896
static PyObject *
897
_io_StringIO___setstate___impl(stringio *self, PyObject *state)
898
/*[clinic end generated code: output=cb3962bc6d5c5609 input=8a27784b11b82e47]*/
899
0
{
900
0
    PyObject *initarg;
901
0
    PyObject *position_obj;
902
0
    PyObject *dict;
903
0
    Py_ssize_t pos;
904
905
0
    assert(state != NULL);
906
0
    CHECK_CLOSED(self);
907
908
    /* We allow the state tuple to be longer than 4, because we may need
909
       someday to extend the object's state without breaking
910
       backward-compatibility. */
911
0
    if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
912
0
        PyErr_Format(PyExc_TypeError,
913
0
                     "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
914
0
                     Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
915
0
        return NULL;
916
0
    }
917
918
    /* Initialize the object's state. */
919
0
    initarg = PyTuple_GetSlice(state, 0, 2);
920
0
    if (initarg == NULL)
921
0
        return NULL;
922
0
    if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
923
0
        Py_DECREF(initarg);
924
0
        return NULL;
925
0
    }
926
0
    Py_DECREF(initarg);
927
928
    /* Restore the buffer state. Even if __init__ did initialize the buffer,
929
       we have to initialize it again since __init__ may translate the
930
       newlines in the initial_value string. We clearly do not want that
931
       because the string value in the state tuple has already been translated
932
       once by __init__. So we do not take any chance and replace object's
933
       buffer completely. */
934
0
    {
935
0
        PyObject *item = PyTuple_GET_ITEM(state, 0);
936
0
        if (PyUnicode_Check(item)) {
937
0
            Py_UCS4 *buf = PyUnicode_AsUCS4Copy(item);
938
0
            if (buf == NULL)
939
0
                return NULL;
940
0
            Py_ssize_t bufsize = PyUnicode_GET_LENGTH(item);
941
942
0
            if (resize_buffer(self, bufsize) < 0) {
943
0
                PyMem_Free(buf);
944
0
                return NULL;
945
0
            }
946
0
            memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
947
0
            PyMem_Free(buf);
948
0
            self->string_size = bufsize;
949
0
        }
950
0
        else {
951
0
            assert(item == Py_None);
952
0
            self->string_size = 0;
953
0
        }
954
0
    }
955
956
    /* Set carefully the position value. Alternatively, we could use the seek
957
       method instead of modifying self->pos directly to better protect the
958
       object internal state against erroneous (or malicious) inputs. */
959
0
    position_obj = PyTuple_GET_ITEM(state, 2);
960
0
    if (!PyLong_Check(position_obj)) {
961
0
        PyErr_Format(PyExc_TypeError,
962
0
                     "third item of state must be an integer, got %.200s",
963
0
                     Py_TYPE(position_obj)->tp_name);
964
0
        return NULL;
965
0
    }
966
0
    pos = PyLong_AsSsize_t(position_obj);
967
0
    if (pos == -1 && PyErr_Occurred())
968
0
        return NULL;
969
0
    if (pos < 0) {
970
0
        PyErr_SetString(PyExc_ValueError,
971
0
                        "position value cannot be negative");
972
0
        return NULL;
973
0
    }
974
0
    self->pos = pos;
975
976
    /* Set the dictionary of the instance variables. */
977
0
    dict = PyTuple_GET_ITEM(state, 3);
978
0
    if (dict != Py_None) {
979
0
        if (!PyDict_Check(dict)) {
980
0
            PyErr_Format(PyExc_TypeError,
981
0
                         "fourth item of state should be a dict, got a %.200s",
982
0
                         Py_TYPE(dict)->tp_name);
983
0
            return NULL;
984
0
        }
985
0
        if (self->dict) {
986
            /* Alternatively, we could replace the internal dictionary
987
               completely. However, it seems more practical to just update it. */
988
0
            if (PyDict_Update(self->dict, dict) < 0)
989
0
                return NULL;
990
0
        }
991
0
        else {
992
0
            self->dict = Py_NewRef(dict);
993
0
        }
994
0
    }
995
996
0
    Py_RETURN_NONE;
997
0
}
998
999
/*[clinic input]
1000
@critical_section
1001
@getter
1002
_io.StringIO.closed
1003
[clinic start generated code]*/
1004
1005
static PyObject *
1006
_io_StringIO_closed_get_impl(stringio *self)
1007
/*[clinic end generated code: output=531ddca7954331d6 input=178d2ef24395fd49]*/
1008
43.5k
{
1009
43.5k
    CHECK_INITIALIZED(self);
1010
43.5k
    return PyBool_FromLong(self->closed);
1011
43.5k
}
1012
1013
/*[clinic input]
1014
@critical_section
1015
@getter
1016
_io.StringIO.line_buffering
1017
[clinic start generated code]*/
1018
1019
static PyObject *
1020
_io_StringIO_line_buffering_get_impl(stringio *self)
1021
/*[clinic end generated code: output=360710e0112966ae input=6a7634e7f890745e]*/
1022
0
{
1023
0
    CHECK_INITIALIZED(self);
1024
0
    CHECK_CLOSED(self);
1025
0
    Py_RETURN_FALSE;
1026
0
}
1027
1028
/*[clinic input]
1029
@critical_section
1030
@getter
1031
_io.StringIO.newlines
1032
[clinic start generated code]*/
1033
1034
static PyObject *
1035
_io_StringIO_newlines_get_impl(stringio *self)
1036
/*[clinic end generated code: output=35d7c0b66d7e0160 input=092a14586718244b]*/
1037
0
{
1038
0
    CHECK_INITIALIZED(self);
1039
0
    CHECK_CLOSED(self);
1040
0
    if (self->decoder == NULL) {
1041
0
        Py_RETURN_NONE;
1042
0
    }
1043
0
    return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
1044
0
}
1045
1046
static struct PyMethodDef stringio_methods[] = {
1047
    _IO_STRINGIO_CLOSE_METHODDEF
1048
    _IO_STRINGIO_GETVALUE_METHODDEF
1049
    _IO_STRINGIO_READ_METHODDEF
1050
    _IO_STRINGIO_READLINE_METHODDEF
1051
    _IO_STRINGIO_TELL_METHODDEF
1052
    _IO_STRINGIO_TRUNCATE_METHODDEF
1053
    _IO_STRINGIO_SEEK_METHODDEF
1054
    _IO_STRINGIO_WRITE_METHODDEF
1055
1056
    _IO_STRINGIO_SEEKABLE_METHODDEF
1057
    _IO_STRINGIO_READABLE_METHODDEF
1058
    _IO_STRINGIO_WRITABLE_METHODDEF
1059
1060
    _IO_STRINGIO___GETSTATE___METHODDEF
1061
    _IO_STRINGIO___SETSTATE___METHODDEF
1062
    {NULL, NULL}        /* sentinel */
1063
};
1064
1065
static PyGetSetDef stringio_getset[] = {
1066
    _IO_STRINGIO_CLOSED_GETSETDEF
1067
    _IO_STRINGIO_NEWLINES_GETSETDEF
1068
    /*  (following comments straight off of the original Python wrapper:)
1069
        XXX Cruft to support the TextIOWrapper API. This would only
1070
        be meaningful if StringIO supported the buffer attribute.
1071
        Hopefully, a better solution, than adding these pseudo-attributes,
1072
        will be found.
1073
    */
1074
    _IO_STRINGIO_LINE_BUFFERING_GETSETDEF
1075
    {NULL}
1076
};
1077
1078
static struct PyMemberDef stringio_members[] = {
1079
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(stringio, weakreflist), Py_READONLY},
1080
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(stringio, dict), Py_READONLY},
1081
    {NULL},
1082
};
1083
1084
static PyType_Slot stringio_slots[] = {
1085
    {Py_tp_dealloc, stringio_dealloc},
1086
    {Py_tp_doc, (void *)_io_StringIO___init____doc__},
1087
    {Py_tp_traverse, stringio_traverse},
1088
    {Py_tp_clear, stringio_clear},
1089
    {Py_tp_iternext, stringio_iternext},
1090
    {Py_tp_methods, stringio_methods},
1091
    {Py_tp_members, stringio_members},
1092
    {Py_tp_getset, stringio_getset},
1093
    {Py_tp_init, _io_StringIO___init__},
1094
    {Py_tp_new, stringio_new},
1095
    {0, NULL},
1096
};
1097
1098
PyType_Spec _Py_stringio_spec = {
1099
    .name = "_io.StringIO",
1100
    .basicsize = sizeof(stringio),
1101
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1102
              Py_TPFLAGS_IMMUTABLETYPE),
1103
    .slots = stringio_slots,
1104
};