Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Modules/_io/stringio.c
Line
Count
Source
1
#include "Python.h"
2
#include <stddef.h>               // offsetof()
3
#include "pycore_object.h"
4
#include "pycore_weakref.h"       // FT_CLEAR_WEAKREFS()
5
#include "_iomodule.h"
6
7
/* Implementation note: the buffer is always at least one character longer
8
   than the enclosed string, for proper functioning of _PyIO_find_line_ending.
9
*/
10
11
22.4M
#define STATE_REALIZED 1
12
213k
#define STATE_ACCUMULATING 2
13
14
/*[clinic input]
15
module _io
16
class _io.StringIO "stringio *" "clinic_state()->PyStringIO_Type"
17
[clinic start generated code]*/
18
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2693eada0658d470]*/
19
20
typedef struct {
21
    PyObject_HEAD
22
    Py_UCS4 *buf;
23
    Py_ssize_t pos;
24
    Py_ssize_t string_size;
25
    size_t buf_size;
26
27
    /* The stringio object can be in two states: accumulating or realized.
28
       In accumulating state, the internal buffer contains nothing and
29
       the contents are given by the embedded _PyUnicodeWriter structure.
30
       In realized state, the internal buffer is meaningful and the
31
       _PyUnicodeWriter is destroyed.
32
    */
33
    int state;
34
    PyUnicodeWriter *writer;
35
36
    char ok; /* initialized? */
37
    char closed;
38
    char readuniversal;
39
    char readtranslate;
40
    PyObject *decoder;
41
    PyObject *readnl;
42
    PyObject *writenl;
43
44
    PyObject *dict;
45
    PyObject *weakreflist;
46
    _PyIO_State *module_state;
47
} stringio;
48
49
22.3M
#define stringio_CAST(op)   ((stringio *)(op))
50
51
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
52
#include "clinic/stringio.c.h"
53
#undef clinic_state
54
55
static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
56
57
#define CHECK_INITIALIZED(self) \
58
22.5M
    if (self->ok <= 0) { \
59
0
        PyErr_SetString(PyExc_ValueError, \
60
0
            "I/O operation on uninitialized object"); \
61
0
        return NULL; \
62
0
    }
63
64
#define CHECK_CLOSED(self) \
65
22.4M
    if (self->closed) { \
66
0
        PyErr_SetString(PyExc_ValueError, \
67
0
            "I/O operation on closed file"); \
68
0
        return NULL; \
69
0
    }
70
71
#define ENSURE_REALIZED(self) \
72
22.3M
    if (realize(self) < 0) { \
73
0
        return NULL; \
74
0
    }
75
76
77
/* Internal routine for changing the size, in terms of characters, of the
78
   buffer of StringIO objects.  The caller should ensure that the 'size'
79
   argument is non-negative.  Returns 0 on success, -1 otherwise. */
80
static int
81
resize_buffer(stringio *self, size_t size)
82
151k
{
83
    /* Here, unsigned types are used to avoid dealing with signed integer
84
       overflow, which is undefined in C. */
85
151k
    size_t alloc = self->buf_size;
86
151k
    Py_UCS4 *new_buf = NULL;
87
88
151k
    assert(self->buf != NULL);
89
90
    /* Reserve one more char for line ending detection. */
91
151k
    size = size + 1;
92
    /* For simplicity, stay in the range of the signed type. Anyway, Python
93
       doesn't allow strings to be longer than this. */
94
151k
    if (size > PY_SSIZE_T_MAX)
95
0
        goto overflow;
96
97
151k
    if (size < alloc / 2) {
98
        /* Major downsize; resize down to exact size. */
99
36.6k
        alloc = size + 1;
100
36.6k
    }
101
114k
    else if (size < alloc) {
102
        /* Within allocated size; quick exit */
103
1.57k
        return 0;
104
1.57k
    }
105
113k
    else if (size <= alloc * 1.125) {
106
        /* Moderate upsize; overallocate similar to list_resize() */
107
4.58k
        alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
108
4.58k
    }
109
108k
    else {
110
        /* Major upsize; resize up to exact size */
111
108k
        alloc = size + 1;
112
108k
    }
113
114
149k
    if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
115
0
        goto overflow;
116
149k
    new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
117
149k
    if (new_buf == NULL) {
118
0
        PyErr_NoMemory();
119
0
        return -1;
120
0
    }
121
149k
    self->buf_size = alloc;
122
149k
    self->buf = new_buf;
123
124
149k
    return 0;
125
126
0
  overflow:
127
0
    PyErr_SetString(PyExc_OverflowError,
128
0
                    "new buffer size too large");
129
0
    return -1;
130
149k
}
131
132
static PyObject *
133
make_intermediate(stringio *self)
134
26
{
135
26
    PyObject *intermediate = PyUnicodeWriter_Finish(self->writer);
136
26
    self->writer = NULL;
137
26
    self->state = STATE_REALIZED;
138
26
    if (intermediate == NULL)
139
0
        return NULL;
140
141
26
    self->writer = PyUnicodeWriter_Create(0);
142
26
    if (self->writer == NULL) {
143
0
        Py_DECREF(intermediate);
144
0
        return NULL;
145
0
    }
146
26
    if (PyUnicodeWriter_WriteStr(self->writer, intermediate)) {
147
0
        Py_DECREF(intermediate);
148
0
        return NULL;
149
0
    }
150
26
    self->state = STATE_ACCUMULATING;
151
26
    return intermediate;
152
26
}
153
154
static int
155
realize(stringio *self)
156
22.3M
{
157
22.3M
    Py_ssize_t len;
158
22.3M
    PyObject *intermediate;
159
160
22.3M
    if (self->state == STATE_REALIZED)
161
22.3M
        return 0;
162
22.3M
    assert(self->state == STATE_ACCUMULATING);
163
16.1k
    self->state = STATE_REALIZED;
164
165
16.1k
    intermediate = PyUnicodeWriter_Finish(self->writer);
166
16.1k
    self->writer = NULL;
167
16.1k
    if (intermediate == NULL)
168
0
        return -1;
169
170
    /* Append the intermediate string to the internal buffer.
171
       The length should be equal to the current cursor position.
172
     */
173
16.1k
    len = PyUnicode_GET_LENGTH(intermediate);
174
16.1k
    if (resize_buffer(self, len) < 0) {
175
0
        Py_DECREF(intermediate);
176
0
        return -1;
177
0
    }
178
16.1k
    if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
179
0
        Py_DECREF(intermediate);
180
0
        return -1;
181
0
    }
182
183
16.1k
    Py_DECREF(intermediate);
184
16.1k
    return 0;
185
16.1k
}
186
187
/* Internal routine for writing a whole PyUnicode object to the buffer of a
188
   StringIO object. Returns 0 on success, or -1 on error. */
189
static Py_ssize_t
190
write_str(stringio *self, PyObject *obj)
191
83.3k
{
192
83.3k
    Py_ssize_t len;
193
83.3k
    PyObject *decoded = NULL;
194
195
83.3k
    assert(self->buf != NULL);
196
83.3k
    assert(self->pos >= 0);
197
198
83.3k
    if (self->decoder != NULL) {
199
61.1k
        decoded = _PyIncrementalNewlineDecoder_decode(
200
61.1k
            self->decoder, obj, 1 /* always final */);
201
61.1k
    }
202
22.1k
    else {
203
22.1k
        decoded = Py_NewRef(obj);
204
22.1k
    }
205
83.3k
    if (self->writenl) {
206
0
        PyObject *translated = PyUnicode_Replace(
207
0
            decoded, _Py_LATIN1_CHR('\n'), self->writenl, -1);
208
0
        Py_SETREF(decoded, translated);
209
0
    }
210
83.3k
    if (decoded == NULL)
211
0
        return -1;
212
213
83.3k
    assert(PyUnicode_Check(decoded));
214
83.3k
    len = PyUnicode_GET_LENGTH(decoded);
215
83.3k
    assert(len >= 0);
216
217
    /* This overflow check is not strictly necessary. However, it avoids us to
218
       deal with funky things like comparing an unsigned and a signed
219
       integer. */
220
83.3k
    if (self->pos > PY_SSIZE_T_MAX - len) {
221
0
        PyErr_SetString(PyExc_OverflowError,
222
0
                        "new position too large");
223
0
        goto fail;
224
0
    }
225
226
83.3k
    if (self->state == STATE_ACCUMULATING) {
227
23.0k
        if (self->string_size == self->pos) {
228
23.0k
            if (PyUnicodeWriter_WriteStr(self->writer, decoded))
229
0
                goto fail;
230
23.0k
            goto success;
231
23.0k
        }
232
0
        if (realize(self))
233
0
            goto fail;
234
0
    }
235
236
60.2k
    if (self->pos + len > self->string_size) {
237
60.2k
        if (resize_buffer(self, self->pos + len) < 0)
238
0
            goto fail;
239
60.2k
    }
240
241
60.2k
    if (self->pos > self->string_size) {
242
        /* In case of overseek, pad with null bytes the buffer region between
243
           the end of stream and the current position.
244
245
          0   lo      string_size                           hi
246
          |   |<---used--->|<----------available----------->|
247
          |   |            <--to pad-->|<---to write--->    |
248
          0   buf                   position
249
250
        */
251
0
        memset(self->buf + self->string_size, '\0',
252
0
               (self->pos - self->string_size) * sizeof(Py_UCS4));
253
0
    }
254
255
    /* Copy the data to the internal buffer, overwriting some of the
256
       existing data if self->pos < self->string_size. */
257
60.2k
    if (!PyUnicode_AsUCS4(decoded,
258
60.2k
                          self->buf + self->pos,
259
60.2k
                          self->buf_size - self->pos,
260
60.2k
                          0))
261
0
        goto fail;
262
263
83.3k
success:
264
    /* Set the new length of the internal string if it has changed. */
265
83.3k
    self->pos += len;
266
83.3k
    if (self->string_size < self->pos)
267
83.3k
        self->string_size = self->pos;
268
269
83.3k
    Py_DECREF(decoded);
270
83.3k
    return 0;
271
272
0
fail:
273
0
    Py_XDECREF(decoded);
274
0
    return -1;
275
60.2k
}
276
277
/*[clinic input]
278
@critical_section
279
_io.StringIO.getvalue
280
281
Retrieve the entire contents of the object.
282
[clinic start generated code]*/
283
284
static PyObject *
285
_io_StringIO_getvalue_impl(stringio *self)
286
/*[clinic end generated code: output=27b6a7bfeaebce01 input=fb5dee06b8d467f3]*/
287
0
{
288
0
    CHECK_INITIALIZED(self);
289
0
    CHECK_CLOSED(self);
290
0
    if (self->state == STATE_ACCUMULATING)
291
0
        return make_intermediate(self);
292
0
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
293
0
                                     self->string_size);
294
0
}
295
296
/*[clinic input]
297
@critical_section
298
_io.StringIO.tell
299
300
Tell the current file position.
301
[clinic start generated code]*/
302
303
static PyObject *
304
_io_StringIO_tell_impl(stringio *self)
305
/*[clinic end generated code: output=2e87ac67b116c77b input=98a08f3e2dae3550]*/
306
0
{
307
0
    CHECK_INITIALIZED(self);
308
0
    CHECK_CLOSED(self);
309
0
    return PyLong_FromSsize_t(self->pos);
310
0
}
311
312
/*[clinic input]
313
@critical_section
314
_io.StringIO.read
315
    size: Py_ssize_t(accept={int, NoneType}) = -1
316
    /
317
318
Read at most size characters, returned as a string.
319
320
If the argument is negative or omitted, read until EOF
321
is reached. Return an empty string at EOF.
322
[clinic start generated code]*/
323
324
static PyObject *
325
_io_StringIO_read_impl(stringio *self, Py_ssize_t size)
326
/*[clinic end generated code: output=ae8cf6002f71626c input=9fbef45d8aece8e7]*/
327
56.9k
{
328
56.9k
    Py_ssize_t n;
329
56.9k
    Py_UCS4 *output;
330
331
56.9k
    CHECK_INITIALIZED(self);
332
56.9k
    CHECK_CLOSED(self);
333
334
    /* adjust invalid sizes */
335
56.9k
    n = self->string_size - self->pos;
336
56.9k
    if (size < 0 || size > n) {
337
31.9k
        size = n;
338
31.9k
        if (size < 0)
339
0
            size = 0;
340
31.9k
    }
341
342
    /* Optimization for seek(0); read() */
343
56.9k
    if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
344
26
        PyObject *result = make_intermediate(self);
345
26
        self->pos = self->string_size;
346
26
        return result;
347
26
    }
348
349
56.9k
    ENSURE_REALIZED(self);
350
56.9k
    output = self->buf + self->pos;
351
56.9k
    self->pos += size;
352
56.9k
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
353
56.9k
}
354
355
/* Internal helper, used by stringio_readline and stringio_iternext */
356
static PyObject *
357
_stringio_readline(stringio *self, Py_ssize_t limit)
358
22.2M
{
359
22.2M
    Py_UCS4 *start, *end, old_char;
360
22.2M
    Py_ssize_t len, consumed;
361
362
    /* In case of overseek, return the empty string */
363
22.2M
    if (self->pos >= self->string_size)
364
40.9k
        return Py_GetConstant(Py_CONSTANT_EMPTY_STR);
365
366
22.2M
    start = self->buf + self->pos;
367
22.2M
    if (limit < 0 || limit > self->string_size - self->pos)
368
22.2M
        limit = self->string_size - self->pos;
369
370
22.2M
    end = start + limit;
371
22.2M
    old_char = *end;
372
22.2M
    *end = '\0';
373
22.2M
    len = _PyIO_find_line_ending(
374
22.2M
        self->readtranslate, self->readuniversal, self->readnl,
375
22.2M
        PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
376
22.2M
    *end = old_char;
377
    /* If we haven't found any line ending, we just return everything
378
       (`consumed` is ignored). */
379
22.2M
    if (len < 0)
380
36.3k
        len = limit;
381
22.2M
    self->pos += len;
382
22.2M
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
383
22.2M
}
384
385
/*[clinic input]
386
@critical_section
387
_io.StringIO.readline
388
    size: Py_ssize_t(accept={int, NoneType}) = -1
389
    /
390
391
Read until newline or EOF.
392
393
Returns an empty string if EOF is hit immediately.
394
[clinic start generated code]*/
395
396
static PyObject *
397
_io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
398
/*[clinic end generated code: output=cabd6452f1b7e85d input=4d14b8495dea1d98]*/
399
34
{
400
34
    CHECK_INITIALIZED(self);
401
34
    CHECK_CLOSED(self);
402
34
    ENSURE_REALIZED(self);
403
404
34
    return _stringio_readline(self, size);
405
34
}
406
407
static PyObject *
408
stringio_iternext(PyObject *op)
409
22.2M
{
410
22.2M
    PyObject *line;
411
22.2M
    stringio *self = stringio_CAST(op);
412
413
22.2M
    CHECK_INITIALIZED(self);
414
22.2M
    CHECK_CLOSED(self);
415
22.2M
    ENSURE_REALIZED(self);
416
417
22.2M
    if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) {
418
        /* Skip method call overhead for speed */
419
22.2M
        line = _stringio_readline(self, -1);
420
22.2M
    }
421
0
    else {
422
        /* XXX is subclassing StringIO really supported? */
423
0
        line = PyObject_CallMethodNoArgs(op, &_Py_ID(readline));
424
0
        if (line && !PyUnicode_Check(line)) {
425
0
            PyErr_Format(PyExc_OSError,
426
0
                         "readline() should have returned a str object, "
427
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
428
0
            Py_DECREF(line);
429
0
            return NULL;
430
0
        }
431
0
    }
432
433
22.2M
    if (line == NULL)
434
0
        return NULL;
435
436
22.2M
    if (PyUnicode_GET_LENGTH(line) == 0) {
437
        /* Reached EOF */
438
40.9k
        Py_DECREF(line);
439
40.9k
        return NULL;
440
40.9k
    }
441
442
22.2M
    return line;
443
22.2M
}
444
445
/*[clinic input]
446
@critical_section
447
_io.StringIO.truncate
448
    pos: object = None
449
    /
450
451
Truncate size to pos.
452
453
The pos argument defaults to the current file position, as
454
returned by tell().  The current file position is unchanged.
455
Returns the new absolute position.
456
[clinic start generated code]*/
457
458
static PyObject *
459
_io_StringIO_truncate_impl(stringio *self, PyObject *pos)
460
/*[clinic end generated code: output=c76c43b5ecfaf4e2 input=d59fd2ee49757ae6]*/
461
37.0k
{
462
37.0k
    CHECK_INITIALIZED(self);
463
37.0k
    CHECK_CLOSED(self);
464
465
37.0k
    Py_ssize_t size;
466
37.0k
    if (pos == Py_None) {
467
37.0k
        size = self->pos;
468
37.0k
    }
469
0
    else {
470
0
        size = PyLong_AsLong(pos);
471
0
        if (size == -1 && PyErr_Occurred()) {
472
0
            return NULL;
473
0
        }
474
0
        if (size < 0) {
475
0
            PyErr_Format(PyExc_ValueError,
476
0
                         "negative pos value %zd", size);
477
0
            return NULL;
478
0
        }
479
0
    }
480
481
37.0k
    if (size < self->string_size) {
482
36.6k
        ENSURE_REALIZED(self);
483
36.6k
        if (resize_buffer(self, size) < 0)
484
0
            return NULL;
485
36.6k
        self->string_size = size;
486
36.6k
    }
487
488
37.0k
    return PyLong_FromSsize_t(size);
489
37.0k
}
490
491
/*[clinic input]
492
@critical_section
493
_io.StringIO.seek
494
    pos: Py_ssize_t
495
    whence: int = 0
496
    /
497
498
Change stream position.
499
500
Seek to character offset pos relative to position indicated by whence:
501
    0  Start of stream (the default).  pos should be >= 0;
502
    1  Current position - pos must be 0;
503
    2  End of stream - pos must be 0.
504
Returns the new absolute position.
505
[clinic start generated code]*/
506
507
static PyObject *
508
_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
509
/*[clinic end generated code: output=e9e0ac9a8ae71c25 input=c75ced09343a00d7]*/
510
74.1k
{
511
74.1k
    CHECK_INITIALIZED(self);
512
74.1k
    CHECK_CLOSED(self);
513
514
74.1k
    if (whence != 0 && whence != 1 && whence != 2) {
515
0
        PyErr_Format(PyExc_ValueError,
516
0
                     "Invalid whence (%i, should be 0, 1 or 2)", whence);
517
0
        return NULL;
518
0
    }
519
74.1k
    else if (pos < 0 && whence == 0) {
520
0
        PyErr_Format(PyExc_ValueError,
521
0
                     "Negative seek position %zd", pos);
522
0
        return NULL;
523
0
    }
524
74.1k
    else if (whence != 0 && pos != 0) {
525
0
        PyErr_SetString(PyExc_OSError,
526
0
                        "Can't do nonzero cur-relative seeks");
527
0
        return NULL;
528
0
    }
529
530
    /* whence = 0: offset relative to beginning of the string.
531
       whence = 1: no change to current position.
532
       whence = 2: change position to end of file. */
533
74.1k
    if (whence == 1) {
534
0
        pos = self->pos;
535
0
    }
536
74.1k
    else if (whence == 2) {
537
0
        pos = self->string_size;
538
0
    }
539
540
74.1k
    self->pos = pos;
541
542
74.1k
    return PyLong_FromSsize_t(self->pos);
543
74.1k
}
544
545
/*[clinic input]
546
@critical_section
547
_io.StringIO.write
548
    s as obj: object
549
    /
550
551
Write string to file.
552
553
Returns the number of characters written, which is always equal to
554
the length of the string.
555
[clinic start generated code]*/
556
557
static PyObject *
558
_io_StringIO_write_impl(stringio *self, PyObject *obj)
559
/*[clinic end generated code: output=d53b1d841d7db288 input=1561272c0da4651f]*/
560
61.1k
{
561
61.1k
    Py_ssize_t size;
562
563
61.1k
    CHECK_INITIALIZED(self);
564
61.1k
    if (!PyUnicode_Check(obj)) {
565
0
        PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
566
0
                     Py_TYPE(obj)->tp_name);
567
0
        return NULL;
568
0
    }
569
61.1k
    CHECK_CLOSED(self);
570
61.1k
    size = PyUnicode_GET_LENGTH(obj);
571
572
61.1k
    if (size > 0 && write_str(self, obj) < 0)
573
0
        return NULL;
574
575
61.1k
    return PyLong_FromSsize_t(size);
576
61.1k
}
577
578
/*[clinic input]
579
@critical_section
580
_io.StringIO.close
581
582
Close the IO object.
583
584
Attempting any further operation after the object is closed
585
will raise a ValueError.
586
587
This method has no effect if the file is already closed.
588
[clinic start generated code]*/
589
590
static PyObject *
591
_io_StringIO_close_impl(stringio *self)
592
/*[clinic end generated code: output=04399355cbe518f1 input=305d19aa29cc40b9]*/
593
0
{
594
0
    self->closed = 1;
595
    /* Free up some memory */
596
0
    if (resize_buffer(self, 0) < 0)
597
0
        return NULL;
598
0
    PyUnicodeWriter_Discard(self->writer);
599
0
    self->writer = NULL;
600
0
    Py_CLEAR(self->readnl);
601
0
    Py_CLEAR(self->writenl);
602
0
    Py_CLEAR(self->decoder);
603
0
    Py_RETURN_NONE;
604
0
}
605
606
static int
607
stringio_traverse(PyObject *op, visitproc visit, void *arg)
608
5.49k
{
609
5.49k
    stringio *self = stringio_CAST(op);
610
5.49k
    Py_VISIT(Py_TYPE(self));
611
5.49k
    Py_VISIT(self->readnl);
612
5.49k
    Py_VISIT(self->writenl);
613
5.49k
    Py_VISIT(self->decoder);
614
5.49k
    Py_VISIT(self->dict);
615
5.49k
    return 0;
616
5.49k
}
617
618
static int
619
stringio_clear(PyObject *op)
620
38.3k
{
621
38.3k
    stringio *self = stringio_CAST(op);
622
38.3k
    Py_CLEAR(self->readnl);
623
38.3k
    Py_CLEAR(self->writenl);
624
38.3k
    Py_CLEAR(self->decoder);
625
38.3k
    Py_CLEAR(self->dict);
626
38.3k
    return 0;
627
38.3k
}
628
629
static void
630
stringio_dealloc(PyObject *op)
631
38.3k
{
632
38.3k
    stringio *self = stringio_CAST(op);
633
38.3k
    PyTypeObject *tp = Py_TYPE(self);
634
38.3k
    _PyObject_GC_UNTRACK(self);
635
38.3k
    self->ok = 0;
636
38.3k
    if (self->buf) {
637
38.3k
        PyMem_Free(self->buf);
638
38.3k
        self->buf = NULL;
639
38.3k
    }
640
38.3k
    PyUnicodeWriter_Discard(self->writer);
641
38.3k
    (void)stringio_clear(op);
642
38.3k
    FT_CLEAR_WEAKREFS(op, self->weakreflist);
643
38.3k
    tp->tp_free(self);
644
38.3k
    Py_DECREF(tp);
645
38.3k
}
646
647
static PyObject *
648
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
649
38.3k
{
650
38.3k
    stringio *self;
651
652
38.3k
    assert(type != NULL && type->tp_alloc != NULL);
653
38.3k
    self = (stringio *)type->tp_alloc(type, 0);
654
38.3k
    if (self == NULL)
655
0
        return NULL;
656
657
    /* tp_alloc initializes all the fields to zero. So we don't have to
658
       initialize them here. */
659
660
38.3k
    self->buf = (Py_UCS4 *)PyMem_Malloc(0);
661
38.3k
    if (self->buf == NULL) {
662
0
        Py_DECREF(self);
663
0
        return PyErr_NoMemory();
664
0
    }
665
666
38.3k
    return (PyObject *)self;
667
38.3k
}
668
669
/*[clinic input]
670
_io.StringIO.__init__
671
    initial_value as value: object(c_default="NULL") = ''
672
    newline as newline_obj: object(c_default="NULL") = '\n'
673
674
Text I/O implementation using an in-memory buffer.
675
676
The initial_value argument sets the value of object.  The newline
677
argument is like the one of TextIOWrapper's constructor.
678
[clinic start generated code]*/
679
680
static int
681
_io_StringIO___init___impl(stringio *self, PyObject *value,
682
                           PyObject *newline_obj)
683
/*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
684
38.3k
{
685
38.3k
    const char *newline = "\n";
686
38.3k
    Py_ssize_t value_len;
687
688
    /* Parse the newline argument. We only want to allow unicode objects or
689
       None. */
690
38.3k
    if (newline_obj == Py_None) {
691
0
        newline = NULL;
692
0
    }
693
38.3k
    else if (newline_obj) {
694
16.1k
        if (!PyUnicode_Check(newline_obj)) {
695
0
            PyErr_Format(PyExc_TypeError,
696
0
                         "newline must be str or None, not %.200s",
697
0
                         Py_TYPE(newline_obj)->tp_name);
698
0
            return -1;
699
0
        }
700
16.1k
        newline = PyUnicode_AsUTF8(newline_obj);
701
16.1k
        if (newline == NULL)
702
0
            return -1;
703
16.1k
    }
704
705
38.3k
    if (newline && newline[0] != '\0'
706
22.1k
        && !(newline[0] == '\n' && newline[1] == '\0')
707
0
        && !(newline[0] == '\r' && newline[1] == '\0')
708
0
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
709
0
        PyErr_Format(PyExc_ValueError,
710
0
                     "illegal newline value: %R", newline_obj);
711
0
        return -1;
712
0
    }
713
38.3k
    if (value && value != Py_None && !PyUnicode_Check(value)) {
714
0
        PyErr_Format(PyExc_TypeError,
715
0
                     "initial_value must be str or None, not %.200s",
716
0
                     Py_TYPE(value)->tp_name);
717
0
        return -1;
718
0
    }
719
720
38.3k
    self->ok = 0;
721
722
38.3k
    PyUnicodeWriter_Discard(self->writer);
723
38.3k
    self->writer = NULL;
724
38.3k
    Py_CLEAR(self->readnl);
725
38.3k
    Py_CLEAR(self->writenl);
726
38.3k
    Py_CLEAR(self->decoder);
727
728
38.3k
    assert((newline != NULL && newline_obj != Py_None) ||
729
38.3k
           (newline == NULL && newline_obj == Py_None));
730
731
38.3k
    if (newline) {
732
38.3k
        self->readnl = PyUnicode_FromString(newline);
733
38.3k
        if (self->readnl == NULL)
734
0
            return -1;
735
38.3k
    }
736
38.3k
    self->readuniversal = (newline == NULL || newline[0] == '\0');
737
38.3k
    self->readtranslate = (newline == NULL);
738
    /* If newline == "", we don't translate anything.
739
       If newline == "\n" or newline == None, we translate to "\n", which is
740
       a no-op.
741
       (for newline == None, TextIOWrapper translates to os.linesep, but it
742
       is pointless for StringIO)
743
    */
744
38.3k
    if (newline != NULL && newline[0] == '\r') {
745
0
        self->writenl = Py_NewRef(self->readnl);
746
0
    }
747
748
38.3k
    _PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self));
749
38.3k
    if (self->readuniversal) {
750
16.1k
        self->decoder = PyObject_CallFunctionObjArgs(
751
16.1k
            (PyObject *)module_state->PyIncrementalNewlineDecoder_Type,
752
16.1k
            Py_None, self->readtranslate ? Py_True : Py_False, NULL);
753
16.1k
        if (self->decoder == NULL)
754
0
            return -1;
755
16.1k
    }
756
757
    /* Now everything is set up, resize buffer to size of initial value,
758
       and copy it */
759
38.3k
    self->string_size = 0;
760
38.3k
    if (value && value != Py_None)
761
22.1k
        value_len = PyUnicode_GetLength(value);
762
16.1k
    else
763
16.1k
        value_len = 0;
764
38.3k
    if (value_len > 0) {
765
        /* This is a heuristic, for newline translation might change
766
           the string length. */
767
22.1k
        if (resize_buffer(self, 0) < 0)
768
0
            return -1;
769
22.1k
        self->state = STATE_REALIZED;
770
22.1k
        self->pos = 0;
771
22.1k
        if (write_str(self, value) < 0)
772
0
            return -1;
773
22.1k
    }
774
16.1k
    else {
775
        /* Empty stringio object, we can start by accumulating */
776
16.1k
        if (resize_buffer(self, 0) < 0)
777
0
            return -1;
778
16.1k
        self->writer = PyUnicodeWriter_Create(0);
779
16.1k
        if (self->writer == NULL) {
780
0
            return -1;
781
0
        }
782
16.1k
        self->state = STATE_ACCUMULATING;
783
16.1k
    }
784
38.3k
    self->pos = 0;
785
38.3k
    self->module_state = module_state;
786
38.3k
    self->closed = 0;
787
38.3k
    self->ok = 1;
788
38.3k
    return 0;
789
38.3k
}
790
791
/* Properties and pseudo-properties */
792
793
/*[clinic input]
794
@critical_section
795
_io.StringIO.readable
796
797
Returns True if the IO object can be read.
798
[clinic start generated code]*/
799
800
static PyObject *
801
_io_StringIO_readable_impl(stringio *self)
802
/*[clinic end generated code: output=b19d44dd8b1ceb99 input=6cd2ffd65a8e8763]*/
803
0
{
804
0
    CHECK_INITIALIZED(self);
805
0
    CHECK_CLOSED(self);
806
0
    Py_RETURN_TRUE;
807
0
}
808
809
/*[clinic input]
810
@critical_section
811
_io.StringIO.writable
812
813
Returns True if the IO object can be written.
814
[clinic start generated code]*/
815
816
static PyObject *
817
_io_StringIO_writable_impl(stringio *self)
818
/*[clinic end generated code: output=13e4dd77187074ca input=1b3c63dbaa761c69]*/
819
0
{
820
0
    CHECK_INITIALIZED(self);
821
0
    CHECK_CLOSED(self);
822
0
    Py_RETURN_TRUE;
823
0
}
824
825
/*[clinic input]
826
@critical_section
827
_io.StringIO.seekable
828
829
Returns True if the IO object can be seeked.
830
[clinic start generated code]*/
831
832
static PyObject *
833
_io_StringIO_seekable_impl(stringio *self)
834
/*[clinic end generated code: output=4d20b4641c756879 input=a820fad2cf085fc3]*/
835
0
{
836
0
    CHECK_INITIALIZED(self);
837
0
    CHECK_CLOSED(self);
838
0
    Py_RETURN_TRUE;
839
0
}
840
841
/* Pickling support.
842
843
   The implementation of __getstate__ is similar to the one for BytesIO,
844
   except that we also save the newline parameter. For __setstate__ and unlike
845
   BytesIO, we call __init__ to restore the object's state. Doing so allows us
846
   to avoid decoding the complex newline state while keeping the object
847
   representation compact.
848
849
   See comment in bytesio.c regarding why only pickle protocols and onward are
850
   supported.
851
*/
852
853
/*[clinic input]
854
@critical_section
855
_io.StringIO.__getstate__
856
857
[clinic start generated code]*/
858
859
static PyObject *
860
_io_StringIO___getstate___impl(stringio *self)
861
/*[clinic end generated code: output=780be4a996410199 input=76f27255ef83bb92]*/
862
0
{
863
0
    PyObject *initvalue = _io_StringIO_getvalue_impl(self);
864
0
    PyObject *dict;
865
0
    PyObject *state;
866
867
0
    if (initvalue == NULL)
868
0
        return NULL;
869
0
    if (self->dict == NULL) {
870
0
        dict = Py_NewRef(Py_None);
871
0
    }
872
0
    else {
873
0
        dict = PyDict_Copy(self->dict);
874
0
        if (dict == NULL) {
875
0
            Py_DECREF(initvalue);
876
0
            return NULL;
877
0
        }
878
0
    }
879
880
0
    state = Py_BuildValue("(OOnN)", initvalue,
881
0
                          self->readnl ? self->readnl : Py_None,
882
0
                          self->pos, dict);
883
0
    Py_DECREF(initvalue);
884
0
    return state;
885
0
}
886
887
/*[clinic input]
888
@critical_section
889
_io.StringIO.__setstate__
890
891
    state: object
892
    /
893
[clinic start generated code]*/
894
895
static PyObject *
896
_io_StringIO___setstate___impl(stringio *self, PyObject *state)
897
/*[clinic end generated code: output=cb3962bc6d5c5609 input=8a27784b11b82e47]*/
898
0
{
899
0
    PyObject *initarg;
900
0
    PyObject *position_obj;
901
0
    PyObject *dict;
902
0
    Py_ssize_t pos;
903
904
0
    assert(state != NULL);
905
0
    CHECK_CLOSED(self);
906
907
    /* We allow the state tuple to be longer than 4, because we may need
908
       someday to extend the object's state without breaking
909
       backward-compatibility. */
910
0
    if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
911
0
        PyErr_Format(PyExc_TypeError,
912
0
                     "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
913
0
                     Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
914
0
        return NULL;
915
0
    }
916
917
    /* Initialize the object's state. */
918
0
    initarg = PyTuple_GetSlice(state, 0, 2);
919
0
    if (initarg == NULL)
920
0
        return NULL;
921
0
    if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
922
0
        Py_DECREF(initarg);
923
0
        return NULL;
924
0
    }
925
0
    Py_DECREF(initarg);
926
927
    /* Restore the buffer state. Even if __init__ did initialize the buffer,
928
       we have to initialize it again since __init__ may translate the
929
       newlines in the initial_value string. We clearly do not want that
930
       because the string value in the state tuple has already been translated
931
       once by __init__. So we do not take any chance and replace object's
932
       buffer completely. */
933
0
    {
934
0
        PyObject *item = PyTuple_GET_ITEM(state, 0);
935
0
        if (PyUnicode_Check(item)) {
936
0
            Py_UCS4 *buf = PyUnicode_AsUCS4Copy(item);
937
0
            if (buf == NULL)
938
0
                return NULL;
939
0
            Py_ssize_t bufsize = PyUnicode_GET_LENGTH(item);
940
941
0
            if (resize_buffer(self, bufsize) < 0) {
942
0
                PyMem_Free(buf);
943
0
                return NULL;
944
0
            }
945
0
            memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
946
0
            PyMem_Free(buf);
947
0
            self->string_size = bufsize;
948
0
        }
949
0
        else {
950
0
            assert(item == Py_None);
951
0
            self->string_size = 0;
952
0
        }
953
0
    }
954
955
    /* Set carefully the position value. Alternatively, we could use the seek
956
       method instead of modifying self->pos directly to better protect the
957
       object internal state against erroneous (or malicious) inputs. */
958
0
    position_obj = PyTuple_GET_ITEM(state, 2);
959
0
    if (!PyLong_Check(position_obj)) {
960
0
        PyErr_Format(PyExc_TypeError,
961
0
                     "third item of state must be an integer, got %.200s",
962
0
                     Py_TYPE(position_obj)->tp_name);
963
0
        return NULL;
964
0
    }
965
0
    pos = PyLong_AsSsize_t(position_obj);
966
0
    if (pos == -1 && PyErr_Occurred())
967
0
        return NULL;
968
0
    if (pos < 0) {
969
0
        PyErr_SetString(PyExc_ValueError,
970
0
                        "position value cannot be negative");
971
0
        return NULL;
972
0
    }
973
0
    self->pos = pos;
974
975
    /* Set the dictionary of the instance variables. */
976
0
    dict = PyTuple_GET_ITEM(state, 3);
977
0
    if (dict != Py_None) {
978
0
        if (!PyDict_Check(dict)) {
979
0
            PyErr_Format(PyExc_TypeError,
980
0
                         "fourth item of state should be a dict, got a %.200s",
981
0
                         Py_TYPE(dict)->tp_name);
982
0
            return NULL;
983
0
        }
984
0
        if (self->dict) {
985
            /* Alternatively, we could replace the internal dictionary
986
               completely. However, it seems more practical to just update it. */
987
0
            if (PyDict_Update(self->dict, dict) < 0)
988
0
                return NULL;
989
0
        }
990
0
        else {
991
0
            self->dict = Py_NewRef(dict);
992
0
        }
993
0
    }
994
995
0
    Py_RETURN_NONE;
996
0
}
997
998
/*[clinic input]
999
@critical_section
1000
@getter
1001
_io.StringIO.closed
1002
[clinic start generated code]*/
1003
1004
static PyObject *
1005
_io_StringIO_closed_get_impl(stringio *self)
1006
/*[clinic end generated code: output=531ddca7954331d6 input=178d2ef24395fd49]*/
1007
43.1k
{
1008
43.1k
    CHECK_INITIALIZED(self);
1009
43.1k
    return PyBool_FromLong(self->closed);
1010
43.1k
}
1011
1012
/*[clinic input]
1013
@critical_section
1014
@getter
1015
_io.StringIO.line_buffering
1016
[clinic start generated code]*/
1017
1018
static PyObject *
1019
_io_StringIO_line_buffering_get_impl(stringio *self)
1020
/*[clinic end generated code: output=360710e0112966ae input=6a7634e7f890745e]*/
1021
0
{
1022
0
    CHECK_INITIALIZED(self);
1023
0
    CHECK_CLOSED(self);
1024
0
    Py_RETURN_FALSE;
1025
0
}
1026
1027
/*[clinic input]
1028
@critical_section
1029
@getter
1030
_io.StringIO.newlines
1031
[clinic start generated code]*/
1032
1033
static PyObject *
1034
_io_StringIO_newlines_get_impl(stringio *self)
1035
/*[clinic end generated code: output=35d7c0b66d7e0160 input=092a14586718244b]*/
1036
0
{
1037
0
    CHECK_INITIALIZED(self);
1038
0
    CHECK_CLOSED(self);
1039
0
    if (self->decoder == NULL) {
1040
0
        Py_RETURN_NONE;
1041
0
    }
1042
0
    return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
1043
0
}
1044
1045
static struct PyMethodDef stringio_methods[] = {
1046
    _IO_STRINGIO_CLOSE_METHODDEF
1047
    _IO_STRINGIO_GETVALUE_METHODDEF
1048
    _IO_STRINGIO_READ_METHODDEF
1049
    _IO_STRINGIO_READLINE_METHODDEF
1050
    _IO_STRINGIO_TELL_METHODDEF
1051
    _IO_STRINGIO_TRUNCATE_METHODDEF
1052
    _IO_STRINGIO_SEEK_METHODDEF
1053
    _IO_STRINGIO_WRITE_METHODDEF
1054
1055
    _IO_STRINGIO_SEEKABLE_METHODDEF
1056
    _IO_STRINGIO_READABLE_METHODDEF
1057
    _IO_STRINGIO_WRITABLE_METHODDEF
1058
1059
    _IO_STRINGIO___GETSTATE___METHODDEF
1060
    _IO_STRINGIO___SETSTATE___METHODDEF
1061
    {NULL, NULL}        /* sentinel */
1062
};
1063
1064
static PyGetSetDef stringio_getset[] = {
1065
    _IO_STRINGIO_CLOSED_GETSETDEF
1066
    _IO_STRINGIO_NEWLINES_GETSETDEF
1067
    /*  (following comments straight off of the original Python wrapper:)
1068
        XXX Cruft to support the TextIOWrapper API. This would only
1069
        be meaningful if StringIO supported the buffer attribute.
1070
        Hopefully, a better solution, than adding these pseudo-attributes,
1071
        will be found.
1072
    */
1073
    _IO_STRINGIO_LINE_BUFFERING_GETSETDEF
1074
    {NULL}
1075
};
1076
1077
static struct PyMemberDef stringio_members[] = {
1078
    {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(stringio, weakreflist), Py_READONLY},
1079
    {"__dictoffset__", Py_T_PYSSIZET, offsetof(stringio, dict), Py_READONLY},
1080
    {NULL},
1081
};
1082
1083
static PyType_Slot stringio_slots[] = {
1084
    {Py_tp_dealloc, stringio_dealloc},
1085
    {Py_tp_doc, (void *)_io_StringIO___init____doc__},
1086
    {Py_tp_traverse, stringio_traverse},
1087
    {Py_tp_clear, stringio_clear},
1088
    {Py_tp_iternext, stringio_iternext},
1089
    {Py_tp_methods, stringio_methods},
1090
    {Py_tp_members, stringio_members},
1091
    {Py_tp_getset, stringio_getset},
1092
    {Py_tp_init, _io_StringIO___init__},
1093
    {Py_tp_new, stringio_new},
1094
    {0, NULL},
1095
};
1096
1097
PyType_Spec _Py_stringio_spec = {
1098
    .name = "_io.StringIO",
1099
    .basicsize = sizeof(stringio),
1100
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1101
              Py_TPFLAGS_IMMUTABLETYPE),
1102
    .slots = stringio_slots,
1103
};