Coverage Report

Created: 2025-07-11 06:59

/src/Python-3.8.3/Modules/_io/stringio.c
Line
Count
Source (jump to first uncovered line)
1
#define PY_SSIZE_T_CLEAN
2
#include "Python.h"
3
#include "structmember.h"
4
#include "pycore_accu.h"
5
#include "pycore_object.h"
6
#include "_iomodule.h"
7
8
/* Implementation note: the buffer is always at least one character longer
9
   than the enclosed string, for proper functioning of _PyIO_find_line_ending.
10
*/
11
12
14
#define STATE_REALIZED 1
13
98
#define STATE_ACCUMULATING 2
14
15
/*[clinic input]
16
module _io
17
class _io.StringIO "stringio *" "&PyStringIO_Type"
18
[clinic start generated code]*/
19
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/
20
21
typedef struct {
22
    PyObject_HEAD
23
    Py_UCS4 *buf;
24
    Py_ssize_t pos;
25
    Py_ssize_t string_size;
26
    size_t buf_size;
27
28
    /* The stringio object can be in two states: accumulating or realized.
29
       In accumulating state, the internal buffer contains nothing and
30
       the contents are given by the embedded _PyAccu structure.
31
       In realized state, the internal buffer is meaningful and the
32
       _PyAccu is destroyed.
33
    */
34
    int state;
35
    _PyAccu accu;
36
37
    char ok; /* initialized? */
38
    char closed;
39
    char readuniversal;
40
    char readtranslate;
41
    PyObject *decoder;
42
    PyObject *readnl;
43
    PyObject *writenl;
44
45
    PyObject *dict;
46
    PyObject *weakreflist;
47
} stringio;
48
49
static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
50
51
#define CHECK_INITIALIZED(self) \
52
126
    if (self->ok <= 0) { \
53
0
        PyErr_SetString(PyExc_ValueError, \
54
0
            "I/O operation on uninitialized object"); \
55
0
        return NULL; \
56
0
    }
57
58
#define CHECK_CLOSED(self) \
59
126
    if (self->closed) { \
60
0
        PyErr_SetString(PyExc_ValueError, \
61
0
            "I/O operation on closed file"); \
62
0
        return NULL; \
63
0
    }
64
65
#define ENSURE_REALIZED(self) \
66
0
    if (realize(self) < 0) { \
67
0
        return NULL; \
68
0
    }
69
70
71
/* Internal routine for changing the size, in terms of characters, of the
72
   buffer of StringIO objects.  The caller should ensure that the 'size'
73
   argument is non-negative.  Returns 0 on success, -1 otherwise. */
74
static int
75
resize_buffer(stringio *self, size_t size)
76
28
{
77
    /* Here, unsigned types are used to avoid dealing with signed integer
78
       overflow, which is undefined in C. */
79
28
    size_t alloc = self->buf_size;
80
28
    Py_UCS4 *new_buf = NULL;
81
82
28
    assert(self->buf != NULL);
83
84
    /* Reserve one more char for line ending detection. */
85
28
    size = size + 1;
86
    /* For simplicity, stay in the range of the signed type. Anyway, Python
87
       doesn't allow strings to be longer than this. */
88
28
    if (size > PY_SSIZE_T_MAX)
89
0
        goto overflow;
90
91
28
    if (size < alloc / 2) {
92
        /* Major downsize; resize down to exact size. */
93
0
        alloc = size + 1;
94
0
    }
95
28
    else if (size < alloc) {
96
        /* Within allocated size; quick exit */
97
14
        return 0;
98
14
    }
99
14
    else if (size <= alloc * 1.125) {
100
        /* Moderate upsize; overallocate similar to list_resize() */
101
0
        alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
102
0
    }
103
14
    else {
104
        /* Major upsize; resize up to exact size */
105
14
        alloc = size + 1;
106
14
    }
107
108
14
    if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
109
0
        goto overflow;
110
14
    new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
111
14
    if (new_buf == NULL) {
112
0
        PyErr_NoMemory();
113
0
        return -1;
114
0
    }
115
14
    self->buf_size = alloc;
116
14
    self->buf = new_buf;
117
118
14
    return 0;
119
120
0
  overflow:
121
0
    PyErr_SetString(PyExc_OverflowError,
122
0
                    "new buffer size too large");
123
0
    return -1;
124
14
}
125
126
static PyObject *
127
make_intermediate(stringio *self)
128
14
{
129
14
    PyObject *intermediate = _PyAccu_Finish(&self->accu);
130
14
    self->state = STATE_REALIZED;
131
14
    if (intermediate == NULL)
132
0
        return NULL;
133
14
    if (_PyAccu_Init(&self->accu) ||
134
14
        _PyAccu_Accumulate(&self->accu, intermediate)) {
135
0
        Py_DECREF(intermediate);
136
0
        return NULL;
137
0
    }
138
14
    self->state = STATE_ACCUMULATING;
139
14
    return intermediate;
140
14
}
141
142
static int
143
realize(stringio *self)
144
0
{
145
0
    Py_ssize_t len;
146
0
    PyObject *intermediate;
147
148
0
    if (self->state == STATE_REALIZED)
149
0
        return 0;
150
0
    assert(self->state == STATE_ACCUMULATING);
151
0
    self->state = STATE_REALIZED;
152
153
0
    intermediate = _PyAccu_Finish(&self->accu);
154
0
    if (intermediate == NULL)
155
0
        return -1;
156
157
    /* Append the intermediate string to the internal buffer.
158
       The length should be equal to the current cursor position.
159
     */
160
0
    len = PyUnicode_GET_LENGTH(intermediate);
161
0
    if (resize_buffer(self, len) < 0) {
162
0
        Py_DECREF(intermediate);
163
0
        return -1;
164
0
    }
165
0
    if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
166
0
        Py_DECREF(intermediate);
167
0
        return -1;
168
0
    }
169
170
0
    Py_DECREF(intermediate);
171
0
    return 0;
172
0
}
173
174
/* Internal routine for writing a whole PyUnicode object to the buffer of a
175
   StringIO object. Returns 0 on success, or -1 on error. */
176
static Py_ssize_t
177
write_str(stringio *self, PyObject *obj)
178
56
{
179
56
    Py_ssize_t len;
180
56
    PyObject *decoded = NULL;
181
182
56
    assert(self->buf != NULL);
183
56
    assert(self->pos >= 0);
184
185
56
    if (self->decoder != NULL) {
186
0
        decoded = _PyIncrementalNewlineDecoder_decode(
187
0
            self->decoder, obj, 1 /* always final */);
188
0
    }
189
56
    else {
190
56
        decoded = obj;
191
56
        Py_INCREF(decoded);
192
56
    }
193
56
    if (self->writenl) {
194
0
        PyObject *translated = PyUnicode_Replace(
195
0
            decoded, _PyIO_str_nl, self->writenl, -1);
196
0
        Py_DECREF(decoded);
197
0
        decoded = translated;
198
0
    }
199
56
    if (decoded == NULL)
200
0
        return -1;
201
202
56
    assert(PyUnicode_Check(decoded));
203
56
    if (PyUnicode_READY(decoded)) {
204
0
        Py_DECREF(decoded);
205
0
        return -1;
206
0
    }
207
56
    len = PyUnicode_GET_LENGTH(decoded);
208
56
    assert(len >= 0);
209
210
    /* This overflow check is not strictly necessary. However, it avoids us to
211
       deal with funky things like comparing an unsigned and a signed
212
       integer. */
213
56
    if (self->pos > PY_SSIZE_T_MAX - len) {
214
0
        PyErr_SetString(PyExc_OverflowError,
215
0
                        "new position too large");
216
0
        goto fail;
217
0
    }
218
219
56
    if (self->state == STATE_ACCUMULATING) {
220
56
        if (self->string_size == self->pos) {
221
56
            if (_PyAccu_Accumulate(&self->accu, decoded))
222
0
                goto fail;
223
56
            goto success;
224
56
        }
225
0
        if (realize(self))
226
0
            goto fail;
227
0
    }
228
229
0
    if (self->pos + len > self->string_size) {
230
0
        if (resize_buffer(self, self->pos + len) < 0)
231
0
            goto fail;
232
0
    }
233
234
0
    if (self->pos > self->string_size) {
235
        /* In case of overseek, pad with null bytes the buffer region between
236
           the end of stream and the current position.
237
238
          0   lo      string_size                           hi
239
          |   |<---used--->|<----------available----------->|
240
          |   |            <--to pad-->|<---to write--->    |
241
          0   buf                   position
242
243
        */
244
0
        memset(self->buf + self->string_size, '\0',
245
0
               (self->pos - self->string_size) * sizeof(Py_UCS4));
246
0
    }
247
248
    /* Copy the data to the internal buffer, overwriting some of the
249
       existing data if self->pos < self->string_size. */
250
0
    if (!PyUnicode_AsUCS4(decoded,
251
0
                          self->buf + self->pos,
252
0
                          self->buf_size - self->pos,
253
0
                          0))
254
0
        goto fail;
255
256
56
success:
257
    /* Set the new length of the internal string if it has changed. */
258
56
    self->pos += len;
259
56
    if (self->string_size < self->pos)
260
56
        self->string_size = self->pos;
261
262
56
    Py_DECREF(decoded);
263
56
    return 0;
264
265
0
fail:
266
0
    Py_XDECREF(decoded);
267
0
    return -1;
268
0
}
269
270
/*[clinic input]
271
_io.StringIO.getvalue
272
273
Retrieve the entire contents of the object.
274
[clinic start generated code]*/
275
276
static PyObject *
277
_io_StringIO_getvalue_impl(stringio *self)
278
/*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/
279
14
{
280
14
    CHECK_INITIALIZED(self);
281
14
    CHECK_CLOSED(self);
282
14
    if (self->state == STATE_ACCUMULATING)
283
14
        return make_intermediate(self);
284
0
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
285
0
                                     self->string_size);
286
14
}
287
288
/*[clinic input]
289
_io.StringIO.tell
290
291
Tell the current file position.
292
[clinic start generated code]*/
293
294
static PyObject *
295
_io_StringIO_tell_impl(stringio *self)
296
/*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/
297
0
{
298
0
    CHECK_INITIALIZED(self);
299
0
    CHECK_CLOSED(self);
300
0
    return PyLong_FromSsize_t(self->pos);
301
0
}
302
303
/*[clinic input]
304
_io.StringIO.read
305
    size: Py_ssize_t(accept={int, NoneType}) = -1
306
    /
307
308
Read at most size characters, returned as a string.
309
310
If the argument is negative or omitted, read until EOF
311
is reached. Return an empty string at EOF.
312
[clinic start generated code]*/
313
314
static PyObject *
315
_io_StringIO_read_impl(stringio *self, Py_ssize_t size)
316
/*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/
317
0
{
318
0
    Py_ssize_t n;
319
0
    Py_UCS4 *output;
320
321
0
    CHECK_INITIALIZED(self);
322
0
    CHECK_CLOSED(self);
323
324
    /* adjust invalid sizes */
325
0
    n = self->string_size - self->pos;
326
0
    if (size < 0 || size > n) {
327
0
        size = n;
328
0
        if (size < 0)
329
0
            size = 0;
330
0
    }
331
332
    /* Optimization for seek(0); read() */
333
0
    if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
334
0
        PyObject *result = make_intermediate(self);
335
0
        self->pos = self->string_size;
336
0
        return result;
337
0
    }
338
339
0
    ENSURE_REALIZED(self);
340
0
    output = self->buf + self->pos;
341
0
    self->pos += size;
342
0
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
343
0
}
344
345
/* Internal helper, used by stringio_readline and stringio_iternext */
346
static PyObject *
347
_stringio_readline(stringio *self, Py_ssize_t limit)
348
0
{
349
0
    Py_UCS4 *start, *end, old_char;
350
0
    Py_ssize_t len, consumed;
351
352
    /* In case of overseek, return the empty string */
353
0
    if (self->pos >= self->string_size)
354
0
        return PyUnicode_New(0, 0);
355
356
0
    start = self->buf + self->pos;
357
0
    if (limit < 0 || limit > self->string_size - self->pos)
358
0
        limit = self->string_size - self->pos;
359
360
0
    end = start + limit;
361
0
    old_char = *end;
362
0
    *end = '\0';
363
0
    len = _PyIO_find_line_ending(
364
0
        self->readtranslate, self->readuniversal, self->readnl,
365
0
        PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
366
0
    *end = old_char;
367
    /* If we haven't found any line ending, we just return everything
368
       (`consumed` is ignored). */
369
0
    if (len < 0)
370
0
        len = limit;
371
0
    self->pos += len;
372
0
    return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
373
0
}
374
375
/*[clinic input]
376
_io.StringIO.readline
377
    size: Py_ssize_t(accept={int, NoneType}) = -1
378
    /
379
380
Read until newline or EOF.
381
382
Returns an empty string if EOF is hit immediately.
383
[clinic start generated code]*/
384
385
static PyObject *
386
_io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
387
/*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/
388
0
{
389
0
    CHECK_INITIALIZED(self);
390
0
    CHECK_CLOSED(self);
391
0
    ENSURE_REALIZED(self);
392
393
0
    return _stringio_readline(self, size);
394
0
}
395
396
static PyObject *
397
stringio_iternext(stringio *self)
398
0
{
399
0
    PyObject *line;
400
401
0
    CHECK_INITIALIZED(self);
402
0
    CHECK_CLOSED(self);
403
0
    ENSURE_REALIZED(self);
404
405
0
    if (Py_TYPE(self) == &PyStringIO_Type) {
406
        /* Skip method call overhead for speed */
407
0
        line = _stringio_readline(self, -1);
408
0
    }
409
0
    else {
410
        /* XXX is subclassing StringIO really supported? */
411
0
        line = PyObject_CallMethodObjArgs((PyObject *)self,
412
0
                                           _PyIO_str_readline, NULL);
413
0
        if (line && !PyUnicode_Check(line)) {
414
0
            PyErr_Format(PyExc_OSError,
415
0
                         "readline() should have returned a str object, "
416
0
                         "not '%.200s'", Py_TYPE(line)->tp_name);
417
0
            Py_DECREF(line);
418
0
            return NULL;
419
0
        }
420
0
    }
421
422
0
    if (line == NULL)
423
0
        return NULL;
424
425
0
    if (PyUnicode_GET_LENGTH(line) == 0) {
426
        /* Reached EOF */
427
0
        Py_DECREF(line);
428
0
        return NULL;
429
0
    }
430
431
0
    return line;
432
0
}
433
434
/*[clinic input]
435
_io.StringIO.truncate
436
    pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None
437
    /
438
439
Truncate size to pos.
440
441
The pos argument defaults to the current file position, as
442
returned by tell().  The current file position is unchanged.
443
Returns the new absolute position.
444
[clinic start generated code]*/
445
446
static PyObject *
447
_io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
448
/*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/
449
0
{
450
0
    CHECK_INITIALIZED(self);
451
0
    CHECK_CLOSED(self);
452
453
0
    if (size < 0) {
454
0
        PyErr_Format(PyExc_ValueError,
455
0
                     "Negative size value %zd", size);
456
0
        return NULL;
457
0
    }
458
459
0
    if (size < self->string_size) {
460
0
        ENSURE_REALIZED(self);
461
0
        if (resize_buffer(self, size) < 0)
462
0
            return NULL;
463
0
        self->string_size = size;
464
0
    }
465
466
0
    return PyLong_FromSsize_t(size);
467
0
}
468
469
/*[clinic input]
470
_io.StringIO.seek
471
    pos: Py_ssize_t
472
    whence: int = 0
473
    /
474
475
Change stream position.
476
477
Seek to character offset pos relative to position indicated by whence:
478
    0  Start of stream (the default).  pos should be >= 0;
479
    1  Current position - pos must be 0;
480
    2  End of stream - pos must be 0.
481
Returns the new absolute position.
482
[clinic start generated code]*/
483
484
static PyObject *
485
_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
486
/*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/
487
0
{
488
0
    CHECK_INITIALIZED(self);
489
0
    CHECK_CLOSED(self);
490
491
0
    if (whence != 0 && whence != 1 && whence != 2) {
492
0
        PyErr_Format(PyExc_ValueError,
493
0
                     "Invalid whence (%i, should be 0, 1 or 2)", whence);
494
0
        return NULL;
495
0
    }
496
0
    else if (pos < 0 && whence == 0) {
497
0
        PyErr_Format(PyExc_ValueError,
498
0
                     "Negative seek position %zd", pos);
499
0
        return NULL;
500
0
    }
501
0
    else if (whence != 0 && pos != 0) {
502
0
        PyErr_SetString(PyExc_OSError,
503
0
                        "Can't do nonzero cur-relative seeks");
504
0
        return NULL;
505
0
    }
506
507
    /* whence = 0: offset relative to beginning of the string.
508
       whence = 1: no change to current position.
509
       whence = 2: change position to end of file. */
510
0
    if (whence == 1) {
511
0
        pos = self->pos;
512
0
    }
513
0
    else if (whence == 2) {
514
0
        pos = self->string_size;
515
0
    }
516
517
0
    self->pos = pos;
518
519
0
    return PyLong_FromSsize_t(self->pos);
520
0
}
521
522
/*[clinic input]
523
_io.StringIO.write
524
    s as obj: object
525
    /
526
527
Write string to file.
528
529
Returns the number of characters written, which is always equal to
530
the length of the string.
531
[clinic start generated code]*/
532
533
static PyObject *
534
_io_StringIO_write(stringio *self, PyObject *obj)
535
/*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/
536
112
{
537
112
    Py_ssize_t size;
538
539
112
    CHECK_INITIALIZED(self);
540
112
    if (!PyUnicode_Check(obj)) {
541
0
        PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
542
0
                     Py_TYPE(obj)->tp_name);
543
0
        return NULL;
544
0
    }
545
112
    if (PyUnicode_READY(obj))
546
0
        return NULL;
547
112
    CHECK_CLOSED(self);
548
112
    size = PyUnicode_GET_LENGTH(obj);
549
550
112
    if (size > 0 && write_str(self, obj) < 0)
551
0
        return NULL;
552
553
112
    return PyLong_FromSsize_t(size);
554
112
}
555
556
/*[clinic input]
557
_io.StringIO.close
558
559
Close the IO object.
560
561
Attempting any further operation after the object is closed
562
will raise a ValueError.
563
564
This method has no effect if the file is already closed.
565
[clinic start generated code]*/
566
567
static PyObject *
568
_io_StringIO_close_impl(stringio *self)
569
/*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/
570
14
{
571
14
    self->closed = 1;
572
    /* Free up some memory */
573
14
    if (resize_buffer(self, 0) < 0)
574
0
        return NULL;
575
14
    _PyAccu_Destroy(&self->accu);
576
14
    Py_CLEAR(self->readnl);
577
14
    Py_CLEAR(self->writenl);
578
14
    Py_CLEAR(self->decoder);
579
14
    Py_RETURN_NONE;
580
14
}
581
582
static int
583
stringio_traverse(stringio *self, visitproc visit, void *arg)
584
0
{
585
0
    Py_VISIT(self->dict);
586
0
    return 0;
587
0
}
588
589
static int
590
stringio_clear(stringio *self)
591
0
{
592
0
    Py_CLEAR(self->dict);
593
0
    return 0;
594
0
}
595
596
static void
597
stringio_dealloc(stringio *self)
598
14
{
599
14
    _PyObject_GC_UNTRACK(self);
600
14
    self->ok = 0;
601
14
    if (self->buf) {
602
14
        PyMem_Free(self->buf);
603
14
        self->buf = NULL;
604
14
    }
605
14
    _PyAccu_Destroy(&self->accu);
606
14
    Py_CLEAR(self->readnl);
607
14
    Py_CLEAR(self->writenl);
608
14
    Py_CLEAR(self->decoder);
609
14
    Py_CLEAR(self->dict);
610
14
    if (self->weakreflist != NULL)
611
0
        PyObject_ClearWeakRefs((PyObject *) self);
612
14
    Py_TYPE(self)->tp_free(self);
613
14
}
614
615
static PyObject *
616
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
617
14
{
618
14
    stringio *self;
619
620
14
    assert(type != NULL && type->tp_alloc != NULL);
621
14
    self = (stringio *)type->tp_alloc(type, 0);
622
14
    if (self == NULL)
623
0
        return NULL;
624
625
    /* tp_alloc initializes all the fields to zero. So we don't have to
626
       initialize them here. */
627
628
14
    self->buf = (Py_UCS4 *)PyMem_Malloc(0);
629
14
    if (self->buf == NULL) {
630
0
        Py_DECREF(self);
631
0
        return PyErr_NoMemory();
632
0
    }
633
634
14
    return (PyObject *)self;
635
14
}
636
637
/*[clinic input]
638
_io.StringIO.__init__
639
    initial_value as value: object(c_default="NULL") = ''
640
    newline as newline_obj: object(c_default="NULL") = '\n'
641
642
Text I/O implementation using an in-memory buffer.
643
644
The initial_value argument sets the value of object.  The newline
645
argument is like the one of TextIOWrapper's constructor.
646
[clinic start generated code]*/
647
648
static int
649
_io_StringIO___init___impl(stringio *self, PyObject *value,
650
                           PyObject *newline_obj)
651
/*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/
652
14
{
653
14
    const char *newline = "\n";
654
14
    Py_ssize_t value_len;
655
656
    /* Parse the newline argument. We only want to allow unicode objects or
657
       None. */
658
14
    if (newline_obj == Py_None) {
659
0
        newline = NULL;
660
0
    }
661
14
    else if (newline_obj) {
662
0
        if (!PyUnicode_Check(newline_obj)) {
663
0
            PyErr_Format(PyExc_TypeError,
664
0
                         "newline must be str or None, not %.200s",
665
0
                         Py_TYPE(newline_obj)->tp_name);
666
0
            return -1;
667
0
        }
668
0
        newline = PyUnicode_AsUTF8(newline_obj);
669
0
        if (newline == NULL)
670
0
            return -1;
671
0
    }
672
673
14
    if (newline && newline[0] != '\0'
674
14
        && !(newline[0] == '\n' && newline[1] == '\0')
675
14
        && !(newline[0] == '\r' && newline[1] == '\0')
676
14
        && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
677
0
        PyErr_Format(PyExc_ValueError,
678
0
                     "illegal newline value: %R", newline_obj);
679
0
        return -1;
680
0
    }
681
14
    if (value && value != Py_None && !PyUnicode_Check(value)) {
682
0
        PyErr_Format(PyExc_TypeError,
683
0
                     "initial_value must be str or None, not %.200s",
684
0
                     Py_TYPE(value)->tp_name);
685
0
        return -1;
686
0
    }
687
688
14
    self->ok = 0;
689
690
14
    _PyAccu_Destroy(&self->accu);
691
14
    Py_CLEAR(self->readnl);
692
14
    Py_CLEAR(self->writenl);
693
14
    Py_CLEAR(self->decoder);
694
695
14
    assert((newline != NULL && newline_obj != Py_None) ||
696
14
           (newline == NULL && newline_obj == Py_None));
697
698
14
    if (newline) {
699
14
        self->readnl = PyUnicode_FromString(newline);
700
14
        if (self->readnl == NULL)
701
0
            return -1;
702
14
    }
703
14
    self->readuniversal = (newline == NULL || newline[0] == '\0');
704
14
    self->readtranslate = (newline == NULL);
705
    /* If newline == "", we don't translate anything.
706
       If newline == "\n" or newline == None, we translate to "\n", which is
707
       a no-op.
708
       (for newline == None, TextIOWrapper translates to os.linesep, but it
709
       is pointless for StringIO)
710
    */
711
14
    if (newline != NULL && newline[0] == '\r') {
712
0
        self->writenl = self->readnl;
713
0
        Py_INCREF(self->writenl);
714
0
    }
715
716
14
    if (self->readuniversal) {
717
0
        self->decoder = PyObject_CallFunction(
718
0
            (PyObject *)&PyIncrementalNewlineDecoder_Type,
719
0
            "Oi", Py_None, (int) self->readtranslate);
720
0
        if (self->decoder == NULL)
721
0
            return -1;
722
0
    }
723
724
    /* Now everything is set up, resize buffer to size of initial value,
725
       and copy it */
726
14
    self->string_size = 0;
727
14
    if (value && value != Py_None)
728
0
        value_len = PyUnicode_GetLength(value);
729
14
    else
730
14
        value_len = 0;
731
14
    if (value_len > 0) {
732
        /* This is a heuristic, for newline translation might change
733
           the string length. */
734
0
        if (resize_buffer(self, 0) < 0)
735
0
            return -1;
736
0
        self->state = STATE_REALIZED;
737
0
        self->pos = 0;
738
0
        if (write_str(self, value) < 0)
739
0
            return -1;
740
0
    }
741
14
    else {
742
        /* Empty stringio object, we can start by accumulating */
743
14
        if (resize_buffer(self, 0) < 0)
744
0
            return -1;
745
14
        if (_PyAccu_Init(&self->accu))
746
0
            return -1;
747
14
        self->state = STATE_ACCUMULATING;
748
14
    }
749
14
    self->pos = 0;
750
751
14
    self->closed = 0;
752
14
    self->ok = 1;
753
14
    return 0;
754
14
}
755
756
/* Properties and pseudo-properties */
757
758
/*[clinic input]
759
_io.StringIO.readable
760
761
Returns True if the IO object can be read.
762
[clinic start generated code]*/
763
764
static PyObject *
765
_io_StringIO_readable_impl(stringio *self)
766
/*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/
767
0
{
768
0
    CHECK_INITIALIZED(self);
769
0
    CHECK_CLOSED(self);
770
0
    Py_RETURN_TRUE;
771
0
}
772
773
/*[clinic input]
774
_io.StringIO.writable
775
776
Returns True if the IO object can be written.
777
[clinic start generated code]*/
778
779
static PyObject *
780
_io_StringIO_writable_impl(stringio *self)
781
/*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/
782
0
{
783
0
    CHECK_INITIALIZED(self);
784
0
    CHECK_CLOSED(self);
785
0
    Py_RETURN_TRUE;
786
0
}
787
788
/*[clinic input]
789
_io.StringIO.seekable
790
791
Returns True if the IO object can be seeked.
792
[clinic start generated code]*/
793
794
static PyObject *
795
_io_StringIO_seekable_impl(stringio *self)
796
/*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/
797
0
{
798
0
    CHECK_INITIALIZED(self);
799
0
    CHECK_CLOSED(self);
800
0
    Py_RETURN_TRUE;
801
0
}
802
803
/* Pickling support.
804
805
   The implementation of __getstate__ is similar to the one for BytesIO,
806
   except that we also save the newline parameter. For __setstate__ and unlike
807
   BytesIO, we call __init__ to restore the object's state. Doing so allows us
808
   to avoid decoding the complex newline state while keeping the object
809
   representation compact.
810
811
   See comment in bytesio.c regarding why only pickle protocols and onward are
812
   supported.
813
*/
814
815
static PyObject *
816
stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored))
817
0
{
818
0
    PyObject *initvalue = _io_StringIO_getvalue_impl(self);
819
0
    PyObject *dict;
820
0
    PyObject *state;
821
822
0
    if (initvalue == NULL)
823
0
        return NULL;
824
0
    if (self->dict == NULL) {
825
0
        Py_INCREF(Py_None);
826
0
        dict = Py_None;
827
0
    }
828
0
    else {
829
0
        dict = PyDict_Copy(self->dict);
830
0
        if (dict == NULL) {
831
0
            Py_DECREF(initvalue);
832
0
            return NULL;
833
0
        }
834
0
    }
835
836
0
    state = Py_BuildValue("(OOnN)", initvalue,
837
0
                          self->readnl ? self->readnl : Py_None,
838
0
                          self->pos, dict);
839
0
    Py_DECREF(initvalue);
840
0
    return state;
841
0
}
842
843
static PyObject *
844
stringio_setstate(stringio *self, PyObject *state)
845
0
{
846
0
    PyObject *initarg;
847
0
    PyObject *position_obj;
848
0
    PyObject *dict;
849
0
    Py_ssize_t pos;
850
851
0
    assert(state != NULL);
852
0
    CHECK_CLOSED(self);
853
854
    /* We allow the state tuple to be longer than 4, because we may need
855
       someday to extend the object's state without breaking
856
       backward-compatibility. */
857
0
    if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
858
0
        PyErr_Format(PyExc_TypeError,
859
0
                     "%.200s.__setstate__ argument should be 4-tuple, got %.200s",
860
0
                     Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
861
0
        return NULL;
862
0
    }
863
864
    /* Initialize the object's state. */
865
0
    initarg = PyTuple_GetSlice(state, 0, 2);
866
0
    if (initarg == NULL)
867
0
        return NULL;
868
0
    if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
869
0
        Py_DECREF(initarg);
870
0
        return NULL;
871
0
    }
872
0
    Py_DECREF(initarg);
873
874
    /* Restore the buffer state. Even if __init__ did initialize the buffer,
875
       we have to initialize it again since __init__ may translate the
876
       newlines in the initial_value string. We clearly do not want that
877
       because the string value in the state tuple has already been translated
878
       once by __init__. So we do not take any chance and replace object's
879
       buffer completely. */
880
0
    {
881
0
        PyObject *item;
882
0
        Py_UCS4 *buf;
883
0
        Py_ssize_t bufsize;
884
885
0
        item = PyTuple_GET_ITEM(state, 0);
886
0
        buf = PyUnicode_AsUCS4Copy(item);
887
0
        if (buf == NULL)
888
0
            return NULL;
889
0
        bufsize = PyUnicode_GET_LENGTH(item);
890
891
0
        if (resize_buffer(self, bufsize) < 0) {
892
0
            PyMem_Free(buf);
893
0
            return NULL;
894
0
        }
895
0
        memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
896
0
        PyMem_Free(buf);
897
0
        self->string_size = bufsize;
898
0
    }
899
900
    /* Set carefully the position value. Alternatively, we could use the seek
901
       method instead of modifying self->pos directly to better protect the
902
       object internal state against erroneous (or malicious) inputs. */
903
0
    position_obj = PyTuple_GET_ITEM(state, 2);
904
0
    if (!PyLong_Check(position_obj)) {
905
0
        PyErr_Format(PyExc_TypeError,
906
0
                     "third item of state must be an integer, got %.200s",
907
0
                     Py_TYPE(position_obj)->tp_name);
908
0
        return NULL;
909
0
    }
910
0
    pos = PyLong_AsSsize_t(position_obj);
911
0
    if (pos == -1 && PyErr_Occurred())
912
0
        return NULL;
913
0
    if (pos < 0) {
914
0
        PyErr_SetString(PyExc_ValueError,
915
0
                        "position value cannot be negative");
916
0
        return NULL;
917
0
    }
918
0
    self->pos = pos;
919
920
    /* Set the dictionary of the instance variables. */
921
0
    dict = PyTuple_GET_ITEM(state, 3);
922
0
    if (dict != Py_None) {
923
0
        if (!PyDict_Check(dict)) {
924
0
            PyErr_Format(PyExc_TypeError,
925
0
                         "fourth item of state should be a dict, got a %.200s",
926
0
                         Py_TYPE(dict)->tp_name);
927
0
            return NULL;
928
0
        }
929
0
        if (self->dict) {
930
            /* Alternatively, we could replace the internal dictionary
931
               completely. However, it seems more practical to just update it. */
932
0
            if (PyDict_Update(self->dict, dict) < 0)
933
0
                return NULL;
934
0
        }
935
0
        else {
936
0
            Py_INCREF(dict);
937
0
            self->dict = dict;
938
0
        }
939
0
    }
940
941
0
    Py_RETURN_NONE;
942
0
}
943
944
945
static PyObject *
946
stringio_closed(stringio *self, void *context)
947
0
{
948
0
    CHECK_INITIALIZED(self);
949
0
    return PyBool_FromLong(self->closed);
950
0
}
951
952
static PyObject *
953
stringio_line_buffering(stringio *self, void *context)
954
0
{
955
0
    CHECK_INITIALIZED(self);
956
0
    CHECK_CLOSED(self);
957
0
    Py_RETURN_FALSE;
958
0
}
959
960
static PyObject *
961
stringio_newlines(stringio *self, void *context)
962
0
{
963
0
    CHECK_INITIALIZED(self);
964
0
    CHECK_CLOSED(self);
965
0
    if (self->decoder == NULL)
966
0
        Py_RETURN_NONE;
967
0
    return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
968
0
}
969
970
#include "clinic/stringio.c.h"
971
972
static struct PyMethodDef stringio_methods[] = {
973
    _IO_STRINGIO_CLOSE_METHODDEF
974
    _IO_STRINGIO_GETVALUE_METHODDEF
975
    _IO_STRINGIO_READ_METHODDEF
976
    _IO_STRINGIO_READLINE_METHODDEF
977
    _IO_STRINGIO_TELL_METHODDEF
978
    _IO_STRINGIO_TRUNCATE_METHODDEF
979
    _IO_STRINGIO_SEEK_METHODDEF
980
    _IO_STRINGIO_WRITE_METHODDEF
981
982
    _IO_STRINGIO_SEEKABLE_METHODDEF
983
    _IO_STRINGIO_READABLE_METHODDEF
984
    _IO_STRINGIO_WRITABLE_METHODDEF
985
986
    {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
987
    {"__setstate__", (PyCFunction)stringio_setstate, METH_O},
988
    {NULL, NULL}        /* sentinel */
989
};
990
991
static PyGetSetDef stringio_getset[] = {
992
    {"closed",         (getter)stringio_closed,         NULL, NULL},
993
    {"newlines",       (getter)stringio_newlines,       NULL, NULL},
994
    /*  (following comments straight off of the original Python wrapper:)
995
        XXX Cruft to support the TextIOWrapper API. This would only
996
        be meaningful if StringIO supported the buffer attribute.
997
        Hopefully, a better solution, than adding these pseudo-attributes,
998
        will be found.
999
    */
1000
    {"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
1001
    {NULL}
1002
};
1003
1004
PyTypeObject PyStringIO_Type = {
1005
    PyVarObject_HEAD_INIT(NULL, 0)
1006
    "_io.StringIO",                            /*tp_name*/
1007
    sizeof(stringio),                    /*tp_basicsize*/
1008
    0,                                         /*tp_itemsize*/
1009
    (destructor)stringio_dealloc,              /*tp_dealloc*/
1010
    0,                                         /*tp_vectorcall_offset*/
1011
    0,                                         /*tp_getattr*/
1012
    0,                                         /*tp_setattr*/
1013
    0,                                         /*tp_as_async*/
1014
    0,                                         /*tp_repr*/
1015
    0,                                         /*tp_as_number*/
1016
    0,                                         /*tp_as_sequence*/
1017
    0,                                         /*tp_as_mapping*/
1018
    0,                                         /*tp_hash*/
1019
    0,                                         /*tp_call*/
1020
    0,                                         /*tp_str*/
1021
    0,                                         /*tp_getattro*/
1022
    0,                                         /*tp_setattro*/
1023
    0,                                         /*tp_as_buffer*/
1024
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
1025
                       | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
1026
    _io_StringIO___init____doc__,              /*tp_doc*/
1027
    (traverseproc)stringio_traverse,           /*tp_traverse*/
1028
    (inquiry)stringio_clear,                   /*tp_clear*/
1029
    0,                                         /*tp_richcompare*/
1030
    offsetof(stringio, weakreflist),            /*tp_weaklistoffset*/
1031
    0,                                         /*tp_iter*/
1032
    (iternextfunc)stringio_iternext,           /*tp_iternext*/
1033
    stringio_methods,                          /*tp_methods*/
1034
    0,                                         /*tp_members*/
1035
    stringio_getset,                           /*tp_getset*/
1036
    0,                                         /*tp_base*/
1037
    0,                                         /*tp_dict*/
1038
    0,                                         /*tp_descr_get*/
1039
    0,                                         /*tp_descr_set*/
1040
    offsetof(stringio, dict),                  /*tp_dictoffset*/
1041
    _io_StringIO___init__,                     /*tp_init*/
1042
    0,                                         /*tp_alloc*/
1043
    stringio_new,                              /*tp_new*/
1044
};