/src/Python-3.8.3/Modules/_io/stringio.c
Line | Count | Source (jump to first uncovered line) |
1 | | #define PY_SSIZE_T_CLEAN |
2 | | #include "Python.h" |
3 | | #include "structmember.h" |
4 | | #include "pycore_accu.h" |
5 | | #include "pycore_object.h" |
6 | | #include "_iomodule.h" |
7 | | |
8 | | /* Implementation note: the buffer is always at least one character longer |
9 | | than the enclosed string, for proper functioning of _PyIO_find_line_ending. |
10 | | */ |
11 | | |
12 | 14 | #define STATE_REALIZED 1 |
13 | 98 | #define STATE_ACCUMULATING 2 |
14 | | |
15 | | /*[clinic input] |
16 | | module _io |
17 | | class _io.StringIO "stringio *" "&PyStringIO_Type" |
18 | | [clinic start generated code]*/ |
19 | | /*[clinic end generated code: output=da39a3ee5e6b4b0d input=c17bc0f42165cd7d]*/ |
20 | | |
21 | | typedef struct { |
22 | | PyObject_HEAD |
23 | | Py_UCS4 *buf; |
24 | | Py_ssize_t pos; |
25 | | Py_ssize_t string_size; |
26 | | size_t buf_size; |
27 | | |
28 | | /* The stringio object can be in two states: accumulating or realized. |
29 | | In accumulating state, the internal buffer contains nothing and |
30 | | the contents are given by the embedded _PyAccu structure. |
31 | | In realized state, the internal buffer is meaningful and the |
32 | | _PyAccu is destroyed. |
33 | | */ |
34 | | int state; |
35 | | _PyAccu accu; |
36 | | |
37 | | char ok; /* initialized? */ |
38 | | char closed; |
39 | | char readuniversal; |
40 | | char readtranslate; |
41 | | PyObject *decoder; |
42 | | PyObject *readnl; |
43 | | PyObject *writenl; |
44 | | |
45 | | PyObject *dict; |
46 | | PyObject *weakreflist; |
47 | | } stringio; |
48 | | |
49 | | static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs); |
50 | | |
51 | | #define CHECK_INITIALIZED(self) \ |
52 | 126 | if (self->ok <= 0) { \ |
53 | 0 | PyErr_SetString(PyExc_ValueError, \ |
54 | 0 | "I/O operation on uninitialized object"); \ |
55 | 0 | return NULL; \ |
56 | 0 | } |
57 | | |
58 | | #define CHECK_CLOSED(self) \ |
59 | 126 | if (self->closed) { \ |
60 | 0 | PyErr_SetString(PyExc_ValueError, \ |
61 | 0 | "I/O operation on closed file"); \ |
62 | 0 | return NULL; \ |
63 | 0 | } |
64 | | |
65 | | #define ENSURE_REALIZED(self) \ |
66 | 0 | if (realize(self) < 0) { \ |
67 | 0 | return NULL; \ |
68 | 0 | } |
69 | | |
70 | | |
71 | | /* Internal routine for changing the size, in terms of characters, of the |
72 | | buffer of StringIO objects. The caller should ensure that the 'size' |
73 | | argument is non-negative. Returns 0 on success, -1 otherwise. */ |
74 | | static int |
75 | | resize_buffer(stringio *self, size_t size) |
76 | 28 | { |
77 | | /* Here, unsigned types are used to avoid dealing with signed integer |
78 | | overflow, which is undefined in C. */ |
79 | 28 | size_t alloc = self->buf_size; |
80 | 28 | Py_UCS4 *new_buf = NULL; |
81 | | |
82 | 28 | assert(self->buf != NULL); |
83 | | |
84 | | /* Reserve one more char for line ending detection. */ |
85 | 28 | size = size + 1; |
86 | | /* For simplicity, stay in the range of the signed type. Anyway, Python |
87 | | doesn't allow strings to be longer than this. */ |
88 | 28 | if (size > PY_SSIZE_T_MAX) |
89 | 0 | goto overflow; |
90 | | |
91 | 28 | if (size < alloc / 2) { |
92 | | /* Major downsize; resize down to exact size. */ |
93 | 0 | alloc = size + 1; |
94 | 0 | } |
95 | 28 | else if (size < alloc) { |
96 | | /* Within allocated size; quick exit */ |
97 | 14 | return 0; |
98 | 14 | } |
99 | 14 | else if (size <= alloc * 1.125) { |
100 | | /* Moderate upsize; overallocate similar to list_resize() */ |
101 | 0 | alloc = size + (size >> 3) + (size < 9 ? 3 : 6); |
102 | 0 | } |
103 | 14 | else { |
104 | | /* Major upsize; resize up to exact size */ |
105 | 14 | alloc = size + 1; |
106 | 14 | } |
107 | | |
108 | 14 | if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4)) |
109 | 0 | goto overflow; |
110 | 14 | new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4)); |
111 | 14 | if (new_buf == NULL) { |
112 | 0 | PyErr_NoMemory(); |
113 | 0 | return -1; |
114 | 0 | } |
115 | 14 | self->buf_size = alloc; |
116 | 14 | self->buf = new_buf; |
117 | | |
118 | 14 | return 0; |
119 | | |
120 | 0 | overflow: |
121 | 0 | PyErr_SetString(PyExc_OverflowError, |
122 | 0 | "new buffer size too large"); |
123 | 0 | return -1; |
124 | 14 | } |
125 | | |
126 | | static PyObject * |
127 | | make_intermediate(stringio *self) |
128 | 14 | { |
129 | 14 | PyObject *intermediate = _PyAccu_Finish(&self->accu); |
130 | 14 | self->state = STATE_REALIZED; |
131 | 14 | if (intermediate == NULL) |
132 | 0 | return NULL; |
133 | 14 | if (_PyAccu_Init(&self->accu) || |
134 | 14 | _PyAccu_Accumulate(&self->accu, intermediate)) { |
135 | 0 | Py_DECREF(intermediate); |
136 | 0 | return NULL; |
137 | 0 | } |
138 | 14 | self->state = STATE_ACCUMULATING; |
139 | 14 | return intermediate; |
140 | 14 | } |
141 | | |
142 | | static int |
143 | | realize(stringio *self) |
144 | 0 | { |
145 | 0 | Py_ssize_t len; |
146 | 0 | PyObject *intermediate; |
147 | |
|
148 | 0 | if (self->state == STATE_REALIZED) |
149 | 0 | return 0; |
150 | 0 | assert(self->state == STATE_ACCUMULATING); |
151 | 0 | self->state = STATE_REALIZED; |
152 | |
|
153 | 0 | intermediate = _PyAccu_Finish(&self->accu); |
154 | 0 | if (intermediate == NULL) |
155 | 0 | return -1; |
156 | | |
157 | | /* Append the intermediate string to the internal buffer. |
158 | | The length should be equal to the current cursor position. |
159 | | */ |
160 | 0 | len = PyUnicode_GET_LENGTH(intermediate); |
161 | 0 | if (resize_buffer(self, len) < 0) { |
162 | 0 | Py_DECREF(intermediate); |
163 | 0 | return -1; |
164 | 0 | } |
165 | 0 | if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) { |
166 | 0 | Py_DECREF(intermediate); |
167 | 0 | return -1; |
168 | 0 | } |
169 | | |
170 | 0 | Py_DECREF(intermediate); |
171 | 0 | return 0; |
172 | 0 | } |
173 | | |
174 | | /* Internal routine for writing a whole PyUnicode object to the buffer of a |
175 | | StringIO object. Returns 0 on success, or -1 on error. */ |
176 | | static Py_ssize_t |
177 | | write_str(stringio *self, PyObject *obj) |
178 | 56 | { |
179 | 56 | Py_ssize_t len; |
180 | 56 | PyObject *decoded = NULL; |
181 | | |
182 | 56 | assert(self->buf != NULL); |
183 | 56 | assert(self->pos >= 0); |
184 | | |
185 | 56 | if (self->decoder != NULL) { |
186 | 0 | decoded = _PyIncrementalNewlineDecoder_decode( |
187 | 0 | self->decoder, obj, 1 /* always final */); |
188 | 0 | } |
189 | 56 | else { |
190 | 56 | decoded = obj; |
191 | 56 | Py_INCREF(decoded); |
192 | 56 | } |
193 | 56 | if (self->writenl) { |
194 | 0 | PyObject *translated = PyUnicode_Replace( |
195 | 0 | decoded, _PyIO_str_nl, self->writenl, -1); |
196 | 0 | Py_DECREF(decoded); |
197 | 0 | decoded = translated; |
198 | 0 | } |
199 | 56 | if (decoded == NULL) |
200 | 0 | return -1; |
201 | | |
202 | 56 | assert(PyUnicode_Check(decoded)); |
203 | 56 | if (PyUnicode_READY(decoded)) { |
204 | 0 | Py_DECREF(decoded); |
205 | 0 | return -1; |
206 | 0 | } |
207 | 56 | len = PyUnicode_GET_LENGTH(decoded); |
208 | 56 | assert(len >= 0); |
209 | | |
210 | | /* This overflow check is not strictly necessary. However, it avoids us to |
211 | | deal with funky things like comparing an unsigned and a signed |
212 | | integer. */ |
213 | 56 | if (self->pos > PY_SSIZE_T_MAX - len) { |
214 | 0 | PyErr_SetString(PyExc_OverflowError, |
215 | 0 | "new position too large"); |
216 | 0 | goto fail; |
217 | 0 | } |
218 | | |
219 | 56 | if (self->state == STATE_ACCUMULATING) { |
220 | 56 | if (self->string_size == self->pos) { |
221 | 56 | if (_PyAccu_Accumulate(&self->accu, decoded)) |
222 | 0 | goto fail; |
223 | 56 | goto success; |
224 | 56 | } |
225 | 0 | if (realize(self)) |
226 | 0 | goto fail; |
227 | 0 | } |
228 | | |
229 | 0 | if (self->pos + len > self->string_size) { |
230 | 0 | if (resize_buffer(self, self->pos + len) < 0) |
231 | 0 | goto fail; |
232 | 0 | } |
233 | | |
234 | 0 | if (self->pos > self->string_size) { |
235 | | /* In case of overseek, pad with null bytes the buffer region between |
236 | | the end of stream and the current position. |
237 | | |
238 | | 0 lo string_size hi |
239 | | | |<---used--->|<----------available----------->| |
240 | | | | <--to pad-->|<---to write---> | |
241 | | 0 buf position |
242 | | |
243 | | */ |
244 | 0 | memset(self->buf + self->string_size, '\0', |
245 | 0 | (self->pos - self->string_size) * sizeof(Py_UCS4)); |
246 | 0 | } |
247 | | |
248 | | /* Copy the data to the internal buffer, overwriting some of the |
249 | | existing data if self->pos < self->string_size. */ |
250 | 0 | if (!PyUnicode_AsUCS4(decoded, |
251 | 0 | self->buf + self->pos, |
252 | 0 | self->buf_size - self->pos, |
253 | 0 | 0)) |
254 | 0 | goto fail; |
255 | | |
256 | 56 | success: |
257 | | /* Set the new length of the internal string if it has changed. */ |
258 | 56 | self->pos += len; |
259 | 56 | if (self->string_size < self->pos) |
260 | 56 | self->string_size = self->pos; |
261 | | |
262 | 56 | Py_DECREF(decoded); |
263 | 56 | return 0; |
264 | | |
265 | 0 | fail: |
266 | 0 | Py_XDECREF(decoded); |
267 | 0 | return -1; |
268 | 0 | } |
269 | | |
270 | | /*[clinic input] |
271 | | _io.StringIO.getvalue |
272 | | |
273 | | Retrieve the entire contents of the object. |
274 | | [clinic start generated code]*/ |
275 | | |
276 | | static PyObject * |
277 | | _io_StringIO_getvalue_impl(stringio *self) |
278 | | /*[clinic end generated code: output=27b6a7bfeaebce01 input=d23cb81d6791cf88]*/ |
279 | 14 | { |
280 | 14 | CHECK_INITIALIZED(self); |
281 | 14 | CHECK_CLOSED(self); |
282 | 14 | if (self->state == STATE_ACCUMULATING) |
283 | 14 | return make_intermediate(self); |
284 | 0 | return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf, |
285 | 0 | self->string_size); |
286 | 14 | } |
287 | | |
288 | | /*[clinic input] |
289 | | _io.StringIO.tell |
290 | | |
291 | | Tell the current file position. |
292 | | [clinic start generated code]*/ |
293 | | |
294 | | static PyObject * |
295 | | _io_StringIO_tell_impl(stringio *self) |
296 | | /*[clinic end generated code: output=2e87ac67b116c77b input=ec866ebaff02f405]*/ |
297 | 0 | { |
298 | 0 | CHECK_INITIALIZED(self); |
299 | 0 | CHECK_CLOSED(self); |
300 | 0 | return PyLong_FromSsize_t(self->pos); |
301 | 0 | } |
302 | | |
303 | | /*[clinic input] |
304 | | _io.StringIO.read |
305 | | size: Py_ssize_t(accept={int, NoneType}) = -1 |
306 | | / |
307 | | |
308 | | Read at most size characters, returned as a string. |
309 | | |
310 | | If the argument is negative or omitted, read until EOF |
311 | | is reached. Return an empty string at EOF. |
312 | | [clinic start generated code]*/ |
313 | | |
314 | | static PyObject * |
315 | | _io_StringIO_read_impl(stringio *self, Py_ssize_t size) |
316 | | /*[clinic end generated code: output=ae8cf6002f71626c input=0921093383dfb92d]*/ |
317 | 0 | { |
318 | 0 | Py_ssize_t n; |
319 | 0 | Py_UCS4 *output; |
320 | |
|
321 | 0 | CHECK_INITIALIZED(self); |
322 | 0 | CHECK_CLOSED(self); |
323 | | |
324 | | /* adjust invalid sizes */ |
325 | 0 | n = self->string_size - self->pos; |
326 | 0 | if (size < 0 || size > n) { |
327 | 0 | size = n; |
328 | 0 | if (size < 0) |
329 | 0 | size = 0; |
330 | 0 | } |
331 | | |
332 | | /* Optimization for seek(0); read() */ |
333 | 0 | if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) { |
334 | 0 | PyObject *result = make_intermediate(self); |
335 | 0 | self->pos = self->string_size; |
336 | 0 | return result; |
337 | 0 | } |
338 | | |
339 | 0 | ENSURE_REALIZED(self); |
340 | 0 | output = self->buf + self->pos; |
341 | 0 | self->pos += size; |
342 | 0 | return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size); |
343 | 0 | } |
344 | | |
345 | | /* Internal helper, used by stringio_readline and stringio_iternext */ |
346 | | static PyObject * |
347 | | _stringio_readline(stringio *self, Py_ssize_t limit) |
348 | 0 | { |
349 | 0 | Py_UCS4 *start, *end, old_char; |
350 | 0 | Py_ssize_t len, consumed; |
351 | | |
352 | | /* In case of overseek, return the empty string */ |
353 | 0 | if (self->pos >= self->string_size) |
354 | 0 | return PyUnicode_New(0, 0); |
355 | | |
356 | 0 | start = self->buf + self->pos; |
357 | 0 | if (limit < 0 || limit > self->string_size - self->pos) |
358 | 0 | limit = self->string_size - self->pos; |
359 | |
|
360 | 0 | end = start + limit; |
361 | 0 | old_char = *end; |
362 | 0 | *end = '\0'; |
363 | 0 | len = _PyIO_find_line_ending( |
364 | 0 | self->readtranslate, self->readuniversal, self->readnl, |
365 | 0 | PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed); |
366 | 0 | *end = old_char; |
367 | | /* If we haven't found any line ending, we just return everything |
368 | | (`consumed` is ignored). */ |
369 | 0 | if (len < 0) |
370 | 0 | len = limit; |
371 | 0 | self->pos += len; |
372 | 0 | return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len); |
373 | 0 | } |
374 | | |
375 | | /*[clinic input] |
376 | | _io.StringIO.readline |
377 | | size: Py_ssize_t(accept={int, NoneType}) = -1 |
378 | | / |
379 | | |
380 | | Read until newline or EOF. |
381 | | |
382 | | Returns an empty string if EOF is hit immediately. |
383 | | [clinic start generated code]*/ |
384 | | |
385 | | static PyObject * |
386 | | _io_StringIO_readline_impl(stringio *self, Py_ssize_t size) |
387 | | /*[clinic end generated code: output=cabd6452f1b7e85d input=a5bd70bf682aa276]*/ |
388 | 0 | { |
389 | 0 | CHECK_INITIALIZED(self); |
390 | 0 | CHECK_CLOSED(self); |
391 | 0 | ENSURE_REALIZED(self); |
392 | |
|
393 | 0 | return _stringio_readline(self, size); |
394 | 0 | } |
395 | | |
396 | | static PyObject * |
397 | | stringio_iternext(stringio *self) |
398 | 0 | { |
399 | 0 | PyObject *line; |
400 | |
|
401 | 0 | CHECK_INITIALIZED(self); |
402 | 0 | CHECK_CLOSED(self); |
403 | 0 | ENSURE_REALIZED(self); |
404 | |
|
405 | 0 | if (Py_TYPE(self) == &PyStringIO_Type) { |
406 | | /* Skip method call overhead for speed */ |
407 | 0 | line = _stringio_readline(self, -1); |
408 | 0 | } |
409 | 0 | else { |
410 | | /* XXX is subclassing StringIO really supported? */ |
411 | 0 | line = PyObject_CallMethodObjArgs((PyObject *)self, |
412 | 0 | _PyIO_str_readline, NULL); |
413 | 0 | if (line && !PyUnicode_Check(line)) { |
414 | 0 | PyErr_Format(PyExc_OSError, |
415 | 0 | "readline() should have returned a str object, " |
416 | 0 | "not '%.200s'", Py_TYPE(line)->tp_name); |
417 | 0 | Py_DECREF(line); |
418 | 0 | return NULL; |
419 | 0 | } |
420 | 0 | } |
421 | | |
422 | 0 | if (line == NULL) |
423 | 0 | return NULL; |
424 | | |
425 | 0 | if (PyUnicode_GET_LENGTH(line) == 0) { |
426 | | /* Reached EOF */ |
427 | 0 | Py_DECREF(line); |
428 | 0 | return NULL; |
429 | 0 | } |
430 | | |
431 | 0 | return line; |
432 | 0 | } |
433 | | |
434 | | /*[clinic input] |
435 | | _io.StringIO.truncate |
436 | | pos as size: Py_ssize_t(accept={int, NoneType}, c_default="self->pos") = None |
437 | | / |
438 | | |
439 | | Truncate size to pos. |
440 | | |
441 | | The pos argument defaults to the current file position, as |
442 | | returned by tell(). The current file position is unchanged. |
443 | | Returns the new absolute position. |
444 | | [clinic start generated code]*/ |
445 | | |
446 | | static PyObject * |
447 | | _io_StringIO_truncate_impl(stringio *self, Py_ssize_t size) |
448 | | /*[clinic end generated code: output=eb3aef8e06701365 input=5505cff90ca48b96]*/ |
449 | 0 | { |
450 | 0 | CHECK_INITIALIZED(self); |
451 | 0 | CHECK_CLOSED(self); |
452 | |
|
453 | 0 | if (size < 0) { |
454 | 0 | PyErr_Format(PyExc_ValueError, |
455 | 0 | "Negative size value %zd", size); |
456 | 0 | return NULL; |
457 | 0 | } |
458 | | |
459 | 0 | if (size < self->string_size) { |
460 | 0 | ENSURE_REALIZED(self); |
461 | 0 | if (resize_buffer(self, size) < 0) |
462 | 0 | return NULL; |
463 | 0 | self->string_size = size; |
464 | 0 | } |
465 | | |
466 | 0 | return PyLong_FromSsize_t(size); |
467 | 0 | } |
468 | | |
469 | | /*[clinic input] |
470 | | _io.StringIO.seek |
471 | | pos: Py_ssize_t |
472 | | whence: int = 0 |
473 | | / |
474 | | |
475 | | Change stream position. |
476 | | |
477 | | Seek to character offset pos relative to position indicated by whence: |
478 | | 0 Start of stream (the default). pos should be >= 0; |
479 | | 1 Current position - pos must be 0; |
480 | | 2 End of stream - pos must be 0. |
481 | | Returns the new absolute position. |
482 | | [clinic start generated code]*/ |
483 | | |
484 | | static PyObject * |
485 | | _io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence) |
486 | | /*[clinic end generated code: output=e9e0ac9a8ae71c25 input=e3855b24e7cae06a]*/ |
487 | 0 | { |
488 | 0 | CHECK_INITIALIZED(self); |
489 | 0 | CHECK_CLOSED(self); |
490 | |
|
491 | 0 | if (whence != 0 && whence != 1 && whence != 2) { |
492 | 0 | PyErr_Format(PyExc_ValueError, |
493 | 0 | "Invalid whence (%i, should be 0, 1 or 2)", whence); |
494 | 0 | return NULL; |
495 | 0 | } |
496 | 0 | else if (pos < 0 && whence == 0) { |
497 | 0 | PyErr_Format(PyExc_ValueError, |
498 | 0 | "Negative seek position %zd", pos); |
499 | 0 | return NULL; |
500 | 0 | } |
501 | 0 | else if (whence != 0 && pos != 0) { |
502 | 0 | PyErr_SetString(PyExc_OSError, |
503 | 0 | "Can't do nonzero cur-relative seeks"); |
504 | 0 | return NULL; |
505 | 0 | } |
506 | | |
507 | | /* whence = 0: offset relative to beginning of the string. |
508 | | whence = 1: no change to current position. |
509 | | whence = 2: change position to end of file. */ |
510 | 0 | if (whence == 1) { |
511 | 0 | pos = self->pos; |
512 | 0 | } |
513 | 0 | else if (whence == 2) { |
514 | 0 | pos = self->string_size; |
515 | 0 | } |
516 | |
|
517 | 0 | self->pos = pos; |
518 | |
|
519 | 0 | return PyLong_FromSsize_t(self->pos); |
520 | 0 | } |
521 | | |
522 | | /*[clinic input] |
523 | | _io.StringIO.write |
524 | | s as obj: object |
525 | | / |
526 | | |
527 | | Write string to file. |
528 | | |
529 | | Returns the number of characters written, which is always equal to |
530 | | the length of the string. |
531 | | [clinic start generated code]*/ |
532 | | |
533 | | static PyObject * |
534 | | _io_StringIO_write(stringio *self, PyObject *obj) |
535 | | /*[clinic end generated code: output=0deaba91a15b94da input=cf96f3b16586e669]*/ |
536 | 112 | { |
537 | 112 | Py_ssize_t size; |
538 | | |
539 | 112 | CHECK_INITIALIZED(self); |
540 | 112 | if (!PyUnicode_Check(obj)) { |
541 | 0 | PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'", |
542 | 0 | Py_TYPE(obj)->tp_name); |
543 | 0 | return NULL; |
544 | 0 | } |
545 | 112 | if (PyUnicode_READY(obj)) |
546 | 0 | return NULL; |
547 | 112 | CHECK_CLOSED(self); |
548 | 112 | size = PyUnicode_GET_LENGTH(obj); |
549 | | |
550 | 112 | if (size > 0 && write_str(self, obj) < 0) |
551 | 0 | return NULL; |
552 | | |
553 | 112 | return PyLong_FromSsize_t(size); |
554 | 112 | } |
555 | | |
556 | | /*[clinic input] |
557 | | _io.StringIO.close |
558 | | |
559 | | Close the IO object. |
560 | | |
561 | | Attempting any further operation after the object is closed |
562 | | will raise a ValueError. |
563 | | |
564 | | This method has no effect if the file is already closed. |
565 | | [clinic start generated code]*/ |
566 | | |
567 | | static PyObject * |
568 | | _io_StringIO_close_impl(stringio *self) |
569 | | /*[clinic end generated code: output=04399355cbe518f1 input=cbc10b45f35d6d46]*/ |
570 | 14 | { |
571 | 14 | self->closed = 1; |
572 | | /* Free up some memory */ |
573 | 14 | if (resize_buffer(self, 0) < 0) |
574 | 0 | return NULL; |
575 | 14 | _PyAccu_Destroy(&self->accu); |
576 | 14 | Py_CLEAR(self->readnl); |
577 | 14 | Py_CLEAR(self->writenl); |
578 | 14 | Py_CLEAR(self->decoder); |
579 | 14 | Py_RETURN_NONE; |
580 | 14 | } |
581 | | |
582 | | static int |
583 | | stringio_traverse(stringio *self, visitproc visit, void *arg) |
584 | 0 | { |
585 | 0 | Py_VISIT(self->dict); |
586 | 0 | return 0; |
587 | 0 | } |
588 | | |
589 | | static int |
590 | | stringio_clear(stringio *self) |
591 | 0 | { |
592 | 0 | Py_CLEAR(self->dict); |
593 | 0 | return 0; |
594 | 0 | } |
595 | | |
596 | | static void |
597 | | stringio_dealloc(stringio *self) |
598 | 14 | { |
599 | 14 | _PyObject_GC_UNTRACK(self); |
600 | 14 | self->ok = 0; |
601 | 14 | if (self->buf) { |
602 | 14 | PyMem_Free(self->buf); |
603 | 14 | self->buf = NULL; |
604 | 14 | } |
605 | 14 | _PyAccu_Destroy(&self->accu); |
606 | 14 | Py_CLEAR(self->readnl); |
607 | 14 | Py_CLEAR(self->writenl); |
608 | 14 | Py_CLEAR(self->decoder); |
609 | 14 | Py_CLEAR(self->dict); |
610 | 14 | if (self->weakreflist != NULL) |
611 | 0 | PyObject_ClearWeakRefs((PyObject *) self); |
612 | 14 | Py_TYPE(self)->tp_free(self); |
613 | 14 | } |
614 | | |
615 | | static PyObject * |
616 | | stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds) |
617 | 14 | { |
618 | 14 | stringio *self; |
619 | | |
620 | 14 | assert(type != NULL && type->tp_alloc != NULL); |
621 | 14 | self = (stringio *)type->tp_alloc(type, 0); |
622 | 14 | if (self == NULL) |
623 | 0 | return NULL; |
624 | | |
625 | | /* tp_alloc initializes all the fields to zero. So we don't have to |
626 | | initialize them here. */ |
627 | | |
628 | 14 | self->buf = (Py_UCS4 *)PyMem_Malloc(0); |
629 | 14 | if (self->buf == NULL) { |
630 | 0 | Py_DECREF(self); |
631 | 0 | return PyErr_NoMemory(); |
632 | 0 | } |
633 | | |
634 | 14 | return (PyObject *)self; |
635 | 14 | } |
636 | | |
637 | | /*[clinic input] |
638 | | _io.StringIO.__init__ |
639 | | initial_value as value: object(c_default="NULL") = '' |
640 | | newline as newline_obj: object(c_default="NULL") = '\n' |
641 | | |
642 | | Text I/O implementation using an in-memory buffer. |
643 | | |
644 | | The initial_value argument sets the value of object. The newline |
645 | | argument is like the one of TextIOWrapper's constructor. |
646 | | [clinic start generated code]*/ |
647 | | |
648 | | static int |
649 | | _io_StringIO___init___impl(stringio *self, PyObject *value, |
650 | | PyObject *newline_obj) |
651 | | /*[clinic end generated code: output=a421ea023b22ef4e input=cee2d9181b2577a3]*/ |
652 | 14 | { |
653 | 14 | const char *newline = "\n"; |
654 | 14 | Py_ssize_t value_len; |
655 | | |
656 | | /* Parse the newline argument. We only want to allow unicode objects or |
657 | | None. */ |
658 | 14 | if (newline_obj == Py_None) { |
659 | 0 | newline = NULL; |
660 | 0 | } |
661 | 14 | else if (newline_obj) { |
662 | 0 | if (!PyUnicode_Check(newline_obj)) { |
663 | 0 | PyErr_Format(PyExc_TypeError, |
664 | 0 | "newline must be str or None, not %.200s", |
665 | 0 | Py_TYPE(newline_obj)->tp_name); |
666 | 0 | return -1; |
667 | 0 | } |
668 | 0 | newline = PyUnicode_AsUTF8(newline_obj); |
669 | 0 | if (newline == NULL) |
670 | 0 | return -1; |
671 | 0 | } |
672 | | |
673 | 14 | if (newline && newline[0] != '\0' |
674 | 14 | && !(newline[0] == '\n' && newline[1] == '\0') |
675 | 14 | && !(newline[0] == '\r' && newline[1] == '\0') |
676 | 14 | && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) { |
677 | 0 | PyErr_Format(PyExc_ValueError, |
678 | 0 | "illegal newline value: %R", newline_obj); |
679 | 0 | return -1; |
680 | 0 | } |
681 | 14 | if (value && value != Py_None && !PyUnicode_Check(value)) { |
682 | 0 | PyErr_Format(PyExc_TypeError, |
683 | 0 | "initial_value must be str or None, not %.200s", |
684 | 0 | Py_TYPE(value)->tp_name); |
685 | 0 | return -1; |
686 | 0 | } |
687 | | |
688 | 14 | self->ok = 0; |
689 | | |
690 | 14 | _PyAccu_Destroy(&self->accu); |
691 | 14 | Py_CLEAR(self->readnl); |
692 | 14 | Py_CLEAR(self->writenl); |
693 | 14 | Py_CLEAR(self->decoder); |
694 | | |
695 | 14 | assert((newline != NULL && newline_obj != Py_None) || |
696 | 14 | (newline == NULL && newline_obj == Py_None)); |
697 | | |
698 | 14 | if (newline) { |
699 | 14 | self->readnl = PyUnicode_FromString(newline); |
700 | 14 | if (self->readnl == NULL) |
701 | 0 | return -1; |
702 | 14 | } |
703 | 14 | self->readuniversal = (newline == NULL || newline[0] == '\0'); |
704 | 14 | self->readtranslate = (newline == NULL); |
705 | | /* If newline == "", we don't translate anything. |
706 | | If newline == "\n" or newline == None, we translate to "\n", which is |
707 | | a no-op. |
708 | | (for newline == None, TextIOWrapper translates to os.linesep, but it |
709 | | is pointless for StringIO) |
710 | | */ |
711 | 14 | if (newline != NULL && newline[0] == '\r') { |
712 | 0 | self->writenl = self->readnl; |
713 | 0 | Py_INCREF(self->writenl); |
714 | 0 | } |
715 | | |
716 | 14 | if (self->readuniversal) { |
717 | 0 | self->decoder = PyObject_CallFunction( |
718 | 0 | (PyObject *)&PyIncrementalNewlineDecoder_Type, |
719 | 0 | "Oi", Py_None, (int) self->readtranslate); |
720 | 0 | if (self->decoder == NULL) |
721 | 0 | return -1; |
722 | 0 | } |
723 | | |
724 | | /* Now everything is set up, resize buffer to size of initial value, |
725 | | and copy it */ |
726 | 14 | self->string_size = 0; |
727 | 14 | if (value && value != Py_None) |
728 | 0 | value_len = PyUnicode_GetLength(value); |
729 | 14 | else |
730 | 14 | value_len = 0; |
731 | 14 | if (value_len > 0) { |
732 | | /* This is a heuristic, for newline translation might change |
733 | | the string length. */ |
734 | 0 | if (resize_buffer(self, 0) < 0) |
735 | 0 | return -1; |
736 | 0 | self->state = STATE_REALIZED; |
737 | 0 | self->pos = 0; |
738 | 0 | if (write_str(self, value) < 0) |
739 | 0 | return -1; |
740 | 0 | } |
741 | 14 | else { |
742 | | /* Empty stringio object, we can start by accumulating */ |
743 | 14 | if (resize_buffer(self, 0) < 0) |
744 | 0 | return -1; |
745 | 14 | if (_PyAccu_Init(&self->accu)) |
746 | 0 | return -1; |
747 | 14 | self->state = STATE_ACCUMULATING; |
748 | 14 | } |
749 | 14 | self->pos = 0; |
750 | | |
751 | 14 | self->closed = 0; |
752 | 14 | self->ok = 1; |
753 | 14 | return 0; |
754 | 14 | } |
755 | | |
756 | | /* Properties and pseudo-properties */ |
757 | | |
758 | | /*[clinic input] |
759 | | _io.StringIO.readable |
760 | | |
761 | | Returns True if the IO object can be read. |
762 | | [clinic start generated code]*/ |
763 | | |
764 | | static PyObject * |
765 | | _io_StringIO_readable_impl(stringio *self) |
766 | | /*[clinic end generated code: output=b19d44dd8b1ceb99 input=39ce068b224c21ad]*/ |
767 | 0 | { |
768 | 0 | CHECK_INITIALIZED(self); |
769 | 0 | CHECK_CLOSED(self); |
770 | 0 | Py_RETURN_TRUE; |
771 | 0 | } |
772 | | |
773 | | /*[clinic input] |
774 | | _io.StringIO.writable |
775 | | |
776 | | Returns True if the IO object can be written. |
777 | | [clinic start generated code]*/ |
778 | | |
779 | | static PyObject * |
780 | | _io_StringIO_writable_impl(stringio *self) |
781 | | /*[clinic end generated code: output=13e4dd77187074ca input=7a691353aac38835]*/ |
782 | 0 | { |
783 | 0 | CHECK_INITIALIZED(self); |
784 | 0 | CHECK_CLOSED(self); |
785 | 0 | Py_RETURN_TRUE; |
786 | 0 | } |
787 | | |
788 | | /*[clinic input] |
789 | | _io.StringIO.seekable |
790 | | |
791 | | Returns True if the IO object can be seeked. |
792 | | [clinic start generated code]*/ |
793 | | |
794 | | static PyObject * |
795 | | _io_StringIO_seekable_impl(stringio *self) |
796 | | /*[clinic end generated code: output=4d20b4641c756879 input=4c606d05b32952e6]*/ |
797 | 0 | { |
798 | 0 | CHECK_INITIALIZED(self); |
799 | 0 | CHECK_CLOSED(self); |
800 | 0 | Py_RETURN_TRUE; |
801 | 0 | } |
802 | | |
803 | | /* Pickling support. |
804 | | |
805 | | The implementation of __getstate__ is similar to the one for BytesIO, |
806 | | except that we also save the newline parameter. For __setstate__ and unlike |
807 | | BytesIO, we call __init__ to restore the object's state. Doing so allows us |
808 | | to avoid decoding the complex newline state while keeping the object |
809 | | representation compact. |
810 | | |
811 | | See comment in bytesio.c regarding why only pickle protocols and onward are |
812 | | supported. |
813 | | */ |
814 | | |
815 | | static PyObject * |
816 | | stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored)) |
817 | 0 | { |
818 | 0 | PyObject *initvalue = _io_StringIO_getvalue_impl(self); |
819 | 0 | PyObject *dict; |
820 | 0 | PyObject *state; |
821 | |
|
822 | 0 | if (initvalue == NULL) |
823 | 0 | return NULL; |
824 | 0 | if (self->dict == NULL) { |
825 | 0 | Py_INCREF(Py_None); |
826 | 0 | dict = Py_None; |
827 | 0 | } |
828 | 0 | else { |
829 | 0 | dict = PyDict_Copy(self->dict); |
830 | 0 | if (dict == NULL) { |
831 | 0 | Py_DECREF(initvalue); |
832 | 0 | return NULL; |
833 | 0 | } |
834 | 0 | } |
835 | | |
836 | 0 | state = Py_BuildValue("(OOnN)", initvalue, |
837 | 0 | self->readnl ? self->readnl : Py_None, |
838 | 0 | self->pos, dict); |
839 | 0 | Py_DECREF(initvalue); |
840 | 0 | return state; |
841 | 0 | } |
842 | | |
843 | | static PyObject * |
844 | | stringio_setstate(stringio *self, PyObject *state) |
845 | 0 | { |
846 | 0 | PyObject *initarg; |
847 | 0 | PyObject *position_obj; |
848 | 0 | PyObject *dict; |
849 | 0 | Py_ssize_t pos; |
850 | |
|
851 | 0 | assert(state != NULL); |
852 | 0 | CHECK_CLOSED(self); |
853 | | |
854 | | /* We allow the state tuple to be longer than 4, because we may need |
855 | | someday to extend the object's state without breaking |
856 | | backward-compatibility. */ |
857 | 0 | if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) { |
858 | 0 | PyErr_Format(PyExc_TypeError, |
859 | 0 | "%.200s.__setstate__ argument should be 4-tuple, got %.200s", |
860 | 0 | Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); |
861 | 0 | return NULL; |
862 | 0 | } |
863 | | |
864 | | /* Initialize the object's state. */ |
865 | 0 | initarg = PyTuple_GetSlice(state, 0, 2); |
866 | 0 | if (initarg == NULL) |
867 | 0 | return NULL; |
868 | 0 | if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) { |
869 | 0 | Py_DECREF(initarg); |
870 | 0 | return NULL; |
871 | 0 | } |
872 | 0 | Py_DECREF(initarg); |
873 | | |
874 | | /* Restore the buffer state. Even if __init__ did initialize the buffer, |
875 | | we have to initialize it again since __init__ may translate the |
876 | | newlines in the initial_value string. We clearly do not want that |
877 | | because the string value in the state tuple has already been translated |
878 | | once by __init__. So we do not take any chance and replace object's |
879 | | buffer completely. */ |
880 | 0 | { |
881 | 0 | PyObject *item; |
882 | 0 | Py_UCS4 *buf; |
883 | 0 | Py_ssize_t bufsize; |
884 | |
|
885 | 0 | item = PyTuple_GET_ITEM(state, 0); |
886 | 0 | buf = PyUnicode_AsUCS4Copy(item); |
887 | 0 | if (buf == NULL) |
888 | 0 | return NULL; |
889 | 0 | bufsize = PyUnicode_GET_LENGTH(item); |
890 | |
|
891 | 0 | if (resize_buffer(self, bufsize) < 0) { |
892 | 0 | PyMem_Free(buf); |
893 | 0 | return NULL; |
894 | 0 | } |
895 | 0 | memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4)); |
896 | 0 | PyMem_Free(buf); |
897 | 0 | self->string_size = bufsize; |
898 | 0 | } |
899 | | |
900 | | /* Set carefully the position value. Alternatively, we could use the seek |
901 | | method instead of modifying self->pos directly to better protect the |
902 | | object internal state against erroneous (or malicious) inputs. */ |
903 | 0 | position_obj = PyTuple_GET_ITEM(state, 2); |
904 | 0 | if (!PyLong_Check(position_obj)) { |
905 | 0 | PyErr_Format(PyExc_TypeError, |
906 | 0 | "third item of state must be an integer, got %.200s", |
907 | 0 | Py_TYPE(position_obj)->tp_name); |
908 | 0 | return NULL; |
909 | 0 | } |
910 | 0 | pos = PyLong_AsSsize_t(position_obj); |
911 | 0 | if (pos == -1 && PyErr_Occurred()) |
912 | 0 | return NULL; |
913 | 0 | if (pos < 0) { |
914 | 0 | PyErr_SetString(PyExc_ValueError, |
915 | 0 | "position value cannot be negative"); |
916 | 0 | return NULL; |
917 | 0 | } |
918 | 0 | self->pos = pos; |
919 | | |
920 | | /* Set the dictionary of the instance variables. */ |
921 | 0 | dict = PyTuple_GET_ITEM(state, 3); |
922 | 0 | if (dict != Py_None) { |
923 | 0 | if (!PyDict_Check(dict)) { |
924 | 0 | PyErr_Format(PyExc_TypeError, |
925 | 0 | "fourth item of state should be a dict, got a %.200s", |
926 | 0 | Py_TYPE(dict)->tp_name); |
927 | 0 | return NULL; |
928 | 0 | } |
929 | 0 | if (self->dict) { |
930 | | /* Alternatively, we could replace the internal dictionary |
931 | | completely. However, it seems more practical to just update it. */ |
932 | 0 | if (PyDict_Update(self->dict, dict) < 0) |
933 | 0 | return NULL; |
934 | 0 | } |
935 | 0 | else { |
936 | 0 | Py_INCREF(dict); |
937 | 0 | self->dict = dict; |
938 | 0 | } |
939 | 0 | } |
940 | | |
941 | 0 | Py_RETURN_NONE; |
942 | 0 | } |
943 | | |
944 | | |
945 | | static PyObject * |
946 | | stringio_closed(stringio *self, void *context) |
947 | 0 | { |
948 | 0 | CHECK_INITIALIZED(self); |
949 | 0 | return PyBool_FromLong(self->closed); |
950 | 0 | } |
951 | | |
952 | | static PyObject * |
953 | | stringio_line_buffering(stringio *self, void *context) |
954 | 0 | { |
955 | 0 | CHECK_INITIALIZED(self); |
956 | 0 | CHECK_CLOSED(self); |
957 | 0 | Py_RETURN_FALSE; |
958 | 0 | } |
959 | | |
960 | | static PyObject * |
961 | | stringio_newlines(stringio *self, void *context) |
962 | 0 | { |
963 | 0 | CHECK_INITIALIZED(self); |
964 | 0 | CHECK_CLOSED(self); |
965 | 0 | if (self->decoder == NULL) |
966 | 0 | Py_RETURN_NONE; |
967 | 0 | return PyObject_GetAttr(self->decoder, _PyIO_str_newlines); |
968 | 0 | } |
969 | | |
970 | | #include "clinic/stringio.c.h" |
971 | | |
972 | | static struct PyMethodDef stringio_methods[] = { |
973 | | _IO_STRINGIO_CLOSE_METHODDEF |
974 | | _IO_STRINGIO_GETVALUE_METHODDEF |
975 | | _IO_STRINGIO_READ_METHODDEF |
976 | | _IO_STRINGIO_READLINE_METHODDEF |
977 | | _IO_STRINGIO_TELL_METHODDEF |
978 | | _IO_STRINGIO_TRUNCATE_METHODDEF |
979 | | _IO_STRINGIO_SEEK_METHODDEF |
980 | | _IO_STRINGIO_WRITE_METHODDEF |
981 | | |
982 | | _IO_STRINGIO_SEEKABLE_METHODDEF |
983 | | _IO_STRINGIO_READABLE_METHODDEF |
984 | | _IO_STRINGIO_WRITABLE_METHODDEF |
985 | | |
986 | | {"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS}, |
987 | | {"__setstate__", (PyCFunction)stringio_setstate, METH_O}, |
988 | | {NULL, NULL} /* sentinel */ |
989 | | }; |
990 | | |
991 | | static PyGetSetDef stringio_getset[] = { |
992 | | {"closed", (getter)stringio_closed, NULL, NULL}, |
993 | | {"newlines", (getter)stringio_newlines, NULL, NULL}, |
994 | | /* (following comments straight off of the original Python wrapper:) |
995 | | XXX Cruft to support the TextIOWrapper API. This would only |
996 | | be meaningful if StringIO supported the buffer attribute. |
997 | | Hopefully, a better solution, than adding these pseudo-attributes, |
998 | | will be found. |
999 | | */ |
1000 | | {"line_buffering", (getter)stringio_line_buffering, NULL, NULL}, |
1001 | | {NULL} |
1002 | | }; |
1003 | | |
1004 | | PyTypeObject PyStringIO_Type = { |
1005 | | PyVarObject_HEAD_INIT(NULL, 0) |
1006 | | "_io.StringIO", /*tp_name*/ |
1007 | | sizeof(stringio), /*tp_basicsize*/ |
1008 | | 0, /*tp_itemsize*/ |
1009 | | (destructor)stringio_dealloc, /*tp_dealloc*/ |
1010 | | 0, /*tp_vectorcall_offset*/ |
1011 | | 0, /*tp_getattr*/ |
1012 | | 0, /*tp_setattr*/ |
1013 | | 0, /*tp_as_async*/ |
1014 | | 0, /*tp_repr*/ |
1015 | | 0, /*tp_as_number*/ |
1016 | | 0, /*tp_as_sequence*/ |
1017 | | 0, /*tp_as_mapping*/ |
1018 | | 0, /*tp_hash*/ |
1019 | | 0, /*tp_call*/ |
1020 | | 0, /*tp_str*/ |
1021 | | 0, /*tp_getattro*/ |
1022 | | 0, /*tp_setattro*/ |
1023 | | 0, /*tp_as_buffer*/ |
1024 | | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1025 | | | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ |
1026 | | _io_StringIO___init____doc__, /*tp_doc*/ |
1027 | | (traverseproc)stringio_traverse, /*tp_traverse*/ |
1028 | | (inquiry)stringio_clear, /*tp_clear*/ |
1029 | | 0, /*tp_richcompare*/ |
1030 | | offsetof(stringio, weakreflist), /*tp_weaklistoffset*/ |
1031 | | 0, /*tp_iter*/ |
1032 | | (iternextfunc)stringio_iternext, /*tp_iternext*/ |
1033 | | stringio_methods, /*tp_methods*/ |
1034 | | 0, /*tp_members*/ |
1035 | | stringio_getset, /*tp_getset*/ |
1036 | | 0, /*tp_base*/ |
1037 | | 0, /*tp_dict*/ |
1038 | | 0, /*tp_descr_get*/ |
1039 | | 0, /*tp_descr_set*/ |
1040 | | offsetof(stringio, dict), /*tp_dictoffset*/ |
1041 | | _io_StringIO___init__, /*tp_init*/ |
1042 | | 0, /*tp_alloc*/ |
1043 | | stringio_new, /*tp_new*/ |
1044 | | }; |