Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_writer.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
#include "Python.h"
42
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
43
#include "pycore_long.h"          // _PyLong_FormatWriter()
44
#include "pycore_unicodeobject.h" // _PyUnicode_Result()
45
46
47
#ifdef MS_WINDOWS
48
   /* On Windows, overallocate by 50% is the best factor */
49
#  define OVERALLOCATE_FACTOR 2
50
#else
51
   /* On Linux, overallocate by 25% is the best factor */
52
81.4M
#  define OVERALLOCATE_FACTOR 4
53
#endif
54
55
56
/* Compilation of templated routines */
57
58
#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty()
59
60
#include "stringlib/ucs1lib.h"
61
#include "stringlib/find_max_char.h"
62
#include "stringlib/undef.h"
63
64
65
/* Copy an ASCII or latin1 char* string into a Python Unicode string.
66
67
   WARNING: The function doesn't copy the terminating null character and
68
   doesn't check the maximum character (may write a latin1 character in an
69
   ASCII string). */
70
static void
71
unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
72
                   const char *str, Py_ssize_t len)
73
0
{
74
0
    int kind = PyUnicode_KIND(unicode);
75
0
    const void *data = PyUnicode_DATA(unicode);
76
0
    const char *end = str + len;
77
78
0
    assert(index + len <= PyUnicode_GET_LENGTH(unicode));
79
0
    switch (kind) {
80
0
    case PyUnicode_1BYTE_KIND: {
81
#ifdef Py_DEBUG
82
        if (PyUnicode_IS_ASCII(unicode)) {
83
            Py_UCS4 maxchar = ucs1lib_find_max_char(
84
                (const Py_UCS1*)str,
85
                (const Py_UCS1*)str + len);
86
            assert(maxchar < 128);
87
        }
88
#endif
89
0
        memcpy((char *) data + index, str, len);
90
0
        break;
91
0
    }
92
0
    case PyUnicode_2BYTE_KIND: {
93
0
        Py_UCS2 *start = (Py_UCS2 *)data + index;
94
0
        Py_UCS2 *ucs2 = start;
95
96
0
        for (; str < end; ++ucs2, ++str)
97
0
            *ucs2 = (Py_UCS2)*str;
98
99
0
        assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
100
0
        break;
101
0
    }
102
0
    case PyUnicode_4BYTE_KIND: {
103
0
        Py_UCS4 *start = (Py_UCS4 *)data + index;
104
0
        Py_UCS4 *ucs4 = start;
105
106
0
        for (; str < end; ++ucs4, ++str)
107
0
            *ucs4 = (Py_UCS4)*str;
108
109
0
        assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
110
0
        break;
111
0
    }
112
0
    default:
113
0
        Py_UNREACHABLE();
114
0
    }
115
0
}
116
117
118
static inline void
119
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
120
88.9M
{
121
88.9M
    writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
122
88.9M
    writer->data = PyUnicode_DATA(writer->buffer);
123
124
88.9M
    if (!writer->readonly) {
125
88.8M
        writer->kind = PyUnicode_KIND(writer->buffer);
126
88.8M
        writer->size = PyUnicode_GET_LENGTH(writer->buffer);
127
88.8M
    }
128
91.6k
    else {
129
        /* use a value smaller than PyUnicode_1BYTE_KIND() so
130
           _PyUnicodeWriter_PrepareKind() will copy the buffer. */
131
91.6k
        writer->kind = 0;
132
91.6k
        assert(writer->kind <= PyUnicode_1BYTE_KIND);
133
134
        /* Copy-on-write mode: set buffer size to 0 so
135
         * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
136
         * next write. */
137
91.6k
        writer->size = 0;
138
91.6k
    }
139
88.9M
}
140
141
142
void
143
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
144
42.4M
{
145
42.4M
    memset(writer, 0, sizeof(*writer));
146
147
    /* ASCII is the bare minimum */
148
42.4M
    writer->min_char = 127;
149
150
    /* use a kind value smaller than PyUnicode_1BYTE_KIND so
151
       _PyUnicodeWriter_PrepareKind() will copy the buffer. */
152
42.4M
    assert(writer->kind == 0);
153
42.4M
    assert(writer->kind < PyUnicode_1BYTE_KIND);
154
42.4M
}
155
156
157
PyUnicodeWriter*
158
PyUnicodeWriter_Create(Py_ssize_t length)
159
4.01M
{
160
4.01M
    if (length < 0) {
161
0
        PyErr_SetString(PyExc_ValueError,
162
0
                        "length must be positive");
163
0
        return NULL;
164
0
    }
165
166
4.01M
    const size_t size = sizeof(_PyUnicodeWriter);
167
4.01M
    PyUnicodeWriter *pub_writer;
168
4.01M
    pub_writer = _Py_FREELIST_POP_MEM(unicode_writers);
169
4.01M
    if (pub_writer == NULL) {
170
2.09M
        pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
171
2.09M
        if (pub_writer == NULL) {
172
0
            return (PyUnicodeWriter *)PyErr_NoMemory();
173
0
        }
174
2.09M
    }
175
4.01M
    _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
176
177
4.01M
    _PyUnicodeWriter_Init(writer);
178
4.01M
    if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
179
0
        PyUnicodeWriter_Discard(pub_writer);
180
0
        return NULL;
181
0
    }
182
4.01M
    writer->overallocate = 1;
183
184
4.01M
    return pub_writer;
185
4.01M
}
186
187
188
void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
189
77.2k
{
190
77.2k
    if (writer == NULL) {
191
76.5k
        return;
192
76.5k
    }
193
669
    _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
194
669
    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
195
669
}
196
197
198
// Initialize _PyUnicodeWriter with initial buffer
199
void
200
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
201
19.5M
{
202
19.5M
    memset(writer, 0, sizeof(*writer));
203
19.5M
    writer->buffer = buffer;
204
19.5M
    _PyUnicodeWriter_Update(writer);
205
19.5M
    writer->min_length = writer->size;
206
19.5M
}
207
208
209
int
210
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
211
                                 Py_ssize_t length, Py_UCS4 maxchar)
212
69.2M
{
213
69.2M
    Py_ssize_t newlen;
214
69.2M
    PyObject *newbuffer;
215
216
69.2M
    assert(length >= 0);
217
69.2M
    assert(maxchar <= _Py_MAX_UNICODE);
218
219
    /* ensure that the _PyUnicodeWriter_Prepare macro was used */
220
69.2M
    assert((maxchar > writer->maxchar && length >= 0)
221
69.2M
           || length > 0);
222
223
69.2M
    if (length > PY_SSIZE_T_MAX - writer->pos) {
224
0
        PyErr_NoMemory();
225
0
        return -1;
226
0
    }
227
69.2M
    newlen = writer->pos + length;
228
229
69.2M
    maxchar = Py_MAX(maxchar, writer->min_char);
230
231
69.2M
    if (writer->buffer == NULL) {
232
41.4M
        assert(!writer->readonly);
233
41.4M
        if (writer->overallocate
234
32.0M
            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
235
            /* overallocate to limit the number of realloc() */
236
32.0M
            newlen += newlen / OVERALLOCATE_FACTOR;
237
32.0M
        }
238
41.4M
        if (newlen < writer->min_length)
239
32.9M
            newlen = writer->min_length;
240
241
41.4M
        writer->buffer = PyUnicode_New(newlen, maxchar);
242
41.4M
        if (writer->buffer == NULL)
243
0
            return -1;
244
41.4M
    }
245
27.8M
    else if (newlen > writer->size) {
246
8.98M
        if (writer->overallocate
247
8.65M
            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
248
            /* overallocate to limit the number of realloc() */
249
8.65M
            newlen += newlen / OVERALLOCATE_FACTOR;
250
8.65M
        }
251
8.98M
        if (newlen < writer->min_length)
252
1.29k
            newlen = writer->min_length;
253
254
8.98M
        if (maxchar > writer->maxchar || writer->readonly) {
255
            /* resize + widen */
256
2.52M
            maxchar = Py_MAX(maxchar, writer->maxchar);
257
2.52M
            newbuffer = PyUnicode_New(newlen, maxchar);
258
2.52M
            if (newbuffer == NULL)
259
0
                return -1;
260
2.52M
            _PyUnicode_FastCopyCharacters(newbuffer, 0,
261
2.52M
                                          writer->buffer, 0, writer->pos);
262
2.52M
            Py_DECREF(writer->buffer);
263
2.52M
            writer->readonly = 0;
264
2.52M
        }
265
6.46M
        else {
266
6.46M
            newbuffer = _PyUnicode_ResizeCompact(writer->buffer, newlen);
267
6.46M
            if (newbuffer == NULL)
268
0
                return -1;
269
6.46M
        }
270
8.98M
        writer->buffer = newbuffer;
271
8.98M
    }
272
18.8M
    else if (maxchar > writer->maxchar) {
273
18.8M
        assert(!writer->readonly);
274
18.8M
        newbuffer = PyUnicode_New(writer->size, maxchar);
275
18.8M
        if (newbuffer == NULL)
276
0
            return -1;
277
18.8M
        _PyUnicode_FastCopyCharacters(newbuffer, 0,
278
18.8M
                                      writer->buffer, 0, writer->pos);
279
18.8M
        Py_SETREF(writer->buffer, newbuffer);
280
18.8M
    }
281
69.2M
    _PyUnicodeWriter_Update(writer);
282
69.2M
    return 0;
283
284
69.2M
#undef OVERALLOCATE_FACTOR
285
69.2M
}
286
287
int
288
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
289
                                     int kind)
290
217k
{
291
217k
    Py_UCS4 maxchar;
292
293
    /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
294
217k
    assert(writer->kind < kind);
295
296
217k
    switch (kind)
297
217k
    {
298
0
    case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
299
217k
    case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
300
0
    case PyUnicode_4BYTE_KIND: maxchar = _Py_MAX_UNICODE; break;
301
0
    default:
302
0
        Py_UNREACHABLE();
303
217k
    }
304
305
217k
    return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
306
217k
}
307
308
309
int
310
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
311
117M
{
312
117M
    return _PyUnicodeWriter_WriteCharInline(writer, ch);
313
117M
}
314
315
316
int
317
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
318
73.4M
{
319
73.4M
    if (ch > _Py_MAX_UNICODE) {
320
0
        PyErr_SetString(PyExc_ValueError,
321
0
                        "character must be in range(0x110000)");
322
0
        return -1;
323
0
    }
324
325
73.4M
    return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
326
73.4M
}
327
328
329
int
330
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
331
57.7M
{
332
57.7M
    assert(PyUnicode_Check(str));
333
334
57.7M
    Py_UCS4 maxchar;
335
57.7M
    Py_ssize_t len;
336
337
57.7M
    len = PyUnicode_GET_LENGTH(str);
338
57.7M
    if (len == 0)
339
4.63M
        return 0;
340
53.1M
    maxchar = PyUnicode_MAX_CHAR_VALUE(str);
341
53.1M
    if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
342
17.2M
        if (writer->buffer == NULL && !writer->overallocate) {
343
5.54k
            assert(_PyUnicode_CheckConsistency(str, 1));
344
5.54k
            writer->readonly = 1;
345
5.54k
            writer->buffer = Py_NewRef(str);
346
5.54k
            _PyUnicodeWriter_Update(writer);
347
5.54k
            writer->pos += len;
348
5.54k
            return 0;
349
5.54k
        }
350
17.2M
        if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
351
0
            return -1;
352
17.2M
    }
353
53.1M
    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
354
53.1M
                                  str, 0, len);
355
53.1M
    writer->pos += len;
356
53.1M
    return 0;
357
53.1M
}
358
359
360
int
361
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
362
4.75M
{
363
4.75M
    PyTypeObject *type = Py_TYPE(obj);
364
4.75M
    if (type == &PyUnicode_Type) {
365
4.75M
        return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
366
4.75M
    }
367
368
0
    if (type == &PyLong_Type) {
369
0
        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
370
0
    }
371
372
0
    PyObject *str = PyObject_Str(obj);
373
0
    if (str == NULL) {
374
0
        return -1;
375
0
    }
376
377
0
    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
378
0
    Py_DECREF(str);
379
0
    return res;
380
0
}
381
382
383
int
384
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
385
8.29M
{
386
8.29M
    if (obj == NULL) {
387
0
        return _PyUnicodeWriter_WriteASCIIString((_PyUnicodeWriter*)writer, "<NULL>", 6);
388
0
    }
389
390
8.29M
    if (Py_TYPE(obj) == &PyLong_Type) {
391
967k
        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
392
967k
    }
393
394
7.33M
    PyObject *repr = PyObject_Repr(obj);
395
7.33M
    if (repr == NULL) {
396
0
        return -1;
397
0
    }
398
399
7.33M
    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
400
7.33M
    Py_DECREF(repr);
401
7.33M
    return res;
402
7.33M
}
403
404
405
int
406
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
407
                                Py_ssize_t start, Py_ssize_t end)
408
42.4M
{
409
42.4M
    assert(0 <= start);
410
42.4M
    assert(end <= PyUnicode_GET_LENGTH(str));
411
42.4M
    assert(start <= end);
412
413
42.4M
    if (start == 0 && end == PyUnicode_GET_LENGTH(str))
414
90
        return _PyUnicodeWriter_WriteStr(writer, str);
415
416
42.4M
    Py_ssize_t len = end - start;
417
42.4M
    if (len == 0) {
418
192
        return 0;
419
192
    }
420
421
42.4M
    Py_UCS4 maxchar;
422
42.4M
    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) {
423
11.4M
        maxchar = _PyUnicode_FindMaxChar(str, start, end);
424
11.4M
    }
425
30.9M
    else {
426
30.9M
        maxchar = writer->maxchar;
427
30.9M
    }
428
42.4M
    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) {
429
0
        return -1;
430
0
    }
431
432
42.4M
    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
433
42.4M
                                  str, start, len);
434
42.4M
    writer->pos += len;
435
42.4M
    return 0;
436
42.4M
}
437
438
439
int
440
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
441
                               Py_ssize_t start, Py_ssize_t end)
442
604k
{
443
604k
    if (!PyUnicode_Check(str)) {
444
0
        PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
445
0
        return -1;
446
0
    }
447
604k
    if (start < 0 || start > end) {
448
0
        PyErr_Format(PyExc_ValueError, "invalid start argument");
449
0
        return -1;
450
0
    }
451
604k
    if (end > PyUnicode_GET_LENGTH(str)) {
452
0
        PyErr_Format(PyExc_ValueError, "invalid end argument");
453
0
        return -1;
454
0
    }
455
456
604k
    return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
457
604k
                                           start, end);
458
604k
}
459
460
461
int
462
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
463
                                  const char *ascii, Py_ssize_t len)
464
43.2M
{
465
43.2M
    if (len == -1)
466
0
        len = strlen(ascii);
467
468
43.2M
    if (len == 0) {
469
187k
        return 0;
470
187k
    }
471
472
43.2M
    assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
473
474
43.1M
    if (writer->buffer == NULL && !writer->overallocate) {
475
86.0k
        PyObject *str;
476
477
86.0k
        str = _PyUnicode_FromASCII(ascii, len);
478
86.0k
        if (str == NULL)
479
0
            return -1;
480
481
86.0k
        writer->readonly = 1;
482
86.0k
        writer->buffer = str;
483
86.0k
        _PyUnicodeWriter_Update(writer);
484
86.0k
        writer->pos += len;
485
86.0k
        return 0;
486
86.0k
    }
487
488
43.0M
    if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
489
0
        return -1;
490
491
43.0M
    switch (writer->kind)
492
43.0M
    {
493
42.9M
    case PyUnicode_1BYTE_KIND:
494
42.9M
    {
495
42.9M
        const Py_UCS1 *str = (const Py_UCS1 *)ascii;
496
42.9M
        Py_UCS1 *data = writer->data;
497
498
42.9M
        memcpy(data + writer->pos, str, len);
499
42.9M
        break;
500
0
    }
501
10.1k
    case PyUnicode_2BYTE_KIND:
502
10.1k
    {
503
10.1k
        _PyUnicode_CONVERT_BYTES(
504
10.1k
            Py_UCS1, Py_UCS2,
505
10.1k
            ascii, ascii + len,
506
10.1k
            (Py_UCS2 *)writer->data + writer->pos);
507
10.1k
        break;
508
0
    }
509
7.59k
    case PyUnicode_4BYTE_KIND:
510
7.59k
    {
511
7.59k
        _PyUnicode_CONVERT_BYTES(
512
7.59k
            Py_UCS1, Py_UCS4,
513
7.59k
            ascii, ascii + len,
514
7.59k
            (Py_UCS4 *)writer->data + writer->pos);
515
7.59k
        break;
516
0
    }
517
0
    default:
518
0
        Py_UNREACHABLE();
519
43.0M
    }
520
521
43.0M
    writer->pos += len;
522
43.0M
    return 0;
523
43.0M
}
524
525
526
int
527
PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
528
                           const char *str,
529
                           Py_ssize_t size)
530
1.31M
{
531
1.31M
    assert(writer != NULL);
532
1.31M
    _Py_AssertHoldsTstate();
533
534
1.31M
    _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
535
1.31M
    return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
536
1.31M
}
537
538
539
int
540
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
541
                          const char *str,
542
                          Py_ssize_t size)
543
0
{
544
0
    if (size < 0) {
545
0
        size = strlen(str);
546
0
    }
547
548
0
    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
549
0
    Py_ssize_t old_pos = _writer->pos;
550
0
    int res = _PyUnicode_DecodeUTF8Writer(_writer, str, size,
551
0
                                          _Py_ERROR_STRICT, NULL, NULL);
552
0
    if (res < 0) {
553
0
        _writer->pos = old_pos;
554
0
    }
555
0
    return res;
556
0
}
557
558
559
int
560
PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
561
                                   const char *string,
562
                                   Py_ssize_t length,
563
                                   const char *errors,
564
                                   Py_ssize_t *consumed)
565
0
{
566
0
    if (length < 0) {
567
0
        length = strlen(string);
568
0
    }
569
570
0
    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
571
0
    Py_ssize_t old_pos = _writer->pos;
572
0
    int res = _PyUnicode_DecodeUTF8Writer(_writer, string, length,
573
0
                                          _Py_ERROR_UNKNOWN, errors,
574
0
                                          consumed);
575
0
    if (res < 0) {
576
0
        _writer->pos = old_pos;
577
0
        if (consumed) {
578
0
            *consumed = 0;
579
0
        }
580
0
    }
581
0
    return res;
582
0
}
583
584
585
int
586
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
587
                                   const char *str, Py_ssize_t len)
588
0
{
589
0
    Py_UCS4 maxchar;
590
591
0
    maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
592
0
    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
593
0
        return -1;
594
0
    unicode_write_cstr(writer->buffer, writer->pos, str, len);
595
0
    writer->pos += len;
596
0
    return 0;
597
0
}
598
599
600
PyObject *
601
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
602
58.6M
{
603
58.6M
    PyObject *str;
604
605
58.6M
    if (writer->pos == 0) {
606
1.27k
        Py_CLEAR(writer->buffer);
607
1.27k
        return _PyUnicode_GetEmpty();
608
1.27k
    }
609
610
58.6M
    str = writer->buffer;
611
58.6M
    writer->buffer = NULL;
612
613
58.6M
    if (writer->readonly) {
614
90.3k
        assert(PyUnicode_GET_LENGTH(str) == writer->pos);
615
90.3k
        return str;
616
90.3k
    }
617
618
58.5M
    if (PyUnicode_GET_LENGTH(str) != writer->pos) {
619
52.9M
        PyObject *str2;
620
52.9M
        str2 = _PyUnicode_ResizeCompact(str, writer->pos);
621
52.9M
        if (str2 == NULL) {
622
0
            Py_DECREF(str);
623
0
            return NULL;
624
0
        }
625
52.9M
        str = str2;
626
52.9M
    }
627
628
58.5M
    assert(_PyUnicode_CheckConsistency(str, 1));
629
58.5M
    return _PyUnicode_Result(str);
630
58.5M
}
631
632
633
PyObject*
634
PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
635
4.01M
{
636
4.01M
    PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
637
4.01M
    assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
638
4.01M
    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
639
4.01M
    return str;
640
4.01M
}
641
642
643
void
644
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
645
3.30M
{
646
    Py_CLEAR(writer->buffer);
647
3.30M
}