Coverage Report

Created: 2026-02-09 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Objects/unicode_writer.c
Line
Count
Source
1
/*
2
3
Unicode implementation based on original code by Fredrik Lundh,
4
modified by Marc-Andre Lemburg <mal@lemburg.com>.
5
6
Major speed upgrades to the method implementations at the Reykjavik
7
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
8
9
Copyright (c) Corporation for National Research Initiatives.
10
11
--------------------------------------------------------------------
12
The original string type implementation is:
13
14
  Copyright (c) 1999 by Secret Labs AB
15
  Copyright (c) 1999 by Fredrik Lundh
16
17
By obtaining, using, and/or copying this software and/or its
18
associated documentation, you agree that you have read, understood,
19
and will comply with the following terms and conditions:
20
21
Permission to use, copy, modify, and distribute this software and its
22
associated documentation for any purpose and without fee is hereby
23
granted, provided that the above copyright notice appears in all
24
copies, and that both that copyright notice and this permission notice
25
appear in supporting documentation, and that the name of Secret Labs
26
AB or the author not be used in advertising or publicity pertaining to
27
distribution of the software without specific, written prior
28
permission.
29
30
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
31
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
32
FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
33
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
34
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
35
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
36
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
37
--------------------------------------------------------------------
38
39
*/
40
41
#include "Python.h"
42
#include "pycore_freelist.h"      // _Py_FREELIST_FREE()
43
#include "pycore_long.h"          // _PyLong_FormatWriter()
44
#include "pycore_unicodeobject.h" // _PyUnicode_Result()
45
46
47
#ifdef MS_WINDOWS
48
   /* On Windows, overallocate by 50% is the best factor */
49
#  define OVERALLOCATE_FACTOR 2
50
#else
51
   /* On Linux, overallocate by 25% is the best factor */
52
80.0M
#  define OVERALLOCATE_FACTOR 4
53
#endif
54
55
56
/* Compilation of templated routines */
57
58
#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty()
59
60
#include "stringlib/ucs1lib.h"
61
#include "stringlib/find_max_char.h"
62
#include "stringlib/undef.h"
63
64
65
/* Copy an ASCII or latin1 char* string into a Python Unicode string.
66
67
   WARNING: The function doesn't copy the terminating null character and
68
   doesn't check the maximum character (may write a latin1 character in an
69
   ASCII string). */
70
static void
71
unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
72
                   const char *str, Py_ssize_t len)
73
0
{
74
0
    int kind = PyUnicode_KIND(unicode);
75
0
    const void *data = PyUnicode_DATA(unicode);
76
0
    const char *end = str + len;
77
78
0
    assert(index + len <= PyUnicode_GET_LENGTH(unicode));
79
0
    switch (kind) {
80
0
    case PyUnicode_1BYTE_KIND: {
81
#ifdef Py_DEBUG
82
        if (PyUnicode_IS_ASCII(unicode)) {
83
            Py_UCS4 maxchar = ucs1lib_find_max_char(
84
                (const Py_UCS1*)str,
85
                (const Py_UCS1*)str + len);
86
            assert(maxchar < 128);
87
        }
88
#endif
89
0
        memcpy((char *) data + index, str, len);
90
0
        break;
91
0
    }
92
0
    case PyUnicode_2BYTE_KIND: {
93
0
        Py_UCS2 *start = (Py_UCS2 *)data + index;
94
0
        Py_UCS2 *ucs2 = start;
95
96
0
        for (; str < end; ++ucs2, ++str)
97
0
            *ucs2 = (Py_UCS2)*str;
98
99
0
        assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
100
0
        break;
101
0
    }
102
0
    case PyUnicode_4BYTE_KIND: {
103
0
        Py_UCS4 *start = (Py_UCS4 *)data + index;
104
0
        Py_UCS4 *ucs4 = start;
105
106
0
        for (; str < end; ++ucs4, ++str)
107
0
            *ucs4 = (Py_UCS4)*str;
108
109
0
        assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
110
0
        break;
111
0
    }
112
0
    default:
113
0
        Py_UNREACHABLE();
114
0
    }
115
0
}
116
117
118
static inline void
119
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
120
60.2M
{
121
60.2M
    writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
122
60.2M
    writer->data = PyUnicode_DATA(writer->buffer);
123
124
60.2M
    if (!writer->readonly) {
125
60.2M
        writer->kind = PyUnicode_KIND(writer->buffer);
126
60.2M
        writer->size = PyUnicode_GET_LENGTH(writer->buffer);
127
60.2M
    }
128
12.4k
    else {
129
        /* use a value smaller than PyUnicode_1BYTE_KIND() so
130
           _PyUnicodeWriter_PrepareKind() will copy the buffer. */
131
12.4k
        writer->kind = 0;
132
12.4k
        assert(writer->kind <= PyUnicode_1BYTE_KIND);
133
134
        /* Copy-on-write mode: set buffer size to 0 so
135
         * _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
136
         * next write. */
137
12.4k
        writer->size = 0;
138
12.4k
    }
139
60.2M
}
140
141
142
void
143
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
144
40.4M
{
145
40.4M
    memset(writer, 0, sizeof(*writer));
146
147
    /* ASCII is the bare minimum */
148
40.4M
    writer->min_char = 127;
149
150
    /* use a kind value smaller than PyUnicode_1BYTE_KIND so
151
       _PyUnicodeWriter_PrepareKind() will copy the buffer. */
152
40.4M
    assert(writer->kind == 0);
153
40.4M
    assert(writer->kind < PyUnicode_1BYTE_KIND);
154
40.4M
}
155
156
157
PyUnicodeWriter*
158
PyUnicodeWriter_Create(Py_ssize_t length)
159
3.12M
{
160
3.12M
    if (length < 0) {
161
0
        PyErr_SetString(PyExc_ValueError,
162
0
                        "length must be positive");
163
0
        return NULL;
164
0
    }
165
166
3.12M
    const size_t size = sizeof(_PyUnicodeWriter);
167
3.12M
    PyUnicodeWriter *pub_writer;
168
3.12M
    pub_writer = _Py_FREELIST_POP_MEM(unicode_writers);
169
3.12M
    if (pub_writer == NULL) {
170
1.75M
        pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
171
1.75M
        if (pub_writer == NULL) {
172
0
            return (PyUnicodeWriter *)PyErr_NoMemory();
173
0
        }
174
1.75M
    }
175
3.12M
    _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
176
177
3.12M
    _PyUnicodeWriter_Init(writer);
178
3.12M
    if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
179
0
        PyUnicodeWriter_Discard(pub_writer);
180
0
        return NULL;
181
0
    }
182
3.12M
    writer->overallocate = 1;
183
184
3.12M
    return pub_writer;
185
3.12M
}
186
187
188
void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
189
76.4k
{
190
76.4k
    if (writer == NULL) {
191
75.9k
        return;
192
75.9k
    }
193
563
    _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
194
563
    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
195
563
}
196
197
198
// Initialize _PyUnicodeWriter with initial buffer
199
void
200
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
201
6.34M
{
202
6.34M
    memset(writer, 0, sizeof(*writer));
203
6.34M
    writer->buffer = buffer;
204
6.34M
    _PyUnicodeWriter_Update(writer);
205
6.34M
    writer->min_length = writer->size;
206
6.34M
}
207
208
209
int
210
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
211
                                 Py_ssize_t length, Py_UCS4 maxchar)
212
53.8M
{
213
53.8M
    Py_ssize_t newlen;
214
53.8M
    PyObject *newbuffer;
215
216
53.8M
    assert(length >= 0);
217
53.8M
    assert(maxchar <= _Py_MAX_UNICODE);
218
219
    /* ensure that the _PyUnicodeWriter_Prepare macro was used */
220
53.8M
    assert((maxchar > writer->maxchar && length >= 0)
221
53.8M
           || length > 0);
222
223
53.8M
    if (length > PY_SSIZE_T_MAX - writer->pos) {
224
0
        PyErr_NoMemory();
225
0
        return -1;
226
0
    }
227
53.8M
    newlen = writer->pos + length;
228
229
53.8M
    maxchar = Py_MAX(maxchar, writer->min_char);
230
231
53.8M
    if (writer->buffer == NULL) {
232
38.1M
        assert(!writer->readonly);
233
38.1M
        if (writer->overallocate
234
30.8M
            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
235
            /* overallocate to limit the number of realloc() */
236
30.8M
            newlen += newlen / OVERALLOCATE_FACTOR;
237
30.8M
        }
238
38.1M
        if (newlen < writer->min_length)
239
34.4M
            newlen = writer->min_length;
240
241
38.1M
        writer->buffer = PyUnicode_New(newlen, maxchar);
242
38.1M
        if (writer->buffer == NULL)
243
0
            return -1;
244
38.1M
    }
245
15.7M
    else if (newlen > writer->size) {
246
9.49M
        if (writer->overallocate
247
9.20M
            && newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
248
            /* overallocate to limit the number of realloc() */
249
9.20M
            newlen += newlen / OVERALLOCATE_FACTOR;
250
9.20M
        }
251
9.49M
        if (newlen < writer->min_length)
252
1.29k
            newlen = writer->min_length;
253
254
9.49M
        if (maxchar > writer->maxchar || writer->readonly) {
255
            /* resize + widen */
256
2.75M
            maxchar = Py_MAX(maxchar, writer->maxchar);
257
2.75M
            newbuffer = PyUnicode_New(newlen, maxchar);
258
2.75M
            if (newbuffer == NULL)
259
0
                return -1;
260
2.75M
            _PyUnicode_FastCopyCharacters(newbuffer, 0,
261
2.75M
                                          writer->buffer, 0, writer->pos);
262
2.75M
            Py_DECREF(writer->buffer);
263
2.75M
            writer->readonly = 0;
264
2.75M
        }
265
6.73M
        else {
266
6.73M
            newbuffer = _PyUnicode_ResizeCompact(writer->buffer, newlen);
267
6.73M
            if (newbuffer == NULL)
268
0
                return -1;
269
6.73M
        }
270
9.49M
        writer->buffer = newbuffer;
271
9.49M
    }
272
6.24M
    else if (maxchar > writer->maxchar) {
273
6.24M
        assert(!writer->readonly);
274
6.24M
        newbuffer = PyUnicode_New(writer->size, maxchar);
275
6.24M
        if (newbuffer == NULL)
276
0
            return -1;
277
6.24M
        _PyUnicode_FastCopyCharacters(newbuffer, 0,
278
6.24M
                                      writer->buffer, 0, writer->pos);
279
6.24M
        Py_SETREF(writer->buffer, newbuffer);
280
6.24M
    }
281
53.8M
    _PyUnicodeWriter_Update(writer);
282
53.8M
    return 0;
283
284
53.8M
#undef OVERALLOCATE_FACTOR
285
53.8M
}
286
287
int
288
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
289
                                     int kind)
290
169k
{
291
169k
    Py_UCS4 maxchar;
292
293
    /* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
294
169k
    assert(writer->kind < kind);
295
296
169k
    switch (kind)
297
169k
    {
298
0
    case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
299
169k
    case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
300
0
    case PyUnicode_4BYTE_KIND: maxchar = _Py_MAX_UNICODE; break;
301
0
    default:
302
0
        Py_UNREACHABLE();
303
169k
    }
304
305
169k
    return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
306
169k
}
307
308
309
int
310
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
311
89.5M
{
312
89.5M
    return _PyUnicodeWriter_WriteCharInline(writer, ch);
313
89.5M
}
314
315
316
int
317
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
318
62.4M
{
319
62.4M
    if (ch > _Py_MAX_UNICODE) {
320
0
        PyErr_SetString(PyExc_ValueError,
321
0
                        "character must be in range(0x110000)");
322
0
        return -1;
323
0
    }
324
325
62.4M
    return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
326
62.4M
}
327
328
329
int
330
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
331
61.0M
{
332
61.0M
    assert(PyUnicode_Check(str));
333
334
61.0M
    Py_UCS4 maxchar;
335
61.0M
    Py_ssize_t len;
336
337
61.0M
    len = PyUnicode_GET_LENGTH(str);
338
61.0M
    if (len == 0)
339
10.6M
        return 0;
340
50.3M
    maxchar = PyUnicode_MAX_CHAR_VALUE(str);
341
50.3M
    if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
342
16.5M
        if (writer->buffer == NULL && !writer->overallocate) {
343
5.49k
            assert(_PyUnicode_CheckConsistency(str, 1));
344
5.49k
            writer->readonly = 1;
345
5.49k
            writer->buffer = Py_NewRef(str);
346
5.49k
            _PyUnicodeWriter_Update(writer);
347
5.49k
            writer->pos += len;
348
5.49k
            return 0;
349
5.49k
        }
350
16.5M
        if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
351
0
            return -1;
352
16.5M
    }
353
50.3M
    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
354
50.3M
                                  str, 0, len);
355
50.3M
    writer->pos += len;
356
50.3M
    return 0;
357
50.3M
}
358
359
360
int
361
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
362
3.51M
{
363
3.51M
    PyTypeObject *type = Py_TYPE(obj);
364
3.51M
    if (type == &PyUnicode_Type) {
365
3.51M
        return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
366
3.51M
    }
367
368
0
    if (type == &PyLong_Type) {
369
0
        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
370
0
    }
371
372
0
    PyObject *str = PyObject_Str(obj);
373
0
    if (str == NULL) {
374
0
        return -1;
375
0
    }
376
377
0
    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
378
0
    Py_DECREF(str);
379
0
    return res;
380
0
}
381
382
383
int
384
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
385
5.98M
{
386
5.98M
    if (Py_TYPE(obj) == &PyLong_Type) {
387
402k
        return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
388
402k
    }
389
390
5.58M
    PyObject *repr = PyObject_Repr(obj);
391
5.58M
    if (repr == NULL) {
392
0
        return -1;
393
0
    }
394
395
5.58M
    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
396
5.58M
    Py_DECREF(repr);
397
5.58M
    return res;
398
5.58M
}
399
400
401
int
402
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
403
                                Py_ssize_t start, Py_ssize_t end)
404
51.8M
{
405
51.8M
    assert(0 <= start);
406
51.8M
    assert(end <= PyUnicode_GET_LENGTH(str));
407
51.8M
    assert(start <= end);
408
409
51.8M
    if (start == 0 && end == PyUnicode_GET_LENGTH(str))
410
86
        return _PyUnicodeWriter_WriteStr(writer, str);
411
412
51.8M
    Py_ssize_t len = end - start;
413
51.8M
    if (len == 0) {
414
96
        return 0;
415
96
    }
416
417
51.8M
    Py_UCS4 maxchar;
418
51.8M
    if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) {
419
14.0M
        maxchar = _PyUnicode_FindMaxChar(str, start, end);
420
14.0M
    }
421
37.8M
    else {
422
37.8M
        maxchar = writer->maxchar;
423
37.8M
    }
424
51.8M
    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) {
425
0
        return -1;
426
0
    }
427
428
51.8M
    _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
429
51.8M
                                  str, start, len);
430
51.8M
    writer->pos += len;
431
51.8M
    return 0;
432
51.8M
}
433
434
435
int
436
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
437
                               Py_ssize_t start, Py_ssize_t end)
438
438k
{
439
438k
    if (!PyUnicode_Check(str)) {
440
0
        PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
441
0
        return -1;
442
0
    }
443
438k
    if (start < 0 || start > end) {
444
0
        PyErr_Format(PyExc_ValueError, "invalid start argument");
445
0
        return -1;
446
0
    }
447
438k
    if (end > PyUnicode_GET_LENGTH(str)) {
448
0
        PyErr_Format(PyExc_ValueError, "invalid end argument");
449
0
        return -1;
450
0
    }
451
452
438k
    return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
453
438k
                                           start, end);
454
438k
}
455
456
457
int
458
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
459
                                  const char *ascii, Py_ssize_t len)
460
37.4M
{
461
37.4M
    if (len == -1)
462
0
        len = strlen(ascii);
463
464
37.4M
    assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
465
466
37.4M
    if (writer->buffer == NULL && !writer->overallocate) {
467
6.91k
        PyObject *str;
468
469
6.91k
        str = _PyUnicode_FromASCII(ascii, len);
470
6.91k
        if (str == NULL)
471
0
            return -1;
472
473
6.91k
        writer->readonly = 1;
474
6.91k
        writer->buffer = str;
475
6.91k
        _PyUnicodeWriter_Update(writer);
476
6.91k
        writer->pos += len;
477
6.91k
        return 0;
478
6.91k
    }
479
480
37.4M
    if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
481
0
        return -1;
482
483
37.4M
    switch (writer->kind)
484
37.4M
    {
485
37.4M
    case PyUnicode_1BYTE_KIND:
486
37.4M
    {
487
37.4M
        const Py_UCS1 *str = (const Py_UCS1 *)ascii;
488
37.4M
        Py_UCS1 *data = writer->data;
489
490
37.4M
        memcpy(data + writer->pos, str, len);
491
37.4M
        break;
492
0
    }
493
12.0k
    case PyUnicode_2BYTE_KIND:
494
12.0k
    {
495
12.0k
        _PyUnicode_CONVERT_BYTES(
496
12.0k
            Py_UCS1, Py_UCS2,
497
12.0k
            ascii, ascii + len,
498
12.0k
            (Py_UCS2 *)writer->data + writer->pos);
499
12.0k
        break;
500
0
    }
501
3.80k
    case PyUnicode_4BYTE_KIND:
502
3.80k
    {
503
3.80k
        _PyUnicode_CONVERT_BYTES(
504
3.80k
            Py_UCS1, Py_UCS4,
505
3.80k
            ascii, ascii + len,
506
3.80k
            (Py_UCS4 *)writer->data + writer->pos);
507
3.80k
        break;
508
0
    }
509
0
    default:
510
0
        Py_UNREACHABLE();
511
37.4M
    }
512
513
37.4M
    writer->pos += len;
514
37.4M
    return 0;
515
37.4M
}
516
517
518
int
519
PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
520
                           const char *str,
521
                           Py_ssize_t size)
522
904k
{
523
904k
    assert(writer != NULL);
524
904k
    _Py_AssertHoldsTstate();
525
526
904k
    _PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
527
904k
    return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
528
904k
}
529
530
531
int
532
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
533
                          const char *str,
534
                          Py_ssize_t size)
535
0
{
536
0
    if (size < 0) {
537
0
        size = strlen(str);
538
0
    }
539
540
0
    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
541
0
    Py_ssize_t old_pos = _writer->pos;
542
0
    int res = _PyUnicode_DecodeUTF8Writer(_writer, str, size,
543
0
                                          _Py_ERROR_STRICT, NULL, NULL);
544
0
    if (res < 0) {
545
0
        _writer->pos = old_pos;
546
0
    }
547
0
    return res;
548
0
}
549
550
551
int
552
PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
553
                                   const char *string,
554
                                   Py_ssize_t length,
555
                                   const char *errors,
556
                                   Py_ssize_t *consumed)
557
0
{
558
0
    if (length < 0) {
559
0
        length = strlen(string);
560
0
    }
561
562
0
    _PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
563
0
    Py_ssize_t old_pos = _writer->pos;
564
0
    int res = _PyUnicode_DecodeUTF8Writer(_writer, string, length,
565
0
                                          _Py_ERROR_UNKNOWN, errors,
566
0
                                          consumed);
567
0
    if (res < 0) {
568
0
        _writer->pos = old_pos;
569
0
        if (consumed) {
570
0
            *consumed = 0;
571
0
        }
572
0
    }
573
0
    return res;
574
0
}
575
576
577
int
578
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
579
                                   const char *str, Py_ssize_t len)
580
0
{
581
0
    Py_UCS4 maxchar;
582
583
0
    maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
584
0
    if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
585
0
        return -1;
586
0
    unicode_write_cstr(writer->buffer, writer->pos, str, len);
587
0
    writer->pos += len;
588
0
    return 0;
589
0
}
590
591
592
PyObject *
593
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
594
42.6M
{
595
42.6M
    PyObject *str;
596
597
42.6M
    if (writer->pos == 0) {
598
661
        Py_CLEAR(writer->buffer);
599
661
        return _PyUnicode_GetEmpty();
600
661
    }
601
602
42.6M
    str = writer->buffer;
603
42.6M
    writer->buffer = NULL;
604
605
42.6M
    if (writer->readonly) {
606
11.1k
        assert(PyUnicode_GET_LENGTH(str) == writer->pos);
607
11.1k
        return str;
608
11.1k
    }
609
610
42.6M
    if (PyUnicode_GET_LENGTH(str) != writer->pos) {
611
41.1M
        PyObject *str2;
612
41.1M
        str2 = _PyUnicode_ResizeCompact(str, writer->pos);
613
41.1M
        if (str2 == NULL) {
614
0
            Py_DECREF(str);
615
0
            return NULL;
616
0
        }
617
41.1M
        str = str2;
618
41.1M
    }
619
620
42.6M
    assert(_PyUnicode_CheckConsistency(str, 1));
621
42.6M
    return _PyUnicode_Result(str);
622
42.6M
}
623
624
625
PyObject*
626
PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
627
3.12M
{
628
3.12M
    PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
629
3.12M
    assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
630
3.12M
    _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
631
3.12M
    return str;
632
3.12M
}
633
634
635
void
636
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
637
4.05M
{
638
    Py_CLEAR(writer->buffer);
639
4.05M
}