/src/cpython/Objects/unicode_format.c
Line | Count | Source |
1 | | /* |
2 | | |
3 | | Unicode implementation based on original code by Fredrik Lundh, |
4 | | modified by Marc-Andre Lemburg <mal@lemburg.com>. |
5 | | |
6 | | Major speed upgrades to the method implementations at the Reykjavik |
7 | | NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. |
8 | | |
9 | | Copyright (c) Corporation for National Research Initiatives. |
10 | | |
11 | | -------------------------------------------------------------------- |
12 | | The original string type implementation is: |
13 | | |
14 | | Copyright (c) 1999 by Secret Labs AB |
15 | | Copyright (c) 1999 by Fredrik Lundh |
16 | | |
17 | | By obtaining, using, and/or copying this software and/or its |
18 | | associated documentation, you agree that you have read, understood, |
19 | | and will comply with the following terms and conditions: |
20 | | |
21 | | Permission to use, copy, modify, and distribute this software and its |
22 | | associated documentation for any purpose and without fee is hereby |
23 | | granted, provided that the above copyright notice appears in all |
24 | | copies, and that both that copyright notice and this permission notice |
25 | | appear in supporting documentation, and that the name of Secret Labs |
26 | | AB or the author not be used in advertising or publicity pertaining to |
27 | | distribution of the software without specific, written prior |
28 | | permission. |
29 | | |
30 | | SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO |
31 | | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
32 | | FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR |
33 | | ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
34 | | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
35 | | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
36 | | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
37 | | -------------------------------------------------------------------- |
38 | | |
39 | | */ |
40 | | |
41 | | // PyUnicode_Format() implementation |
42 | | |
43 | | #include "Python.h" |
44 | | #include "pycore_abstract.h" // _PyIndex_Check() |
45 | | #include "pycore_format.h" // F_ALT |
46 | | #include "pycore_long.h" // _PyLong_FormatWriter() |
47 | | #include "pycore_object.h" // _PyObject_IsUniquelyReferenced() |
48 | | #include "pycore_unicodeobject.h" // _Py_MAX_UNICODE |
49 | | |
50 | | |
51 | 0 | #define MAX_UNICODE _Py_MAX_UNICODE |
52 | 32.7M | #define ensure_unicode _PyUnicode_EnsureUnicode |
53 | | |
54 | | struct unicode_formatter_t { |
55 | | PyObject *args; |
56 | | int args_owned; |
57 | | Py_ssize_t arglen, argidx; |
58 | | PyObject *dict; |
59 | | |
60 | | int fmtkind; |
61 | | Py_ssize_t fmtcnt, fmtpos; |
62 | | const void *fmtdata; |
63 | | PyObject *fmtstr; |
64 | | |
65 | | _PyUnicodeWriter writer; |
66 | | }; |
67 | | |
68 | | |
69 | | struct unicode_format_arg_t { |
70 | | Py_UCS4 ch; |
71 | | int flags; |
72 | | Py_ssize_t width; |
73 | | int prec; |
74 | | int sign; |
75 | | Py_ssize_t fmtstart; |
76 | | PyObject *key; |
77 | | }; |
78 | | |
79 | | |
80 | | // Use FORMAT_ERROR("...%s", "") when there is no arguments. |
81 | 4.77M | #define FORMAT_ERROR(EXC, FMT, ...) do { \ |
82 | 4.77M | if (arg->key != NULL) { \ |
83 | 0 | PyErr_Format((EXC), "format argument %R: " FMT, \ |
84 | 0 | arg->key, __VA_ARGS__); \ |
85 | 0 | } \ |
86 | 4.77M | else if (ctx->argidx >= 0) { \ |
87 | 0 | PyErr_Format((EXC), "format argument %zd: " FMT, \ |
88 | 0 | ctx->argidx, __VA_ARGS__); \ |
89 | 0 | } \ |
90 | 4.77M | else { \ |
91 | 4.77M | PyErr_Format((EXC), "format argument: " FMT, __VA_ARGS__); \ |
92 | 4.77M | } \ |
93 | 4.77M | } while (0) |
94 | | |
95 | | |
96 | | static PyObject * |
97 | | unicode_format_getnextarg(struct unicode_formatter_t *ctx, int allowone) |
98 | 69.1M | { |
99 | 69.1M | Py_ssize_t argidx = ctx->argidx; |
100 | | |
101 | 69.1M | if (argidx < ctx->arglen && (allowone || ctx->arglen >= 0)) { |
102 | 69.1M | ctx->argidx++; |
103 | 69.1M | if (ctx->arglen >= 0) { |
104 | 53.7M | return PyTuple_GetItem(ctx->args, argidx); |
105 | 53.7M | } |
106 | 15.3M | else if (allowone) { |
107 | 15.3M | return ctx->args; |
108 | 15.3M | } |
109 | 69.1M | } |
110 | 0 | PyErr_Format(PyExc_TypeError, |
111 | 0 | "not enough arguments for format string (got %zd)", |
112 | 0 | ctx->arglen < 0 ? 1 : ctx->arglen); |
113 | 0 | return NULL; |
114 | 69.1M | } |
115 | | |
116 | | |
117 | | /* Returns a new reference to a PyUnicode object, or NULL on failure. */ |
118 | | |
119 | | /* Format a float into the writer if the writer is not NULL, or into *p_output |
120 | | otherwise. |
121 | | |
122 | | Return 0 on success, raise an exception and return -1 on error. */ |
123 | | static int |
124 | | formatfloat(PyObject *v, |
125 | | struct unicode_formatter_t *ctx, |
126 | | struct unicode_format_arg_t *arg, |
127 | | PyObject **p_output, |
128 | | _PyUnicodeWriter *writer) |
129 | 106 | { |
130 | 106 | char *p; |
131 | 106 | double x; |
132 | 106 | Py_ssize_t len; |
133 | 106 | int prec; |
134 | 106 | int dtoa_flags = 0; |
135 | | |
136 | 106 | x = PyFloat_AsDouble(v); |
137 | 106 | if (x == -1.0 && PyErr_Occurred()) { |
138 | 0 | if (PyErr_ExceptionMatches(PyExc_TypeError)) { |
139 | 0 | FORMAT_ERROR(PyExc_TypeError, |
140 | 0 | "%%%c requires a real number, not %T", |
141 | 0 | arg->ch, v); |
142 | 0 | } |
143 | 0 | return -1; |
144 | 0 | } |
145 | | |
146 | 106 | prec = arg->prec; |
147 | 106 | if (prec < 0) |
148 | 0 | prec = 6; |
149 | | |
150 | 106 | if (arg->flags & F_ALT) |
151 | 0 | dtoa_flags |= Py_DTSF_ALT; |
152 | 106 | p = PyOS_double_to_string(x, arg->ch, prec, dtoa_flags, NULL); |
153 | 106 | if (p == NULL) |
154 | 0 | return -1; |
155 | 106 | len = strlen(p); |
156 | 106 | if (writer) { |
157 | 0 | if (_PyUnicodeWriter_WriteASCIIString(writer, p, len) < 0) { |
158 | 0 | PyMem_Free(p); |
159 | 0 | return -1; |
160 | 0 | } |
161 | 0 | } |
162 | 106 | else |
163 | 106 | *p_output = _PyUnicode_FromASCII(p, len); |
164 | 106 | PyMem_Free(p); |
165 | 106 | return 0; |
166 | 106 | } |
167 | | |
168 | | |
169 | | /* formatlong() emulates the format codes d, u, o, x and X, and |
170 | | * the F_ALT flag, for Python's long (unbounded) ints. It's not used for |
171 | | * Python's regular ints. |
172 | | * Return value: a new PyUnicodeObject*, or NULL if error. |
173 | | * The output string is of the form |
174 | | * "-"? ("0x" | "0X")? digit+ |
175 | | * "0x"/"0X" are present only for x and X conversions, with F_ALT |
176 | | * set in flags. The case of hex digits will be correct, |
177 | | * There will be at least prec digits, zero-filled on the left if |
178 | | * necessary to get that many. |
179 | | * val object to be converted |
180 | | * flags bitmask of format flags; only F_ALT is looked at |
181 | | * prec minimum number of digits; 0-fill on left if needed |
182 | | * type a character in [duoxX]; u acts the same as d |
183 | | * |
184 | | * CAUTION: o, x and X conversions on regular ints can never |
185 | | * produce a '-' sign, but can for Python's unbounded ints. |
186 | | */ |
187 | | PyObject * |
188 | | _PyUnicode_FormatLong(PyObject *val, int alt, int prec, int type) |
189 | 9.62M | { |
190 | 9.62M | PyObject *result = NULL; |
191 | 9.62M | char *buf; |
192 | 9.62M | Py_ssize_t i; |
193 | 9.62M | int sign; /* 1 if '-', else 0 */ |
194 | 9.62M | int len; /* number of characters */ |
195 | 9.62M | Py_ssize_t llen; |
196 | 9.62M | int numdigits; /* len == numnondigits + numdigits */ |
197 | 9.62M | int numnondigits = 0; |
198 | | |
199 | | /* Avoid exceeding SSIZE_T_MAX */ |
200 | 9.62M | if (prec > INT_MAX-3) { |
201 | 0 | PyErr_SetString(PyExc_OverflowError, |
202 | 0 | "precision too large"); |
203 | 0 | return NULL; |
204 | 0 | } |
205 | | |
206 | 9.62M | assert(PyLong_Check(val)); |
207 | | |
208 | 9.62M | switch (type) { |
209 | 0 | default: |
210 | 0 | Py_UNREACHABLE(); |
211 | 3.66M | case 'd': |
212 | 3.66M | case 'i': |
213 | 3.66M | case 'u': |
214 | | /* int and int subclasses should print numerically when a numeric */ |
215 | | /* format code is used (see issue18780) */ |
216 | 3.66M | result = PyNumber_ToBase(val, 10); |
217 | 3.66M | break; |
218 | 0 | case 'o': |
219 | 0 | numnondigits = 2; |
220 | 0 | result = PyNumber_ToBase(val, 8); |
221 | 0 | break; |
222 | 87 | case 'x': |
223 | 5.96M | case 'X': |
224 | 5.96M | numnondigits = 2; |
225 | 5.96M | result = PyNumber_ToBase(val, 16); |
226 | 5.96M | break; |
227 | 9.62M | } |
228 | 9.62M | if (!result) |
229 | 0 | return NULL; |
230 | | |
231 | 9.62M | assert(_PyUnicode_IsModifiable(result)); |
232 | 9.62M | assert(PyUnicode_IS_ASCII(result)); |
233 | | |
234 | | /* To modify the string in-place, there can only be one reference. */ |
235 | 9.62M | if (!_PyObject_IsUniquelyReferenced(result)) { |
236 | 0 | Py_DECREF(result); |
237 | 0 | PyErr_BadInternalCall(); |
238 | 0 | return NULL; |
239 | 0 | } |
240 | 9.62M | buf = PyUnicode_DATA(result); |
241 | 9.62M | llen = PyUnicode_GET_LENGTH(result); |
242 | 9.62M | if (llen > INT_MAX) { |
243 | 0 | Py_DECREF(result); |
244 | 0 | PyErr_SetString(PyExc_ValueError, |
245 | 0 | "string too large in _PyUnicode_FormatLong"); |
246 | 0 | return NULL; |
247 | 0 | } |
248 | 9.62M | len = (int)llen; |
249 | 9.62M | sign = buf[0] == '-'; |
250 | 9.62M | numnondigits += sign; |
251 | 9.62M | numdigits = len - numnondigits; |
252 | 9.62M | assert(numdigits > 0); |
253 | | |
254 | | /* Get rid of base marker unless F_ALT */ |
255 | 9.62M | if (((alt) == 0 && |
256 | 9.62M | (type == 'o' || type == 'x' || type == 'X'))) { |
257 | 5.96M | assert(buf[sign] == '0'); |
258 | 5.96M | assert(buf[sign+1] == 'x' || buf[sign+1] == 'X' || |
259 | 5.96M | buf[sign+1] == 'o'); |
260 | 5.96M | numnondigits -= 2; |
261 | 5.96M | buf += 2; |
262 | 5.96M | len -= 2; |
263 | 5.96M | if (sign) |
264 | 0 | buf[0] = '-'; |
265 | 5.96M | assert(len == numnondigits + numdigits); |
266 | 5.96M | assert(numdigits > 0); |
267 | 5.96M | } |
268 | | |
269 | | /* Fill with leading zeroes to meet minimum width. */ |
270 | 9.62M | if (prec > numdigits) { |
271 | 0 | PyObject *r1 = PyBytes_FromStringAndSize(NULL, |
272 | 0 | numnondigits + prec); |
273 | 0 | char *b1; |
274 | 0 | if (!r1) { |
275 | 0 | Py_DECREF(result); |
276 | 0 | return NULL; |
277 | 0 | } |
278 | 0 | b1 = PyBytes_AS_STRING(r1); |
279 | 0 | for (i = 0; i < numnondigits; ++i) |
280 | 0 | *b1++ = *buf++; |
281 | 0 | for (i = 0; i < prec - numdigits; i++) |
282 | 0 | *b1++ = '0'; |
283 | 0 | for (i = 0; i < numdigits; i++) |
284 | 0 | *b1++ = *buf++; |
285 | 0 | *b1 = '\0'; |
286 | 0 | Py_SETREF(result, r1); |
287 | 0 | buf = PyBytes_AS_STRING(result); |
288 | 0 | len = numnondigits + prec; |
289 | 0 | } |
290 | | |
291 | | /* Fix up case for hex conversions. */ |
292 | 9.62M | if (type == 'X') { |
293 | | /* Need to convert all lower case letters to upper case. |
294 | | and need to convert 0x to 0X (and -0x to -0X). */ |
295 | 41.2M | for (i = 0; i < len; i++) |
296 | 35.2M | if (buf[i] >= 'a' && buf[i] <= 'x') |
297 | 7.41M | buf[i] -= 'a'-'A'; |
298 | 5.96M | } |
299 | 9.62M | if (!PyUnicode_Check(result) |
300 | 9.62M | || buf != PyUnicode_DATA(result)) { |
301 | 5.96M | PyObject *unicode; |
302 | 5.96M | unicode = _PyUnicode_FromASCII(buf, len); |
303 | 5.96M | Py_SETREF(result, unicode); |
304 | 5.96M | } |
305 | 3.66M | else if (len != PyUnicode_GET_LENGTH(result)) { |
306 | 0 | if (PyUnicode_Resize(&result, len) < 0) |
307 | 0 | Py_CLEAR(result); |
308 | 0 | } |
309 | 9.62M | return result; |
310 | 9.62M | } |
311 | | |
312 | | |
313 | | /* Format an integer or a float as an integer. |
314 | | * Return 1 if the number has been formatted into the writer, |
315 | | * 0 if the number has been formatted into *p_output |
316 | | * -1 and raise an exception on error */ |
317 | | static int |
318 | | mainformatlong(PyObject *v, |
319 | | struct unicode_formatter_t *ctx, |
320 | | struct unicode_format_arg_t *arg, |
321 | | PyObject **p_output, |
322 | | _PyUnicodeWriter *writer) |
323 | 23.7M | { |
324 | 23.7M | PyObject *iobj, *res; |
325 | 23.7M | char type = (char)arg->ch; |
326 | | |
327 | 23.7M | if (!PyNumber_Check(v)) |
328 | 4.77M | goto wrongtype; |
329 | | |
330 | | /* make sure number is a type of integer for o, x, and X */ |
331 | 18.9M | if (!PyLong_Check(v)) { |
332 | 0 | if (type == 'o' || type == 'x' || type == 'X') { |
333 | 0 | iobj = _PyNumber_Index(v); |
334 | 0 | } |
335 | 0 | else { |
336 | 0 | iobj = PyNumber_Long(v); |
337 | 0 | } |
338 | 0 | if (iobj == NULL ) { |
339 | 0 | if (PyErr_ExceptionMatches(PyExc_TypeError)) |
340 | 0 | goto wrongtype; |
341 | 0 | return -1; |
342 | 0 | } |
343 | 0 | assert(PyLong_Check(iobj)); |
344 | 0 | } |
345 | 18.9M | else { |
346 | 18.9M | iobj = Py_NewRef(v); |
347 | 18.9M | } |
348 | | |
349 | 18.9M | if (PyLong_CheckExact(v) |
350 | 18.9M | && arg->width == -1 && arg->prec == -1 |
351 | 15.3M | && !(arg->flags & (F_SIGN | F_BLANK)) |
352 | 15.3M | && type != 'X') |
353 | 9.36M | { |
354 | | /* Fast path */ |
355 | 9.36M | int alternate = arg->flags & F_ALT; |
356 | 9.36M | int base; |
357 | | |
358 | 9.36M | switch(type) |
359 | 9.36M | { |
360 | 0 | default: |
361 | 0 | Py_UNREACHABLE(); |
362 | 9.36M | case 'd': |
363 | 9.36M | case 'i': |
364 | 9.36M | case 'u': |
365 | 9.36M | base = 10; |
366 | 9.36M | break; |
367 | 0 | case 'o': |
368 | 0 | base = 8; |
369 | 0 | break; |
370 | 33 | case 'x': |
371 | 33 | case 'X': |
372 | 33 | base = 16; |
373 | 33 | break; |
374 | 9.36M | } |
375 | | |
376 | 9.36M | if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) { |
377 | 0 | Py_DECREF(iobj); |
378 | 0 | return -1; |
379 | 0 | } |
380 | 9.36M | Py_DECREF(iobj); |
381 | 9.36M | return 1; |
382 | 9.36M | } |
383 | | |
384 | 9.62M | res = _PyUnicode_FormatLong(iobj, arg->flags & F_ALT, arg->prec, type); |
385 | 9.62M | Py_DECREF(iobj); |
386 | 9.62M | if (res == NULL) |
387 | 0 | return -1; |
388 | 9.62M | *p_output = res; |
389 | 9.62M | return 0; |
390 | | |
391 | 4.77M | wrongtype: |
392 | 4.77M | switch(type) |
393 | 4.77M | { |
394 | 0 | case 'o': |
395 | 0 | case 'x': |
396 | 0 | case 'X': |
397 | 0 | FORMAT_ERROR(PyExc_TypeError, |
398 | 0 | "%%%c requires an integer, not %T", |
399 | 0 | arg->ch, v); |
400 | 0 | break; |
401 | 4.77M | default: |
402 | 4.77M | FORMAT_ERROR(PyExc_TypeError, |
403 | 4.77M | "%%%c requires a real number, not %T", |
404 | 4.77M | arg->ch, v); |
405 | 4.77M | break; |
406 | 4.77M | } |
407 | 4.77M | return -1; |
408 | 4.77M | } |
409 | | |
410 | | |
411 | | static Py_UCS4 |
412 | | formatchar(PyObject *v, |
413 | | struct unicode_formatter_t *ctx, |
414 | | struct unicode_format_arg_t *arg) |
415 | 0 | { |
416 | | /* presume that the buffer is at least 3 characters long */ |
417 | 0 | if (PyUnicode_Check(v)) { |
418 | 0 | if (PyUnicode_GET_LENGTH(v) == 1) { |
419 | 0 | return PyUnicode_READ_CHAR(v, 0); |
420 | 0 | } |
421 | 0 | FORMAT_ERROR(PyExc_TypeError, |
422 | 0 | "%%c requires an integer or a unicode character, " |
423 | 0 | "not a string of length %zd", |
424 | 0 | PyUnicode_GET_LENGTH(v)); |
425 | 0 | return (Py_UCS4) -1; |
426 | 0 | } |
427 | 0 | else { |
428 | 0 | int overflow; |
429 | 0 | long x = PyLong_AsLongAndOverflow(v, &overflow); |
430 | 0 | if (x == -1 && PyErr_Occurred()) { |
431 | 0 | if (PyErr_ExceptionMatches(PyExc_TypeError)) { |
432 | 0 | FORMAT_ERROR(PyExc_TypeError, |
433 | 0 | "%%c requires an integer or a unicode character, " |
434 | 0 | "not %T", |
435 | 0 | v); |
436 | 0 | } |
437 | 0 | return (Py_UCS4) -1; |
438 | 0 | } |
439 | | |
440 | 0 | if (x < 0 || x > MAX_UNICODE) { |
441 | | /* this includes an overflow in converting to C long */ |
442 | 0 | FORMAT_ERROR(PyExc_OverflowError, |
443 | 0 | "%%c argument not in range(0x110000)%s", ""); |
444 | 0 | return (Py_UCS4) -1; |
445 | 0 | } |
446 | | |
447 | 0 | return (Py_UCS4) x; |
448 | 0 | } |
449 | 0 | } |
450 | | |
451 | | |
452 | | /* Parse options of an argument: flags, width, precision. |
453 | | Handle also "%(name)" syntax. |
454 | | |
455 | | Return 0 if the argument has been formatted into arg->str. |
456 | | Return 1 if the argument has been written into ctx->writer, |
457 | | Raise an exception and return -1 on error. */ |
458 | | static int |
459 | | unicode_format_arg_parse(struct unicode_formatter_t *ctx, |
460 | | struct unicode_format_arg_t *arg) |
461 | 69.1M | { |
462 | 69.1M | #define FORMAT_READ(ctx) \ |
463 | 73.0M | PyUnicode_READ((ctx)->fmtkind, (ctx)->fmtdata, (ctx)->fmtpos) |
464 | | |
465 | 69.1M | PyObject *v; |
466 | | |
467 | 69.1M | if (arg->ch == '(') { |
468 | | /* Get argument value from a dictionary. Example: "%(name)s". */ |
469 | 39.0k | Py_ssize_t keystart; |
470 | 39.0k | Py_ssize_t keylen; |
471 | 39.0k | int pcount = 1; |
472 | | |
473 | 39.0k | if (ctx->dict == NULL) { |
474 | 0 | PyErr_Format(PyExc_TypeError, |
475 | 0 | "format requires a mapping, not %T", |
476 | 0 | ctx->args); |
477 | 0 | return -1; |
478 | 0 | } |
479 | 39.0k | ++ctx->fmtpos; |
480 | 39.0k | --ctx->fmtcnt; |
481 | 39.0k | keystart = ctx->fmtpos; |
482 | | /* Skip over balanced parentheses */ |
483 | 351k | while (pcount > 0 && --ctx->fmtcnt >= 0) { |
484 | 312k | arg->ch = FORMAT_READ(ctx); |
485 | 312k | if (arg->ch == ')') |
486 | 39.0k | --pcount; |
487 | 273k | else if (arg->ch == '(') |
488 | 0 | ++pcount; |
489 | 312k | ctx->fmtpos++; |
490 | 312k | } |
491 | 39.0k | keylen = ctx->fmtpos - keystart - 1; |
492 | 39.0k | if (ctx->fmtcnt < 0 || pcount > 0) { |
493 | 0 | PyErr_Format(PyExc_ValueError, |
494 | 0 | "stray %% or incomplete format key at position %zd", |
495 | 0 | arg->fmtstart); |
496 | 0 | return -1; |
497 | 0 | } |
498 | 39.0k | arg->key = PyUnicode_Substring(ctx->fmtstr, |
499 | 39.0k | keystart, keystart + keylen); |
500 | 39.0k | if (arg->key == NULL) |
501 | 0 | return -1; |
502 | 39.0k | if (ctx->args_owned) { |
503 | 27.9k | ctx->args_owned = 0; |
504 | 27.9k | Py_DECREF(ctx->args); |
505 | 27.9k | } |
506 | 39.0k | ctx->args = PyObject_GetItem(ctx->dict, arg->key); |
507 | 39.0k | if (ctx->args == NULL) |
508 | 0 | return -1; |
509 | 39.0k | ctx->args_owned = 1; |
510 | 39.0k | ctx->arglen = -3; |
511 | 39.0k | ctx->argidx = -4; |
512 | 39.0k | } |
513 | 69.0M | else { |
514 | 69.0M | if (ctx->arglen < -1) { |
515 | 0 | PyErr_Format(PyExc_ValueError, |
516 | 0 | "format requires a parenthesised mapping key " |
517 | 0 | "at position %zd", |
518 | 0 | arg->fmtstart); |
519 | 0 | return -1; |
520 | 0 | } |
521 | 69.0M | } |
522 | | |
523 | | /* Parse flags. Example: "%+i" => flags=F_SIGN. */ |
524 | 69.1M | while (--ctx->fmtcnt >= 0) { |
525 | 69.1M | arg->ch = FORMAT_READ(ctx); |
526 | 69.1M | ctx->fmtpos++; |
527 | 69.1M | switch (arg->ch) { |
528 | 0 | case '-': arg->flags |= F_LJUST; continue; |
529 | 0 | case '+': arg->flags |= F_SIGN; continue; |
530 | 0 | case ' ': arg->flags |= F_BLANK; continue; |
531 | 33 | case '#': arg->flags |= F_ALT; continue; |
532 | 2.16k | case '0': arg->flags |= F_ZERO; continue; |
533 | 69.1M | } |
534 | 69.1M | break; |
535 | 69.1M | } |
536 | | |
537 | | /* Parse width. Example: "%10s" => width=10 */ |
538 | 69.1M | if (arg->ch == '*') { |
539 | 0 | if (ctx->arglen < -1) { |
540 | 0 | PyErr_Format(PyExc_ValueError, |
541 | 0 | "* cannot be used with a parenthesised mapping key " |
542 | 0 | "at position %zd", |
543 | 0 | arg->fmtstart); |
544 | 0 | return -1; |
545 | 0 | } |
546 | 0 | v = unicode_format_getnextarg(ctx, 0); |
547 | 0 | if (v == NULL) |
548 | 0 | return -1; |
549 | 0 | if (!PyLong_Check(v)) { |
550 | 0 | FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v); |
551 | 0 | return -1; |
552 | 0 | } |
553 | 0 | arg->width = PyLong_AsSsize_t(v); |
554 | 0 | if (arg->width == -1 && PyErr_Occurred()) { |
555 | 0 | if (PyErr_ExceptionMatches(PyExc_OverflowError)) { |
556 | 0 | FORMAT_ERROR(PyExc_OverflowError, |
557 | 0 | "too big for width%s", ""); |
558 | 0 | } |
559 | 0 | return -1; |
560 | 0 | } |
561 | 0 | if (arg->width < 0) { |
562 | 0 | arg->flags |= F_LJUST; |
563 | 0 | arg->width = -arg->width; |
564 | 0 | } |
565 | 0 | if (--ctx->fmtcnt >= 0) { |
566 | 0 | arg->ch = FORMAT_READ(ctx); |
567 | 0 | ctx->fmtpos++; |
568 | 0 | } |
569 | 0 | } |
570 | 69.1M | else if (arg->ch >= '0' && arg->ch <= '9') { |
571 | 3.66M | arg->width = arg->ch - '0'; |
572 | 3.66M | while (--ctx->fmtcnt >= 0) { |
573 | 3.66M | arg->ch = FORMAT_READ(ctx); |
574 | 3.66M | ctx->fmtpos++; |
575 | 3.66M | if (arg->ch < '0' || arg->ch > '9') |
576 | 3.66M | break; |
577 | | /* Since arg->ch is unsigned, the RHS would end up as unsigned, |
578 | | mixing signed and unsigned comparison. Since arg->ch is between |
579 | | '0' and '9', casting to int is safe. */ |
580 | 0 | if (arg->width > (PY_SSIZE_T_MAX - ((int)arg->ch - '0')) / 10) { |
581 | 0 | PyErr_Format(PyExc_ValueError, |
582 | 0 | "width too big at position %zd", |
583 | 0 | arg->fmtstart); |
584 | 0 | return -1; |
585 | 0 | } |
586 | 0 | arg->width = arg->width*10 + (arg->ch - '0'); |
587 | 0 | } |
588 | 3.66M | } |
589 | | |
590 | | /* Parse precision. Example: "%.3f" => prec=3 */ |
591 | 69.1M | if (arg->ch == '.') { |
592 | 106 | arg->prec = 0; |
593 | 106 | if (--ctx->fmtcnt >= 0) { |
594 | 106 | arg->ch = FORMAT_READ(ctx); |
595 | 106 | ctx->fmtpos++; |
596 | 106 | } |
597 | 106 | if (arg->ch == '*') { |
598 | 0 | if (ctx->arglen < -1) { |
599 | 0 | PyErr_Format(PyExc_ValueError, |
600 | 0 | "* cannot be used with a parenthesised mapping key " |
601 | 0 | "at position %zd", |
602 | 0 | arg->fmtstart); |
603 | 0 | return -1; |
604 | 0 | } |
605 | 0 | v = unicode_format_getnextarg(ctx, 0); |
606 | 0 | if (v == NULL) |
607 | 0 | return -1; |
608 | 0 | if (!PyLong_Check(v)) { |
609 | 0 | FORMAT_ERROR(PyExc_TypeError, "* requires int, not %T", v); |
610 | 0 | return -1; |
611 | 0 | } |
612 | 0 | arg->prec = PyLong_AsInt(v); |
613 | 0 | if (arg->prec == -1 && PyErr_Occurred()) { |
614 | 0 | if (PyErr_ExceptionMatches(PyExc_OverflowError)) { |
615 | 0 | FORMAT_ERROR(PyExc_OverflowError, |
616 | 0 | "too big for precision%s", ""); |
617 | 0 | } |
618 | 0 | return -1; |
619 | 0 | } |
620 | 0 | if (arg->prec < 0) |
621 | 0 | arg->prec = 0; |
622 | 0 | if (--ctx->fmtcnt >= 0) { |
623 | 0 | arg->ch = FORMAT_READ(ctx); |
624 | 0 | ctx->fmtpos++; |
625 | 0 | } |
626 | 0 | } |
627 | 106 | else if (arg->ch >= '0' && arg->ch <= '9') { |
628 | 106 | arg->prec = arg->ch - '0'; |
629 | 106 | while (--ctx->fmtcnt >= 0) { |
630 | 106 | arg->ch = FORMAT_READ(ctx); |
631 | 106 | ctx->fmtpos++; |
632 | 106 | if (arg->ch < '0' || arg->ch > '9') |
633 | 106 | break; |
634 | 0 | if (arg->prec > (INT_MAX - ((int)arg->ch - '0')) / 10) { |
635 | 0 | PyErr_Format(PyExc_ValueError, |
636 | 0 | "precision too big at position %zd", |
637 | 0 | arg->fmtstart); |
638 | 0 | return -1; |
639 | 0 | } |
640 | 0 | arg->prec = arg->prec*10 + (arg->ch - '0'); |
641 | 0 | } |
642 | 106 | } |
643 | 106 | } |
644 | | |
645 | | /* Ignore "h", "l" and "L" format prefix (ex: "%hi" or "%ls") */ |
646 | 69.1M | if (ctx->fmtcnt >= 0) { |
647 | 69.1M | if (arg->ch == 'h' || arg->ch == 'l' || arg->ch == 'L') { |
648 | 0 | if (--ctx->fmtcnt >= 0) { |
649 | 0 | arg->ch = FORMAT_READ(ctx); |
650 | 0 | ctx->fmtpos++; |
651 | 0 | } |
652 | 0 | } |
653 | 69.1M | } |
654 | 69.1M | if (ctx->fmtcnt < 0) { |
655 | 0 | PyErr_Format(PyExc_ValueError, |
656 | 0 | "stray %% at position %zd", arg->fmtstart); |
657 | 0 | return -1; |
658 | 0 | } |
659 | 69.1M | return 0; |
660 | | |
661 | 69.1M | #undef FORMAT_READ |
662 | 69.1M | } |
663 | | |
664 | | |
665 | | /* Format one argument. Supported conversion specifiers: |
666 | | |
667 | | - "s", "r", "a": any type |
668 | | - "i", "d", "u": int or float |
669 | | - "o", "x", "X": int |
670 | | - "e", "E", "f", "F", "g", "G": float |
671 | | - "c": int or str (1 character) |
672 | | |
673 | | When possible, the output is written directly into the Unicode writer |
674 | | (ctx->writer). A string is created when padding is required. |
675 | | |
676 | | Return 0 if the argument has been formatted into *p_str, |
677 | | 1 if the argument has been written into ctx->writer, |
678 | | -1 on error. */ |
679 | | static int |
680 | | unicode_format_arg_format(struct unicode_formatter_t *ctx, |
681 | | struct unicode_format_arg_t *arg, |
682 | | PyObject **p_str) |
683 | 69.1M | { |
684 | 69.1M | PyObject *v; |
685 | 69.1M | _PyUnicodeWriter *writer = &ctx->writer; |
686 | | |
687 | 69.1M | if (ctx->fmtcnt == 0) |
688 | 23.6M | ctx->writer.overallocate = 0; |
689 | | |
690 | 69.1M | v = unicode_format_getnextarg(ctx, 1); |
691 | 69.1M | if (v == NULL) |
692 | 0 | return -1; |
693 | | |
694 | | |
695 | 69.1M | switch (arg->ch) { |
696 | 45.3M | case 's': |
697 | 45.3M | case 'r': |
698 | 45.3M | case 'a': |
699 | 45.3M | if (PyLong_CheckExact(v) && arg->width == -1 && arg->prec == -1) { |
700 | | /* Fast path */ |
701 | 113 | if (_PyLong_FormatWriter(writer, v, 10, arg->flags & F_ALT) == -1) |
702 | 0 | return -1; |
703 | 113 | return 1; |
704 | 113 | } |
705 | | |
706 | 45.3M | if (PyUnicode_CheckExact(v) && arg->ch == 's') { |
707 | 39.3M | *p_str = Py_NewRef(v); |
708 | 39.3M | } |
709 | 5.96M | else { |
710 | 5.96M | if (arg->ch == 's') |
711 | 5.96M | *p_str = PyObject_Str(v); |
712 | 6.48k | else if (arg->ch == 'r') |
713 | 6.48k | *p_str = PyObject_Repr(v); |
714 | 0 | else |
715 | 0 | *p_str = PyObject_ASCII(v); |
716 | 5.96M | } |
717 | 45.3M | break; |
718 | | |
719 | 0 | case 'i': |
720 | 17.8M | case 'd': |
721 | 17.8M | case 'u': |
722 | 17.8M | case 'o': |
723 | 17.8M | case 'x': |
724 | 23.7M | case 'X': |
725 | 23.7M | { |
726 | 23.7M | int ret = mainformatlong(v, ctx, arg, p_str, writer); |
727 | 23.7M | if (ret != 0) |
728 | 14.1M | return ret; |
729 | 9.62M | arg->sign = 1; |
730 | 9.62M | break; |
731 | 23.7M | } |
732 | | |
733 | 0 | case 'e': |
734 | 0 | case 'E': |
735 | 106 | case 'f': |
736 | 106 | case 'F': |
737 | 106 | case 'g': |
738 | 106 | case 'G': |
739 | 106 | if (arg->width == -1 && arg->prec == -1 |
740 | 0 | && !(arg->flags & (F_SIGN | F_BLANK))) |
741 | 0 | { |
742 | | /* Fast path */ |
743 | 0 | if (formatfloat(v, ctx, arg, NULL, writer) == -1) |
744 | 0 | return -1; |
745 | 0 | return 1; |
746 | 0 | } |
747 | | |
748 | 106 | arg->sign = 1; |
749 | 106 | if (formatfloat(v, ctx, arg, p_str, NULL) == -1) |
750 | 0 | return -1; |
751 | 106 | break; |
752 | | |
753 | 106 | case 'c': |
754 | 0 | { |
755 | 0 | Py_UCS4 ch = formatchar(v, ctx, arg); |
756 | 0 | if (ch == (Py_UCS4) -1) |
757 | 0 | return -1; |
758 | 0 | if (arg->width == -1 && arg->prec == -1) { |
759 | | /* Fast path */ |
760 | 0 | if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) |
761 | 0 | return -1; |
762 | 0 | return 1; |
763 | 0 | } |
764 | 0 | *p_str = PyUnicode_FromOrdinal(ch); |
765 | 0 | break; |
766 | 0 | } |
767 | | |
768 | 0 | default: |
769 | 0 | if (arg->ch < 128 && Py_ISALPHA(arg->ch)) { |
770 | 0 | PyErr_Format(PyExc_ValueError, |
771 | 0 | "unsupported format %%%c at position %zd", |
772 | 0 | (int)arg->ch, arg->fmtstart); |
773 | 0 | } |
774 | 0 | else if (arg->ch == '\'') { |
775 | 0 | PyErr_Format(PyExc_ValueError, |
776 | 0 | "stray %% at position %zd or unexpected " |
777 | 0 | "format character \"'\" at position %zd", |
778 | 0 | arg->fmtstart, |
779 | 0 | ctx->fmtpos - 1); |
780 | 0 | } |
781 | 0 | else if (arg->ch >= 32 && arg->ch < 127) { |
782 | 0 | PyErr_Format(PyExc_ValueError, |
783 | 0 | "stray %% at position %zd or unexpected " |
784 | 0 | "format character '%c' at position %zd", |
785 | 0 | arg->fmtstart, |
786 | 0 | (int)arg->ch, ctx->fmtpos - 1); |
787 | 0 | } |
788 | 0 | else if (Py_UNICODE_ISPRINTABLE(arg->ch)) { |
789 | 0 | PyErr_Format(PyExc_ValueError, |
790 | 0 | "stray %% at position %zd or unexpected " |
791 | 0 | "format character '%c' (U+%04X) at position %zd", |
792 | 0 | arg->fmtstart, |
793 | 0 | (int)arg->ch, (int)arg->ch, ctx->fmtpos - 1); |
794 | 0 | } |
795 | 0 | else { |
796 | 0 | PyErr_Format(PyExc_ValueError, |
797 | 0 | "stray %% at position %zd or unexpected " |
798 | 0 | "format character U+%04X at position %zd", |
799 | 0 | arg->fmtstart, (int)arg->ch, ctx->fmtpos - 1); |
800 | 0 | } |
801 | 0 | return -1; |
802 | 69.1M | } |
803 | 54.9M | if (*p_str == NULL) |
804 | 0 | return -1; |
805 | 54.9M | assert (PyUnicode_Check(*p_str)); |
806 | 54.9M | return 0; |
807 | 54.9M | } |
808 | | |
809 | | |
810 | | static int |
811 | | unicode_format_arg_output(struct unicode_formatter_t *ctx, |
812 | | struct unicode_format_arg_t *arg, |
813 | | PyObject *str) |
814 | 54.9M | { |
815 | 54.9M | Py_ssize_t len; |
816 | 54.9M | int kind; |
817 | 54.9M | const void *pbuf; |
818 | 54.9M | Py_ssize_t pindex; |
819 | 54.9M | Py_UCS4 signchar; |
820 | 54.9M | Py_ssize_t buflen; |
821 | 54.9M | Py_UCS4 maxchar; |
822 | 54.9M | Py_ssize_t sublen; |
823 | 54.9M | _PyUnicodeWriter *writer = &ctx->writer; |
824 | 54.9M | Py_UCS4 fill; |
825 | | |
826 | 54.9M | fill = ' '; |
827 | 54.9M | if (arg->sign && arg->flags & F_ZERO) |
828 | 2.16k | fill = '0'; |
829 | | |
830 | 54.9M | len = PyUnicode_GET_LENGTH(str); |
831 | 54.9M | if ((arg->width == -1 || arg->width <= len) |
832 | 54.9M | && (arg->prec == -1 || arg->prec >= len) |
833 | 54.9M | && !(arg->flags & (F_SIGN | F_BLANK))) |
834 | 54.9M | { |
835 | | /* Fast path */ |
836 | 54.9M | if (_PyUnicodeWriter_WriteStr(writer, str) == -1) |
837 | 0 | return -1; |
838 | 54.9M | return 0; |
839 | 54.9M | } |
840 | | |
841 | | /* Truncate the string for "s", "r" and "a" formats |
842 | | if the precision is set */ |
843 | 17.3k | if (arg->ch == 's' || arg->ch == 'r' || arg->ch == 'a') { |
844 | 0 | if (arg->prec >= 0 && len > arg->prec) |
845 | 0 | len = arg->prec; |
846 | 0 | } |
847 | | |
848 | | /* Adjust sign and width */ |
849 | 17.3k | kind = PyUnicode_KIND(str); |
850 | 17.3k | pbuf = PyUnicode_DATA(str); |
851 | 17.3k | pindex = 0; |
852 | 17.3k | signchar = '\0'; |
853 | 17.3k | if (arg->sign) { |
854 | 17.3k | Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); |
855 | 17.3k | if (ch == '-' || ch == '+') { |
856 | 0 | signchar = ch; |
857 | 0 | len--; |
858 | 0 | pindex++; |
859 | 0 | } |
860 | 17.3k | else if (arg->flags & F_SIGN) |
861 | 0 | signchar = '+'; |
862 | 17.3k | else if (arg->flags & F_BLANK) |
863 | 0 | signchar = ' '; |
864 | 17.3k | else |
865 | 17.3k | arg->sign = 0; |
866 | 17.3k | } |
867 | 17.3k | if (arg->width < len) |
868 | 106 | arg->width = len; |
869 | | |
870 | | /* Prepare the writer */ |
871 | 17.3k | maxchar = writer->maxchar; |
872 | 17.3k | if (!(arg->flags & F_LJUST)) { |
873 | 17.3k | if (arg->sign) { |
874 | 0 | if ((arg->width-1) > len) |
875 | 0 | maxchar = Py_MAX(maxchar, fill); |
876 | 0 | } |
877 | 17.3k | else { |
878 | 17.3k | if (arg->width > len) |
879 | 17.2k | maxchar = Py_MAX(maxchar, fill); |
880 | 17.3k | } |
881 | 17.3k | } |
882 | 17.3k | if (PyUnicode_MAX_CHAR_VALUE(str) > maxchar) { |
883 | 0 | Py_UCS4 strmaxchar = _PyUnicode_FindMaxChar(str, 0, pindex+len); |
884 | 0 | maxchar = Py_MAX(maxchar, strmaxchar); |
885 | 0 | } |
886 | | |
887 | 17.3k | buflen = arg->width; |
888 | 17.3k | if (arg->sign && len == arg->width) |
889 | 0 | buflen++; |
890 | 17.3k | if (_PyUnicodeWriter_Prepare(writer, buflen, maxchar) == -1) |
891 | 0 | return -1; |
892 | | |
893 | | /* Write the sign if needed */ |
894 | 17.3k | if (arg->sign) { |
895 | 0 | if (fill != ' ') { |
896 | 0 | PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar); |
897 | 0 | writer->pos += 1; |
898 | 0 | } |
899 | 0 | if (arg->width > len) |
900 | 0 | arg->width--; |
901 | 0 | } |
902 | | |
903 | | /* Write the numeric prefix for "x", "X" and "o" formats |
904 | | if the alternate form is used. |
905 | | For example, write "0x" for the "%#x" format. */ |
906 | 17.3k | if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) { |
907 | 0 | assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); |
908 | 0 | assert(PyUnicode_READ(kind, pbuf, pindex + 1) == arg->ch); |
909 | 0 | if (fill != ' ') { |
910 | 0 | PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0'); |
911 | 0 | PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch); |
912 | 0 | writer->pos += 2; |
913 | 0 | pindex += 2; |
914 | 0 | } |
915 | 0 | arg->width -= 2; |
916 | 0 | if (arg->width < 0) |
917 | 0 | arg->width = 0; |
918 | 0 | len -= 2; |
919 | 0 | } |
920 | | |
921 | | /* Pad left with the fill character if needed */ |
922 | 17.3k | if (arg->width > len && !(arg->flags & F_LJUST)) { |
923 | 17.2k | sublen = arg->width - len; |
924 | 17.2k | _PyUnicode_Fill(writer->kind, writer->data, fill, writer->pos, sublen); |
925 | 17.2k | writer->pos += sublen; |
926 | 17.2k | arg->width = len; |
927 | 17.2k | } |
928 | | |
929 | | /* If padding with spaces: write sign if needed and/or numeric prefix if |
930 | | the alternate form is used */ |
931 | 17.3k | if (fill == ' ') { |
932 | 17.1k | if (arg->sign) { |
933 | 0 | PyUnicode_WRITE(writer->kind, writer->data, writer->pos, signchar); |
934 | 0 | writer->pos += 1; |
935 | 0 | } |
936 | 17.1k | if ((arg->flags & F_ALT) && (arg->ch == 'x' || arg->ch == 'X' || arg->ch == 'o')) { |
937 | 0 | assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); |
938 | 0 | assert(PyUnicode_READ(kind, pbuf, pindex+1) == arg->ch); |
939 | 0 | PyUnicode_WRITE(writer->kind, writer->data, writer->pos, '0'); |
940 | 0 | PyUnicode_WRITE(writer->kind, writer->data, writer->pos+1, arg->ch); |
941 | 0 | writer->pos += 2; |
942 | 0 | pindex += 2; |
943 | 0 | } |
944 | 17.1k | } |
945 | | |
946 | | /* Write characters */ |
947 | 17.3k | if (len) { |
948 | 17.3k | _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, |
949 | 17.3k | str, pindex, len); |
950 | 17.3k | writer->pos += len; |
951 | 17.3k | } |
952 | | |
953 | | /* Pad right with the fill character if needed */ |
954 | 17.3k | if (arg->width > len) { |
955 | 0 | sublen = arg->width - len; |
956 | 0 | _PyUnicode_Fill(writer->kind, writer->data, ' ', writer->pos, sublen); |
957 | 0 | writer->pos += sublen; |
958 | 0 | } |
959 | 17.3k | return 0; |
960 | 17.3k | } |
961 | | |
962 | | |
963 | | /* Helper of PyUnicode_Format(): format one arg. |
964 | | Return 0 on success, raise an exception and return -1 on error. */ |
965 | | static int |
966 | | unicode_format_arg(struct unicode_formatter_t *ctx) |
967 | 69.1M | { |
968 | 69.1M | struct unicode_format_arg_t arg; |
969 | 69.1M | PyObject *str; |
970 | 69.1M | int ret; |
971 | | |
972 | 69.1M | arg.ch = PyUnicode_READ(ctx->fmtkind, ctx->fmtdata, ctx->fmtpos); |
973 | 69.1M | if (arg.ch == '%') { |
974 | 0 | ctx->fmtpos++; |
975 | 0 | ctx->fmtcnt--; |
976 | 0 | if (_PyUnicodeWriter_WriteCharInline(&ctx->writer, '%') < 0) |
977 | 0 | return -1; |
978 | 0 | return 0; |
979 | 0 | } |
980 | 69.1M | arg.flags = 0; |
981 | 69.1M | arg.width = -1; |
982 | 69.1M | arg.prec = -1; |
983 | 69.1M | arg.sign = 0; |
984 | 69.1M | arg.fmtstart = ctx->fmtpos - 1; |
985 | 69.1M | arg.key = NULL; |
986 | 69.1M | str = NULL; |
987 | | |
988 | 69.1M | ret = unicode_format_arg_parse(ctx, &arg); |
989 | 69.1M | if (ret == -1) { |
990 | 0 | goto onError; |
991 | 0 | } |
992 | | |
993 | 69.1M | ret = unicode_format_arg_format(ctx, &arg, &str); |
994 | 69.1M | if (ret == -1) { |
995 | 4.77M | goto onError; |
996 | 4.77M | } |
997 | | |
998 | 64.3M | if (ret != 1) { |
999 | 54.9M | ret = unicode_format_arg_output(ctx, &arg, str); |
1000 | 54.9M | Py_DECREF(str); |
1001 | 54.9M | if (ret == -1) { |
1002 | 0 | goto onError; |
1003 | 0 | } |
1004 | 54.9M | } |
1005 | | |
1006 | 64.3M | if (ctx->dict && (ctx->argidx < ctx->arglen)) { |
1007 | | // XXX: Never happens? |
1008 | 0 | PyErr_SetString(PyExc_TypeError, |
1009 | 0 | "not all arguments converted during string formatting"); |
1010 | 0 | goto onError; |
1011 | 0 | } |
1012 | 64.3M | Py_XDECREF(arg.key); |
1013 | 64.3M | return 0; |
1014 | | |
1015 | 4.77M | onError: |
1016 | 4.77M | Py_XDECREF(arg.key); |
1017 | 4.77M | return -1; |
1018 | 64.3M | } |
1019 | | |
1020 | | |
1021 | | PyObject * |
1022 | | PyUnicode_Format(PyObject *format, PyObject *args) |
1023 | 32.7M | { |
1024 | 32.7M | struct unicode_formatter_t ctx; |
1025 | | |
1026 | 32.7M | if (format == NULL || args == NULL) { |
1027 | 0 | PyErr_BadInternalCall(); |
1028 | 0 | return NULL; |
1029 | 0 | } |
1030 | | |
1031 | 32.7M | if (ensure_unicode(format) < 0) |
1032 | 0 | return NULL; |
1033 | | |
1034 | 32.7M | ctx.fmtstr = format; |
1035 | 32.7M | ctx.fmtdata = PyUnicode_DATA(ctx.fmtstr); |
1036 | 32.7M | ctx.fmtkind = PyUnicode_KIND(ctx.fmtstr); |
1037 | 32.7M | ctx.fmtcnt = PyUnicode_GET_LENGTH(ctx.fmtstr); |
1038 | 32.7M | ctx.fmtpos = 0; |
1039 | | |
1040 | 32.7M | _PyUnicodeWriter_Init(&ctx.writer); |
1041 | 32.7M | ctx.writer.min_length = ctx.fmtcnt + 100; |
1042 | 32.7M | ctx.writer.overallocate = 1; |
1043 | | |
1044 | 32.7M | if (PyTuple_Check(args)) { |
1045 | 17.4M | ctx.arglen = PyTuple_Size(args); |
1046 | 17.4M | ctx.argidx = 0; |
1047 | 17.4M | } |
1048 | 15.3M | else { |
1049 | 15.3M | ctx.arglen = -1; |
1050 | 15.3M | ctx.argidx = -2; |
1051 | 15.3M | } |
1052 | 32.7M | ctx.args_owned = 0; |
1053 | 32.7M | if (PyMapping_Check(args) && !PyTuple_Check(args) && !PyUnicode_Check(args)) |
1054 | 12.1k | ctx.dict = args; |
1055 | 32.7M | else |
1056 | 32.7M | ctx.dict = NULL; |
1057 | 32.7M | ctx.args = args; |
1058 | | |
1059 | 162M | while (--ctx.fmtcnt >= 0) { |
1060 | 134M | if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { |
1061 | 65.6M | Py_ssize_t nonfmtpos; |
1062 | | |
1063 | 65.6M | nonfmtpos = ctx.fmtpos++; |
1064 | 616M | while (ctx.fmtcnt >= 0 && |
1065 | 607M | PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') { |
1066 | 550M | ctx.fmtpos++; |
1067 | 550M | ctx.fmtcnt--; |
1068 | 550M | } |
1069 | 65.6M | if (ctx.fmtcnt < 0) { |
1070 | 9.14M | ctx.fmtpos--; |
1071 | 9.14M | ctx.writer.overallocate = 0; |
1072 | 9.14M | } |
1073 | | |
1074 | 65.6M | if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr, |
1075 | 65.6M | nonfmtpos, ctx.fmtpos) < 0) |
1076 | 0 | goto onError; |
1077 | 65.6M | } |
1078 | 69.1M | else { |
1079 | 69.1M | ctx.fmtpos++; |
1080 | 69.1M | if (unicode_format_arg(&ctx) == -1) |
1081 | 4.77M | goto onError; |
1082 | 69.1M | } |
1083 | 134M | } |
1084 | | |
1085 | 28.0M | if (ctx.argidx < ctx.arglen && !ctx.dict) { |
1086 | 0 | PyErr_Format(PyExc_TypeError, |
1087 | 0 | "not all arguments converted during string formatting " |
1088 | 0 | "(required %zd, got %zd)", |
1089 | 0 | ctx.arglen < 0 ? 0 : ctx.argidx, |
1090 | 0 | ctx.arglen < 0 ? 1 : ctx.arglen); |
1091 | 0 | goto onError; |
1092 | 0 | } |
1093 | | |
1094 | 28.0M | if (ctx.args_owned) { |
1095 | 11.1k | Py_DECREF(ctx.args); |
1096 | 11.1k | } |
1097 | 28.0M | return _PyUnicodeWriter_Finish(&ctx.writer); |
1098 | | |
1099 | 4.77M | onError: |
1100 | 4.77M | _PyUnicodeWriter_Dealloc(&ctx.writer); |
1101 | 4.77M | if (ctx.args_owned) { |
1102 | 0 | Py_DECREF(ctx.args); |
1103 | 0 | } |
1104 | | return NULL; |
1105 | 28.0M | } |