Coverage Report

Created: 2026-01-09 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.32k
#define ALTTABSIZE 1
11
12
1.73M
#define is_potential_identifier_start(c) (\
13
1.73M
              (c >= 'a' && c <= 'z')\
14
1.73M
               || (c >= 'A' && c <= 'Z')\
15
1.73M
               || c == '_'\
16
1.73M
               || (c >= 128))
17
18
2.71M
#define is_potential_identifier_char(c) (\
19
2.71M
              (c >= 'a' && c <= 'z')\
20
2.71M
               || (c >= 'A' && c <= 'Z')\
21
2.71M
               || (c >= '0' && c <= '9')\
22
2.71M
               || c == '_'\
23
2.71M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.85M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
17.1k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
37
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.74M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
258k
{
55
258k
    return memchr(str, 0, size) != NULL;
56
258k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.4M
{
62
11.4M
    int rc;
63
11.6M
    for (;;) {
64
11.6M
        if (tok->cur != tok->inp) {
65
11.3M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.3M
            tok->col_offset++;
70
11.3M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.3M
        }
72
305k
        if (tok->done != E_OK) {
73
31.4k
            return EOF;
74
31.4k
        }
75
274k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
274k
        if (!rc) {
84
15.8k
            tok->cur = tok->inp;
85
15.8k
            return EOF;
86
15.8k
        }
87
258k
        tok->line_start = tok->cur;
88
89
258k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
258k
    }
95
11.4M
    Py_UNREACHABLE();
96
11.4M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.67M
{
102
3.67M
    if (c != EOF) {
103
3.64M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.64M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.64M
        tok->col_offset--;
110
3.64M
    }
111
3.67M
}
112
113
static int
114
23.5k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
23.5k
    assert(token != NULL);
116
23.5k
    assert(c == '}' || c == ':' || c == '!');
117
23.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
23.5k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
13.9k
        return 0;
121
13.9k
    }
122
9.61k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
9.61k
    int hash_detected = 0;
126
9.61k
    int in_string = 0;
127
9.61k
    char quote_char = 0;
128
129
1.00M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
999k
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
999k
        if (ch == '\\') {
134
17.6k
            i++;
135
17.6k
            continue;
136
17.6k
        }
137
138
        // Handle quotes
139
982k
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
157k
            if (!in_string) {
148
57.4k
                in_string = 1;
149
57.4k
                quote_char = ch;
150
57.4k
            }
151
100k
            else if (ch == quote_char) {
152
56.7k
                in_string = 0;
153
56.7k
            }
154
157k
            continue;
155
157k
        }
156
157
        // Check for # outside strings
158
824k
        if (ch == '#' && !in_string) {
159
876
            hash_detected = 1;
160
876
            break;
161
876
        }
162
824k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
9.61k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
876
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
876
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
876
        Py_ssize_t i = 0;  // Input position
172
876
        Py_ssize_t j = 0;  // Output position
173
876
        in_string = 0;     // Whether we're in a string
174
876
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
58.6k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
57.7k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
57.7k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
8.56k
                if (!in_string) {
184
3.47k
                    in_string = 1;
185
3.47k
                    quote_char = ch;
186
5.08k
                } else if (ch == quote_char) {
187
3.46k
                    in_string = 0;
188
3.46k
                }
189
8.56k
                result[j++] = ch;
190
8.56k
            }
191
            // Skip comments
192
49.2k
            else if (ch == '#' && !in_string) {
193
33.9k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
33.1k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
32.8k
                    i++;
196
32.8k
                }
197
1.07k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
291
                    result[j++] = '\n';
199
291
                }
200
1.07k
            }
201
            // Copy other chars
202
48.1k
            else {
203
48.1k
                result[j++] = ch;
204
48.1k
            }
205
57.7k
            i++;
206
57.7k
        }
207
208
876
        result[j] = '\0';  // Null-terminate the result string
209
876
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
876
        PyMem_Free(result);
211
8.73k
    } else {
212
8.73k
        res = PyUnicode_DecodeUTF8(
213
8.73k
            tok_mode->last_expr_buffer,
214
8.73k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.73k
            NULL
216
8.73k
        );
217
8.73k
    }
218
219
9.61k
    if (!res) {
220
0
        return -1;
221
0
    }
222
9.61k
    token->metadata = res;
223
9.61k
    return 0;
224
9.61k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
62.9k
{
229
62.9k
    assert(tok->cur != NULL);
230
231
62.9k
    Py_ssize_t size = strlen(tok->cur);
232
62.9k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
62.9k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
39.4k
        case '{':
252
39.4k
            if (tok_mode->last_expr_buffer != NULL) {
253
27.9k
                PyMem_Free(tok_mode->last_expr_buffer);
254
27.9k
            }
255
39.4k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
39.4k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
39.4k
            tok_mode->last_expr_size = size;
260
39.4k
            tok_mode->last_expr_end = -1;
261
39.4k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
39.4k
            break;
263
18.4k
        case '}':
264
20.1k
        case '!':
265
20.1k
            tok_mode->last_expr_end = strlen(tok->start);
266
20.1k
            break;
267
3.37k
        case ':':
268
3.37k
            if (tok_mode->last_expr_end == -1) {
269
2.97k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.97k
            }
271
3.37k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
62.9k
    }
275
62.9k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
62.9k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
8.26k
{
284
8.26k
    const char *s = test;
285
8.26k
    int res = 0;
286
22.5k
    while (1) {
287
22.5k
        int c = tok_nextc(tok);
288
22.5k
        if (*s == 0) {
289
8.17k
            res = !is_potential_identifier_char(c);
290
8.17k
        }
291
14.3k
        else if (c == *s) {
292
14.2k
            s++;
293
14.2k
            continue;
294
14.2k
        }
295
296
8.26k
        tok_backup(tok, c);
297
22.5k
        while (s != test) {
298
14.2k
            tok_backup(tok, *--s);
299
14.2k
        }
300
8.26k
        return res;
301
22.5k
    }
302
8.26k
}
303
304
static int
305
95.3k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
95.3k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
95.3k
    int r = 0;
322
95.3k
    if (c == 'a') {
323
1.09k
        r = lookahead(tok, "nd");
324
1.09k
    }
325
94.2k
    else if (c == 'e') {
326
580
        r = lookahead(tok, "lse");
327
580
    }
328
93.6k
    else if (c == 'f') {
329
3.59k
        r = lookahead(tok, "or");
330
3.59k
    }
331
90.0k
    else if (c == 'i') {
332
1.58k
        int c2 = tok_nextc(tok);
333
1.58k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.56k
            r = 1;
335
1.56k
        }
336
1.58k
        tok_backup(tok, c2);
337
1.58k
    }
338
88.4k
    else if (c == 'o') {
339
2.64k
        r = lookahead(tok, "r");
340
2.64k
    }
341
85.8k
    else if (c == 'n') {
342
348
        r = lookahead(tok, "ot");
343
348
    }
344
95.3k
    if (r) {
345
9.72k
        tok_backup(tok, c);
346
9.72k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
9.72k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
9.72k
        tok_nextc(tok);
352
9.72k
    }
353
85.5k
    else /* In future releases, only error will remain. */
354
85.5k
    if (c < 128 && is_potential_identifier_char(c)) {
355
212
        tok_backup(tok, c);
356
212
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
212
        return 0;
358
212
    }
359
95.1k
    return 1;
360
95.3k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.5k
{
366
11.5k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.5k
    PyObject *s;
370
11.5k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.5k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.5k
    if (s == NULL) {
374
2
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
2
            tok->done = E_DECODE;
376
2
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
2
        return 0;
381
2
    }
382
11.5k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.5k
    assert(invalid >= 0);
384
11.5k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.5k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
680
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
680
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
469
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
469
            if (s != NULL) {
391
469
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
469
            }
393
469
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
469
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
469
        }
399
680
        Py_DECREF(s);
400
680
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
345
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
345
        }
403
335
        else {
404
335
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
335
        }
406
680
        return 0;
407
680
    }
408
10.8k
    Py_DECREF(s);
409
10.8k
    return 1;
410
11.5k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
74.2k
{
415
74.2k
    int c;
416
417
74.7k
    while (1) {
418
214k
        do {
419
214k
            c = tok_nextc(tok);
420
214k
        } while (Py_ISDIGIT(c));
421
74.7k
        if (c != '_') {
422
74.2k
            break;
423
74.2k
        }
424
516
        c = tok_nextc(tok);
425
516
        if (!Py_ISDIGIT(c)) {
426
12
            tok_backup(tok, c);
427
12
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
12
            return 0;
429
12
        }
430
516
    }
431
74.2k
    return c;
432
74.2k
}
433
434
static inline int
435
1.13k
tok_continuation_line(struct tok_state *tok) {
436
1.13k
    int c = tok_nextc(tok);
437
1.13k
    if (c == '\r') {
438
75
        c = tok_nextc(tok);
439
75
    }
440
1.13k
    if (c != '\n') {
441
74
        tok->done = E_LINECONT;
442
74
        return -1;
443
74
    }
444
1.05k
    c = tok_nextc(tok);
445
1.05k
    if (c == EOF) {
446
55
        tok->done = E_EOF;
447
55
        tok->cur = tok->inp;
448
55
        return -1;
449
1.00k
    } else {
450
1.00k
        tok_backup(tok, c);
451
1.00k
    }
452
1.00k
    return c;
453
1.05k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
21.6k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
21.6k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
21.6k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
21.6k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
21.6k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
21.6k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
21.6k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
21.6k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
21.6k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
21.6k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
21.6k
#undef RETURN_SYNTAX_ERROR
496
497
21.6k
    return 0;
498
21.6k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.70M
{
503
1.70M
    int c;
504
1.70M
    int blankline, nonascii;
505
506
1.70M
    const char *p_start = NULL;
507
1.70M
    const char *p_end = NULL;
508
1.80M
  nextline:
509
1.80M
    tok->start = NULL;
510
1.80M
    tok->starting_col_offset = -1;
511
1.80M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.80M
    if (tok->atbol) {
516
253k
        int col = 0;
517
253k
        int altcol = 0;
518
253k
        tok->atbol = 0;
519
253k
        int cont_line_col = 0;
520
1.15M
        for (;;) {
521
1.15M
            c = tok_nextc(tok);
522
1.15M
            if (c == ' ') {
523
900k
                col++, altcol++;
524
900k
            }
525
255k
            else if (c == '\t') {
526
661
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
661
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
661
            }
529
254k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
762
                col = altcol = 0; /* For Emacs users */
531
762
            }
532
254k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
663
                cont_line_col = cont_line_col ? cont_line_col : col;
538
663
                if ((c = tok_continuation_line(tok)) == -1) {
539
44
                    return MAKE_TOKEN(ERRORTOKEN);
540
44
                }
541
663
            }
542
253k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
253k
            else {
546
253k
                break;
547
253k
            }
548
1.15M
        }
549
253k
        tok_backup(tok, c);
550
253k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
64.1k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
64.1k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
64.1k
            else {
566
64.1k
                blankline = 1; /* Ignore completely */
567
64.1k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
64.1k
        }
571
253k
        if (!blankline && tok->level == 0) {
572
145k
            col = cont_line_col ? cont_line_col : col;
573
145k
            altcol = cont_line_col ? cont_line_col : altcol;
574
145k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
96.0k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
96.0k
            }
580
49.0k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
27.4k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
27.4k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
27.4k
                tok->pendin++;
591
27.4k
                tok->indstack[++tok->indent] = col;
592
27.4k
                tok->altindstack[tok->indent] = altcol;
593
27.4k
            }
594
21.5k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
48.3k
                while (tok->indent > 0 &&
597
43.3k
                    col < tok->indstack[tok->indent]) {
598
26.7k
                    tok->pendin--;
599
26.7k
                    tok->indent--;
600
26.7k
                }
601
21.5k
                if (col != tok->indstack[tok->indent]) {
602
7
                    tok->done = E_DEDENT;
603
7
                    tok->cur = tok->inp;
604
7
                    return MAKE_TOKEN(ERRORTOKEN);
605
7
                }
606
21.5k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
21.5k
            }
610
145k
        }
611
253k
    }
612
613
1.80M
    tok->start = tok->cur;
614
1.80M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
1.80M
    if (tok->pendin != 0) {
618
54.1k
        if (tok->pendin < 0) {
619
26.7k
            if (tok->tok_extra_tokens) {
620
0
                p_start = tok->cur;
621
0
                p_end = tok->cur;
622
0
            }
623
26.7k
            tok->pendin++;
624
26.7k
            return MAKE_TOKEN(DEDENT);
625
26.7k
        }
626
27.4k
        else {
627
27.4k
            if (tok->tok_extra_tokens) {
628
0
                p_start = tok->buf;
629
0
                p_end = tok->cur;
630
0
            }
631
27.4k
            tok->pendin--;
632
27.4k
            return MAKE_TOKEN(INDENT);
633
27.4k
        }
634
54.1k
    }
635
636
    /* Peek ahead at the next character */
637
1.75M
    c = tok_nextc(tok);
638
1.75M
    tok_backup(tok, c);
639
640
1.75M
 again:
641
1.75M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.14M
    do {
644
2.14M
        c = tok_nextc(tok);
645
2.14M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.75M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.75M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.75M
    if (c == '#') {
653
654
41.0k
        const char* p = NULL;
655
41.0k
        const char *prefix, *type_start;
656
41.0k
        int current_starting_col_offset;
657
658
1.37M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.33M
            c = tok_nextc(tok);
660
1.33M
        }
661
662
41.0k
        if (tok->tok_extra_tokens) {
663
0
            p = tok->start;
664
0
        }
665
666
41.0k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
41.0k
        if (tok->tok_extra_tokens) {
721
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
0
            p_start = p;
723
0
            p_end = tok->cur;
724
0
            tok->comment_newline = blankline;
725
0
            return MAKE_TOKEN(COMMENT);
726
0
        }
727
41.0k
    }
728
729
1.75M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.75M
    if (c == EOF) {
735
15.7k
        if (tok->level) {
736
4.06k
            return MAKE_TOKEN(ERRORTOKEN);
737
4.06k
        }
738
11.6k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
15.7k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.73M
    nonascii = 0;
743
1.73M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
574k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
704k
        while (1) {
747
704k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
21.5k
                saw_b = 1;
749
21.5k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
682k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
8.45k
                saw_u = 1;
754
8.45k
            }
755
            /* ur"" and ru"" are not supported */
756
674k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
37.8k
                saw_r = 1;
758
37.8k
            }
759
636k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
47.5k
                saw_f = 1;
761
47.5k
            }
762
588k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
35.9k
                saw_t = 1;
764
35.9k
            }
765
552k
            else {
766
552k
                break;
767
552k
            }
768
151k
            c = tok_nextc(tok);
769
151k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
21.6k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
21.6k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
21.6k
                if (status < 0) {
774
7
                    return MAKE_TOKEN(ERRORTOKEN);
775
7
                }
776
777
                // Handle valid f or t string creation:
778
21.6k
                if (saw_f || saw_t) {
779
17.1k
                    goto f_string_quote;
780
17.1k
                }
781
4.52k
                goto letter_quote;
782
21.6k
            }
783
151k
        }
784
2.61M
        while (is_potential_identifier_char(c)) {
785
2.06M
            if (c >= 128) {
786
120k
                nonascii = 1;
787
120k
            }
788
2.06M
            c = tok_nextc(tok);
789
2.06M
        }
790
552k
        tok_backup(tok, c);
791
552k
        if (nonascii && !verify_identifier(tok)) {
792
682
            return MAKE_TOKEN(ERRORTOKEN);
793
682
        }
794
795
552k
        p_start = tok->start;
796
552k
        p_end = tok->cur;
797
798
552k
        return MAKE_TOKEN(NAME);
799
552k
    }
800
801
1.16M
    if (c == '\r') {
802
316
        c = tok_nextc(tok);
803
316
    }
804
805
    /* Newline */
806
1.16M
    if (c == '\n') {
807
234k
        tok->atbol = 1;
808
234k
        if (blankline || tok->level > 0) {
809
108k
            if (tok->tok_extra_tokens) {
810
0
                if (tok->comment_newline) {
811
0
                    tok->comment_newline = 0;
812
0
                }
813
0
                p_start = tok->start;
814
0
                p_end = tok->cur;
815
0
                return MAKE_TOKEN(NL);
816
0
            }
817
108k
            goto nextline;
818
108k
        }
819
126k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
0
            tok->comment_newline = 0;
821
0
            p_start = tok->start;
822
0
            p_end = tok->cur;
823
0
            return MAKE_TOKEN(NL);
824
0
        }
825
126k
        p_start = tok->start;
826
126k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
126k
        tok->cont_line = 0;
828
126k
        return MAKE_TOKEN(NEWLINE);
829
126k
    }
830
831
    /* Period or number starting with period? */
832
929k
    if (c == '.') {
833
40.6k
        c = tok_nextc(tok);
834
40.6k
        if (Py_ISDIGIT(c)) {
835
3.34k
            goto fraction;
836
37.2k
        } else if (c == '.') {
837
1.30k
            c = tok_nextc(tok);
838
1.30k
            if (c == '.') {
839
583
                p_start = tok->start;
840
583
                p_end = tok->cur;
841
583
                return MAKE_TOKEN(ELLIPSIS);
842
583
            }
843
720
            else {
844
720
                tok_backup(tok, c);
845
720
            }
846
720
            tok_backup(tok, '.');
847
720
        }
848
35.9k
        else {
849
35.9k
            tok_backup(tok, c);
850
35.9k
        }
851
36.6k
        p_start = tok->start;
852
36.6k
        p_end = tok->cur;
853
36.6k
        return MAKE_TOKEN(DOT);
854
40.6k
    }
855
856
    /* Number */
857
888k
    if (Py_ISDIGIT(c)) {
858
92.1k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
33.6k
            c = tok_nextc(tok);
861
33.6k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
16.0k
                c = tok_nextc(tok);
864
16.2k
                do {
865
16.2k
                    if (c == '_') {
866
213
                        c = tok_nextc(tok);
867
213
                    }
868
16.2k
                    if (!Py_ISXDIGIT(c)) {
869
18
                        tok_backup(tok, c);
870
18
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
18
                    }
872
76.4k
                    do {
873
76.4k
                        c = tok_nextc(tok);
874
76.4k
                    } while (Py_ISXDIGIT(c));
875
16.2k
                } while (c == '_');
876
16.0k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
1
                    return MAKE_TOKEN(ERRORTOKEN);
878
1
                }
879
16.0k
            }
880
17.5k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
565
                c = tok_nextc(tok);
883
937
                do {
884
937
                    if (c == '_') {
885
376
                        c = tok_nextc(tok);
886
376
                    }
887
937
                    if (c < '0' || c >= '8') {
888
22
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
21
                        else {
893
21
                            tok_backup(tok, c);
894
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
21
                        }
896
22
                    }
897
2.95k
                    do {
898
2.95k
                        c = tok_nextc(tok);
899
2.95k
                    } while ('0' <= c && c < '8');
900
915
                } while (c == '_');
901
543
                if (Py_ISDIGIT(c)) {
902
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
2
                            "invalid digit '%c' in octal literal", c));
904
2
                }
905
541
                if (!verify_end_of_number(tok, c, "octal")) {
906
4
                    return MAKE_TOKEN(ERRORTOKEN);
907
4
                }
908
541
            }
909
16.9k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
414
                c = tok_nextc(tok);
912
897
                do {
913
897
                    if (c == '_') {
914
495
                        c = tok_nextc(tok);
915
495
                    }
916
897
                    if (c != '0' && c != '1') {
917
25
                        if (Py_ISDIGIT(c)) {
918
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
2
                        }
920
23
                        else {
921
23
                            tok_backup(tok, c);
922
23
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
23
                        }
924
25
                    }
925
3.87k
                    do {
926
3.87k
                        c = tok_nextc(tok);
927
3.87k
                    } while (c == '0' || c == '1');
928
872
                } while (c == '_');
929
389
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
387
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
387
            }
936
16.5k
            else {
937
16.5k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
17.5k
                while (1) {
941
17.5k
                    if (c == '_') {
942
113
                        c = tok_nextc(tok);
943
113
                        if (!Py_ISDIGIT(c)) {
944
2
                            tok_backup(tok, c);
945
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
2
                        }
947
113
                    }
948
17.5k
                    if (c != '0') {
949
16.5k
                        break;
950
16.5k
                    }
951
947
                    c = tok_nextc(tok);
952
947
                }
953
16.5k
                char* zeros_end = tok->cur;
954
16.5k
                if (Py_ISDIGIT(c)) {
955
360
                    nonzero = 1;
956
360
                    c = tok_decimal_tail(tok);
957
360
                    if (c == 0) {
958
2
                        return MAKE_TOKEN(ERRORTOKEN);
959
2
                    }
960
360
                }
961
16.5k
                if (c == '.') {
962
858
                    c = tok_nextc(tok);
963
858
                    goto fraction;
964
858
                }
965
15.7k
                else if (c == 'e' || c == 'E') {
966
866
                    goto exponent;
967
866
                }
968
14.8k
                else if (c == 'j' || c == 'J') {
969
839
                    goto imaginary;
970
839
                }
971
14.0k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
24
                    tok_backup(tok, c);
974
24
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
24
                            tok, (int)(tok->start + 1 - tok->line_start),
976
24
                            (int)(zeros_end - tok->line_start),
977
24
                            "leading zeros in decimal integer "
978
24
                            "literals are not permitted; "
979
24
                            "use an 0o prefix for octal integers"));
980
24
                }
981
13.9k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
31
                    return MAKE_TOKEN(ERRORTOKEN);
983
31
                }
984
13.9k
            }
985
33.6k
        }
986
58.4k
        else {
987
            /* Decimal */
988
58.4k
            c = tok_decimal_tail(tok);
989
58.4k
            if (c == 0) {
990
8
                return MAKE_TOKEN(ERRORTOKEN);
991
8
            }
992
58.4k
            {
993
                /* Accept floating-point numbers. */
994
58.4k
                if (c == '.') {
995
3.52k
                    c = tok_nextc(tok);
996
7.72k
        fraction:
997
                    /* Fraction */
998
7.72k
                    if (Py_ISDIGIT(c)) {
999
6.11k
                        c = tok_decimal_tail(tok);
1000
6.11k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
6.11k
                    }
1004
7.72k
                }
1005
62.6k
                if (c == 'e' || c == 'E') {
1006
9.05k
                    int e;
1007
9.92k
                  exponent:
1008
9.92k
                    e = c;
1009
                    /* Exponent part */
1010
9.92k
                    c = tok_nextc(tok);
1011
9.92k
                    if (c == '+' || c == '-') {
1012
3.69k
                        c = tok_nextc(tok);
1013
3.69k
                        if (!Py_ISDIGIT(c)) {
1014
12
                            tok_backup(tok, c);
1015
12
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
12
                        }
1017
6.22k
                    } else if (!Py_ISDIGIT(c)) {
1018
583
                        tok_backup(tok, c);
1019
583
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
38
                            return MAKE_TOKEN(ERRORTOKEN);
1021
38
                        }
1022
545
                        tok_backup(tok, e);
1023
545
                        p_start = tok->start;
1024
545
                        p_end = tok->cur;
1025
545
                        return MAKE_TOKEN(NUMBER);
1026
583
                    }
1027
9.32k
                    c = tok_decimal_tail(tok);
1028
9.32k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
9.32k
                }
1032
62.9k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.63k
        imaginary:
1035
3.63k
                    c = tok_nextc(tok);
1036
3.63k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
10
                        return MAKE_TOKEN(ERRORTOKEN);
1038
10
                    }
1039
3.63k
                }
1040
60.1k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
127
                    return MAKE_TOKEN(ERRORTOKEN);
1042
127
                }
1043
62.9k
            }
1044
62.9k
        }
1045
94.5k
        tok_backup(tok, c);
1046
94.5k
        p_start = tok->start;
1047
94.5k
        p_end = tok->cur;
1048
94.5k
        return MAKE_TOKEN(NUMBER);
1049
92.1k
    }
1050
1051
813k
  f_string_quote:
1052
813k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
17.1k
        && (c == '\'' || c == '"'))) {
1054
1055
17.1k
        int quote = c;
1056
17.1k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
17.1k
        tok->first_lineno = tok->lineno;
1063
17.1k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
17.1k
        int after_quote = tok_nextc(tok);
1067
17.1k
        if (after_quote == quote) {
1068
2.77k
            int after_after_quote = tok_nextc(tok);
1069
2.77k
            if (after_after_quote == quote) {
1070
889
                quote_size = 3;
1071
889
            }
1072
1.88k
            else {
1073
                // TODO: Check this
1074
1.88k
                tok_backup(tok, after_after_quote);
1075
1.88k
                tok_backup(tok, after_quote);
1076
1.88k
            }
1077
2.77k
        }
1078
17.1k
        if (after_quote != quote) {
1079
14.3k
            tok_backup(tok, after_quote);
1080
14.3k
        }
1081
1082
1083
17.1k
        p_start = tok->start;
1084
17.1k
        p_end = tok->cur;
1085
17.1k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
3
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
3
        }
1088
17.1k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
17.1k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
17.1k
        the_current_tok->quote = quote;
1091
17.1k
        the_current_tok->quote_size = quote_size;
1092
17.1k
        the_current_tok->start = tok->start;
1093
17.1k
        the_current_tok->multi_line_start = tok->line_start;
1094
17.1k
        the_current_tok->first_line = tok->lineno;
1095
17.1k
        the_current_tok->start_offset = -1;
1096
17.1k
        the_current_tok->multi_line_start_offset = -1;
1097
17.1k
        the_current_tok->last_expr_buffer = NULL;
1098
17.1k
        the_current_tok->last_expr_size = 0;
1099
17.1k
        the_current_tok->last_expr_end = -1;
1100
17.1k
        the_current_tok->in_format_spec = 0;
1101
17.1k
        the_current_tok->in_debug = 0;
1102
1103
17.1k
        enum string_kind_t string_kind = FSTRING;
1104
17.1k
        switch (*tok->start) {
1105
962
            case 'T':
1106
4.99k
            case 't':
1107
4.99k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.99k
                string_kind = TSTRING;
1109
4.99k
                break;
1110
2.18k
            case 'F':
1111
11.6k
            case 'f':
1112
11.6k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
11.6k
                break;
1114
220
            case 'R':
1115
492
            case 'r':
1116
492
                the_current_tok->raw = 1;
1117
492
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
201
                    string_kind = TSTRING;
1119
201
                }
1120
492
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
17.1k
        }
1124
1125
17.1k
        the_current_tok->string_kind = string_kind;
1126
17.1k
        the_current_tok->curly_bracket_depth = 0;
1127
17.1k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
17.1k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
17.1k
    }
1130
1131
800k
  letter_quote:
1132
    /* String */
1133
800k
    if (c == '\'' || c == '"') {
1134
60.9k
        int quote = c;
1135
60.9k
        int quote_size = 1;             /* 1 or 3 */
1136
60.9k
        int end_quote_size = 0;
1137
60.9k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
60.9k
        tok->first_lineno = tok->lineno;
1144
60.9k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
60.9k
        c = tok_nextc(tok);
1148
60.9k
        if (c == quote) {
1149
9.74k
            c = tok_nextc(tok);
1150
9.74k
            if (c == quote) {
1151
2.94k
                quote_size = 3;
1152
2.94k
            }
1153
6.80k
            else {
1154
6.80k
                end_quote_size = 1;     /* empty string found */
1155
6.80k
            }
1156
9.74k
        }
1157
60.9k
        if (c != quote) {
1158
57.9k
            tok_backup(tok, c);
1159
57.9k
        }
1160
1161
        /* Get rest of string */
1162
1.26M
        while (end_quote_size != quote_size) {
1163
1.19M
            c = tok_nextc(tok);
1164
1.19M
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
1.19M
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
1.19M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
309
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
309
                tok->cur = (char *)tok->start;
1176
309
                tok->cur++;
1177
309
                tok->line_start = tok->multi_line_start;
1178
309
                int start = tok->lineno;
1179
309
                tok->lineno = tok->first_lineno;
1180
1181
309
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
39
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
39
                    if (the_current_tok->quote == quote &&
1189
32
                        the_current_tok->quote_size == quote_size) {
1190
22
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
22
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
22
                    }
1193
39
                }
1194
1195
287
                if (quote_size == 3) {
1196
24
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
24
                                     " (detected at line %d)", start);
1198
24
                    if (c != '\n') {
1199
24
                        tok->done = E_EOFS;
1200
24
                    }
1201
24
                    return MAKE_TOKEN(ERRORTOKEN);
1202
24
                }
1203
263
                else {
1204
263
                    if (has_escaped_quote) {
1205
9
                        _PyTokenizer_syntaxerror(
1206
9
                            tok,
1207
9
                            "unterminated string literal (detected at line %d); "
1208
9
                            "perhaps you escaped the end quote?",
1209
9
                            start
1210
9
                        );
1211
254
                    } else {
1212
254
                        _PyTokenizer_syntaxerror(
1213
254
                            tok, "unterminated string literal (detected at line %d)", start
1214
254
                        );
1215
254
                    }
1216
263
                    if (c != '\n') {
1217
15
                        tok->done = E_EOLS;
1218
15
                    }
1219
263
                    return MAKE_TOKEN(ERRORTOKEN);
1220
263
                }
1221
287
            }
1222
1.19M
            if (c == quote) {
1223
61.3k
                end_quote_size += 1;
1224
61.3k
            }
1225
1.13M
            else {
1226
1.13M
                end_quote_size = 0;
1227
1.13M
                if (c == '\\') {
1228
27.7k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
27.7k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
956
                        has_escaped_quote = 1;
1231
956
                    }
1232
27.7k
                    if (c == '\r') {
1233
69
                        c = tok_nextc(tok);
1234
69
                    }
1235
27.7k
                }
1236
1.13M
            }
1237
1.19M
        }
1238
1239
60.6k
        p_start = tok->start;
1240
60.6k
        p_end = tok->cur;
1241
60.6k
        return MAKE_TOKEN(STRING);
1242
60.9k
    }
1243
1244
    /* Line continuation */
1245
740k
    if (c == '\\') {
1246
468
        if ((c = tok_continuation_line(tok)) == -1) {
1247
85
            return MAKE_TOKEN(ERRORTOKEN);
1248
85
        }
1249
383
        tok->cont_line = 1;
1250
383
        goto again; /* Read next line */
1251
468
    }
1252
1253
    /* Punctuation character */
1254
739k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
739k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
55.2k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
55.2k
        int in_format_spec = current_tok->in_format_spec;
1261
55.2k
         int cursor_in_format_with_debug =
1262
55.2k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
55.2k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
55.2k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
55.2k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
55.2k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.43k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.43k
            current_tok->in_format_spec = 1;
1274
4.43k
            p_start = tok->start;
1275
4.43k
            p_end = tok->cur;
1276
4.43k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.43k
        }
1278
55.2k
    }
1279
1280
    /* Check for two-character token */
1281
735k
    {
1282
735k
        int c2 = tok_nextc(tok);
1283
735k
        int current_token = _PyToken_TwoChars(c, c2);
1284
735k
        if (current_token != OP) {
1285
26.1k
            int c3 = tok_nextc(tok);
1286
26.1k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
26.1k
            if (current_token3 != OP) {
1288
1.72k
                current_token = current_token3;
1289
1.72k
            }
1290
24.4k
            else {
1291
24.4k
                tok_backup(tok, c3);
1292
24.4k
            }
1293
26.1k
            p_start = tok->start;
1294
26.1k
            p_end = tok->cur;
1295
26.1k
            return MAKE_TOKEN(current_token);
1296
26.1k
        }
1297
708k
        tok_backup(tok, c2);
1298
708k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
96.3k
    case '(':
1303
128k
    case '[':
1304
171k
    case '{':
1305
171k
        if (tok->level >= MAXLEVEL) {
1306
10
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
10
        }
1308
171k
        tok->parenstack[tok->level] = c;
1309
171k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
171k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
171k
        tok->level++;
1312
171k
        if (INSIDE_FSTRING(tok)) {
1313
29.3k
            current_tok->curly_bracket_depth++;
1314
29.3k
        }
1315
171k
        break;
1316
69.2k
    case ')':
1317
81.5k
    case ']':
1318
106k
    case '}':
1319
106k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
55
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
55
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
55
        }
1323
106k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
202
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
202
        }
1326
106k
        if (tok->level > 0) {
1327
106k
            tok->level--;
1328
106k
            int opening = tok->parenstack[tok->level];
1329
106k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
37.4k
                                            (opening == '[' && c == ']') ||
1331
25.2k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
47
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
6
                    assert(current_tok->curly_bracket_depth >= 0);
1339
6
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
6
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
4
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
4
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
4
                    }
1344
6
                }
1345
43
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
2
                            "closing parenthesis '%c' does not match "
1348
2
                            "opening parenthesis '%c' on line %d",
1349
2
                            c, opening, tok->parenlinenostack[tok->level]));
1350
2
                }
1351
41
                else {
1352
41
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
41
                            "closing parenthesis '%c' does not match "
1354
41
                            "opening parenthesis '%c'",
1355
41
                            c, opening));
1356
41
                }
1357
43
            }
1358
106k
        }
1359
1360
106k
        if (INSIDE_FSTRING(tok)) {
1361
21.7k
            current_tok->curly_bracket_depth--;
1362
21.7k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
21.7k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
20.5k
                current_tok->curly_bracket_expr_start_depth--;
1368
20.5k
                current_tok->kind = TOK_FSTRING_MODE;
1369
20.5k
                current_tok->in_format_spec = 0;
1370
20.5k
                current_tok->in_debug = 0;
1371
20.5k
            }
1372
21.7k
        }
1373
106k
        break;
1374
430k
    default:
1375
430k
        break;
1376
708k
    }
1377
1378
708k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
444
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
444
    }
1381
1382
708k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
4.48k
        current_tok->in_debug = 1;
1384
4.48k
    }
1385
1386
    /* Punctuation character */
1387
708k
    p_start = tok->start;
1388
708k
    p_end = tok->cur;
1389
708k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
708k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
52.3k
{
1395
52.3k
    const char *p_start = NULL;
1396
52.3k
    const char *p_end = NULL;
1397
52.3k
    int end_quote_size = 0;
1398
52.3k
    int unicode_escape = 0;
1399
1400
52.3k
    tok->start = tok->cur;
1401
52.3k
    tok->first_lineno = tok->lineno;
1402
52.3k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
52.3k
    int start_char = tok_nextc(tok);
1407
52.3k
    if (start_char == '{') {
1408
14.2k
        int peek1 = tok_nextc(tok);
1409
14.2k
        tok_backup(tok, peek1);
1410
14.2k
        tok_backup(tok, start_char);
1411
14.2k
        if (peek1 != '{') {
1412
12.1k
            current_tok->curly_bracket_expr_start_depth++;
1413
12.1k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
2
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
2
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
2
            }
1417
12.1k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
12.1k
            return tok_get_normal_mode(tok, current_tok, token);
1419
12.1k
        }
1420
14.2k
    }
1421
38.1k
    else {
1422
38.1k
        tok_backup(tok, start_char);
1423
38.1k
    }
1424
1425
    // Check if we are at the end of the string
1426
57.9k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
45.3k
        int quote = tok_nextc(tok);
1428
45.3k
        if (quote != current_tok->quote) {
1429
27.5k
            tok_backup(tok, quote);
1430
27.5k
            goto f_string_middle;
1431
27.5k
        }
1432
45.3k
    }
1433
1434
12.6k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.19k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.19k
        current_tok->last_expr_buffer = NULL;
1437
7.19k
        current_tok->last_expr_size = 0;
1438
7.19k
        current_tok->last_expr_end = -1;
1439
7.19k
    }
1440
1441
12.6k
    p_start = tok->start;
1442
12.6k
    p_end = tok->cur;
1443
12.6k
    tok->tok_mode_stack_index--;
1444
12.6k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
27.5k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
27.5k
    tok->multi_line_start = tok->line_start;
1451
164k
    while (end_quote_size != current_tok->quote_size) {
1452
158k
        int c = tok_nextc(tok);
1453
158k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
158k
        int in_format_spec = (
1457
158k
                current_tok->in_format_spec
1458
11.7k
                &&
1459
11.7k
                INSIDE_FSTRING_EXPR(current_tok)
1460
158k
        );
1461
1462
158k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
441
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
441
            if (in_format_spec && c == '\n') {
1471
59
                if (current_tok->quote_size == 1) {
1472
59
                    return MAKE_TOKEN(
1473
59
                        _PyTokenizer_syntaxerror(
1474
59
                            tok,
1475
59
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
59
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
59
                        )
1478
59
                    );
1479
59
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
59
            }
1487
1488
441
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
382
            tok->cur = (char *)current_tok->start;
1493
382
            tok->cur++;
1494
382
            tok->line_start = current_tok->multi_line_start;
1495
382
            int start = tok->lineno;
1496
1497
382
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
382
            tok->lineno = the_current_tok->first_line;
1499
1500
382
            if (current_tok->quote_size == 3) {
1501
37
                _PyTokenizer_syntaxerror(tok,
1502
37
                                    "unterminated triple-quoted %c-string literal"
1503
37
                                    " (detected at line %d)",
1504
37
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
37
                if (c != '\n') {
1506
37
                    tok->done = E_EOFS;
1507
37
                }
1508
37
                return MAKE_TOKEN(ERRORTOKEN);
1509
37
            }
1510
345
            else {
1511
345
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
345
                                    "unterminated %c-string literal (detected at"
1513
345
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
345
            }
1515
382
        }
1516
1517
158k
        if (c == current_tok->quote) {
1518
10.2k
            end_quote_size += 1;
1519
10.2k
            continue;
1520
147k
        } else {
1521
147k
            end_quote_size = 0;
1522
147k
        }
1523
1524
147k
        if (c == '{') {
1525
16.1k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
16.1k
            int peek = tok_nextc(tok);
1529
16.1k
            if (peek != '{' || in_format_spec) {
1530
13.6k
                tok_backup(tok, peek);
1531
13.6k
                tok_backup(tok, c);
1532
13.6k
                current_tok->curly_bracket_expr_start_depth++;
1533
13.6k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
4
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
4
                }
1537
13.6k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
13.6k
                current_tok->in_format_spec = 0;
1539
13.6k
                p_start = tok->start;
1540
13.6k
                p_end = tok->cur;
1541
13.6k
            } else {
1542
2.49k
                p_start = tok->start;
1543
2.49k
                p_end = tok->cur - 1;
1544
2.49k
            }
1545
16.1k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
131k
        } else if (c == '}') {
1547
4.86k
            if (unicode_escape) {
1548
208
                p_start = tok->start;
1549
208
                p_end = tok->cur;
1550
208
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
208
            }
1552
4.65k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.65k
            int cursor = current_tok->curly_bracket_depth;
1559
4.65k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.43k
                p_start = tok->start;
1561
1.43k
                p_end = tok->cur - 1;
1562
3.22k
            } else {
1563
3.22k
                tok_backup(tok, peek);
1564
3.22k
                tok_backup(tok, c);
1565
3.22k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.22k
                current_tok->in_format_spec = 0;
1567
3.22k
                p_start = tok->start;
1568
3.22k
                p_end = tok->cur;
1569
3.22k
            }
1570
4.65k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
126k
        } else if (c == '\\') {
1572
6.10k
            int peek = tok_nextc(tok);
1573
6.10k
            if (peek == '\r') {
1574
18
                peek = tok_nextc(tok);
1575
18
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
6.10k
            if (peek == '{' || peek == '}') {
1580
1.29k
                if (!current_tok->raw) {
1581
1.10k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
1
                        return MAKE_TOKEN(ERRORTOKEN);
1583
1
                    }
1584
1.10k
                }
1585
1.29k
                tok_backup(tok, peek);
1586
1.29k
                continue;
1587
1.29k
            }
1588
1589
4.80k
            if (!current_tok->raw) {
1590
4.15k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
310
                    peek = tok_nextc(tok);
1593
310
                    if (peek == '{') {
1594
231
                        unicode_escape = 1;
1595
231
                    } else {
1596
79
                        tok_backup(tok, peek);
1597
79
                    }
1598
310
                }
1599
4.15k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
4.80k
        }
1603
147k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
13.6k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
7.54k
        tok_backup(tok, current_tok->quote);
1609
7.54k
    }
1610
6.14k
    p_start = tok->start;
1611
6.14k
    p_end = tok->cur;
1612
6.14k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
27.5k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.74M
{
1618
1.74M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.74M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.68M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.68M
    } else {
1622
52.3k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
52.3k
    }
1624
1.74M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.74M
{
1629
1.74M
    int result = tok_get(tok, token);
1630
1.74M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.74M
    return result;
1635
1.74M
}