Coverage Report

Created: 2026-06-09 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.30k
#define ALTTABSIZE 1
11
12
2.15M
#define is_potential_identifier_start(c) (\
13
2.15M
              (c >= 'a' && c <= 'z')\
14
2.15M
               || (c >= 'A' && c <= 'Z')\
15
2.15M
               || c == '_'\
16
2.15M
               || (c >= 128))
17
18
3.26M
#define is_potential_identifier_char(c) (\
19
3.26M
              (c >= 'a' && c <= 'z')\
20
3.26M
               || (c >= 'A' && c <= 'Z')\
21
3.26M
               || (c >= '0' && c <= '9')\
22
3.26M
               || c == '_'\
23
3.26M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.34M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
18.5k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
24
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
2.21M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
287k
{
55
287k
    return memchr(str, 0, size) != NULL;
56
287k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.8M
{
62
11.8M
    int rc;
63
12.1M
    for (;;) {
64
12.1M
        if (tok->cur != tok->inp) {
65
11.5M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.5M
            tok->col_offset++;
70
11.5M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.5M
        }
72
581k
        if (tok->done != E_OK) {
73
196k
            return EOF;
74
196k
        }
75
385k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
385k
        if (!rc) {
84
98.0k
            tok->cur = tok->inp;
85
98.0k
            return EOF;
86
98.0k
        }
87
287k
        tok->line_start = tok->cur;
88
89
287k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
287k
    }
95
11.8M
    Py_UNREACHABLE();
96
11.8M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.69M
{
102
4.69M
    if (c != EOF) {
103
4.49M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
4.49M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
4.49M
        tok->col_offset--;
110
4.49M
    }
111
4.69M
}
112
113
static int
114
27.4k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
27.4k
    assert(token != NULL);
116
27.4k
    assert(c == '}' || c == ':' || c == '!');
117
27.4k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
27.4k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
14.5k
        return 0;
121
14.5k
    }
122
12.8k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
12.8k
    int hash_detected = 0;
126
12.8k
    int in_string = 0;
127
12.8k
    char quote_char = 0;
128
129
1.91M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.90M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.90M
        if (ch == '\\') {
134
32.1k
            i++;
135
32.1k
            continue;
136
32.1k
        }
137
138
        // Handle quotes
139
1.87M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
336k
            if (!in_string) {
148
125k
                in_string = 1;
149
125k
                quote_char = ch;
150
125k
            }
151
211k
            else if (ch == quote_char) {
152
123k
                in_string = 0;
153
123k
            }
154
336k
            continue;
155
336k
        }
156
157
        // Check for # outside strings
158
1.53M
        if (ch == '#' && !in_string) {
159
1.03k
            hash_detected = 1;
160
1.03k
            break;
161
1.03k
        }
162
1.53M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
12.8k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
1.03k
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
1.03k
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
1.03k
        Py_ssize_t i = 0;  // Input position
172
1.03k
        Py_ssize_t j = 0;  // Output position
173
1.03k
        in_string = 0;     // Whether we're in a string
174
1.03k
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
190k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
189k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
189k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
22.6k
                if (!in_string) {
184
7.97k
                    in_string = 1;
185
7.97k
                    quote_char = ch;
186
14.6k
                } else if (ch == quote_char) {
187
7.95k
                    in_string = 0;
188
7.95k
                }
189
22.6k
                result[j++] = ch;
190
22.6k
            }
191
            // Skip comments
192
166k
            else if (ch == '#' && !in_string) {
193
219k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
218k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
218k
                    i++;
196
218k
                }
197
1.21k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
243
                    result[j++] = '\n';
199
243
                }
200
1.21k
            }
201
            // Copy other chars
202
165k
            else {
203
165k
                result[j++] = ch;
204
165k
            }
205
189k
            i++;
206
189k
        }
207
208
1.03k
        result[j] = '\0';  // Null-terminate the result string
209
1.03k
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
1.03k
        PyMem_Free(result);
211
11.8k
    } else {
212
11.8k
        res = PyUnicode_DecodeUTF8(
213
11.8k
            tok_mode->last_expr_buffer,
214
11.8k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
11.8k
            NULL
216
11.8k
        );
217
11.8k
    }
218
219
12.8k
    if (!res) {
220
0
        return -1;
221
0
    }
222
12.8k
    token->metadata = res;
223
12.8k
    return 0;
224
12.8k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
70.7k
{
229
70.7k
    assert(tok->cur != NULL);
230
231
70.7k
    Py_ssize_t size = strlen(tok->cur);
232
70.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
70.7k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
43.3k
        case '{':
252
43.3k
            if (tok_mode->last_expr_buffer != NULL) {
253
30.0k
                PyMem_Free(tok_mode->last_expr_buffer);
254
30.0k
            }
255
43.3k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
43.3k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
43.3k
            tok_mode->last_expr_size = size;
260
43.3k
            tok_mode->last_expr_end = -1;
261
43.3k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
43.3k
            break;
263
22.0k
        case '}':
264
24.0k
        case '!':
265
24.0k
            tok_mode->last_expr_end = strlen(tok->start);
266
24.0k
            break;
267
3.33k
        case ':':
268
3.33k
            if (tok_mode->last_expr_end == -1) {
269
3.20k
               tok_mode->last_expr_end = strlen(tok->start);
270
3.20k
            }
271
3.33k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
70.7k
    }
275
70.7k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
70.7k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
9.65k
{
284
9.65k
    const char *s = test;
285
9.65k
    int res = 0;
286
24.5k
    while (1) {
287
24.5k
        int c = tok_nextc(tok);
288
24.5k
        if (*s == 0) {
289
9.53k
            res = !is_potential_identifier_char(c);
290
9.53k
        }
291
15.0k
        else if (c == *s) {
292
14.9k
            s++;
293
14.9k
            continue;
294
14.9k
        }
295
296
9.65k
        tok_backup(tok, c);
297
24.5k
        while (s != test) {
298
14.9k
            tok_backup(tok, *--s);
299
14.9k
        }
300
9.65k
        return res;
301
24.5k
    }
302
9.65k
}
303
304
static int
305
94.0k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
94.0k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
93.9k
    int r = 0;
322
93.9k
    if (c == 'a') {
323
1.13k
        r = lookahead(tok, "nd");
324
1.13k
    }
325
92.8k
    else if (c == 'e') {
326
698
        r = lookahead(tok, "lse");
327
698
    }
328
92.1k
    else if (c == 'f') {
329
2.70k
        r = lookahead(tok, "or");
330
2.70k
    }
331
89.4k
    else if (c == 'i') {
332
1.69k
        int c2 = tok_nextc(tok);
333
1.69k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.68k
            r = 1;
335
1.68k
        }
336
1.69k
        tok_backup(tok, c2);
337
1.69k
    }
338
87.7k
    else if (c == 'o') {
339
4.81k
        r = lookahead(tok, "r");
340
4.81k
    }
341
82.9k
    else if (c == 'n') {
342
307
        r = lookahead(tok, "ot");
343
307
    }
344
93.9k
    if (r) {
345
11.2k
        tok_backup(tok, c);
346
11.2k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
11.2k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
11.2k
        tok_nextc(tok);
352
11.2k
    }
353
82.7k
    else /* In future releases, only error will remain. */
354
82.7k
    if (c < 128 && is_potential_identifier_char(c)) {
355
263
        tok_backup(tok, c);
356
263
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
263
        return 0;
358
263
    }
359
93.7k
    return 1;
360
93.9k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
10.1k
{
366
10.1k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
10.1k
    PyObject *s;
370
10.1k
    if (tok->decoding_erred)
371
0
        return 0;
372
10.1k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
10.1k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
10.1k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
10.1k
    assert(invalid >= 0);
384
10.1k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
10.1k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
512
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
512
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
329
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
329
            if (s != NULL) {
391
329
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
329
            }
393
329
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
329
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
329
        }
399
512
        Py_DECREF(s);
400
512
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
275
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
275
        }
403
237
        else {
404
237
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
237
        }
406
512
        return 0;
407
512
    }
408
9.59k
    Py_DECREF(s);
409
9.59k
    return 1;
410
10.1k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
75.3k
{
415
75.3k
    int c;
416
417
75.6k
    while (1) {
418
235k
        do {
419
235k
            c = tok_nextc(tok);
420
235k
        } while (Py_ISDIGIT(c));
421
75.6k
        if (c != '_') {
422
75.3k
            break;
423
75.3k
        }
424
337
        c = tok_nextc(tok);
425
337
        if (!Py_ISDIGIT(c)) {
426
24
            tok_backup(tok, c);
427
24
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
24
            return 0;
429
24
        }
430
337
    }
431
75.3k
    return c;
432
75.3k
}
433
434
static inline int
435
1.05k
tok_continuation_line(struct tok_state *tok) {
436
1.05k
    int c = tok_nextc(tok);
437
1.05k
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
1.05k
    if (c != '\n') {
441
73
        tok->done = E_LINECONT;
442
73
        return -1;
443
73
    }
444
981
    c = tok_nextc(tok);
445
981
    if (c == EOF) {
446
45
        tok->done = E_EOF;
447
45
        tok->cur = tok->inp;
448
45
        return -1;
449
936
    } else {
450
936
        tok_backup(tok, c);
451
936
    }
452
936
    return c;
453
981
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
21.6k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
21.6k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
21.6k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
21.6k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
21.6k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
21.6k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
21.6k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
21.6k
    if (saw_b && saw_f) {
485
2
        RETURN_SYNTAX_ERROR("b", "f");
486
2
    }
487
21.6k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
21.6k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
21.6k
#undef RETURN_SYNTAX_ERROR
496
497
21.6k
    return 0;
498
21.6k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
2.17M
{
503
2.17M
    int c;
504
2.17M
    int blankline, nonascii;
505
506
2.17M
    const char *p_start = NULL;
507
2.17M
    const char *p_end = NULL;
508
2.28M
  nextline:
509
2.28M
    tok->start = NULL;
510
2.28M
    tok->starting_col_offset = -1;
511
2.28M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.28M
    if (tok->atbol) {
516
375k
        int col = 0;
517
375k
        int altcol = 0;
518
375k
        tok->atbol = 0;
519
375k
        int cont_line_col = 0;
520
734k
        for (;;) {
521
734k
            c = tok_nextc(tok);
522
734k
            if (c == ' ') {
523
356k
                col++, altcol++;
524
356k
            }
525
377k
            else if (c == '\t') {
526
650
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
650
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
650
            }
529
377k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
807
                col = altcol = 0; /* For Emacs users */
531
807
            }
532
376k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
648
                cont_line_col = cont_line_col ? cont_line_col : col;
538
648
                if ((c = tok_continuation_line(tok)) == -1) {
539
21
                    return MAKE_TOKEN(ERRORTOKEN);
540
21
                }
541
648
            }
542
375k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
375k
            else {
546
375k
                break;
547
375k
            }
548
734k
        }
549
375k
        tok_backup(tok, c);
550
375k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
72.5k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
72.5k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
72.5k
            else {
566
72.5k
                blankline = 1; /* Ignore completely */
567
72.5k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
72.5k
        }
571
375k
        if (!blankline && tok->level == 0) {
572
268k
            col = cont_line_col ? cont_line_col : col;
573
268k
            altcol = cont_line_col ? cont_line_col : altcol;
574
268k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
246k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
246k
            }
580
22.2k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
12.4k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
12.4k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
12.4k
                tok->pendin++;
591
12.4k
                tok->indstack[++tok->indent] = col;
592
12.4k
                tok->altindstack[tok->indent] = altcol;
593
12.4k
            }
594
9.86k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
21.7k
                while (tok->indent > 0 &&
597
18.2k
                    col < tok->indstack[tok->indent]) {
598
11.8k
                    tok->pendin--;
599
11.8k
                    tok->indent--;
600
11.8k
                }
601
9.86k
                if (col != tok->indstack[tok->indent]) {
602
10
                    tok->done = E_DEDENT;
603
10
                    tok->cur = tok->inp;
604
10
                    return MAKE_TOKEN(ERRORTOKEN);
605
10
                }
606
9.85k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.85k
            }
610
268k
        }
611
375k
    }
612
613
2.28M
    tok->start = tok->cur;
614
2.28M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.28M
    if (tok->pendin != 0) {
618
24.2k
        if (tok->pendin < 0) {
619
11.8k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
11.8k
            tok->pendin++;
624
11.8k
            return MAKE_TOKEN(DEDENT);
625
11.8k
        }
626
12.4k
        else {
627
12.4k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
12.4k
            tok->pendin--;
632
12.4k
            return MAKE_TOKEN(INDENT);
633
12.4k
        }
634
24.2k
    }
635
636
    /* Peek ahead at the next character */
637
2.25M
    c = tok_nextc(tok);
638
2.25M
    tok_backup(tok, c);
639
640
2.25M
 again:
641
2.25M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.69M
    do {
644
2.69M
        c = tok_nextc(tok);
645
2.69M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
2.25M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
2.25M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
2.25M
    if (c == '#') {
653
654
32.4k
        const char* p = NULL;
655
32.4k
        const char *prefix, *type_start;
656
32.4k
        int current_starting_col_offset;
657
658
1.03M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.00M
            c = tok_nextc(tok);
660
1.00M
        }
661
662
32.4k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
32.4k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
32.4k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
32.4k
    }
728
729
2.25M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
2.25M
    if (c == EOF) {
735
98.0k
        if (tok->level) {
736
4.11k
            return MAKE_TOKEN(ERRORTOKEN);
737
4.11k
        }
738
93.8k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
98.0k
    }
740
741
    /* Identifier (most frequent token!) */
742
2.15M
    nonascii = 0;
743
2.15M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
707k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
897k
        while (1) {
747
897k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
19.2k
                saw_b = 1;
749
19.2k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
877k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
87.6k
                saw_u = 1;
754
87.6k
            }
755
            /* ur"" and ru"" are not supported */
756
790k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
26.5k
                saw_r = 1;
758
26.5k
            }
759
763k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
44.6k
                saw_f = 1;
761
44.6k
            }
762
719k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
33.3k
                saw_t = 1;
764
33.3k
            }
765
685k
            else {
766
685k
                break;
767
685k
            }
768
211k
            c = tok_nextc(tok);
769
211k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
21.6k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
21.6k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
21.6k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
21.6k
                if (saw_f || saw_t) {
779
18.5k
                    goto f_string_quote;
780
18.5k
                }
781
3.16k
                goto letter_quote;
782
21.6k
            }
783
211k
        }
784
3.17M
        while (is_potential_identifier_char(c)) {
785
2.49M
            if (c >= 128) {
786
131k
                nonascii = 1;
787
131k
            }
788
2.49M
            c = tok_nextc(tok);
789
2.49M
        }
790
685k
        tok_backup(tok, c);
791
685k
        if (nonascii && !verify_identifier(tok)) {
792
512
            return MAKE_TOKEN(ERRORTOKEN);
793
512
        }
794
795
685k
        p_start = tok->start;
796
685k
        p_end = tok->cur;
797
798
685k
        return MAKE_TOKEN(NAME);
799
685k
    }
800
801
1.45M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.45M
    if (c == '\n') {
807
275k
        tok->atbol = 1;
808
275k
        if (blankline || tok->level > 0) {
809
107k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
107k
            goto nextline;
818
107k
        }
819
167k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
167k
        p_start = tok->start;
826
167k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
167k
        tok->cont_line = 0;
828
167k
        return MAKE_TOKEN(NEWLINE);
829
167k
    }
830
831
    /* Period or number starting with period? */
832
1.17M
    if (c == '.') {
833
32.7k
        c = tok_nextc(tok);
834
32.7k
        if (Py_ISDIGIT(c)) {
835
3.30k
            goto fraction;
836
29.4k
        } else if (c == '.') {
837
2.14k
            c = tok_nextc(tok);
838
2.14k
            if (c == '.') {
839
1.33k
                p_start = tok->start;
840
1.33k
                p_end = tok->cur;
841
1.33k
                return MAKE_TOKEN(ELLIPSIS);
842
1.33k
            }
843
810
            else {
844
810
                tok_backup(tok, c);
845
810
            }
846
810
            tok_backup(tok, '.');
847
810
        }
848
27.2k
        else {
849
27.2k
            tok_backup(tok, c);
850
27.2k
        }
851
28.1k
        p_start = tok->start;
852
28.1k
        p_end = tok->cur;
853
28.1k
        return MAKE_TOKEN(DOT);
854
32.7k
    }
855
856
    /* Number */
857
1.14M
    if (Py_ISDIGIT(c)) {
858
90.8k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
31.1k
            c = tok_nextc(tok);
861
31.1k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
13.9k
                c = tok_nextc(tok);
864
14.1k
                do {
865
14.1k
                    if (c == '_') {
866
221
                        c = tok_nextc(tok);
867
221
                    }
868
14.1k
                    if (!Py_ISXDIGIT(c)) {
869
18
                        tok_backup(tok, c);
870
18
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
18
                    }
872
72.7k
                    do {
873
72.7k
                        c = tok_nextc(tok);
874
72.7k
                    } while (Py_ISXDIGIT(c));
875
14.1k
                } while (c == '_');
876
13.9k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
1
                    return MAKE_TOKEN(ERRORTOKEN);
878
1
                }
879
13.9k
            }
880
17.2k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
637
                c = tok_nextc(tok);
883
914
                do {
884
914
                    if (c == '_') {
885
279
                        c = tok_nextc(tok);
886
279
                    }
887
914
                    if (c < '0' || c >= '8') {
888
20
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
19
                        else {
893
19
                            tok_backup(tok, c);
894
19
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
19
                        }
896
20
                    }
897
4.46k
                    do {
898
4.46k
                        c = tok_nextc(tok);
899
4.46k
                    } while ('0' <= c && c < '8');
900
894
                } while (c == '_');
901
617
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
616
                if (!verify_end_of_number(tok, c, "octal")) {
906
4
                    return MAKE_TOKEN(ERRORTOKEN);
907
4
                }
908
616
            }
909
16.5k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
982
                c = tok_nextc(tok);
912
1.28k
                do {
913
1.28k
                    if (c == '_') {
914
304
                        c = tok_nextc(tok);
915
304
                    }
916
1.28k
                    if (c != '0' && c != '1') {
917
21
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
20
                        else {
921
20
                            tok_backup(tok, c);
922
20
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
20
                        }
924
21
                    }
925
3.72k
                    do {
926
3.72k
                        c = tok_nextc(tok);
927
3.72k
                    } while (c == '0' || c == '1');
928
1.25k
                } while (c == '_');
929
961
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
959
                if (!verify_end_of_number(tok, c, "binary")) {
933
5
                    return MAKE_TOKEN(ERRORTOKEN);
934
5
                }
935
959
            }
936
15.6k
            else {
937
15.6k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
17.7k
                while (1) {
941
17.7k
                    if (c == '_') {
942
307
                        c = tok_nextc(tok);
943
307
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
307
                    }
948
17.7k
                    if (c != '0') {
949
15.6k
                        break;
950
15.6k
                    }
951
2.15k
                    c = tok_nextc(tok);
952
2.15k
                }
953
15.6k
                char* zeros_end = tok->cur;
954
15.6k
                if (Py_ISDIGIT(c)) {
955
486
                    nonzero = 1;
956
486
                    c = tok_decimal_tail(tok);
957
486
                    if (c == 0) {
958
1
                        return MAKE_TOKEN(ERRORTOKEN);
959
1
                    }
960
486
                }
961
15.5k
                if (c == '.') {
962
964
                    c = tok_nextc(tok);
963
964
                    goto fraction;
964
964
                }
965
14.6k
                else if (c == 'e' || c == 'E') {
966
777
                    goto exponent;
967
777
                }
968
13.8k
                else if (c == 'j' || c == 'J') {
969
708
                    goto imaginary;
970
708
                }
971
13.1k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
38
                    tok_backup(tok, c);
974
38
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
38
                            tok, (int)(tok->start + 1 - tok->line_start),
976
38
                            (int)(zeros_end - tok->line_start),
977
38
                            "leading zeros in decimal integer "
978
38
                            "literals are not permitted; "
979
38
                            "use an 0o prefix for octal integers"));
980
38
                }
981
13.1k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
26
                    return MAKE_TOKEN(ERRORTOKEN);
983
26
                }
984
13.1k
            }
985
31.1k
        }
986
59.7k
        else {
987
            /* Decimal */
988
59.7k
            c = tok_decimal_tail(tok);
989
59.7k
            if (c == 0) {
990
17
                return MAKE_TOKEN(ERRORTOKEN);
991
17
            }
992
59.6k
            {
993
                /* Accept floating-point numbers. */
994
59.6k
                if (c == '.') {
995
3.86k
                    c = tok_nextc(tok);
996
8.12k
        fraction:
997
                    /* Fraction */
998
8.12k
                    if (Py_ISDIGIT(c)) {
999
6.19k
                        c = tok_decimal_tail(tok);
1000
6.19k
                        if (c == 0) {
1001
2
                            return MAKE_TOKEN(ERRORTOKEN);
1002
2
                        }
1003
6.19k
                    }
1004
8.12k
                }
1005
63.9k
                if (c == 'e' || c == 'E') {
1006
8.86k
                    int e;
1007
9.64k
                  exponent:
1008
9.64k
                    e = c;
1009
                    /* Exponent part */
1010
9.64k
                    c = tok_nextc(tok);
1011
9.64k
                    if (c == '+' || c == '-') {
1012
3.89k
                        c = tok_nextc(tok);
1013
3.89k
                        if (!Py_ISDIGIT(c)) {
1014
14
                            tok_backup(tok, c);
1015
14
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
14
                        }
1017
5.75k
                    } else if (!Py_ISDIGIT(c)) {
1018
686
                        tok_backup(tok, c);
1019
686
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
51
                            return MAKE_TOKEN(ERRORTOKEN);
1021
51
                        }
1022
635
                        tok_backup(tok, e);
1023
635
                        p_start = tok->start;
1024
635
                        p_end = tok->cur;
1025
635
                        return MAKE_TOKEN(NUMBER);
1026
686
                    }
1027
8.94k
                    c = tok_decimal_tail(tok);
1028
8.94k
                    if (c == 0) {
1029
4
                        return MAKE_TOKEN(ERRORTOKEN);
1030
4
                    }
1031
8.94k
                }
1032
64.0k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
5.07k
        imaginary:
1035
5.07k
                    c = tok_nextc(tok);
1036
5.07k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
14
                        return MAKE_TOKEN(ERRORTOKEN);
1038
14
                    }
1039
5.07k
                }
1040
59.6k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
162
                    return MAKE_TOKEN(ERRORTOKEN);
1042
162
                }
1043
64.0k
            }
1044
64.0k
        }
1045
93.1k
        tok_backup(tok, c);
1046
93.1k
        p_start = tok->start;
1047
93.1k
        p_end = tok->cur;
1048
93.1k
        return MAKE_TOKEN(NUMBER);
1049
90.8k
    }
1050
1051
1.07M
  f_string_quote:
1052
1.07M
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
18.5k
        && (c == '\'' || c == '"'))) {
1054
1055
18.5k
        int quote = c;
1056
18.5k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
18.5k
        tok->first_lineno = tok->lineno;
1063
18.5k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
18.5k
        int after_quote = tok_nextc(tok);
1067
18.5k
        if (after_quote == quote) {
1068
2.77k
            int after_after_quote = tok_nextc(tok);
1069
2.77k
            if (after_after_quote == quote) {
1070
794
                quote_size = 3;
1071
794
            }
1072
1.97k
            else {
1073
                // TODO: Check this
1074
1.97k
                tok_backup(tok, after_after_quote);
1075
1.97k
                tok_backup(tok, after_quote);
1076
1.97k
            }
1077
2.77k
        }
1078
18.5k
        if (after_quote != quote) {
1079
15.7k
            tok_backup(tok, after_quote);
1080
15.7k
        }
1081
1082
1083
18.5k
        p_start = tok->start;
1084
18.5k
        p_end = tok->cur;
1085
18.5k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
18.5k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
18.5k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
18.5k
        the_current_tok->quote = quote;
1091
18.5k
        the_current_tok->quote_size = quote_size;
1092
18.5k
        the_current_tok->start = tok->start;
1093
18.5k
        the_current_tok->multi_line_start = tok->line_start;
1094
18.5k
        the_current_tok->first_line = tok->lineno;
1095
18.5k
        the_current_tok->start_offset = -1;
1096
18.5k
        the_current_tok->multi_line_start_offset = -1;
1097
18.5k
        the_current_tok->last_expr_buffer = NULL;
1098
18.5k
        the_current_tok->last_expr_size = 0;
1099
18.5k
        the_current_tok->last_expr_end = -1;
1100
18.5k
        the_current_tok->in_format_spec = 0;
1101
18.5k
        the_current_tok->in_debug = 0;
1102
1103
18.5k
        enum string_kind_t string_kind = FSTRING;
1104
18.5k
        switch (*tok->start) {
1105
943
            case 'T':
1106
5.02k
            case 't':
1107
5.02k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
5.02k
                string_kind = TSTRING;
1109
5.02k
                break;
1110
1.72k
            case 'F':
1111
12.9k
            case 'f':
1112
12.9k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
12.9k
                break;
1114
196
            case 'R':
1115
593
            case 'r':
1116
593
                the_current_tok->raw = 1;
1117
593
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
190
                    string_kind = TSTRING;
1119
190
                }
1120
593
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
18.5k
        }
1124
1125
18.5k
        the_current_tok->string_kind = string_kind;
1126
18.5k
        the_current_tok->curly_bracket_depth = 0;
1127
18.5k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
18.5k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
18.5k
    }
1130
1131
1.05M
  letter_quote:
1132
    /* String */
1133
1.05M
    if (c == '\'' || c == '"') {
1134
39.1k
        int quote = c;
1135
39.1k
        int quote_size = 1;             /* 1 or 3 */
1136
39.1k
        int end_quote_size = 0;
1137
39.1k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
39.1k
        tok->first_lineno = tok->lineno;
1144
39.1k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
39.1k
        c = tok_nextc(tok);
1148
39.1k
        if (c == quote) {
1149
6.99k
            c = tok_nextc(tok);
1150
6.99k
            if (c == quote) {
1151
1.40k
                quote_size = 3;
1152
1.40k
            }
1153
5.59k
            else {
1154
5.59k
                end_quote_size = 1;     /* empty string found */
1155
5.59k
            }
1156
6.99k
        }
1157
39.1k
        if (c != quote) {
1158
37.7k
            tok_backup(tok, c);
1159
37.7k
        }
1160
1161
        /* Get rest of string */
1162
559k
        while (end_quote_size != quote_size) {
1163
521k
            c = tok_nextc(tok);
1164
521k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
521k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
521k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
403
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
403
                tok->cur = (char *)tok->start;
1176
403
                tok->cur++;
1177
403
                tok->line_start = tok->multi_line_start;
1178
403
                int start = tok->lineno;
1179
403
                tok->lineno = tok->first_lineno;
1180
1181
403
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
49
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
49
                    if (the_current_tok->quote == quote &&
1189
35
                        the_current_tok->quote_size == quote_size) {
1190
33
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
33
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
33
                    }
1193
49
                }
1194
1195
370
                if (quote_size == 3) {
1196
33
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
33
                                     " (detected at line %d)", start);
1198
33
                    if (c != '\n') {
1199
33
                        tok->done = E_EOFS;
1200
33
                    }
1201
33
                    return MAKE_TOKEN(ERRORTOKEN);
1202
33
                }
1203
337
                else {
1204
337
                    if (has_escaped_quote) {
1205
9
                        _PyTokenizer_syntaxerror(
1206
9
                            tok,
1207
9
                            "unterminated string literal (detected at line %d); "
1208
9
                            "perhaps you escaped the end quote?",
1209
9
                            start
1210
9
                        );
1211
328
                    } else {
1212
328
                        _PyTokenizer_syntaxerror(
1213
328
                            tok, "unterminated string literal (detected at line %d)", start
1214
328
                        );
1215
328
                    }
1216
337
                    if (c != '\n') {
1217
7
                        tok->done = E_EOLS;
1218
7
                    }
1219
337
                    return MAKE_TOKEN(ERRORTOKEN);
1220
337
                }
1221
370
            }
1222
520k
            if (c == quote) {
1223
36.6k
                end_quote_size += 1;
1224
36.6k
            }
1225
483k
            else {
1226
483k
                end_quote_size = 0;
1227
483k
                if (c == '\\') {
1228
22.7k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
22.7k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
708
                        has_escaped_quote = 1;
1231
708
                    }
1232
22.7k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
22.7k
                }
1236
483k
            }
1237
520k
        }
1238
1239
38.7k
        p_start = tok->start;
1240
38.7k
        p_end = tok->cur;
1241
38.7k
        return MAKE_TOKEN(STRING);
1242
39.1k
    }
1243
1244
    /* Line continuation */
1245
1.01M
    if (c == '\\') {
1246
406
        if ((c = tok_continuation_line(tok)) == -1) {
1247
97
            return MAKE_TOKEN(ERRORTOKEN);
1248
97
        }
1249
309
        tok->cont_line = 1;
1250
309
        goto again; /* Read next line */
1251
406
    }
1252
1253
    /* Punctuation character */
1254
1.01M
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
1.01M
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
62.8k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
62.8k
        int in_format_spec = current_tok->in_format_spec;
1261
62.8k
         int cursor_in_format_with_debug =
1262
62.8k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
62.8k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
62.8k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
62.8k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
62.8k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
5.03k
            current_tok->kind = TOK_FSTRING_MODE;
1273
5.03k
            current_tok->in_format_spec = 1;
1274
5.03k
            p_start = tok->start;
1275
5.03k
            p_end = tok->cur;
1276
5.03k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
5.03k
        }
1278
62.8k
    }
1279
1280
    /* Check for two-character token */
1281
1.01M
    {
1282
1.01M
        int c2 = tok_nextc(tok);
1283
1.01M
        int current_token = _PyToken_TwoChars(c, c2);
1284
1.01M
        if (current_token != OP) {
1285
25.6k
            int c3 = tok_nextc(tok);
1286
25.6k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
25.6k
            if (current_token3 != OP) {
1288
1.56k
                current_token = current_token3;
1289
1.56k
            }
1290
24.0k
            else {
1291
24.0k
                tok_backup(tok, c3);
1292
24.0k
            }
1293
25.6k
            p_start = tok->start;
1294
25.6k
            p_end = tok->cur;
1295
25.6k
            return MAKE_TOKEN(current_token);
1296
25.6k
        }
1297
986k
        tok_backup(tok, c2);
1298
986k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
73.6k
    case '(':
1303
115k
    case '[':
1304
163k
    case '{':
1305
163k
        if (tok->level >= MAXLEVEL) {
1306
16
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
16
        }
1308
163k
        tok->parenstack[tok->level] = c;
1309
163k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
163k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
163k
        tok->level++;
1312
163k
        if (INSIDE_FSTRING(tok)) {
1313
34.9k
            current_tok->curly_bracket_depth++;
1314
34.9k
        }
1315
163k
        break;
1316
46.9k
    case ')':
1317
53.3k
    case ']':
1318
82.1k
    case '}':
1319
82.1k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
50
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
50
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
50
        }
1323
82.0k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
221
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
221
        }
1326
81.8k
        if (tok->level > 0) {
1327
81.8k
            tok->level--;
1328
81.8k
            int opening = tok->parenstack[tok->level];
1329
81.8k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
35.0k
                                            (opening == '[' && c == ']') ||
1331
28.6k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
48
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
5
                    assert(current_tok->curly_bracket_depth >= 0);
1339
5
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
5
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
3
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
3
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
3
                    }
1344
5
                }
1345
45
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
5
                            "closing parenthesis '%c' does not match "
1348
5
                            "opening parenthesis '%c' on line %d",
1349
5
                            c, opening, tok->parenlinenostack[tok->level]));
1350
5
                }
1351
40
                else {
1352
40
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
40
                            "closing parenthesis '%c' does not match "
1354
40
                            "opening parenthesis '%c'",
1355
40
                            c, opening));
1356
40
                }
1357
45
            }
1358
81.8k
        }
1359
1360
81.7k
        if (INSIDE_FSTRING(tok)) {
1361
26.3k
            current_tok->curly_bracket_depth--;
1362
26.3k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
26.3k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
24.2k
                current_tok->curly_bracket_expr_start_depth--;
1368
24.2k
                current_tok->kind = TOK_FSTRING_MODE;
1369
24.2k
                current_tok->in_format_spec = 0;
1370
24.2k
                current_tok->in_debug = 0;
1371
24.2k
            }
1372
26.3k
        }
1373
81.7k
        break;
1374
740k
    default:
1375
740k
        break;
1376
986k
    }
1377
1378
985k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
459
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
459
    }
1381
1382
985k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
6.61k
        current_tok->in_debug = 1;
1384
6.61k
    }
1385
1386
    /* Punctuation character */
1387
985k
    p_start = tok->start;
1388
985k
    p_end = tok->cur;
1389
985k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
985k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
56.7k
{
1395
56.7k
    const char *p_start = NULL;
1396
56.7k
    const char *p_end = NULL;
1397
56.7k
    int end_quote_size = 0;
1398
56.7k
    int unicode_escape = 0;
1399
1400
56.7k
    tok->start = tok->cur;
1401
56.7k
    tok->first_lineno = tok->lineno;
1402
56.7k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
56.7k
    int start_char = tok_nextc(tok);
1407
56.7k
    if (start_char == '{') {
1408
15.8k
        int peek1 = tok_nextc(tok);
1409
15.8k
        tok_backup(tok, peek1);
1410
15.8k
        tok_backup(tok, start_char);
1411
15.8k
        if (peek1 != '{') {
1412
14.5k
            current_tok->curly_bracket_expr_start_depth++;
1413
14.5k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
2
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
2
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
2
            }
1417
14.5k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
14.5k
            return tok_get_normal_mode(tok, current_tok, token);
1419
14.5k
        }
1420
15.8k
    }
1421
40.8k
    else {
1422
40.8k
        tok_backup(tok, start_char);
1423
40.8k
    }
1424
1425
    // Check if we are at the end of the string
1426
59.9k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
45.6k
        int quote = tok_nextc(tok);
1428
45.6k
        if (quote != current_tok->quote) {
1429
27.9k
            tok_backup(tok, quote);
1430
27.9k
            goto f_string_middle;
1431
27.9k
        }
1432
45.6k
    }
1433
1434
14.3k
    if (current_tok->last_expr_buffer != NULL) {
1435
9.34k
        PyMem_Free(current_tok->last_expr_buffer);
1436
9.34k
        current_tok->last_expr_buffer = NULL;
1437
9.34k
        current_tok->last_expr_size = 0;
1438
9.34k
        current_tok->last_expr_end = -1;
1439
9.34k
    }
1440
1441
14.3k
    p_start = tok->start;
1442
14.3k
    p_end = tok->cur;
1443
14.3k
    tok->tok_mode_stack_index--;
1444
14.3k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
27.9k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
27.9k
    tok->multi_line_start = tok->line_start;
1451
222k
    while (end_quote_size != current_tok->quote_size) {
1452
216k
        int c = tok_nextc(tok);
1453
216k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
216k
        int in_format_spec = (
1457
216k
                current_tok->in_format_spec
1458
12.1k
                &&
1459
12.1k
                INSIDE_FSTRING_EXPR(current_tok)
1460
216k
        );
1461
1462
216k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
376
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
376
            if (in_format_spec && c == '\n') {
1471
43
                if (current_tok->quote_size == 1) {
1472
43
                    return MAKE_TOKEN(
1473
43
                        _PyTokenizer_syntaxerror(
1474
43
                            tok,
1475
43
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
43
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
43
                        )
1478
43
                    );
1479
43
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
43
            }
1487
1488
376
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
333
            tok->cur = (char *)current_tok->start;
1493
333
            tok->cur++;
1494
333
            tok->line_start = current_tok->multi_line_start;
1495
333
            int start = tok->lineno;
1496
1497
333
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
333
            tok->lineno = the_current_tok->first_line;
1499
1500
333
            if (current_tok->quote_size == 3) {
1501
24
                _PyTokenizer_syntaxerror(tok,
1502
24
                                    "unterminated triple-quoted %c-string literal"
1503
24
                                    " (detected at line %d)",
1504
24
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
24
                if (c != '\n') {
1506
24
                    tok->done = E_EOFS;
1507
24
                }
1508
24
                return MAKE_TOKEN(ERRORTOKEN);
1509
24
            }
1510
309
            else {
1511
309
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
309
                                    "unterminated %c-string literal (detected at"
1513
309
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
309
            }
1515
333
        }
1516
1517
216k
        if (c == current_tok->quote) {
1518
9.35k
            end_quote_size += 1;
1519
9.35k
            continue;
1520
207k
        } else {
1521
207k
            end_quote_size = 0;
1522
207k
        }
1523
1524
207k
        if (c == '{') {
1525
16.7k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
16.7k
            int peek = tok_nextc(tok);
1529
16.7k
            if (peek != '{' || in_format_spec) {
1530
14.6k
                tok_backup(tok, peek);
1531
14.6k
                tok_backup(tok, c);
1532
14.6k
                current_tok->curly_bracket_expr_start_depth++;
1533
14.6k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
6
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
6
                }
1537
14.6k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
14.6k
                current_tok->in_format_spec = 0;
1539
14.6k
                p_start = tok->start;
1540
14.6k
                p_end = tok->cur;
1541
14.6k
            } else {
1542
2.09k
                p_start = tok->start;
1543
2.09k
                p_end = tok->cur - 1;
1544
2.09k
            }
1545
16.7k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
190k
        } else if (c == '}') {
1547
5.17k
            if (unicode_escape) {
1548
399
                p_start = tok->start;
1549
399
                p_end = tok->cur;
1550
399
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
399
            }
1552
4.77k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.77k
            int cursor = current_tok->curly_bracket_depth;
1559
4.77k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
894
                p_start = tok->start;
1561
894
                p_end = tok->cur - 1;
1562
3.88k
            } else {
1563
3.88k
                tok_backup(tok, peek);
1564
3.88k
                tok_backup(tok, c);
1565
3.88k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.88k
                current_tok->in_format_spec = 0;
1567
3.88k
                p_start = tok->start;
1568
3.88k
                p_end = tok->cur;
1569
3.88k
            }
1570
4.77k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
185k
        } else if (c == '\\') {
1572
6.05k
            int peek = tok_nextc(tok);
1573
6.05k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
6.05k
            if (peek == '{' || peek == '}') {
1580
690
                if (!current_tok->raw) {
1581
624
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
624
                }
1585
690
                tok_backup(tok, peek);
1586
690
                continue;
1587
690
            }
1588
1589
5.36k
            if (!current_tok->raw) {
1590
5.14k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
631
                    peek = tok_nextc(tok);
1593
631
                    if (peek == '{') {
1594
417
                        unicode_escape = 1;
1595
417
                    } else {
1596
214
                        tok_backup(tok, peek);
1597
214
                    }
1598
631
                }
1599
5.14k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
5.36k
        }
1603
207k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
12.2k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.69k
        tok_backup(tok, current_tok->quote);
1609
6.69k
    }
1610
5.57k
    p_start = tok->start;
1611
5.57k
    p_end = tok->cur;
1612
5.57k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
27.9k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
2.21M
{
1618
2.21M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
2.21M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
2.15M
        return tok_get_normal_mode(tok, current_tok, token);
1621
2.15M
    } else {
1622
56.7k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
56.7k
    }
1624
2.21M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
2.21M
{
1629
2.21M
    int result = tok_get(tok, token);
1630
2.21M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
2.21M
    return result;
1635
2.21M
}