Coverage Report

Created: 2025-11-11 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.25k
#define ALTTABSIZE 1
11
12
1.71M
#define is_potential_identifier_start(c) (\
13
1.71M
              (c >= 'a' && c <= 'z')\
14
1.71M
               || (c >= 'A' && c <= 'Z')\
15
1.71M
               || c == '_'\
16
1.71M
               || (c >= 128))
17
18
2.59M
#define is_potential_identifier_char(c) (\
19
2.59M
              (c >= 'a' && c <= 'z')\
20
2.59M
               || (c >= 'A' && c <= 'Z')\
21
2.59M
               || (c >= '0' && c <= '9')\
22
2.59M
               || c == '_'\
23
2.59M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.83M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
16.5k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
42
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.71M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
249k
{
55
249k
    return memchr(str, 0, size) != NULL;
56
249k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.0M
{
62
11.0M
    int rc;
63
11.3M
    for (;;) {
64
11.3M
        if (tok->cur != tok->inp) {
65
11.0M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.0M
            tok->col_offset++;
70
11.0M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.0M
        }
72
298k
        if (tok->done != E_OK) {
73
32.4k
            return EOF;
74
32.4k
        }
75
265k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
265k
        if (!rc) {
84
16.4k
            tok->cur = tok->inp;
85
16.4k
            return EOF;
86
16.4k
        }
87
249k
        tok->line_start = tok->cur;
88
89
249k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
249k
    }
95
11.0M
    Py_UNREACHABLE();
96
11.0M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.62M
{
102
3.62M
    if (c != EOF) {
103
3.59M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.59M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.59M
        tok->col_offset--;
110
3.59M
    }
111
3.62M
}
112
113
static int
114
24.0k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
24.0k
    assert(token != NULL);
116
24.0k
    assert(c == '}' || c == ':' || c == '!');
117
24.0k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
24.0k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
13.3k
        return 0;
121
13.3k
    }
122
10.7k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
10.7k
    int hash_detected = 0;
126
10.7k
    int in_string = 0;
127
10.7k
    char quote_char = 0;
128
129
1.08M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.07M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.07M
        if (ch == '\\') {
134
18.5k
            i++;
135
18.5k
            continue;
136
18.5k
        }
137
138
        // Handle quotes
139
1.05M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
194k
            if (!in_string) {
148
72.8k
                in_string = 1;
149
72.8k
                quote_char = ch;
150
72.8k
            }
151
122k
            else if (ch == quote_char) {
152
72.0k
                in_string = 0;
153
72.0k
            }
154
194k
            continue;
155
194k
        }
156
157
        // Check for # outside strings
158
858k
        if (ch == '#' && !in_string) {
159
901
            hash_detected = 1;
160
901
            break;
161
901
        }
162
858k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
10.7k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
901
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
901
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
901
        Py_ssize_t i = 0;  // Input position
172
901
        Py_ssize_t j = 0;  // Output position
173
901
        in_string = 0;     // Whether we're in a string
174
901
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
63.5k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
62.6k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
62.6k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
9.74k
                if (!in_string) {
184
3.85k
                    in_string = 1;
185
3.85k
                    quote_char = ch;
186
5.89k
                } else if (ch == quote_char) {
187
3.84k
                    in_string = 0;
188
3.84k
                }
189
9.74k
                result[j++] = ch;
190
9.74k
            }
191
            // Skip comments
192
52.9k
            else if (ch == '#' && !in_string) {
193
47.3k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
46.6k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
46.2k
                    i++;
196
46.2k
                }
197
1.11k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
345
                    result[j++] = '\n';
199
345
                }
200
1.11k
            }
201
            // Copy other chars
202
51.7k
            else {
203
51.7k
                result[j++] = ch;
204
51.7k
            }
205
62.6k
            i++;
206
62.6k
        }
207
208
901
        result[j] = '\0';  // Null-terminate the result string
209
901
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
901
        PyMem_Free(result);
211
9.83k
    } else {
212
9.83k
        res = PyUnicode_DecodeUTF8(
213
9.83k
            tok_mode->last_expr_buffer,
214
9.83k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
9.83k
            NULL
216
9.83k
        );
217
9.83k
    }
218
219
10.7k
    if (!res) {
220
0
        return -1;
221
0
    }
222
10.7k
    token->metadata = res;
223
10.7k
    return 0;
224
10.7k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
64.6k
{
229
64.6k
    assert(tok->cur != NULL);
230
231
64.6k
    Py_ssize_t size = strlen(tok->cur);
232
64.6k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
64.6k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
40.5k
        case '{':
252
40.5k
            if (tok_mode->last_expr_buffer != NULL) {
253
29.1k
                PyMem_Free(tok_mode->last_expr_buffer);
254
29.1k
            }
255
40.5k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
40.5k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
40.5k
            tok_mode->last_expr_size = size;
260
40.5k
            tok_mode->last_expr_end = -1;
261
40.5k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
40.5k
            break;
263
18.8k
        case '}':
264
20.7k
        case '!':
265
20.7k
            tok_mode->last_expr_end = strlen(tok->start);
266
20.7k
            break;
267
3.35k
        case ':':
268
3.35k
            if (tok_mode->last_expr_end == -1) {
269
2.98k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.98k
            }
271
3.35k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
64.6k
    }
275
64.6k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
64.6k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
9.26k
{
284
9.26k
    const char *s = test;
285
9.26k
    int res = 0;
286
24.3k
    while (1) {
287
24.3k
        int c = tok_nextc(tok);
288
24.3k
        if (*s == 0) {
289
9.17k
            res = !is_potential_identifier_char(c);
290
9.17k
        }
291
15.1k
        else if (c == *s) {
292
15.0k
            s++;
293
15.0k
            continue;
294
15.0k
        }
295
296
9.26k
        tok_backup(tok, c);
297
24.3k
        while (s != test) {
298
15.0k
            tok_backup(tok, *--s);
299
15.0k
        }
300
9.26k
        return res;
301
24.3k
    }
302
9.26k
}
303
304
static int
305
97.6k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
97.6k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
97.6k
    int r = 0;
322
97.6k
    if (c == 'a') {
323
1.16k
        r = lookahead(tok, "nd");
324
1.16k
    }
325
96.5k
    else if (c == 'e') {
326
525
        r = lookahead(tok, "lse");
327
525
    }
328
95.9k
    else if (c == 'f') {
329
3.44k
        r = lookahead(tok, "or");
330
3.44k
    }
331
92.5k
    else if (c == 'i') {
332
1.49k
        int c2 = tok_nextc(tok);
333
1.49k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.47k
            r = 1;
335
1.47k
        }
336
1.49k
        tok_backup(tok, c2);
337
1.49k
    }
338
91.0k
    else if (c == 'o') {
339
3.83k
        r = lookahead(tok, "r");
340
3.83k
    }
341
87.2k
    else if (c == 'n') {
342
304
        r = lookahead(tok, "ot");
343
304
    }
344
97.6k
    if (r) {
345
10.6k
        tok_backup(tok, c);
346
10.6k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.6k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.6k
        tok_nextc(tok);
352
10.6k
    }
353
87.0k
    else /* In future releases, only error will remain. */
354
87.0k
    if (c < 128 && is_potential_identifier_char(c)) {
355
212
        tok_backup(tok, c);
356
212
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
212
        return 0;
358
212
    }
359
97.4k
    return 1;
360
97.6k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
12.1k
{
366
12.1k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
12.1k
    PyObject *s;
370
12.1k
    if (tok->decoding_erred)
371
0
        return 0;
372
12.1k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
12.1k
    if (s == NULL) {
374
1
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
1
            tok->done = E_DECODE;
376
1
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
1
        return 0;
381
1
    }
382
12.1k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
12.1k
    assert(invalid >= 0);
384
12.1k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
12.1k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
689
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
689
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
466
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
466
            if (s != NULL) {
391
466
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
466
            }
393
466
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
466
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
466
        }
399
689
        Py_DECREF(s);
400
689
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
365
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
365
        }
403
324
        else {
404
324
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
324
        }
406
689
        return 0;
407
689
    }
408
11.4k
    Py_DECREF(s);
409
11.4k
    return 1;
410
12.1k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
77.9k
{
415
77.9k
    int c;
416
417
78.4k
    while (1) {
418
220k
        do {
419
220k
            c = tok_nextc(tok);
420
220k
        } while (Py_ISDIGIT(c));
421
78.4k
        if (c != '_') {
422
77.9k
            break;
423
77.9k
        }
424
521
        c = tok_nextc(tok);
425
521
        if (!Py_ISDIGIT(c)) {
426
12
            tok_backup(tok, c);
427
12
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
12
            return 0;
429
12
        }
430
521
    }
431
77.9k
    return c;
432
77.9k
}
433
434
static inline int
435
1.11k
tok_continuation_line(struct tok_state *tok) {
436
1.11k
    int c = tok_nextc(tok);
437
1.11k
    if (c == '\r') {
438
70
        c = tok_nextc(tok);
439
70
    }
440
1.11k
    if (c != '\n') {
441
64
        tok->done = E_LINECONT;
442
64
        return -1;
443
64
    }
444
1.04k
    c = tok_nextc(tok);
445
1.04k
    if (c == EOF) {
446
53
        tok->done = E_EOF;
447
53
        tok->cur = tok->inp;
448
53
        return -1;
449
996
    } else {
450
996
        tok_backup(tok, c);
451
996
    }
452
996
    return c;
453
1.04k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
20.8k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
20.8k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
20.8k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
20.8k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
20.8k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
20.8k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
20.8k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
20.8k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
20.8k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
20.8k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
20.8k
#undef RETURN_SYNTAX_ERROR
496
497
20.8k
    return 0;
498
20.8k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.67M
{
503
1.67M
    int c;
504
1.67M
    int blankline, nonascii;
505
506
1.67M
    const char *p_start = NULL;
507
1.67M
    const char *p_end = NULL;
508
1.77M
  nextline:
509
1.77M
    tok->start = NULL;
510
1.77M
    tok->starting_col_offset = -1;
511
1.77M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.77M
    if (tok->atbol) {
516
244k
        int col = 0;
517
244k
        int altcol = 0;
518
244k
        tok->atbol = 0;
519
244k
        int cont_line_col = 0;
520
1.09M
        for (;;) {
521
1.09M
            c = tok_nextc(tok);
522
1.09M
            if (c == ' ') {
523
845k
                col++, altcol++;
524
845k
            }
525
246k
            else if (c == '\t') {
526
625
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
625
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
625
            }
529
246k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.13k
                col = altcol = 0; /* For Emacs users */
531
1.13k
            }
532
244k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
658
                cont_line_col = cont_line_col ? cont_line_col : col;
538
658
                if ((c = tok_continuation_line(tok)) == -1) {
539
45
                    return MAKE_TOKEN(ERRORTOKEN);
540
45
                }
541
658
            }
542
244k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
244k
            else {
546
244k
                break;
547
244k
            }
548
1.09M
        }
549
244k
        tok_backup(tok, c);
550
244k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
58.2k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
58.2k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
58.2k
            else {
566
58.2k
                blankline = 1; /* Ignore completely */
567
58.2k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
58.2k
        }
571
244k
        if (!blankline && tok->level == 0) {
572
144k
            col = cont_line_col ? cont_line_col : col;
573
144k
            altcol = cont_line_col ? cont_line_col : altcol;
574
144k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
97.2k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
97.2k
            }
580
46.7k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
26.1k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
26.1k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
26.1k
                tok->pendin++;
591
26.1k
                tok->indstack[++tok->indent] = col;
592
26.1k
                tok->altindstack[tok->indent] = altcol;
593
26.1k
            }
594
20.6k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
46.0k
                while (tok->indent > 0 &&
597
41.0k
                    col < tok->indstack[tok->indent]) {
598
25.4k
                    tok->pendin--;
599
25.4k
                    tok->indent--;
600
25.4k
                }
601
20.6k
                if (col != tok->indstack[tok->indent]) {
602
5
                    tok->done = E_DEDENT;
603
5
                    tok->cur = tok->inp;
604
5
                    return MAKE_TOKEN(ERRORTOKEN);
605
5
                }
606
20.6k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
20.6k
            }
610
144k
        }
611
244k
    }
612
613
1.77M
    tok->start = tok->cur;
614
1.77M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
1.77M
    if (tok->pendin != 0) {
618
51.6k
        if (tok->pendin < 0) {
619
25.4k
            if (tok->tok_extra_tokens) {
620
0
                p_start = tok->cur;
621
0
                p_end = tok->cur;
622
0
            }
623
25.4k
            tok->pendin++;
624
25.4k
            return MAKE_TOKEN(DEDENT);
625
25.4k
        }
626
26.1k
        else {
627
26.1k
            if (tok->tok_extra_tokens) {
628
0
                p_start = tok->buf;
629
0
                p_end = tok->cur;
630
0
            }
631
26.1k
            tok->pendin--;
632
26.1k
            return MAKE_TOKEN(INDENT);
633
26.1k
        }
634
51.6k
    }
635
636
    /* Peek ahead at the next character */
637
1.72M
    c = tok_nextc(tok);
638
1.72M
    tok_backup(tok, c);
639
640
1.72M
 again:
641
1.72M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.09M
    do {
644
2.09M
        c = tok_nextc(tok);
645
2.09M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.72M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.72M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.72M
    if (c == '#') {
653
654
40.3k
        const char* p = NULL;
655
40.3k
        const char *prefix, *type_start;
656
40.3k
        int current_starting_col_offset;
657
658
1.33M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.29M
            c = tok_nextc(tok);
660
1.29M
        }
661
662
40.3k
        if (tok->tok_extra_tokens) {
663
0
            p = tok->start;
664
0
        }
665
666
40.3k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
40.3k
        if (tok->tok_extra_tokens) {
721
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
0
            p_start = p;
723
0
            p_end = tok->cur;
724
0
            tok->comment_newline = blankline;
725
0
            return MAKE_TOKEN(COMMENT);
726
0
        }
727
40.3k
    }
728
729
1.72M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.72M
    if (c == EOF) {
735
16.2k
        if (tok->level) {
736
4.16k
            return MAKE_TOKEN(ERRORTOKEN);
737
4.16k
        }
738
12.0k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
16.2k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.71M
    nonascii = 0;
743
1.71M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
559k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
687k
        while (1) {
747
687k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
21.1k
                saw_b = 1;
749
21.1k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
666k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
7.39k
                saw_u = 1;
754
7.39k
            }
755
            /* ur"" and ru"" are not supported */
756
658k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
38.7k
                saw_r = 1;
758
38.7k
            }
759
620k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
47.1k
                saw_f = 1;
761
47.1k
            }
762
572k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
34.6k
                saw_t = 1;
764
34.6k
            }
765
538k
            else {
766
538k
                break;
767
538k
            }
768
149k
            c = tok_nextc(tok);
769
149k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
20.8k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
20.8k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
20.8k
                if (status < 0) {
774
7
                    return MAKE_TOKEN(ERRORTOKEN);
775
7
                }
776
777
                // Handle valid f or t string creation:
778
20.8k
                if (saw_f || saw_t) {
779
16.5k
                    goto f_string_quote;
780
16.5k
                }
781
4.30k
                goto letter_quote;
782
20.8k
            }
783
149k
        }
784
2.49M
        while (is_potential_identifier_char(c)) {
785
1.95M
            if (c >= 128) {
786
111k
                nonascii = 1;
787
111k
            }
788
1.95M
            c = tok_nextc(tok);
789
1.95M
        }
790
538k
        tok_backup(tok, c);
791
538k
        if (nonascii && !verify_identifier(tok)) {
792
690
            return MAKE_TOKEN(ERRORTOKEN);
793
690
        }
794
795
537k
        p_start = tok->start;
796
537k
        p_end = tok->cur;
797
798
537k
        return MAKE_TOKEN(NAME);
799
538k
    }
800
801
1.15M
    if (c == '\r') {
802
413
        c = tok_nextc(tok);
803
413
    }
804
805
    /* Newline */
806
1.15M
    if (c == '\n') {
807
225k
        tok->atbol = 1;
808
225k
        if (blankline || tok->level > 0) {
809
100k
            if (tok->tok_extra_tokens) {
810
0
                if (tok->comment_newline) {
811
0
                    tok->comment_newline = 0;
812
0
                }
813
0
                p_start = tok->start;
814
0
                p_end = tok->cur;
815
0
                return MAKE_TOKEN(NL);
816
0
            }
817
100k
            goto nextline;
818
100k
        }
819
125k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
0
            tok->comment_newline = 0;
821
0
            p_start = tok->start;
822
0
            p_end = tok->cur;
823
0
            return MAKE_TOKEN(NL);
824
0
        }
825
125k
        p_start = tok->start;
826
125k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
125k
        tok->cont_line = 0;
828
125k
        return MAKE_TOKEN(NEWLINE);
829
125k
    }
830
831
    /* Period or number starting with period? */
832
926k
    if (c == '.') {
833
39.3k
        c = tok_nextc(tok);
834
39.3k
        if (Py_ISDIGIT(c)) {
835
3.43k
            goto fraction;
836
35.9k
        } else if (c == '.') {
837
1.40k
            c = tok_nextc(tok);
838
1.40k
            if (c == '.') {
839
738
                p_start = tok->start;
840
738
                p_end = tok->cur;
841
738
                return MAKE_TOKEN(ELLIPSIS);
842
738
            }
843
670
            else {
844
670
                tok_backup(tok, c);
845
670
            }
846
670
            tok_backup(tok, '.');
847
670
        }
848
34.5k
        else {
849
34.5k
            tok_backup(tok, c);
850
34.5k
        }
851
35.2k
        p_start = tok->start;
852
35.2k
        p_end = tok->cur;
853
35.2k
        return MAKE_TOKEN(DOT);
854
39.3k
    }
855
856
    /* Number */
857
886k
    if (Py_ISDIGIT(c)) {
858
94.3k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
33.0k
            c = tok_nextc(tok);
861
33.0k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
15.8k
                c = tok_nextc(tok);
864
16.0k
                do {
865
16.0k
                    if (c == '_') {
866
217
                        c = tok_nextc(tok);
867
217
                    }
868
16.0k
                    if (!Py_ISXDIGIT(c)) {
869
20
                        tok_backup(tok, c);
870
20
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
20
                    }
872
77.8k
                    do {
873
77.8k
                        c = tok_nextc(tok);
874
77.8k
                    } while (Py_ISXDIGIT(c));
875
16.0k
                } while (c == '_');
876
15.8k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
2
                    return MAKE_TOKEN(ERRORTOKEN);
878
2
                }
879
15.8k
            }
880
17.2k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
605
                c = tok_nextc(tok);
883
948
                do {
884
948
                    if (c == '_') {
885
349
                        c = tok_nextc(tok);
886
349
                    }
887
948
                    if (c < '0' || c >= '8') {
888
23
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
22
                        else {
893
22
                            tok_backup(tok, c);
894
22
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
22
                        }
896
23
                    }
897
2.40k
                    do {
898
2.40k
                        c = tok_nextc(tok);
899
2.40k
                    } while ('0' <= c && c < '8');
900
925
                } while (c == '_');
901
582
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
581
                if (!verify_end_of_number(tok, c, "octal")) {
906
5
                    return MAKE_TOKEN(ERRORTOKEN);
907
5
                }
908
581
            }
909
16.6k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
559
                c = tok_nextc(tok);
912
1.07k
                do {
913
1.07k
                    if (c == '_') {
914
530
                        c = tok_nextc(tok);
915
530
                    }
916
1.07k
                    if (c != '0' && c != '1') {
917
17
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
16
                        else {
921
16
                            tok_backup(tok, c);
922
16
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
16
                        }
924
17
                    }
925
4.10k
                    do {
926
4.10k
                        c = tok_nextc(tok);
927
4.10k
                    } while (c == '0' || c == '1');
928
1.06k
                } while (c == '_');
929
542
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
540
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
540
            }
936
16.0k
            else {
937
16.0k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
17.4k
                while (1) {
941
17.4k
                    if (c == '_') {
942
90
                        c = tok_nextc(tok);
943
90
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
90
                    }
948
17.4k
                    if (c != '0') {
949
16.0k
                        break;
950
16.0k
                    }
951
1.39k
                    c = tok_nextc(tok);
952
1.39k
                }
953
16.0k
                char* zeros_end = tok->cur;
954
16.0k
                if (Py_ISDIGIT(c)) {
955
392
                    nonzero = 1;
956
392
                    c = tok_decimal_tail(tok);
957
392
                    if (c == 0) {
958
1
                        return MAKE_TOKEN(ERRORTOKEN);
959
1
                    }
960
392
                }
961
16.0k
                if (c == '.') {
962
893
                    c = tok_nextc(tok);
963
893
                    goto fraction;
964
893
                }
965
15.1k
                else if (c == 'e' || c == 'E') {
966
853
                    goto exponent;
967
853
                }
968
14.3k
                else if (c == 'j' || c == 'J') {
969
786
                    goto imaginary;
970
786
                }
971
13.5k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
22
                    tok_backup(tok, c);
974
22
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
22
                            tok, (int)(tok->start + 1 - tok->line_start),
976
22
                            (int)(zeros_end - tok->line_start),
977
22
                            "leading zeros in decimal integer "
978
22
                            "literals are not permitted; "
979
22
                            "use an 0o prefix for octal integers"));
980
22
                }
981
13.5k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
24
                    return MAKE_TOKEN(ERRORTOKEN);
983
24
                }
984
13.5k
            }
985
33.0k
        }
986
61.2k
        else {
987
            /* Decimal */
988
61.2k
            c = tok_decimal_tail(tok);
989
61.2k
            if (c == 0) {
990
9
                return MAKE_TOKEN(ERRORTOKEN);
991
9
            }
992
61.2k
            {
993
                /* Accept floating-point numbers. */
994
61.2k
                if (c == '.') {
995
3.92k
                    c = tok_nextc(tok);
996
8.24k
        fraction:
997
                    /* Fraction */
998
8.24k
                    if (Py_ISDIGIT(c)) {
999
6.14k
                        c = tok_decimal_tail(tok);
1000
6.14k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
6.14k
                    }
1004
8.24k
                }
1005
65.6k
                if (c == 'e' || c == 'E') {
1006
9.84k
                    int e;
1007
10.6k
                  exponent:
1008
10.6k
                    e = c;
1009
                    /* Exponent part */
1010
10.6k
                    c = tok_nextc(tok);
1011
10.6k
                    if (c == '+' || c == '-') {
1012
3.78k
                        c = tok_nextc(tok);
1013
3.78k
                        if (!Py_ISDIGIT(c)) {
1014
10
                            tok_backup(tok, c);
1015
10
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
10
                        }
1017
6.91k
                    } else if (!Py_ISDIGIT(c)) {
1018
527
                        tok_backup(tok, c);
1019
527
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
35
                            return MAKE_TOKEN(ERRORTOKEN);
1021
35
                        }
1022
492
                        tok_backup(tok, e);
1023
492
                        p_start = tok->start;
1024
492
                        p_end = tok->cur;
1025
492
                        return MAKE_TOKEN(NUMBER);
1026
527
                    }
1027
10.1k
                    c = tok_decimal_tail(tok);
1028
10.1k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
10.1k
                }
1032
65.9k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
4.12k
        imaginary:
1035
4.12k
                    c = tok_nextc(tok);
1036
4.12k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
13
                        return MAKE_TOKEN(ERRORTOKEN);
1038
13
                    }
1039
4.12k
                }
1040
62.5k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
132
                    return MAKE_TOKEN(ERRORTOKEN);
1042
132
                }
1043
65.9k
            }
1044
65.9k
        }
1045
96.9k
        tok_backup(tok, c);
1046
96.9k
        p_start = tok->start;
1047
96.9k
        p_end = tok->cur;
1048
96.9k
        return MAKE_TOKEN(NUMBER);
1049
94.3k
    }
1050
1051
808k
  f_string_quote:
1052
808k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
16.5k
        && (c == '\'' || c == '"'))) {
1054
1055
16.5k
        int quote = c;
1056
16.5k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
16.5k
        tok->first_lineno = tok->lineno;
1063
16.5k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
16.5k
        int after_quote = tok_nextc(tok);
1067
16.5k
        if (after_quote == quote) {
1068
2.65k
            int after_after_quote = tok_nextc(tok);
1069
2.65k
            if (after_after_quote == quote) {
1070
884
                quote_size = 3;
1071
884
            }
1072
1.76k
            else {
1073
                // TODO: Check this
1074
1.76k
                tok_backup(tok, after_after_quote);
1075
1.76k
                tok_backup(tok, after_quote);
1076
1.76k
            }
1077
2.65k
        }
1078
16.5k
        if (after_quote != quote) {
1079
13.8k
            tok_backup(tok, after_quote);
1080
13.8k
        }
1081
1082
1083
16.5k
        p_start = tok->start;
1084
16.5k
        p_end = tok->cur;
1085
16.5k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
2
        }
1088
16.5k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
16.5k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
16.5k
        the_current_tok->quote = quote;
1091
16.5k
        the_current_tok->quote_size = quote_size;
1092
16.5k
        the_current_tok->start = tok->start;
1093
16.5k
        the_current_tok->multi_line_start = tok->line_start;
1094
16.5k
        the_current_tok->first_line = tok->lineno;
1095
16.5k
        the_current_tok->start_offset = -1;
1096
16.5k
        the_current_tok->multi_line_start_offset = -1;
1097
16.5k
        the_current_tok->last_expr_buffer = NULL;
1098
16.5k
        the_current_tok->last_expr_size = 0;
1099
16.5k
        the_current_tok->last_expr_end = -1;
1100
16.5k
        the_current_tok->in_format_spec = 0;
1101
16.5k
        the_current_tok->in_debug = 0;
1102
1103
16.5k
        enum string_kind_t string_kind = FSTRING;
1104
16.5k
        switch (*tok->start) {
1105
600
            case 'T':
1106
4.37k
            case 't':
1107
4.37k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.37k
                string_kind = TSTRING;
1109
4.37k
                break;
1110
1.69k
            case 'F':
1111
11.6k
            case 'f':
1112
11.6k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
11.6k
                break;
1114
196
            case 'R':
1115
506
            case 'r':
1116
506
                the_current_tok->raw = 1;
1117
506
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
98
                    string_kind = TSTRING;
1119
98
                }
1120
506
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
16.5k
        }
1124
1125
16.5k
        the_current_tok->string_kind = string_kind;
1126
16.5k
        the_current_tok->curly_bracket_depth = 0;
1127
16.5k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
16.5k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
16.5k
    }
1130
1131
796k
  letter_quote:
1132
    /* String */
1133
796k
    if (c == '\'' || c == '"') {
1134
57.5k
        int quote = c;
1135
57.5k
        int quote_size = 1;             /* 1 or 3 */
1136
57.5k
        int end_quote_size = 0;
1137
57.5k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
57.5k
        tok->first_lineno = tok->lineno;
1144
57.5k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
57.5k
        c = tok_nextc(tok);
1148
57.5k
        if (c == quote) {
1149
9.72k
            c = tok_nextc(tok);
1150
9.72k
            if (c == quote) {
1151
3.30k
                quote_size = 3;
1152
3.30k
            }
1153
6.42k
            else {
1154
6.42k
                end_quote_size = 1;     /* empty string found */
1155
6.42k
            }
1156
9.72k
        }
1157
57.5k
        if (c != quote) {
1158
54.2k
            tok_backup(tok, c);
1159
54.2k
        }
1160
1161
        /* Get rest of string */
1162
1.20M
        while (end_quote_size != quote_size) {
1163
1.14M
            c = tok_nextc(tok);
1164
1.14M
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
1.14M
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
1.14M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
304
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
304
                tok->cur = (char *)tok->start;
1176
304
                tok->cur++;
1177
304
                tok->line_start = tok->multi_line_start;
1178
304
                int start = tok->lineno;
1179
304
                tok->lineno = tok->first_lineno;
1180
1181
304
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
30
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
30
                    if (the_current_tok->quote == quote &&
1189
26
                        the_current_tok->quote_size == quote_size) {
1190
23
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
23
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
23
                    }
1193
30
                }
1194
1195
281
                if (quote_size == 3) {
1196
20
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
20
                                     " (detected at line %d)", start);
1198
20
                    if (c != '\n') {
1199
20
                        tok->done = E_EOFS;
1200
20
                    }
1201
20
                    return MAKE_TOKEN(ERRORTOKEN);
1202
20
                }
1203
261
                else {
1204
261
                    if (has_escaped_quote) {
1205
11
                        _PyTokenizer_syntaxerror(
1206
11
                            tok,
1207
11
                            "unterminated string literal (detected at line %d); "
1208
11
                            "perhaps you escaped the end quote?",
1209
11
                            start
1210
11
                        );
1211
250
                    } else {
1212
250
                        _PyTokenizer_syntaxerror(
1213
250
                            tok, "unterminated string literal (detected at line %d)", start
1214
250
                        );
1215
250
                    }
1216
261
                    if (c != '\n') {
1217
14
                        tok->done = E_EOLS;
1218
14
                    }
1219
261
                    return MAKE_TOKEN(ERRORTOKEN);
1220
261
                }
1221
281
            }
1222
1.14M
            if (c == quote) {
1223
59.1k
                end_quote_size += 1;
1224
59.1k
            }
1225
1.08M
            else {
1226
1.08M
                end_quote_size = 0;
1227
1.08M
                if (c == '\\') {
1228
27.0k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
27.0k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
949
                        has_escaped_quote = 1;
1231
949
                    }
1232
27.0k
                    if (c == '\r') {
1233
67
                        c = tok_nextc(tok);
1234
67
                    }
1235
27.0k
                }
1236
1.08M
            }
1237
1.14M
        }
1238
1239
57.2k
        p_start = tok->start;
1240
57.2k
        p_end = tok->cur;
1241
57.2k
        return MAKE_TOKEN(STRING);
1242
57.5k
    }
1243
1244
    /* Line continuation */
1245
739k
    if (c == '\\') {
1246
455
        if ((c = tok_continuation_line(tok)) == -1) {
1247
72
            return MAKE_TOKEN(ERRORTOKEN);
1248
72
        }
1249
383
        tok->cont_line = 1;
1250
383
        goto again; /* Read next line */
1251
455
    }
1252
1253
    /* Punctuation character */
1254
738k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
738k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
55.9k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
55.9k
        int in_format_spec = current_tok->in_format_spec;
1261
55.9k
         int cursor_in_format_with_debug =
1262
55.9k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
55.9k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
55.9k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
55.9k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
55.9k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.54k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.54k
            current_tok->in_format_spec = 1;
1274
4.54k
            p_start = tok->start;
1275
4.54k
            p_end = tok->cur;
1276
4.54k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.54k
        }
1278
55.9k
    }
1279
1280
    /* Check for two-character token */
1281
734k
    {
1282
734k
        int c2 = tok_nextc(tok);
1283
734k
        int current_token = _PyToken_TwoChars(c, c2);
1284
734k
        if (current_token != OP) {
1285
24.6k
            int c3 = tok_nextc(tok);
1286
24.6k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
24.6k
            if (current_token3 != OP) {
1288
922
                current_token = current_token3;
1289
922
            }
1290
23.7k
            else {
1291
23.7k
                tok_backup(tok, c3);
1292
23.7k
            }
1293
24.6k
            p_start = tok->start;
1294
24.6k
            p_end = tok->cur;
1295
24.6k
            return MAKE_TOKEN(current_token);
1296
24.6k
        }
1297
709k
        tok_backup(tok, c2);
1298
709k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
91.0k
    case '(':
1303
122k
    case '[':
1304
165k
    case '{':
1305
165k
        if (tok->level >= MAXLEVEL) {
1306
4
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
4
        }
1308
165k
        tok->parenstack[tok->level] = c;
1309
165k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
165k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
165k
        tok->level++;
1312
165k
        if (INSIDE_FSTRING(tok)) {
1313
29.8k
            current_tok->curly_bracket_depth++;
1314
29.8k
        }
1315
165k
        break;
1316
64.5k
    case ')':
1317
76.3k
    case ']':
1318
102k
    case '}':
1319
102k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
53
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
53
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
53
        }
1323
102k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
193
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
193
        }
1326
102k
        if (tok->level > 0) {
1327
102k
            tok->level--;
1328
102k
            int opening = tok->parenstack[tok->level];
1329
102k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
37.6k
                                            (opening == '[' && c == ']') ||
1331
25.8k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
38
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
9
                    assert(current_tok->curly_bracket_depth >= 0);
1339
9
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
9
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
7
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
7
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
7
                    }
1344
9
                }
1345
31
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
3
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
3
                            "closing parenthesis '%c' does not match "
1348
3
                            "opening parenthesis '%c' on line %d",
1349
3
                            c, opening, tok->parenlinenostack[tok->level]));
1350
3
                }
1351
28
                else {
1352
28
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
28
                            "closing parenthesis '%c' does not match "
1354
28
                            "opening parenthesis '%c'",
1355
28
                            c, opening));
1356
28
                }
1357
31
            }
1358
102k
        }
1359
1360
102k
        if (INSIDE_FSTRING(tok)) {
1361
22.3k
            current_tok->curly_bracket_depth--;
1362
22.3k
            if (current_tok->curly_bracket_depth < 0) {
1363
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
1
                    TOK_GET_STRING_PREFIX(tok), c));
1365
1
            }
1366
22.3k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
20.9k
                current_tok->curly_bracket_expr_start_depth--;
1368
20.9k
                current_tok->kind = TOK_FSTRING_MODE;
1369
20.9k
                current_tok->in_format_spec = 0;
1370
20.9k
                current_tok->in_debug = 0;
1371
20.9k
            }
1372
22.3k
        }
1373
102k
        break;
1374
441k
    default:
1375
441k
        break;
1376
709k
    }
1377
1378
709k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
427
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
427
    }
1381
1382
708k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
5.06k
        current_tok->in_debug = 1;
1384
5.06k
    }
1385
1386
    /* Punctuation character */
1387
708k
    p_start = tok->start;
1388
708k
    p_end = tok->cur;
1389
708k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
709k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
52.6k
{
1395
52.6k
    const char *p_start = NULL;
1396
52.6k
    const char *p_end = NULL;
1397
52.6k
    int end_quote_size = 0;
1398
52.6k
    int unicode_escape = 0;
1399
1400
52.6k
    tok->start = tok->cur;
1401
52.6k
    tok->first_lineno = tok->lineno;
1402
52.6k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
52.6k
    int start_char = tok_nextc(tok);
1407
52.6k
    if (start_char == '{') {
1408
15.4k
        int peek1 = tok_nextc(tok);
1409
15.4k
        tok_backup(tok, peek1);
1410
15.4k
        tok_backup(tok, start_char);
1411
15.4k
        if (peek1 != '{') {
1412
12.7k
            current_tok->curly_bracket_expr_start_depth++;
1413
12.7k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
3
            }
1417
12.7k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
12.7k
            return tok_get_normal_mode(tok, current_tok, token);
1419
12.7k
        }
1420
15.4k
    }
1421
37.2k
    else {
1422
37.2k
        tok_backup(tok, start_char);
1423
37.2k
    }
1424
1425
    // Check if we are at the end of the string
1426
57.0k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
44.9k
        int quote = tok_nextc(tok);
1428
44.9k
        if (quote != current_tok->quote) {
1429
27.8k
            tok_backup(tok, quote);
1430
27.8k
            goto f_string_middle;
1431
27.8k
        }
1432
44.9k
    }
1433
1434
12.0k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.05k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.05k
        current_tok->last_expr_buffer = NULL;
1437
7.05k
        current_tok->last_expr_size = 0;
1438
7.05k
        current_tok->last_expr_end = -1;
1439
7.05k
    }
1440
1441
12.0k
    p_start = tok->start;
1442
12.0k
    p_end = tok->cur;
1443
12.0k
    tok->tok_mode_stack_index--;
1444
12.0k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
27.8k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
27.8k
    tok->multi_line_start = tok->line_start;
1451
165k
    while (end_quote_size != current_tok->quote_size) {
1452
159k
        int c = tok_nextc(tok);
1453
159k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
159k
        int in_format_spec = (
1457
159k
                current_tok->in_format_spec
1458
10.9k
                &&
1459
10.9k
                INSIDE_FSTRING_EXPR(current_tok)
1460
159k
        );
1461
1462
159k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
447
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
447
            if (in_format_spec && c == '\n') {
1471
53
                if (current_tok->quote_size == 1) {
1472
53
                    return MAKE_TOKEN(
1473
53
                        _PyTokenizer_syntaxerror(
1474
53
                            tok,
1475
53
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
53
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
53
                        )
1478
53
                    );
1479
53
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
53
            }
1487
1488
447
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
394
            tok->cur = (char *)current_tok->start;
1493
394
            tok->cur++;
1494
394
            tok->line_start = current_tok->multi_line_start;
1495
394
            int start = tok->lineno;
1496
1497
394
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
394
            tok->lineno = the_current_tok->first_line;
1499
1500
394
            if (current_tok->quote_size == 3) {
1501
42
                _PyTokenizer_syntaxerror(tok,
1502
42
                                    "unterminated triple-quoted %c-string literal"
1503
42
                                    " (detected at line %d)",
1504
42
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
42
                if (c != '\n') {
1506
42
                    tok->done = E_EOFS;
1507
42
                }
1508
42
                return MAKE_TOKEN(ERRORTOKEN);
1509
42
            }
1510
352
            else {
1511
352
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
352
                                    "unterminated %c-string literal (detected at"
1513
352
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
352
            }
1515
394
        }
1516
1517
159k
        if (c == current_tok->quote) {
1518
8.93k
            end_quote_size += 1;
1519
8.93k
            continue;
1520
150k
        } else {
1521
150k
            end_quote_size = 0;
1522
150k
        }
1523
1524
150k
        if (c == '{') {
1525
16.7k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
16.7k
            int peek = tok_nextc(tok);
1529
16.7k
            if (peek != '{' || in_format_spec) {
1530
13.5k
                tok_backup(tok, peek);
1531
13.5k
                tok_backup(tok, c);
1532
13.5k
                current_tok->curly_bracket_expr_start_depth++;
1533
13.5k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
5
                }
1537
13.5k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
13.5k
                current_tok->in_format_spec = 0;
1539
13.5k
                p_start = tok->start;
1540
13.5k
                p_end = tok->cur;
1541
13.5k
            } else {
1542
3.19k
                p_start = tok->start;
1543
3.19k
                p_end = tok->cur - 1;
1544
3.19k
            }
1545
16.7k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
133k
        } else if (c == '}') {
1547
5.28k
            if (unicode_escape) {
1548
400
                p_start = tok->start;
1549
400
                p_end = tok->cur;
1550
400
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
400
            }
1552
4.88k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.88k
            int cursor = current_tok->curly_bracket_depth;
1559
4.88k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.62k
                p_start = tok->start;
1561
1.62k
                p_end = tok->cur - 1;
1562
3.26k
            } else {
1563
3.26k
                tok_backup(tok, peek);
1564
3.26k
                tok_backup(tok, c);
1565
3.26k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.26k
                current_tok->in_format_spec = 0;
1567
3.26k
                p_start = tok->start;
1568
3.26k
                p_end = tok->cur;
1569
3.26k
            }
1570
4.88k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
128k
        } else if (c == '\\') {
1572
5.59k
            int peek = tok_nextc(tok);
1573
5.59k
            if (peek == '\r') {
1574
67
                peek = tok_nextc(tok);
1575
67
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.59k
            if (peek == '{' || peek == '}') {
1580
1.39k
                if (!current_tok->raw) {
1581
1.19k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
1
                        return MAKE_TOKEN(ERRORTOKEN);
1583
1
                    }
1584
1.19k
                }
1585
1.39k
                tok_backup(tok, peek);
1586
1.39k
                continue;
1587
1.39k
            }
1588
1589
4.20k
            if (!current_tok->raw) {
1590
4.06k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
537
                    peek = tok_nextc(tok);
1593
537
                    if (peek == '{') {
1594
451
                        unicode_escape = 1;
1595
451
                    } else {
1596
86
                        tok_backup(tok, peek);
1597
86
                    }
1598
537
                }
1599
4.06k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
4.20k
        }
1603
150k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
11.7k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.30k
        tok_backup(tok, current_tok->quote);
1609
6.30k
    }
1610
5.43k
    p_start = tok->start;
1611
5.43k
    p_end = tok->cur;
1612
5.43k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
27.8k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.71M
{
1618
1.71M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.71M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.66M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.66M
    } else {
1622
52.6k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
52.6k
    }
1624
1.71M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.71M
{
1629
1.71M
    int result = tok_get(tok, token);
1630
1.71M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.71M
    return result;
1635
1.71M
}