Coverage Report

Created: 2025-11-02 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
990
#define ALTTABSIZE 1
11
12
1.59M
#define is_potential_identifier_start(c) (\
13
1.59M
              (c >= 'a' && c <= 'z')\
14
1.59M
               || (c >= 'A' && c <= 'Z')\
15
1.59M
               || c == '_'\
16
1.59M
               || (c >= 128))
17
18
2.35M
#define is_potential_identifier_char(c) (\
19
2.35M
              (c >= 'a' && c <= 'z')\
20
2.35M
               || (c >= 'A' && c <= 'Z')\
21
2.35M
               || (c >= '0' && c <= '9')\
22
2.35M
               || c == '_'\
23
2.35M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.71M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
15.9k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
40
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.60M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
228k
{
55
228k
    return memchr(str, 0, size) != NULL;
56
228k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
10.3M
{
62
10.3M
    int rc;
63
10.5M
    for (;;) {
64
10.5M
        if (tok->cur != tok->inp) {
65
10.2M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
10.2M
            tok->col_offset++;
70
10.2M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
10.2M
        }
72
276k
        if (tok->done != E_OK) {
73
32.0k
            return EOF;
74
32.0k
        }
75
244k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
244k
        if (!rc) {
84
16.1k
            tok->cur = tok->inp;
85
16.1k
            return EOF;
86
16.1k
        }
87
228k
        tok->line_start = tok->cur;
88
89
228k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
228k
    }
95
10.3M
    Py_UNREACHABLE();
96
10.3M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.39M
{
102
3.39M
    if (c != EOF) {
103
3.36M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.36M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.36M
        tok->col_offset--;
110
3.36M
    }
111
3.39M
}
112
113
static int
114
23.4k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
23.4k
    assert(token != NULL);
116
23.4k
    assert(c == '}' || c == ':' || c == '!');
117
23.4k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
23.4k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
13.8k
        return 0;
121
13.8k
    }
122
9.65k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
9.65k
    int hash_detected = 0;
126
9.65k
    int in_string = 0;
127
9.65k
    char quote_char = 0;
128
129
1.01M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.01M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.01M
        if (ch == '\\') {
134
18.4k
            i++;
135
18.4k
            continue;
136
18.4k
        }
137
138
        // Handle quotes
139
992k
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
173k
            if (!in_string) {
148
64.3k
                in_string = 1;
149
64.3k
                quote_char = ch;
150
64.3k
            }
151
108k
            else if (ch == quote_char) {
152
63.6k
                in_string = 0;
153
63.6k
            }
154
173k
            continue;
155
173k
        }
156
157
        // Check for # outside strings
158
818k
        if (ch == '#' && !in_string) {
159
895
            hash_detected = 1;
160
895
            break;
161
895
        }
162
818k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
9.65k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
895
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
895
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
895
        Py_ssize_t i = 0;  // Input position
172
895
        Py_ssize_t j = 0;  // Output position
173
895
        in_string = 0;     // Whether we're in a string
174
895
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
63.5k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
62.6k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
62.6k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
9.64k
                if (!in_string) {
184
3.82k
                    in_string = 1;
185
3.82k
                    quote_char = ch;
186
5.82k
                } else if (ch == quote_char) {
187
3.81k
                    in_string = 0;
188
3.81k
                }
189
9.64k
                result[j++] = ch;
190
9.64k
            }
191
            // Skip comments
192
53.0k
            else if (ch == '#' && !in_string) {
193
46.8k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
46.1k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
45.7k
                    i++;
196
45.7k
                }
197
1.11k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
341
                    result[j++] = '\n';
199
341
                }
200
1.11k
            }
201
            // Copy other chars
202
51.8k
            else {
203
51.8k
                result[j++] = ch;
204
51.8k
            }
205
62.6k
            i++;
206
62.6k
        }
207
208
895
        result[j] = '\0';  // Null-terminate the result string
209
895
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
895
        PyMem_Free(result);
211
8.76k
    } else {
212
8.76k
        res = PyUnicode_DecodeUTF8(
213
8.76k
            tok_mode->last_expr_buffer,
214
8.76k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.76k
            NULL
216
8.76k
        );
217
8.76k
    }
218
219
9.65k
    if (!res) {
220
0
        return -1;
221
0
    }
222
9.65k
    token->metadata = res;
223
9.65k
    return 0;
224
9.65k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
62.7k
{
229
62.7k
    assert(tok->cur != NULL);
230
231
62.7k
    Py_ssize_t size = strlen(tok->cur);
232
62.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
62.7k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
39.2k
        case '{':
252
39.2k
            if (tok_mode->last_expr_buffer != NULL) {
253
28.4k
                PyMem_Free(tok_mode->last_expr_buffer);
254
28.4k
            }
255
39.2k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
39.2k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
39.2k
            tok_mode->last_expr_size = size;
260
39.2k
            tok_mode->last_expr_end = -1;
261
39.2k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
39.2k
            break;
263
18.3k
        case '}':
264
19.9k
        case '!':
265
19.9k
            tok_mode->last_expr_end = strlen(tok->start);
266
19.9k
            break;
267
3.50k
        case ':':
268
3.50k
            if (tok_mode->last_expr_end == -1) {
269
3.17k
               tok_mode->last_expr_end = strlen(tok->start);
270
3.17k
            }
271
3.50k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
62.7k
    }
275
62.7k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
62.7k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
9.09k
{
284
9.09k
    const char *s = test;
285
9.09k
    int res = 0;
286
23.8k
    while (1) {
287
23.8k
        int c = tok_nextc(tok);
288
23.8k
        if (*s == 0) {
289
9.00k
            res = !is_potential_identifier_char(c);
290
9.00k
        }
291
14.8k
        else if (c == *s) {
292
14.7k
            s++;
293
14.7k
            continue;
294
14.7k
        }
295
296
9.09k
        tok_backup(tok, c);
297
23.8k
        while (s != test) {
298
14.7k
            tok_backup(tok, *--s);
299
14.7k
        }
300
9.09k
        return res;
301
23.8k
    }
302
9.09k
}
303
304
static int
305
93.6k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
93.6k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
93.6k
    int r = 0;
322
93.6k
    if (c == 'a') {
323
1.16k
        r = lookahead(tok, "nd");
324
1.16k
    }
325
92.4k
    else if (c == 'e') {
326
513
        r = lookahead(tok, "lse");
327
513
    }
328
91.9k
    else if (c == 'f') {
329
3.35k
        r = lookahead(tok, "or");
330
3.35k
    }
331
88.5k
    else if (c == 'i') {
332
1.50k
        int c2 = tok_nextc(tok);
333
1.50k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.48k
            r = 1;
335
1.48k
        }
336
1.50k
        tok_backup(tok, c2);
337
1.50k
    }
338
87.0k
    else if (c == 'o') {
339
3.75k
        r = lookahead(tok, "r");
340
3.75k
    }
341
83.3k
    else if (c == 'n') {
342
306
        r = lookahead(tok, "ot");
343
306
    }
344
93.6k
    if (r) {
345
10.4k
        tok_backup(tok, c);
346
10.4k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.4k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.4k
        tok_nextc(tok);
352
10.4k
    }
353
83.1k
    else /* In future releases, only error will remain. */
354
83.1k
    if (c < 128 && is_potential_identifier_char(c)) {
355
194
        tok_backup(tok, c);
356
194
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
194
        return 0;
358
194
    }
359
93.4k
    return 1;
360
93.6k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.9k
{
366
11.9k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.9k
    PyObject *s;
370
11.9k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.9k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.9k
    if (s == NULL) {
374
1
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
1
            tok->done = E_DECODE;
376
1
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
1
        return 0;
381
1
    }
382
11.9k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.9k
    assert(invalid >= 0);
384
11.9k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.9k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
711
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
711
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
480
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
480
            if (s != NULL) {
391
480
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
480
            }
393
480
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
480
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
480
        }
399
711
        Py_DECREF(s);
400
711
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
370
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
370
        }
403
341
        else {
404
341
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
341
        }
406
711
        return 0;
407
711
    }
408
11.2k
    Py_DECREF(s);
409
11.2k
    return 1;
410
11.9k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
75.3k
{
415
75.3k
    int c;
416
417
75.8k
    while (1) {
418
218k
        do {
419
218k
            c = tok_nextc(tok);
420
218k
        } while (Py_ISDIGIT(c));
421
75.8k
        if (c != '_') {
422
75.3k
            break;
423
75.3k
        }
424
534
        c = tok_nextc(tok);
425
534
        if (!Py_ISDIGIT(c)) {
426
12
            tok_backup(tok, c);
427
12
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
12
            return 0;
429
12
        }
430
534
    }
431
75.3k
    return c;
432
75.3k
}
433
434
static inline int
435
1.09k
tok_continuation_line(struct tok_state *tok) {
436
1.09k
    int c = tok_nextc(tok);
437
1.09k
    if (c == '\r') {
438
70
        c = tok_nextc(tok);
439
70
    }
440
1.09k
    if (c != '\n') {
441
51
        tok->done = E_LINECONT;
442
51
        return -1;
443
51
    }
444
1.04k
    c = tok_nextc(tok);
445
1.04k
    if (c == EOF) {
446
55
        tok->done = E_EOF;
447
55
        tok->cur = tok->inp;
448
55
        return -1;
449
987
    } else {
450
987
        tok_backup(tok, c);
451
987
    }
452
987
    return c;
453
1.04k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
20.0k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
20.0k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
20.0k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
20.0k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
20.0k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
20.0k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
20.0k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
20.0k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
20.0k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
20.0k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
19.9k
#undef RETURN_SYNTAX_ERROR
496
497
19.9k
    return 0;
498
20.0k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.56M
{
503
1.56M
    int c;
504
1.56M
    int blankline, nonascii;
505
506
1.56M
    const char *p_start = NULL;
507
1.56M
    const char *p_end = NULL;
508
1.65M
  nextline:
509
1.65M
    tok->start = NULL;
510
1.65M
    tok->starting_col_offset = -1;
511
1.65M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.65M
    if (tok->atbol) {
516
225k
        int col = 0;
517
225k
        int altcol = 0;
518
225k
        tok->atbol = 0;
519
225k
        int cont_line_col = 0;
520
961k
        for (;;) {
521
961k
            c = tok_nextc(tok);
522
961k
            if (c == ' ') {
523
733k
                col++, altcol++;
524
733k
            }
525
227k
            else if (c == '\t') {
526
495
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
495
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
495
            }
529
227k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.22k
                col = altcol = 0; /* For Emacs users */
531
1.22k
            }
532
226k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
655
                cont_line_col = cont_line_col ? cont_line_col : col;
538
655
                if ((c = tok_continuation_line(tok)) == -1) {
539
42
                    return MAKE_TOKEN(ERRORTOKEN);
540
42
                }
541
655
            }
542
225k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
225k
            else {
546
225k
                break;
547
225k
            }
548
961k
        }
549
225k
        tok_backup(tok, c);
550
225k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
53.3k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
53.3k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
53.3k
            else {
566
53.3k
                blankline = 1; /* Ignore completely */
567
53.3k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
53.3k
        }
571
225k
        if (!blankline && tok->level == 0) {
572
130k
            col = cont_line_col ? cont_line_col : col;
573
130k
            altcol = cont_line_col ? cont_line_col : altcol;
574
130k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
90.1k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
90.1k
            }
580
40.6k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
22.7k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
22.7k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
22.7k
                tok->pendin++;
591
22.7k
                tok->indstack[++tok->indent] = col;
592
22.7k
                tok->altindstack[tok->indent] = altcol;
593
22.7k
            }
594
17.8k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
39.9k
                while (tok->indent > 0 &&
597
35.1k
                    col < tok->indstack[tok->indent]) {
598
22.0k
                    tok->pendin--;
599
22.0k
                    tok->indent--;
600
22.0k
                }
601
17.8k
                if (col != tok->indstack[tok->indent]) {
602
6
                    tok->done = E_DEDENT;
603
6
                    tok->cur = tok->inp;
604
6
                    return MAKE_TOKEN(ERRORTOKEN);
605
6
                }
606
17.8k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
17.8k
            }
610
130k
        }
611
225k
    }
612
613
1.65M
    tok->start = tok->cur;
614
1.65M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
1.65M
    if (tok->pendin != 0) {
618
44.7k
        if (tok->pendin < 0) {
619
22.0k
            if (tok->tok_extra_tokens) {
620
0
                p_start = tok->cur;
621
0
                p_end = tok->cur;
622
0
            }
623
22.0k
            tok->pendin++;
624
22.0k
            return MAKE_TOKEN(DEDENT);
625
22.0k
        }
626
22.7k
        else {
627
22.7k
            if (tok->tok_extra_tokens) {
628
0
                p_start = tok->buf;
629
0
                p_end = tok->cur;
630
0
            }
631
22.7k
            tok->pendin--;
632
22.7k
            return MAKE_TOKEN(INDENT);
633
22.7k
        }
634
44.7k
    }
635
636
    /* Peek ahead at the next character */
637
1.61M
    c = tok_nextc(tok);
638
1.61M
    tok_backup(tok, c);
639
640
1.61M
 again:
641
1.61M
    tok->start = NULL;
642
    /* Skip spaces */
643
1.96M
    do {
644
1.96M
        c = tok_nextc(tok);
645
1.96M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.61M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.61M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.61M
    if (c == '#') {
653
654
39.0k
        const char* p = NULL;
655
39.0k
        const char *prefix, *type_start;
656
39.0k
        int current_starting_col_offset;
657
658
1.29M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.25M
            c = tok_nextc(tok);
660
1.25M
        }
661
662
39.0k
        if (tok->tok_extra_tokens) {
663
0
            p = tok->start;
664
0
        }
665
666
39.0k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
39.0k
        if (tok->tok_extra_tokens) {
721
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
0
            p_start = p;
723
0
            p_end = tok->cur;
724
0
            tok->comment_newline = blankline;
725
0
            return MAKE_TOKEN(COMMENT);
726
0
        }
727
39.0k
    }
728
729
1.61M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.61M
    if (c == EOF) {
735
16.0k
        if (tok->level) {
736
4.08k
            return MAKE_TOKEN(ERRORTOKEN);
737
4.08k
        }
738
11.9k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
16.0k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.59M
    nonascii = 0;
743
1.59M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
518k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
639k
        while (1) {
747
639k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
20.9k
                saw_b = 1;
749
20.9k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
619k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
7.03k
                saw_u = 1;
754
7.03k
            }
755
            /* ur"" and ru"" are not supported */
756
612k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
35.9k
                saw_r = 1;
758
35.9k
            }
759
576k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
44.3k
                saw_f = 1;
761
44.3k
            }
762
531k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
33.4k
                saw_t = 1;
764
33.4k
            }
765
498k
            else {
766
498k
                break;
767
498k
            }
768
141k
            c = tok_nextc(tok);
769
141k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
20.0k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
20.0k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
20.0k
                if (status < 0) {
774
7
                    return MAKE_TOKEN(ERRORTOKEN);
775
7
                }
776
777
                // Handle valid f or t string creation:
778
19.9k
                if (saw_f || saw_t) {
779
15.9k
                    goto f_string_quote;
780
15.9k
                }
781
4.04k
                goto letter_quote;
782
19.9k
            }
783
141k
        }
784
2.26M
        while (is_potential_identifier_char(c)) {
785
1.76M
            if (c >= 128) {
786
109k
                nonascii = 1;
787
109k
            }
788
1.76M
            c = tok_nextc(tok);
789
1.76M
        }
790
498k
        tok_backup(tok, c);
791
498k
        if (nonascii && !verify_identifier(tok)) {
792
712
            return MAKE_TOKEN(ERRORTOKEN);
793
712
        }
794
795
497k
        p_start = tok->start;
796
497k
        p_end = tok->cur;
797
798
497k
        return MAKE_TOKEN(NAME);
799
498k
    }
800
801
1.08M
    if (c == '\r') {
802
416
        c = tok_nextc(tok);
803
416
    }
804
805
    /* Newline */
806
1.08M
    if (c == '\n') {
807
206k
        tok->atbol = 1;
808
206k
        if (blankline || tok->level > 0) {
809
94.5k
            if (tok->tok_extra_tokens) {
810
0
                if (tok->comment_newline) {
811
0
                    tok->comment_newline = 0;
812
0
                }
813
0
                p_start = tok->start;
814
0
                p_end = tok->cur;
815
0
                return MAKE_TOKEN(NL);
816
0
            }
817
94.5k
            goto nextline;
818
94.5k
        }
819
112k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
0
            tok->comment_newline = 0;
821
0
            p_start = tok->start;
822
0
            p_end = tok->cur;
823
0
            return MAKE_TOKEN(NL);
824
0
        }
825
112k
        p_start = tok->start;
826
112k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
112k
        tok->cont_line = 0;
828
112k
        return MAKE_TOKEN(NEWLINE);
829
112k
    }
830
831
    /* Period or number starting with period? */
832
873k
    if (c == '.') {
833
33.8k
        c = tok_nextc(tok);
834
33.8k
        if (Py_ISDIGIT(c)) {
835
3.27k
            goto fraction;
836
30.5k
        } else if (c == '.') {
837
1.41k
            c = tok_nextc(tok);
838
1.41k
            if (c == '.') {
839
742
                p_start = tok->start;
840
742
                p_end = tok->cur;
841
742
                return MAKE_TOKEN(ELLIPSIS);
842
742
            }
843
674
            else {
844
674
                tok_backup(tok, c);
845
674
            }
846
674
            tok_backup(tok, '.');
847
674
        }
848
29.1k
        else {
849
29.1k
            tok_backup(tok, c);
850
29.1k
        }
851
29.8k
        p_start = tok->start;
852
29.8k
        p_end = tok->cur;
853
29.8k
        return MAKE_TOKEN(DOT);
854
33.8k
    }
855
856
    /* Number */
857
839k
    if (Py_ISDIGIT(c)) {
858
90.4k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
31.7k
            c = tok_nextc(tok);
861
31.7k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
15.8k
                c = tok_nextc(tok);
864
16.3k
                do {
865
16.3k
                    if (c == '_') {
866
520
                        c = tok_nextc(tok);
867
520
                    }
868
16.3k
                    if (!Py_ISXDIGIT(c)) {
869
20
                        tok_backup(tok, c);
870
20
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
20
                    }
872
80.6k
                    do {
873
80.6k
                        c = tok_nextc(tok);
874
80.6k
                    } while (Py_ISXDIGIT(c));
875
16.3k
                } while (c == '_');
876
15.8k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
2
                    return MAKE_TOKEN(ERRORTOKEN);
878
2
                }
879
15.8k
            }
880
15.8k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
557
                c = tok_nextc(tok);
883
868
                do {
884
868
                    if (c == '_') {
885
317
                        c = tok_nextc(tok);
886
317
                    }
887
868
                    if (c < '0' || c >= '8') {
888
21
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
20
                        else {
893
20
                            tok_backup(tok, c);
894
20
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
20
                        }
896
21
                    }
897
2.05k
                    do {
898
2.05k
                        c = tok_nextc(tok);
899
2.05k
                    } while ('0' <= c && c < '8');
900
847
                } while (c == '_');
901
536
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
535
                if (!verify_end_of_number(tok, c, "octal")) {
906
6
                    return MAKE_TOKEN(ERRORTOKEN);
907
6
                }
908
535
            }
909
15.3k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
570
                c = tok_nextc(tok);
912
1.00k
                do {
913
1.00k
                    if (c == '_') {
914
444
                        c = tok_nextc(tok);
915
444
                    }
916
1.00k
                    if (c != '0' && c != '1') {
917
19
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
18
                        else {
921
18
                            tok_backup(tok, c);
922
18
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
18
                        }
924
19
                    }
925
4.04k
                    do {
926
4.04k
                        c = tok_nextc(tok);
927
4.04k
                    } while (c == '0' || c == '1');
928
985
                } while (c == '_');
929
551
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
549
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
549
            }
936
14.7k
            else {
937
14.7k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
16.9k
                while (1) {
941
16.9k
                    if (c == '_') {
942
90
                        c = tok_nextc(tok);
943
90
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
90
                    }
948
16.9k
                    if (c != '0') {
949
14.7k
                        break;
950
14.7k
                    }
951
2.19k
                    c = tok_nextc(tok);
952
2.19k
                }
953
14.7k
                char* zeros_end = tok->cur;
954
14.7k
                if (Py_ISDIGIT(c)) {
955
410
                    nonzero = 1;
956
410
                    c = tok_decimal_tail(tok);
957
410
                    if (c == 0) {
958
1
                        return MAKE_TOKEN(ERRORTOKEN);
959
1
                    }
960
410
                }
961
14.7k
                if (c == '.') {
962
920
                    c = tok_nextc(tok);
963
920
                    goto fraction;
964
920
                }
965
13.8k
                else if (c == 'e' || c == 'E') {
966
849
                    goto exponent;
967
849
                }
968
12.9k
                else if (c == 'j' || c == 'J') {
969
767
                    goto imaginary;
970
767
                }
971
12.2k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
28
                    tok_backup(tok, c);
974
28
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
28
                            tok, (int)(tok->start + 1 - tok->line_start),
976
28
                            (int)(zeros_end - tok->line_start),
977
28
                            "leading zeros in decimal integer "
978
28
                            "literals are not permitted; "
979
28
                            "use an 0o prefix for octal integers"));
980
28
                }
981
12.1k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
28
                    return MAKE_TOKEN(ERRORTOKEN);
983
28
                }
984
12.1k
            }
985
31.7k
        }
986
58.7k
        else {
987
            /* Decimal */
988
58.7k
            c = tok_decimal_tail(tok);
989
58.7k
            if (c == 0) {
990
9
                return MAKE_TOKEN(ERRORTOKEN);
991
9
            }
992
58.7k
            {
993
                /* Accept floating-point numbers. */
994
58.7k
                if (c == '.') {
995
3.93k
                    c = tok_nextc(tok);
996
8.13k
        fraction:
997
                    /* Fraction */
998
8.13k
                    if (Py_ISDIGIT(c)) {
999
6.07k
                        c = tok_decimal_tail(tok);
1000
6.07k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
6.07k
                    }
1004
8.13k
                }
1005
62.8k
                if (c == 'e' || c == 'E') {
1006
9.84k
                    int e;
1007
10.6k
                  exponent:
1008
10.6k
                    e = c;
1009
                    /* Exponent part */
1010
10.6k
                    c = tok_nextc(tok);
1011
10.6k
                    if (c == '+' || c == '-') {
1012
3.80k
                        c = tok_nextc(tok);
1013
3.80k
                        if (!Py_ISDIGIT(c)) {
1014
10
                            tok_backup(tok, c);
1015
10
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
10
                        }
1017
6.89k
                    } else if (!Py_ISDIGIT(c)) {
1018
514
                        tok_backup(tok, c);
1019
514
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
33
                            return MAKE_TOKEN(ERRORTOKEN);
1021
33
                        }
1022
481
                        tok_backup(tok, e);
1023
481
                        p_start = tok->start;
1024
481
                        p_end = tok->cur;
1025
481
                        return MAKE_TOKEN(NUMBER);
1026
514
                    }
1027
10.1k
                    c = tok_decimal_tail(tok);
1028
10.1k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
10.1k
                }
1032
63.2k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.97k
        imaginary:
1035
3.97k
                    c = tok_nextc(tok);
1036
3.97k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
13
                        return MAKE_TOKEN(ERRORTOKEN);
1038
13
                    }
1039
3.97k
                }
1040
60.0k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
111
                    return MAKE_TOKEN(ERRORTOKEN);
1042
111
                }
1043
63.2k
            }
1044
63.2k
        }
1045
92.9k
        tok_backup(tok, c);
1046
92.9k
        p_start = tok->start;
1047
92.9k
        p_end = tok->cur;
1048
92.9k
        return MAKE_TOKEN(NUMBER);
1049
90.4k
    }
1050
1051
765k
  f_string_quote:
1052
765k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
15.9k
        && (c == '\'' || c == '"'))) {
1054
1055
15.9k
        int quote = c;
1056
15.9k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
15.9k
        tok->first_lineno = tok->lineno;
1063
15.9k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
15.9k
        int after_quote = tok_nextc(tok);
1067
15.9k
        if (after_quote == quote) {
1068
2.45k
            int after_after_quote = tok_nextc(tok);
1069
2.45k
            if (after_after_quote == quote) {
1070
752
                quote_size = 3;
1071
752
            }
1072
1.70k
            else {
1073
                // TODO: Check this
1074
1.70k
                tok_backup(tok, after_after_quote);
1075
1.70k
                tok_backup(tok, after_quote);
1076
1.70k
            }
1077
2.45k
        }
1078
15.9k
        if (after_quote != quote) {
1079
13.5k
            tok_backup(tok, after_quote);
1080
13.5k
        }
1081
1082
1083
15.9k
        p_start = tok->start;
1084
15.9k
        p_end = tok->cur;
1085
15.9k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
15.9k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
15.9k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
15.9k
        the_current_tok->quote = quote;
1091
15.9k
        the_current_tok->quote_size = quote_size;
1092
15.9k
        the_current_tok->start = tok->start;
1093
15.9k
        the_current_tok->multi_line_start = tok->line_start;
1094
15.9k
        the_current_tok->first_line = tok->lineno;
1095
15.9k
        the_current_tok->start_offset = -1;
1096
15.9k
        the_current_tok->multi_line_start_offset = -1;
1097
15.9k
        the_current_tok->last_expr_buffer = NULL;
1098
15.9k
        the_current_tok->last_expr_size = 0;
1099
15.9k
        the_current_tok->last_expr_end = -1;
1100
15.9k
        the_current_tok->in_format_spec = 0;
1101
15.9k
        the_current_tok->in_debug = 0;
1102
1103
15.9k
        enum string_kind_t string_kind = FSTRING;
1104
15.9k
        switch (*tok->start) {
1105
548
            case 'T':
1106
4.26k
            case 't':
1107
4.26k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.26k
                string_kind = TSTRING;
1109
4.26k
                break;
1110
1.62k
            case 'F':
1111
11.3k
            case 'f':
1112
11.3k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
11.3k
                break;
1114
68
            case 'R':
1115
370
            case 'r':
1116
370
                the_current_tok->raw = 1;
1117
370
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
97
                    string_kind = TSTRING;
1119
97
                }
1120
370
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
15.9k
        }
1124
1125
15.9k
        the_current_tok->string_kind = string_kind;
1126
15.9k
        the_current_tok->curly_bracket_depth = 0;
1127
15.9k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
15.9k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
15.9k
    }
1130
1131
753k
  letter_quote:
1132
    /* String */
1133
753k
    if (c == '\'' || c == '"') {
1134
54.8k
        int quote = c;
1135
54.8k
        int quote_size = 1;             /* 1 or 3 */
1136
54.8k
        int end_quote_size = 0;
1137
54.8k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
54.8k
        tok->first_lineno = tok->lineno;
1144
54.8k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
54.8k
        c = tok_nextc(tok);
1148
54.8k
        if (c == quote) {
1149
8.97k
            c = tok_nextc(tok);
1150
8.97k
            if (c == quote) {
1151
2.78k
                quote_size = 3;
1152
2.78k
            }
1153
6.18k
            else {
1154
6.18k
                end_quote_size = 1;     /* empty string found */
1155
6.18k
            }
1156
8.97k
        }
1157
54.8k
        if (c != quote) {
1158
52.0k
            tok_backup(tok, c);
1159
52.0k
        }
1160
1161
        /* Get rest of string */
1162
1.08M
        while (end_quote_size != quote_size) {
1163
1.03M
            c = tok_nextc(tok);
1164
1.03M
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
1.03M
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
1.03M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
315
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
315
                tok->cur = (char *)tok->start;
1176
315
                tok->cur++;
1177
315
                tok->line_start = tok->multi_line_start;
1178
315
                int start = tok->lineno;
1179
315
                tok->lineno = tok->first_lineno;
1180
1181
315
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
30
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
30
                    if (the_current_tok->quote == quote &&
1189
24
                        the_current_tok->quote_size == quote_size) {
1190
19
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
19
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
19
                    }
1193
30
                }
1194
1195
296
                if (quote_size == 3) {
1196
17
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
17
                                     " (detected at line %d)", start);
1198
17
                    if (c != '\n') {
1199
17
                        tok->done = E_EOFS;
1200
17
                    }
1201
17
                    return MAKE_TOKEN(ERRORTOKEN);
1202
17
                }
1203
279
                else {
1204
279
                    if (has_escaped_quote) {
1205
10
                        _PyTokenizer_syntaxerror(
1206
10
                            tok,
1207
10
                            "unterminated string literal (detected at line %d); "
1208
10
                            "perhaps you escaped the end quote?",
1209
10
                            start
1210
10
                        );
1211
269
                    } else {
1212
269
                        _PyTokenizer_syntaxerror(
1213
269
                            tok, "unterminated string literal (detected at line %d)", start
1214
269
                        );
1215
269
                    }
1216
279
                    if (c != '\n') {
1217
14
                        tok->done = E_EOLS;
1218
14
                    }
1219
279
                    return MAKE_TOKEN(ERRORTOKEN);
1220
279
                }
1221
296
            }
1222
1.03M
            if (c == quote) {
1223
55.4k
                end_quote_size += 1;
1224
55.4k
            }
1225
976k
            else {
1226
976k
                end_quote_size = 0;
1227
976k
                if (c == '\\') {
1228
26.8k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
26.8k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
953
                        has_escaped_quote = 1;
1231
953
                    }
1232
26.8k
                    if (c == '\r') {
1233
67
                        c = tok_nextc(tok);
1234
67
                    }
1235
26.8k
                }
1236
976k
            }
1237
1.03M
        }
1238
1239
54.5k
        p_start = tok->start;
1240
54.5k
        p_end = tok->cur;
1241
54.5k
        return MAKE_TOKEN(STRING);
1242
54.8k
    }
1243
1244
    /* Line continuation */
1245
698k
    if (c == '\\') {
1246
438
        if ((c = tok_continuation_line(tok)) == -1) {
1247
64
            return MAKE_TOKEN(ERRORTOKEN);
1248
64
        }
1249
374
        tok->cont_line = 1;
1250
374
        goto again; /* Read next line */
1251
438
    }
1252
1253
    /* Punctuation character */
1254
698k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
698k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
54.4k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
54.4k
        int in_format_spec = current_tok->in_format_spec;
1261
54.4k
         int cursor_in_format_with_debug =
1262
54.4k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
54.4k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
54.4k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
54.4k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
54.4k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.69k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.69k
            current_tok->in_format_spec = 1;
1274
4.69k
            p_start = tok->start;
1275
4.69k
            p_end = tok->cur;
1276
4.69k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.69k
        }
1278
54.4k
    }
1279
1280
    /* Check for two-character token */
1281
693k
    {
1282
693k
        int c2 = tok_nextc(tok);
1283
693k
        int current_token = _PyToken_TwoChars(c, c2);
1284
693k
        if (current_token != OP) {
1285
23.5k
            int c3 = tok_nextc(tok);
1286
23.5k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
23.5k
            if (current_token3 != OP) {
1288
938
                current_token = current_token3;
1289
938
            }
1290
22.5k
            else {
1291
22.5k
                tok_backup(tok, c3);
1292
22.5k
            }
1293
23.5k
            p_start = tok->start;
1294
23.5k
            p_end = tok->cur;
1295
23.5k
            return MAKE_TOKEN(current_token);
1296
23.5k
        }
1297
669k
        tok_backup(tok, c2);
1298
669k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
86.3k
    case '(':
1303
115k
    case '[':
1304
158k
    case '{':
1305
158k
        if (tok->level >= MAXLEVEL) {
1306
4
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
4
        }
1308
158k
        tok->parenstack[tok->level] = c;
1309
158k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
158k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
158k
        tok->level++;
1312
158k
        if (INSIDE_FSTRING(tok)) {
1313
28.7k
            current_tok->curly_bracket_depth++;
1314
28.7k
        }
1315
158k
        break;
1316
59.5k
    case ')':
1317
70.6k
    case ']':
1318
96.0k
    case '}':
1319
96.0k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
58
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
58
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
58
        }
1323
96.0k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
194
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
194
        }
1326
95.8k
        if (tok->level > 0) {
1327
95.8k
            tok->level--;
1328
95.8k
            int opening = tok->parenstack[tok->level];
1329
95.8k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
36.3k
                                            (opening == '[' && c == ']') ||
1331
25.3k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
39
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
7
                    assert(current_tok->curly_bracket_depth >= 0);
1339
7
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
7
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
5
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
5
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
5
                    }
1344
7
                }
1345
34
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
2
                            "closing parenthesis '%c' does not match "
1348
2
                            "opening parenthesis '%c' on line %d",
1349
2
                            c, opening, tok->parenlinenostack[tok->level]));
1350
2
                }
1351
32
                else {
1352
32
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
32
                            "closing parenthesis '%c' does not match "
1354
32
                            "opening parenthesis '%c'",
1355
32
                            c, opening));
1356
32
                }
1357
34
            }
1358
95.8k
        }
1359
1360
95.8k
        if (INSIDE_FSTRING(tok)) {
1361
21.6k
            current_tok->curly_bracket_depth--;
1362
21.6k
            if (current_tok->curly_bracket_depth < 0) {
1363
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
1
                    TOK_GET_STRING_PREFIX(tok), c));
1365
1
            }
1366
21.6k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
20.3k
                current_tok->curly_bracket_expr_start_depth--;
1368
20.3k
                current_tok->kind = TOK_FSTRING_MODE;
1369
20.3k
                current_tok->in_format_spec = 0;
1370
20.3k
                current_tok->in_debug = 0;
1371
20.3k
            }
1372
21.6k
        }
1373
95.8k
        break;
1374
415k
    default:
1375
415k
        break;
1376
669k
    }
1377
1378
669k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
427
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
427
    }
1381
1382
669k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
4.88k
        current_tok->in_debug = 1;
1384
4.88k
    }
1385
1386
    /* Punctuation character */
1387
669k
    p_start = tok->start;
1388
669k
    p_end = tok->cur;
1389
669k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
669k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
51.7k
{
1395
51.7k
    const char *p_start = NULL;
1396
51.7k
    const char *p_end = NULL;
1397
51.7k
    int end_quote_size = 0;
1398
51.7k
    int unicode_escape = 0;
1399
1400
51.7k
    tok->start = tok->cur;
1401
51.7k
    tok->first_lineno = tok->lineno;
1402
51.7k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
51.7k
    int start_char = tok_nextc(tok);
1407
51.7k
    if (start_char == '{') {
1408
14.9k
        int peek1 = tok_nextc(tok);
1409
14.9k
        tok_backup(tok, peek1);
1410
14.9k
        tok_backup(tok, start_char);
1411
14.9k
        if (peek1 != '{') {
1412
12.1k
            current_tok->curly_bracket_expr_start_depth++;
1413
12.1k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
3
            }
1417
12.1k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
12.1k
            return tok_get_normal_mode(tok, current_tok, token);
1419
12.1k
        }
1420
14.9k
    }
1421
36.8k
    else {
1422
36.8k
        tok_backup(tok, start_char);
1423
36.8k
    }
1424
1425
    // Check if we are at the end of the string
1426
56.4k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
44.5k
        int quote = tok_nextc(tok);
1428
44.5k
        if (quote != current_tok->quote) {
1429
27.7k
            tok_backup(tok, quote);
1430
27.7k
            goto f_string_middle;
1431
27.7k
        }
1432
44.5k
    }
1433
1434
11.8k
    if (current_tok->last_expr_buffer != NULL) {
1435
6.85k
        PyMem_Free(current_tok->last_expr_buffer);
1436
6.85k
        current_tok->last_expr_buffer = NULL;
1437
6.85k
        current_tok->last_expr_size = 0;
1438
6.85k
        current_tok->last_expr_end = -1;
1439
6.85k
    }
1440
1441
11.8k
    p_start = tok->start;
1442
11.8k
    p_end = tok->cur;
1443
11.8k
    tok->tok_mode_stack_index--;
1444
11.8k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
27.7k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
27.7k
    tok->multi_line_start = tok->line_start;
1451
170k
    while (end_quote_size != current_tok->quote_size) {
1452
165k
        int c = tok_nextc(tok);
1453
165k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
165k
        int in_format_spec = (
1457
165k
                current_tok->in_format_spec
1458
10.9k
                &&
1459
10.9k
                INSIDE_FSTRING_EXPR(current_tok)
1460
165k
        );
1461
1462
165k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
439
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
439
            if (in_format_spec && c == '\n') {
1471
49
                if (current_tok->quote_size == 1) {
1472
49
                    return MAKE_TOKEN(
1473
49
                        _PyTokenizer_syntaxerror(
1474
49
                            tok,
1475
49
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
49
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
49
                        )
1478
49
                    );
1479
49
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
49
            }
1487
1488
439
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
390
            tok->cur = (char *)current_tok->start;
1493
390
            tok->cur++;
1494
390
            tok->line_start = current_tok->multi_line_start;
1495
390
            int start = tok->lineno;
1496
1497
390
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
390
            tok->lineno = the_current_tok->first_line;
1499
1500
390
            if (current_tok->quote_size == 3) {
1501
40
                _PyTokenizer_syntaxerror(tok,
1502
40
                                    "unterminated triple-quoted %c-string literal"
1503
40
                                    " (detected at line %d)",
1504
40
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
40
                if (c != '\n') {
1506
40
                    tok->done = E_EOFS;
1507
40
                }
1508
40
                return MAKE_TOKEN(ERRORTOKEN);
1509
40
            }
1510
350
            else {
1511
350
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
350
                                    "unterminated %c-string literal (detected at"
1513
350
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
350
            }
1515
390
        }
1516
1517
164k
        if (c == current_tok->quote) {
1518
9.02k
            end_quote_size += 1;
1519
9.02k
            continue;
1520
155k
        } else {
1521
155k
            end_quote_size = 0;
1522
155k
        }
1523
1524
155k
        if (c == '{') {
1525
16.3k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
16.3k
            int peek = tok_nextc(tok);
1529
16.3k
            if (peek != '{' || in_format_spec) {
1530
13.1k
                tok_backup(tok, peek);
1531
13.1k
                tok_backup(tok, c);
1532
13.1k
                current_tok->curly_bracket_expr_start_depth++;
1533
13.1k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
5
                }
1537
13.0k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
13.0k
                current_tok->in_format_spec = 0;
1539
13.0k
                p_start = tok->start;
1540
13.0k
                p_end = tok->cur;
1541
13.0k
            } else {
1542
3.28k
                p_start = tok->start;
1543
3.28k
                p_end = tok->cur - 1;
1544
3.28k
            }
1545
16.3k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
139k
        } else if (c == '}') {
1547
5.45k
            if (unicode_escape) {
1548
381
                p_start = tok->start;
1549
381
                p_end = tok->cur;
1550
381
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
381
            }
1552
5.07k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
5.07k
            int cursor = current_tok->curly_bracket_depth;
1559
5.07k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.64k
                p_start = tok->start;
1561
1.64k
                p_end = tok->cur - 1;
1562
3.43k
            } else {
1563
3.43k
                tok_backup(tok, peek);
1564
3.43k
                tok_backup(tok, c);
1565
3.43k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.43k
                current_tok->in_format_spec = 0;
1567
3.43k
                p_start = tok->start;
1568
3.43k
                p_end = tok->cur;
1569
3.43k
            }
1570
5.07k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
133k
        } else if (c == '\\') {
1572
5.60k
            int peek = tok_nextc(tok);
1573
5.60k
            if (peek == '\r') {
1574
66
                peek = tok_nextc(tok);
1575
66
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.60k
            if (peek == '{' || peek == '}') {
1580
1.40k
                if (!current_tok->raw) {
1581
1.20k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
1
                        return MAKE_TOKEN(ERRORTOKEN);
1583
1
                    }
1584
1.20k
                }
1585
1.40k
                tok_backup(tok, peek);
1586
1.40k
                continue;
1587
1.40k
            }
1588
1589
4.19k
            if (!current_tok->raw) {
1590
4.07k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
483
                    peek = tok_nextc(tok);
1593
483
                    if (peek == '{') {
1594
406
                        unicode_escape = 1;
1595
406
                    } else {
1596
77
                        tok_backup(tok, peek);
1597
77
                    }
1598
483
                }
1599
4.07k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
4.19k
        }
1603
155k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
11.8k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.37k
        tok_backup(tok, current_tok->quote);
1609
6.37k
    }
1610
5.50k
    p_start = tok->start;
1611
5.50k
    p_end = tok->cur;
1612
5.50k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
27.7k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.60M
{
1618
1.60M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.60M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.55M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.55M
    } else {
1622
51.7k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
51.7k
    }
1624
1.60M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.60M
{
1629
1.60M
    int result = tok_get(tok, token);
1630
1.60M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.60M
    return result;
1635
1.60M
}