Coverage Report

Created: 2026-06-21 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.96k
#define ALTTABSIZE 1
11
12
2.12M
#define is_potential_identifier_start(c) (\
13
2.12M
              (c >= 'a' && c <= 'z')\
14
2.12M
               || (c >= 'A' && c <= 'Z')\
15
2.12M
               || c == '_'\
16
2.12M
               || (c >= 128))
17
18
3.20M
#define is_potential_identifier_char(c) (\
19
3.20M
              (c >= 'a' && c <= 'z')\
20
3.20M
               || (c >= 'A' && c <= 'Z')\
21
3.20M
               || (c >= '0' && c <= '9')\
22
3.20M
               || c == '_'\
23
3.20M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.31M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
17.3k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
21
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
2.18M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
279k
{
55
279k
    return memchr(str, 0, size) != NULL;
56
279k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.5M
{
62
11.5M
    int rc;
63
11.7M
    for (;;) {
64
11.7M
        if (tok->cur != tok->inp) {
65
11.1M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.1M
            tok->col_offset++;
70
11.1M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.1M
        }
72
594k
        if (tok->done != E_OK) {
73
209k
            return EOF;
74
209k
        }
75
384k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
384k
        if (!rc) {
84
104k
            tok->cur = tok->inp;
85
104k
            return EOF;
86
104k
        }
87
279k
        tok->line_start = tok->cur;
88
89
279k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
279k
    }
95
11.5M
    Py_UNREACHABLE();
96
11.5M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.63M
{
102
4.63M
    if (c != EOF) {
103
4.43M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
4.43M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
4.43M
        tok->col_offset--;
110
4.43M
    }
111
4.63M
}
112
113
static int
114
26.5k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
26.5k
    assert(token != NULL);
116
26.5k
    assert(c == '}' || c == ':' || c == '!');
117
26.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
26.5k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
14.9k
        return 0;
121
14.9k
    }
122
11.5k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
11.5k
    int hash_detected = 0;
126
11.5k
    int in_string = 0;
127
11.5k
    char quote_char = 0;
128
129
1.42M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.41M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.41M
        if (ch == '\\') {
134
13.8k
            i++;
135
13.8k
            continue;
136
13.8k
        }
137
138
        // Handle quotes
139
1.40M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
220k
            if (!in_string) {
148
83.3k
                in_string = 1;
149
83.3k
                quote_char = ch;
150
83.3k
            }
151
136k
            else if (ch == quote_char) {
152
82.4k
                in_string = 0;
153
82.4k
            }
154
220k
            continue;
155
220k
        }
156
157
        // Check for # outside strings
158
1.18M
        if (ch == '#' && !in_string) {
159
1.00k
            hash_detected = 1;
160
1.00k
            break;
161
1.00k
        }
162
1.18M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
11.5k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
1.00k
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
1.00k
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
1.00k
        Py_ssize_t i = 0;  // Input position
172
1.00k
        Py_ssize_t j = 0;  // Output position
173
1.00k
        in_string = 0;     // Whether we're in a string
174
1.00k
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
213k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
212k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
212k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
20.3k
                if (!in_string) {
184
7.29k
                    in_string = 1;
185
7.29k
                    quote_char = ch;
186
13.0k
                } else if (ch == quote_char) {
187
7.25k
                    in_string = 0;
188
7.25k
                }
189
20.3k
                result[j++] = ch;
190
20.3k
            }
191
            // Skip comments
192
192k
            else if (ch == '#' && !in_string) {
193
121k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
120k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
120k
                    i++;
196
120k
                }
197
1.25k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
348
                    result[j++] = '\n';
199
348
                }
200
1.25k
            }
201
            // Copy other chars
202
190k
            else {
203
190k
                result[j++] = ch;
204
190k
            }
205
212k
            i++;
206
212k
        }
207
208
1.00k
        result[j] = '\0';  // Null-terminate the result string
209
1.00k
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
1.00k
        PyMem_Free(result);
211
10.5k
    } else {
212
10.5k
        res = PyUnicode_DecodeUTF8(
213
10.5k
            tok_mode->last_expr_buffer,
214
10.5k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
10.5k
            NULL
216
10.5k
        );
217
10.5k
    }
218
219
11.5k
    if (!res) {
220
0
        return -1;
221
0
    }
222
11.5k
    token->metadata = res;
223
11.5k
    return 0;
224
11.5k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
67.7k
{
229
67.7k
    assert(tok->cur != NULL);
230
231
67.7k
    Py_ssize_t size = strlen(tok->cur);
232
67.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
67.7k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
41.2k
        case '{':
252
41.2k
            if (tok_mode->last_expr_buffer != NULL) {
253
29.1k
                PyMem_Free(tok_mode->last_expr_buffer);
254
29.1k
            }
255
41.2k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
41.2k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
41.2k
            tok_mode->last_expr_size = size;
260
41.2k
            tok_mode->last_expr_end = -1;
261
41.2k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
41.2k
            break;
263
20.5k
        case '}':
264
22.7k
        case '!':
265
22.7k
            tok_mode->last_expr_end = strlen(tok->start);
266
22.7k
            break;
267
3.83k
        case ':':
268
3.83k
            if (tok_mode->last_expr_end == -1) {
269
3.57k
               tok_mode->last_expr_end = strlen(tok->start);
270
3.57k
            }
271
3.83k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
67.7k
    }
275
67.7k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
67.7k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
9.11k
{
284
9.11k
    const char *s = test;
285
9.11k
    int res = 0;
286
23.4k
    while (1) {
287
23.4k
        int c = tok_nextc(tok);
288
23.4k
        if (*s == 0) {
289
9.00k
            res = !is_potential_identifier_char(c);
290
9.00k
        }
291
14.4k
        else if (c == *s) {
292
14.3k
            s++;
293
14.3k
            continue;
294
14.3k
        }
295
296
9.11k
        tok_backup(tok, c);
297
23.4k
        while (s != test) {
298
14.3k
            tok_backup(tok, *--s);
299
14.3k
        }
300
9.11k
        return res;
301
23.4k
    }
302
9.11k
}
303
304
static int
305
94.9k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
94.9k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
94.8k
    int r = 0;
322
94.8k
    if (c == 'a') {
323
1.15k
        r = lookahead(tok, "nd");
324
1.15k
    }
325
93.6k
    else if (c == 'e') {
326
674
        r = lookahead(tok, "lse");
327
674
    }
328
93.0k
    else if (c == 'f') {
329
2.78k
        r = lookahead(tok, "or");
330
2.78k
    }
331
90.2k
    else if (c == 'i') {
332
1.36k
        int c2 = tok_nextc(tok);
333
1.36k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.35k
            r = 1;
335
1.35k
        }
336
1.36k
        tok_backup(tok, c2);
337
1.36k
    }
338
88.8k
    else if (c == 'o') {
339
4.33k
        r = lookahead(tok, "r");
340
4.33k
    }
341
84.5k
    else if (c == 'n') {
342
176
        r = lookahead(tok, "ot");
343
176
    }
344
94.8k
    if (r) {
345
10.3k
        tok_backup(tok, c);
346
10.3k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.3k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.3k
        tok_nextc(tok);
352
10.3k
    }
353
84.4k
    else /* In future releases, only error will remain. */
354
84.4k
    if (c < 128 && is_potential_identifier_char(c)) {
355
251
        tok_backup(tok, c);
356
251
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
251
        return 0;
358
251
    }
359
94.5k
    return 1;
360
94.8k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
10.4k
{
366
10.4k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
10.4k
    PyObject *s;
370
10.4k
    if (tok->decoding_erred)
371
0
        return 0;
372
10.4k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
10.4k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
10.4k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
10.4k
    assert(invalid >= 0);
384
10.4k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
10.4k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
534
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
534
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
367
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
367
            if (s != NULL) {
391
367
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
367
            }
393
367
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
367
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
367
        }
399
534
        Py_DECREF(s);
400
534
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
284
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
284
        }
403
250
        else {
404
250
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
250
        }
406
534
        return 0;
407
534
    }
408
9.93k
    Py_DECREF(s);
409
9.93k
    return 1;
410
10.4k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
75.4k
{
415
75.4k
    int c;
416
417
76.9k
    while (1) {
418
235k
        do {
419
235k
            c = tok_nextc(tok);
420
235k
        } while (Py_ISDIGIT(c));
421
76.9k
        if (c != '_') {
422
75.4k
            break;
423
75.4k
        }
424
1.54k
        c = tok_nextc(tok);
425
1.54k
        if (!Py_ISDIGIT(c)) {
426
19
            tok_backup(tok, c);
427
19
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
19
            return 0;
429
19
        }
430
1.54k
    }
431
75.4k
    return c;
432
75.4k
}
433
434
static inline int
435
1.03k
tok_continuation_line(struct tok_state *tok) {
436
1.03k
    int c = tok_nextc(tok);
437
1.03k
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
1.03k
    if (c != '\n') {
441
76
        tok->done = E_LINECONT;
442
76
        return -1;
443
76
    }
444
958
    c = tok_nextc(tok);
445
958
    if (c == EOF) {
446
47
        tok->done = E_EOF;
447
47
        tok->cur = tok->inp;
448
47
        return -1;
449
911
    } else {
450
911
        tok_backup(tok, c);
451
911
    }
452
911
    return c;
453
958
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
21.0k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
21.0k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
21.0k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
21.0k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
21.0k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
21.0k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
21.0k
    if (saw_u && saw_t) {
481
2
        RETURN_SYNTAX_ERROR("u", "t");
482
2
    }
483
484
21.0k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
21.0k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
21.0k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
21.0k
#undef RETURN_SYNTAX_ERROR
496
497
21.0k
    return 0;
498
21.0k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
2.14M
{
503
2.14M
    int c;
504
2.14M
    int blankline, nonascii;
505
506
2.14M
    const char *p_start = NULL;
507
2.14M
    const char *p_end = NULL;
508
2.25M
  nextline:
509
2.25M
    tok->start = NULL;
510
2.25M
    tok->starting_col_offset = -1;
511
2.25M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.25M
    if (tok->atbol) {
516
377k
        int col = 0;
517
377k
        int altcol = 0;
518
377k
        tok->atbol = 0;
519
377k
        int cont_line_col = 0;
520
641k
        for (;;) {
521
641k
            c = tok_nextc(tok);
522
641k
            if (c == ' ') {
523
261k
                col++, altcol++;
524
261k
            }
525
380k
            else if (c == '\t') {
526
982
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
982
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
982
            }
529
379k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
741
                col = altcol = 0; /* For Emacs users */
531
741
            }
532
378k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
632
                cont_line_col = cont_line_col ? cont_line_col : col;
538
632
                if ((c = tok_continuation_line(tok)) == -1) {
539
25
                    return MAKE_TOKEN(ERRORTOKEN);
540
25
                }
541
632
            }
542
377k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
377k
            else {
546
377k
                break;
547
377k
            }
548
641k
        }
549
377k
        tok_backup(tok, c);
550
377k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
71.0k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
71.0k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
71.0k
            else {
566
71.0k
                blankline = 1; /* Ignore completely */
567
71.0k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
71.0k
        }
571
377k
        if (!blankline && tok->level == 0) {
572
272k
            col = cont_line_col ? cont_line_col : col;
573
272k
            altcol = cont_line_col ? cont_line_col : altcol;
574
272k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
255k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
255k
            }
580
17.1k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
9.52k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
9.52k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
9.52k
                tok->pendin++;
591
9.52k
                tok->indstack[++tok->indent] = col;
592
9.52k
                tok->altindstack[tok->indent] = altcol;
593
9.52k
            }
594
7.61k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
16.5k
                while (tok->indent > 0 &&
597
13.0k
                    col < tok->indstack[tok->indent]) {
598
8.96k
                    tok->pendin--;
599
8.96k
                    tok->indent--;
600
8.96k
                }
601
7.61k
                if (col != tok->indstack[tok->indent]) {
602
10
                    tok->done = E_DEDENT;
603
10
                    tok->cur = tok->inp;
604
10
                    return MAKE_TOKEN(ERRORTOKEN);
605
10
                }
606
7.60k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
7.60k
            }
610
272k
        }
611
377k
    }
612
613
2.25M
    tok->start = tok->cur;
614
2.25M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.25M
    if (tok->pendin != 0) {
618
18.4k
        if (tok->pendin < 0) {
619
8.94k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
8.94k
            tok->pendin++;
624
8.94k
            return MAKE_TOKEN(DEDENT);
625
8.94k
        }
626
9.52k
        else {
627
9.52k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
9.52k
            tok->pendin--;
632
9.52k
            return MAKE_TOKEN(INDENT);
633
9.52k
        }
634
18.4k
    }
635
636
    /* Peek ahead at the next character */
637
2.23M
    c = tok_nextc(tok);
638
2.23M
    tok_backup(tok, c);
639
640
2.23M
 again:
641
2.23M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.67M
    do {
644
2.67M
        c = tok_nextc(tok);
645
2.67M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
2.23M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
2.23M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
2.23M
    if (c == '#') {
653
654
32.0k
        const char* p = NULL;
655
32.0k
        const char *prefix, *type_start;
656
32.0k
        int current_starting_col_offset;
657
658
1.00M
        while (c != EOF && c != '\n' && c != '\r') {
659
969k
            c = tok_nextc(tok);
660
969k
        }
661
662
32.0k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
32.0k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
32.0k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
32.0k
    }
728
729
2.23M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
2.23M
    if (c == EOF) {
735
104k
        if (tok->level) {
736
3.97k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.97k
        }
738
100k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
104k
    }
740
741
    /* Identifier (most frequent token!) */
742
2.12M
    nonascii = 0;
743
2.12M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
692k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
880k
        while (1) {
747
880k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
18.9k
                saw_b = 1;
749
18.9k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
861k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
93.6k
                saw_u = 1;
754
93.6k
            }
755
            /* ur"" and ru"" are not supported */
756
767k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
23.1k
                saw_r = 1;
758
23.1k
            }
759
744k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
41.6k
                saw_f = 1;
761
41.6k
            }
762
702k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
30.9k
                saw_t = 1;
764
30.9k
            }
765
671k
            else {
766
671k
                break;
767
671k
            }
768
208k
            c = tok_nextc(tok);
769
208k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
21.0k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
21.0k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
21.0k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
21.0k
                if (saw_f || saw_t) {
779
17.3k
                    goto f_string_quote;
780
17.3k
                }
781
3.72k
                goto letter_quote;
782
21.0k
            }
783
208k
        }
784
3.11M
        while (is_potential_identifier_char(c)) {
785
2.44M
            if (c >= 128) {
786
175k
                nonascii = 1;
787
175k
            }
788
2.44M
            c = tok_nextc(tok);
789
2.44M
        }
790
671k
        tok_backup(tok, c);
791
671k
        if (nonascii && !verify_identifier(tok)) {
792
534
            return MAKE_TOKEN(ERRORTOKEN);
793
534
        }
794
795
671k
        p_start = tok->start;
796
671k
        p_end = tok->cur;
797
798
671k
        return MAKE_TOKEN(NAME);
799
671k
    }
800
801
1.43M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.43M
    if (c == '\n') {
807
270k
        tok->atbol = 1;
808
270k
        if (blankline || tok->level > 0) {
809
105k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
105k
            goto nextline;
818
105k
        }
819
165k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
165k
        p_start = tok->start;
826
165k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
165k
        tok->cont_line = 0;
828
165k
        return MAKE_TOKEN(NEWLINE);
829
165k
    }
830
831
    /* Period or number starting with period? */
832
1.16M
    if (c == '.') {
833
26.2k
        c = tok_nextc(tok);
834
26.2k
        if (Py_ISDIGIT(c)) {
835
3.21k
            goto fraction;
836
23.0k
        } else if (c == '.') {
837
1.15k
            c = tok_nextc(tok);
838
1.15k
            if (c == '.') {
839
611
                p_start = tok->start;
840
611
                p_end = tok->cur;
841
611
                return MAKE_TOKEN(ELLIPSIS);
842
611
            }
843
545
            else {
844
545
                tok_backup(tok, c);
845
545
            }
846
545
            tok_backup(tok, '.');
847
545
        }
848
21.8k
        else {
849
21.8k
            tok_backup(tok, c);
850
21.8k
        }
851
22.4k
        p_start = tok->start;
852
22.4k
        p_end = tok->cur;
853
22.4k
        return MAKE_TOKEN(DOT);
854
26.2k
    }
855
856
    /* Number */
857
1.13M
    if (Py_ISDIGIT(c)) {
858
91.8k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
32.0k
            c = tok_nextc(tok);
861
32.0k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
14.6k
                c = tok_nextc(tok);
864
15.8k
                do {
865
15.8k
                    if (c == '_') {
866
1.20k
                        c = tok_nextc(tok);
867
1.20k
                    }
868
15.8k
                    if (!Py_ISXDIGIT(c)) {
869
19
                        tok_backup(tok, c);
870
19
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
19
                    }
872
75.3k
                    do {
873
75.3k
                        c = tok_nextc(tok);
874
75.3k
                    } while (Py_ISXDIGIT(c));
875
15.8k
                } while (c == '_');
876
14.6k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
3
                    return MAKE_TOKEN(ERRORTOKEN);
878
3
                }
879
14.6k
            }
880
17.3k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
613
                c = tok_nextc(tok);
883
1.05k
                do {
884
1.05k
                    if (c == '_') {
885
443
                        c = tok_nextc(tok);
886
443
                    }
887
1.05k
                    if (c < '0' || c >= '8') {
888
20
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
19
                        else {
893
19
                            tok_backup(tok, c);
894
19
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
19
                        }
896
20
                    }
897
3.86k
                    do {
898
3.86k
                        c = tok_nextc(tok);
899
3.86k
                    } while ('0' <= c && c < '8');
900
1.03k
                } while (c == '_');
901
593
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
592
                if (!verify_end_of_number(tok, c, "octal")) {
906
3
                    return MAKE_TOKEN(ERRORTOKEN);
907
3
                }
908
592
            }
909
16.7k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
397
                c = tok_nextc(tok);
912
694
                do {
913
694
                    if (c == '_') {
914
304
                        c = tok_nextc(tok);
915
304
                    }
916
694
                    if (c != '0' && c != '1') {
917
23
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
22
                        else {
921
22
                            tok_backup(tok, c);
922
22
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
22
                        }
924
23
                    }
925
2.80k
                    do {
926
2.80k
                        c = tok_nextc(tok);
927
2.80k
                    } while (c == '0' || c == '1');
928
671
                } while (c == '_');
929
374
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
372
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
372
            }
936
16.3k
            else {
937
16.3k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
18.5k
                while (1) {
941
18.5k
                    if (c == '_') {
942
302
                        c = tok_nextc(tok);
943
302
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
302
                    }
948
18.5k
                    if (c != '0') {
949
16.3k
                        break;
950
16.3k
                    }
951
2.14k
                    c = tok_nextc(tok);
952
2.14k
                }
953
16.3k
                char* zeros_end = tok->cur;
954
16.3k
                if (Py_ISDIGIT(c)) {
955
486
                    nonzero = 1;
956
486
                    c = tok_decimal_tail(tok);
957
486
                    if (c == 0) {
958
1
                        return MAKE_TOKEN(ERRORTOKEN);
959
1
                    }
960
486
                }
961
16.3k
                if (c == '.') {
962
1.13k
                    c = tok_nextc(tok);
963
1.13k
                    goto fraction;
964
1.13k
                }
965
15.2k
                else if (c == 'e' || c == 'E') {
966
911
                    goto exponent;
967
911
                }
968
14.3k
                else if (c == 'j' || c == 'J') {
969
710
                    goto imaginary;
970
710
                }
971
13.6k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
38
                    tok_backup(tok, c);
974
38
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
38
                            tok, (int)(tok->start + 1 - tok->line_start),
976
38
                            (int)(zeros_end - tok->line_start),
977
38
                            "leading zeros in decimal integer "
978
38
                            "literals are not permitted; "
979
38
                            "use an 0o prefix for octal integers"));
980
38
                }
981
13.5k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
30
                    return MAKE_TOKEN(ERRORTOKEN);
983
30
                }
984
13.5k
            }
985
32.0k
        }
986
59.7k
        else {
987
            /* Decimal */
988
59.7k
            c = tok_decimal_tail(tok);
989
59.7k
            if (c == 0) {
990
14
                return MAKE_TOKEN(ERRORTOKEN);
991
14
            }
992
59.7k
            {
993
                /* Accept floating-point numbers. */
994
59.7k
                if (c == '.') {
995
3.79k
                    c = tok_nextc(tok);
996
8.15k
        fraction:
997
                    /* Fraction */
998
8.15k
                    if (Py_ISDIGIT(c)) {
999
6.48k
                        c = tok_decimal_tail(tok);
1000
6.48k
                        if (c == 0) {
1001
2
                            return MAKE_TOKEN(ERRORTOKEN);
1002
2
                        }
1003
6.48k
                    }
1004
8.15k
                }
1005
64.0k
                if (c == 'e' || c == 'E') {
1006
8.52k
                    int e;
1007
9.43k
                  exponent:
1008
9.43k
                    e = c;
1009
                    /* Exponent part */
1010
9.43k
                    c = tok_nextc(tok);
1011
9.43k
                    if (c == '+' || c == '-') {
1012
3.99k
                        c = tok_nextc(tok);
1013
3.99k
                        if (!Py_ISDIGIT(c)) {
1014
12
                            tok_backup(tok, c);
1015
12
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
12
                        }
1017
5.43k
                    } else if (!Py_ISDIGIT(c)) {
1018
664
                        tok_backup(tok, c);
1019
664
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
46
                            return MAKE_TOKEN(ERRORTOKEN);
1021
46
                        }
1022
618
                        tok_backup(tok, e);
1023
618
                        p_start = tok->start;
1024
618
                        p_end = tok->cur;
1025
618
                        return MAKE_TOKEN(NUMBER);
1026
664
                    }
1027
8.75k
                    c = tok_decimal_tail(tok);
1028
8.75k
                    if (c == 0) {
1029
2
                        return MAKE_TOKEN(ERRORTOKEN);
1030
2
                    }
1031
8.75k
                }
1032
64.3k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
5.00k
        imaginary:
1035
5.00k
                    c = tok_nextc(tok);
1036
5.00k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
16
                        return MAKE_TOKEN(ERRORTOKEN);
1038
16
                    }
1039
5.00k
                }
1040
60.0k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
152
                    return MAKE_TOKEN(ERRORTOKEN);
1042
152
                }
1043
64.3k
            }
1044
64.3k
        }
1045
94.0k
        tok_backup(tok, c);
1046
94.0k
        p_start = tok->start;
1047
94.0k
        p_end = tok->cur;
1048
94.0k
        return MAKE_TOKEN(NUMBER);
1049
91.8k
    }
1050
1051
1.06M
  f_string_quote:
1052
1.06M
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
17.3k
        && (c == '\'' || c == '"'))) {
1054
1055
17.3k
        int quote = c;
1056
17.3k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
17.3k
        tok->first_lineno = tok->lineno;
1063
17.3k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
17.3k
        int after_quote = tok_nextc(tok);
1067
17.3k
        if (after_quote == quote) {
1068
2.86k
            int after_after_quote = tok_nextc(tok);
1069
2.86k
            if (after_after_quote == quote) {
1070
809
                quote_size = 3;
1071
809
            }
1072
2.05k
            else {
1073
                // TODO: Check this
1074
2.05k
                tok_backup(tok, after_after_quote);
1075
2.05k
                tok_backup(tok, after_quote);
1076
2.05k
            }
1077
2.86k
        }
1078
17.3k
        if (after_quote != quote) {
1079
14.4k
            tok_backup(tok, after_quote);
1080
14.4k
        }
1081
1082
1083
17.3k
        p_start = tok->start;
1084
17.3k
        p_end = tok->cur;
1085
17.3k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
17.3k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
17.3k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
17.3k
        the_current_tok->quote = quote;
1091
17.3k
        the_current_tok->quote_size = quote_size;
1092
17.3k
        the_current_tok->start = tok->start;
1093
17.3k
        the_current_tok->multi_line_start = tok->line_start;
1094
17.3k
        the_current_tok->first_line = tok->lineno;
1095
17.3k
        the_current_tok->start_offset = -1;
1096
17.3k
        the_current_tok->multi_line_start_offset = -1;
1097
17.3k
        the_current_tok->last_expr_buffer = NULL;
1098
17.3k
        the_current_tok->last_expr_size = 0;
1099
17.3k
        the_current_tok->last_expr_end = -1;
1100
17.3k
        the_current_tok->in_format_spec = 0;
1101
17.3k
        the_current_tok->in_debug = 0;
1102
1103
17.3k
        enum string_kind_t string_kind = FSTRING;
1104
17.3k
        switch (*tok->start) {
1105
935
            case 'T':
1106
5.08k
            case 't':
1107
5.08k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
5.08k
                string_kind = TSTRING;
1109
5.08k
                break;
1110
1.44k
            case 'F':
1111
11.3k
            case 'f':
1112
11.3k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
11.3k
                break;
1114
196
            case 'R':
1115
922
            case 'r':
1116
922
                the_current_tok->raw = 1;
1117
922
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
366
                    string_kind = TSTRING;
1119
366
                }
1120
922
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
17.3k
        }
1124
1125
17.3k
        the_current_tok->string_kind = string_kind;
1126
17.3k
        the_current_tok->curly_bracket_depth = 0;
1127
17.3k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
17.3k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
17.3k
    }
1130
1131
1.05M
  letter_quote:
1132
    /* String */
1133
1.05M
    if (c == '\'' || c == '"') {
1134
37.2k
        int quote = c;
1135
37.2k
        int quote_size = 1;             /* 1 or 3 */
1136
37.2k
        int end_quote_size = 0;
1137
37.2k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
37.2k
        tok->first_lineno = tok->lineno;
1144
37.2k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
37.2k
        c = tok_nextc(tok);
1148
37.2k
        if (c == quote) {
1149
6.86k
            c = tok_nextc(tok);
1150
6.86k
            if (c == quote) {
1151
1.11k
                quote_size = 3;
1152
1.11k
            }
1153
5.74k
            else {
1154
5.74k
                end_quote_size = 1;     /* empty string found */
1155
5.74k
            }
1156
6.86k
        }
1157
37.2k
        if (c != quote) {
1158
36.1k
            tok_backup(tok, c);
1159
36.1k
        }
1160
1161
        /* Get rest of string */
1162
457k
        while (end_quote_size != quote_size) {
1163
421k
            c = tok_nextc(tok);
1164
421k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
421k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
421k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
412
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
412
                tok->cur = (char *)tok->start;
1176
412
                tok->cur++;
1177
412
                tok->line_start = tok->multi_line_start;
1178
412
                int start = tok->lineno;
1179
412
                tok->lineno = tok->first_lineno;
1180
1181
412
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
51
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
51
                    if (the_current_tok->quote == quote &&
1189
39
                        the_current_tok->quote_size == quote_size) {
1190
36
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
36
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
36
                    }
1193
51
                }
1194
1195
376
                if (quote_size == 3) {
1196
45
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
45
                                     " (detected at line %d)", start);
1198
45
                    if (c != '\n') {
1199
45
                        tok->done = E_EOFS;
1200
45
                    }
1201
45
                    return MAKE_TOKEN(ERRORTOKEN);
1202
45
                }
1203
331
                else {
1204
331
                    if (has_escaped_quote) {
1205
8
                        _PyTokenizer_syntaxerror(
1206
8
                            tok,
1207
8
                            "unterminated string literal (detected at line %d); "
1208
8
                            "perhaps you escaped the end quote?",
1209
8
                            start
1210
8
                        );
1211
323
                    } else {
1212
323
                        _PyTokenizer_syntaxerror(
1213
323
                            tok, "unterminated string literal (detected at line %d)", start
1214
323
                        );
1215
323
                    }
1216
331
                    if (c != '\n') {
1217
7
                        tok->done = E_EOLS;
1218
7
                    }
1219
331
                    return MAKE_TOKEN(ERRORTOKEN);
1220
331
                }
1221
376
            }
1222
420k
            if (c == quote) {
1223
34.3k
                end_quote_size += 1;
1224
34.3k
            }
1225
386k
            else {
1226
386k
                end_quote_size = 0;
1227
386k
                if (c == '\\') {
1228
22.9k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
22.9k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
663
                        has_escaped_quote = 1;
1231
663
                    }
1232
22.9k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
22.9k
                }
1236
386k
            }
1237
420k
        }
1238
1239
36.8k
        p_start = tok->start;
1240
36.8k
        p_end = tok->cur;
1241
36.8k
        return MAKE_TOKEN(STRING);
1242
37.2k
    }
1243
1244
    /* Line continuation */
1245
1.01M
    if (c == '\\') {
1246
402
        if ((c = tok_continuation_line(tok)) == -1) {
1247
98
            return MAKE_TOKEN(ERRORTOKEN);
1248
98
        }
1249
304
        tok->cont_line = 1;
1250
304
        goto again; /* Read next line */
1251
402
    }
1252
1253
    /* Punctuation character */
1254
1.01M
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
1.01M
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
60.8k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
60.8k
        int in_format_spec = current_tok->in_format_spec;
1261
60.8k
         int cursor_in_format_with_debug =
1262
60.8k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
60.8k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
60.8k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
60.8k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
60.8k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
5.41k
            current_tok->kind = TOK_FSTRING_MODE;
1273
5.41k
            current_tok->in_format_spec = 1;
1274
5.41k
            p_start = tok->start;
1275
5.41k
            p_end = tok->cur;
1276
5.41k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
5.41k
        }
1278
60.8k
    }
1279
1280
    /* Check for two-character token */
1281
1.00M
    {
1282
1.00M
        int c2 = tok_nextc(tok);
1283
1.00M
        int current_token = _PyToken_TwoChars(c, c2);
1284
1.00M
        if (current_token != OP) {
1285
23.0k
            int c3 = tok_nextc(tok);
1286
23.0k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
23.0k
            if (current_token3 != OP) {
1288
1.39k
                current_token = current_token3;
1289
1.39k
            }
1290
21.6k
            else {
1291
21.6k
                tok_backup(tok, c3);
1292
21.6k
            }
1293
23.0k
            p_start = tok->start;
1294
23.0k
            p_end = tok->cur;
1295
23.0k
            return MAKE_TOKEN(current_token);
1296
23.0k
        }
1297
984k
        tok_backup(tok, c2);
1298
984k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
71.0k
    case '(':
1303
113k
    case '[':
1304
159k
    case '{':
1305
159k
        if (tok->level >= MAXLEVEL) {
1306
16
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
16
        }
1308
159k
        tok->parenstack[tok->level] = c;
1309
159k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
159k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
159k
        tok->level++;
1312
159k
        if (INSIDE_FSTRING(tok)) {
1313
34.1k
            current_tok->curly_bracket_depth++;
1314
34.1k
        }
1315
159k
        break;
1316
44.7k
    case ')':
1317
51.8k
    case ']':
1318
79.2k
    case '}':
1319
79.2k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
46
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
46
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
46
        }
1323
79.2k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
227
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
227
        }
1326
79.0k
        if (tok->level > 0) {
1327
79.0k
            tok->level--;
1328
79.0k
            int opening = tok->parenstack[tok->level];
1329
79.0k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
34.4k
                                            (opening == '[' && c == ']') ||
1331
27.3k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
49
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
5
                    assert(current_tok->curly_bracket_depth >= 0);
1339
5
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
5
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
2
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
2
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
2
                    }
1344
5
                }
1345
47
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
7
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
7
                            "closing parenthesis '%c' does not match "
1348
7
                            "opening parenthesis '%c' on line %d",
1349
7
                            c, opening, tok->parenlinenostack[tok->level]));
1350
7
                }
1351
40
                else {
1352
40
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
40
                            "closing parenthesis '%c' does not match "
1354
40
                            "opening parenthesis '%c'",
1355
40
                            c, opening));
1356
40
                }
1357
47
            }
1358
79.0k
        }
1359
1360
78.9k
        if (INSIDE_FSTRING(tok)) {
1361
24.5k
            current_tok->curly_bracket_depth--;
1362
24.5k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
24.5k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
22.7k
                current_tok->curly_bracket_expr_start_depth--;
1368
22.7k
                current_tok->kind = TOK_FSTRING_MODE;
1369
22.7k
                current_tok->in_format_spec = 0;
1370
22.7k
                current_tok->in_debug = 0;
1371
22.7k
            }
1372
24.5k
        }
1373
78.9k
        break;
1374
746k
    default:
1375
746k
        break;
1376
984k
    }
1377
1378
984k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
442
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
442
    }
1381
1382
984k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
5.40k
        current_tok->in_debug = 1;
1384
5.40k
    }
1385
1386
    /* Punctuation character */
1387
984k
    p_start = tok->start;
1388
984k
    p_end = tok->cur;
1389
984k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
984k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
54.1k
{
1395
54.1k
    const char *p_start = NULL;
1396
54.1k
    const char *p_end = NULL;
1397
54.1k
    int end_quote_size = 0;
1398
54.1k
    int unicode_escape = 0;
1399
1400
54.1k
    tok->start = tok->cur;
1401
54.1k
    tok->first_lineno = tok->lineno;
1402
54.1k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
54.1k
    int start_char = tok_nextc(tok);
1407
54.1k
    if (start_char == '{') {
1408
14.8k
        int peek1 = tok_nextc(tok);
1409
14.8k
        tok_backup(tok, peek1);
1410
14.8k
        tok_backup(tok, start_char);
1411
14.8k
        if (peek1 != '{') {
1412
13.6k
            current_tok->curly_bracket_expr_start_depth++;
1413
13.6k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
4
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
4
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
4
            }
1417
13.6k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
13.6k
            return tok_get_normal_mode(tok, current_tok, token);
1419
13.6k
        }
1420
14.8k
    }
1421
39.3k
    else {
1422
39.3k
        tok_backup(tok, start_char);
1423
39.3k
    }
1424
1425
    // Check if we are at the end of the string
1426
57.4k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
44.3k
        int quote = tok_nextc(tok);
1428
44.3k
        if (quote != current_tok->quote) {
1429
27.5k
            tok_backup(tok, quote);
1430
27.5k
            goto f_string_middle;
1431
27.5k
        }
1432
44.3k
    }
1433
1434
13.0k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.99k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.99k
        current_tok->last_expr_buffer = NULL;
1437
7.99k
        current_tok->last_expr_size = 0;
1438
7.99k
        current_tok->last_expr_end = -1;
1439
7.99k
    }
1440
1441
13.0k
    p_start = tok->start;
1442
13.0k
    p_end = tok->cur;
1443
13.0k
    tok->tok_mode_stack_index--;
1444
13.0k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
27.5k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
27.5k
    tok->multi_line_start = tok->line_start;
1451
198k
    while (end_quote_size != current_tok->quote_size) {
1452
192k
        int c = tok_nextc(tok);
1453
192k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
192k
        int in_format_spec = (
1457
192k
                current_tok->in_format_spec
1458
12.4k
                &&
1459
12.4k
                INSIDE_FSTRING_EXPR(current_tok)
1460
192k
        );
1461
1462
192k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
343
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
343
            if (in_format_spec && c == '\n') {
1471
41
                if (current_tok->quote_size == 1) {
1472
41
                    return MAKE_TOKEN(
1473
41
                        _PyTokenizer_syntaxerror(
1474
41
                            tok,
1475
41
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
41
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
41
                        )
1478
41
                    );
1479
41
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
41
            }
1487
1488
343
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
302
            tok->cur = (char *)current_tok->start;
1493
302
            tok->cur++;
1494
302
            tok->line_start = current_tok->multi_line_start;
1495
302
            int start = tok->lineno;
1496
1497
302
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
302
            tok->lineno = the_current_tok->first_line;
1499
1500
302
            if (current_tok->quote_size == 3) {
1501
21
                _PyTokenizer_syntaxerror(tok,
1502
21
                                    "unterminated triple-quoted %c-string literal"
1503
21
                                    " (detected at line %d)",
1504
21
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
21
                if (c != '\n') {
1506
21
                    tok->done = E_EOFS;
1507
21
                }
1508
21
                return MAKE_TOKEN(ERRORTOKEN);
1509
21
            }
1510
281
            else {
1511
281
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
281
                                    "unterminated %c-string literal (detected at"
1513
281
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
281
            }
1515
302
        }
1516
1517
192k
        if (c == current_tok->quote) {
1518
9.28k
            end_quote_size += 1;
1519
9.28k
            continue;
1520
183k
        } else {
1521
183k
            end_quote_size = 0;
1522
183k
        }
1523
1524
183k
        if (c == '{') {
1525
16.0k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
16.0k
            int peek = tok_nextc(tok);
1529
16.0k
            if (peek != '{' || in_format_spec) {
1530
14.3k
                tok_backup(tok, peek);
1531
14.3k
                tok_backup(tok, c);
1532
14.3k
                current_tok->curly_bracket_expr_start_depth++;
1533
14.3k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
5
                }
1537
14.2k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
14.2k
                current_tok->in_format_spec = 0;
1539
14.2k
                p_start = tok->start;
1540
14.2k
                p_end = tok->cur;
1541
14.2k
            } else {
1542
1.78k
                p_start = tok->start;
1543
1.78k
                p_end = tok->cur - 1;
1544
1.78k
            }
1545
16.0k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
166k
        } else if (c == '}') {
1547
5.58k
            if (unicode_escape) {
1548
391
                p_start = tok->start;
1549
391
                p_end = tok->cur;
1550
391
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
391
            }
1552
5.19k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
5.19k
            int cursor = current_tok->curly_bracket_depth;
1559
5.19k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.01k
                p_start = tok->start;
1561
1.01k
                p_end = tok->cur - 1;
1562
4.18k
            } else {
1563
4.18k
                tok_backup(tok, peek);
1564
4.18k
                tok_backup(tok, c);
1565
4.18k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
4.18k
                current_tok->in_format_spec = 0;
1567
4.18k
                p_start = tok->start;
1568
4.18k
                p_end = tok->cur;
1569
4.18k
            }
1570
5.19k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
161k
        } else if (c == '\\') {
1572
5.40k
            int peek = tok_nextc(tok);
1573
5.40k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.40k
            if (peek == '{' || peek == '}') {
1580
645
                if (!current_tok->raw) {
1581
579
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
579
                }
1585
645
                tok_backup(tok, peek);
1586
645
                continue;
1587
645
            }
1588
1589
4.76k
            if (!current_tok->raw) {
1590
4.53k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
609
                    peek = tok_nextc(tok);
1593
609
                    if (peek == '{') {
1594
399
                        unicode_escape = 1;
1595
399
                    } else {
1596
210
                        tok_backup(tok, peek);
1597
210
                    }
1598
609
                }
1599
4.53k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
4.76k
        }
1603
183k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
12.0k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.60k
        tok_backup(tok, current_tok->quote);
1609
6.60k
    }
1610
5.48k
    p_start = tok->start;
1611
5.48k
    p_end = tok->cur;
1612
5.48k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
27.5k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
2.18M
{
1618
2.18M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
2.18M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
2.13M
        return tok_get_normal_mode(tok, current_tok, token);
1621
2.13M
    } else {
1622
54.1k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
54.1k
    }
1624
2.18M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
2.18M
{
1629
2.18M
    int result = tok_get(tok, token);
1630
2.18M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
2.18M
    return result;
1635
2.18M
}