Coverage Report

Created: 2026-01-17 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.29k
#define ALTTABSIZE 1
11
12
1.73M
#define is_potential_identifier_start(c) (\
13
1.73M
              (c >= 'a' && c <= 'z')\
14
1.73M
               || (c >= 'A' && c <= 'Z')\
15
1.73M
               || c == '_'\
16
1.73M
               || (c >= 128))
17
18
2.75M
#define is_potential_identifier_char(c) (\
19
2.75M
              (c >= 'a' && c <= 'z')\
20
2.75M
               || (c >= 'A' && c <= 'Z')\
21
2.75M
               || (c >= '0' && c <= '9')\
22
2.75M
               || c == '_'\
23
2.75M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.86M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
18.2k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
41
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.74M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
257k
{
55
257k
    return memchr(str, 0, size) != NULL;
56
257k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.4M
{
62
11.4M
    int rc;
63
11.7M
    for (;;) {
64
11.7M
        if (tok->cur != tok->inp) {
65
11.4M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.4M
            tok->col_offset++;
70
11.4M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.4M
        }
72
304k
        if (tok->done != E_OK) {
73
31.4k
            return EOF;
74
31.4k
        }
75
273k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
273k
        if (!rc) {
84
15.8k
            tok->cur = tok->inp;
85
15.8k
            return EOF;
86
15.8k
        }
87
257k
        tok->line_start = tok->cur;
88
89
257k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
257k
    }
95
11.4M
    Py_UNREACHABLE();
96
11.4M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.67M
{
102
3.67M
    if (c != EOF) {
103
3.64M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.64M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.64M
        tok->col_offset--;
110
3.64M
    }
111
3.67M
}
112
113
static int
114
24.5k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
24.5k
    assert(token != NULL);
116
24.5k
    assert(c == '}' || c == ':' || c == '!');
117
24.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
24.5k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
14.0k
        return 0;
121
14.0k
    }
122
10.5k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
10.5k
    int hash_detected = 0;
126
10.5k
    int in_string = 0;
127
10.5k
    char quote_char = 0;
128
129
1.13M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.12M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.12M
        if (ch == '\\') {
134
18.5k
            i++;
135
18.5k
            continue;
136
18.5k
        }
137
138
        // Handle quotes
139
1.10M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
195k
            if (!in_string) {
148
71.9k
                in_string = 1;
149
71.9k
                quote_char = ch;
150
71.9k
            }
151
123k
            else if (ch == quote_char) {
152
71.1k
                in_string = 0;
153
71.1k
            }
154
195k
            continue;
155
195k
        }
156
157
        // Check for # outside strings
158
913k
        if (ch == '#' && !in_string) {
159
851
            hash_detected = 1;
160
851
            break;
161
851
        }
162
913k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
10.5k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
851
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
851
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
851
        Py_ssize_t i = 0;  // Input position
172
851
        Py_ssize_t j = 0;  // Output position
173
851
        in_string = 0;     // Whether we're in a string
174
851
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
61.0k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
60.1k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
60.1k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
8.46k
                if (!in_string) {
184
3.38k
                    in_string = 1;
185
3.38k
                    quote_char = ch;
186
5.08k
                } else if (ch == quote_char) {
187
3.36k
                    in_string = 0;
188
3.36k
                }
189
8.46k
                result[j++] = ch;
190
8.46k
            }
191
            // Skip comments
192
51.6k
            else if (ch == '#' && !in_string) {
193
31.7k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
31.0k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
30.4k
                    i++;
196
30.4k
                }
197
1.31k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
563
                    result[j++] = '\n';
199
563
                }
200
1.31k
            }
201
            // Copy other chars
202
50.3k
            else {
203
50.3k
                result[j++] = ch;
204
50.3k
            }
205
60.1k
            i++;
206
60.1k
        }
207
208
851
        result[j] = '\0';  // Null-terminate the result string
209
851
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
851
        PyMem_Free(result);
211
9.68k
    } else {
212
9.68k
        res = PyUnicode_DecodeUTF8(
213
9.68k
            tok_mode->last_expr_buffer,
214
9.68k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
9.68k
            NULL
216
9.68k
        );
217
9.68k
    }
218
219
10.5k
    if (!res) {
220
0
        return -1;
221
0
    }
222
10.5k
    token->metadata = res;
223
10.5k
    return 0;
224
10.5k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
65.2k
{
229
65.2k
    assert(tok->cur != NULL);
230
231
65.2k
    Py_ssize_t size = strlen(tok->cur);
232
65.2k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
65.2k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
40.6k
        case '{':
252
40.6k
            if (tok_mode->last_expr_buffer != NULL) {
253
28.3k
                PyMem_Free(tok_mode->last_expr_buffer);
254
28.3k
            }
255
40.6k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
40.6k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
40.6k
            tok_mode->last_expr_size = size;
260
40.6k
            tok_mode->last_expr_end = -1;
261
40.6k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
40.6k
            break;
263
19.5k
        case '}':
264
21.2k
        case '!':
265
21.2k
            tok_mode->last_expr_end = strlen(tok->start);
266
21.2k
            break;
267
3.36k
        case ':':
268
3.36k
            if (tok_mode->last_expr_end == -1) {
269
2.97k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.97k
            }
271
3.36k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
65.2k
    }
275
65.2k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
65.2k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
8.93k
{
284
8.93k
    const char *s = test;
285
8.93k
    int res = 0;
286
23.9k
    while (1) {
287
23.9k
        int c = tok_nextc(tok);
288
23.9k
        if (*s == 0) {
289
8.84k
            res = !is_potential_identifier_char(c);
290
8.84k
        }
291
15.0k
        else if (c == *s) {
292
15.0k
            s++;
293
15.0k
            continue;
294
15.0k
        }
295
296
8.93k
        tok_backup(tok, c);
297
23.9k
        while (s != test) {
298
15.0k
            tok_backup(tok, *--s);
299
15.0k
        }
300
8.93k
        return res;
301
23.9k
    }
302
8.93k
}
303
304
static int
305
96.2k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
96.2k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
96.2k
    int r = 0;
322
96.2k
    if (c == 'a') {
323
1.15k
        r = lookahead(tok, "nd");
324
1.15k
    }
325
95.1k
    else if (c == 'e') {
326
614
        r = lookahead(tok, "lse");
327
614
    }
328
94.5k
    else if (c == 'f') {
329
3.50k
        r = lookahead(tok, "or");
330
3.50k
    }
331
91.0k
    else if (c == 'i') {
332
1.65k
        int c2 = tok_nextc(tok);
333
1.65k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.63k
            r = 1;
335
1.63k
        }
336
1.65k
        tok_backup(tok, c2);
337
1.65k
    }
338
89.3k
    else if (c == 'o') {
339
3.31k
        r = lookahead(tok, "r");
340
3.31k
    }
341
86.0k
    else if (c == 'n') {
342
344
        r = lookahead(tok, "ot");
343
344
    }
344
96.2k
    if (r) {
345
10.4k
        tok_backup(tok, c);
346
10.4k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.4k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.4k
        tok_nextc(tok);
352
10.4k
    }
353
85.8k
    else /* In future releases, only error will remain. */
354
85.8k
    if (c < 128 && is_potential_identifier_char(c)) {
355
195
        tok_backup(tok, c);
356
195
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
195
        return 0;
358
195
    }
359
96.0k
    return 1;
360
96.2k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
13.1k
{
366
13.1k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
13.1k
    PyObject *s;
370
13.1k
    if (tok->decoding_erred)
371
0
        return 0;
372
13.1k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
13.1k
    if (s == NULL) {
374
2
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
2
            tok->done = E_DECODE;
376
2
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
2
        return 0;
381
2
    }
382
13.1k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
13.1k
    assert(invalid >= 0);
384
13.1k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
13.1k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
697
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
697
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
488
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
488
            if (s != NULL) {
391
488
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
488
            }
393
488
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
488
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
488
        }
399
697
        Py_DECREF(s);
400
697
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
358
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
358
        }
403
339
        else {
404
339
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
339
        }
406
697
        return 0;
407
697
    }
408
12.4k
    Py_DECREF(s);
409
12.4k
    return 1;
410
13.1k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
75.4k
{
415
75.4k
    int c;
416
417
75.9k
    while (1) {
418
222k
        do {
419
222k
            c = tok_nextc(tok);
420
222k
        } while (Py_ISDIGIT(c));
421
75.9k
        if (c != '_') {
422
75.3k
            break;
423
75.3k
        }
424
526
        c = tok_nextc(tok);
425
526
        if (!Py_ISDIGIT(c)) {
426
15
            tok_backup(tok, c);
427
15
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
15
            return 0;
429
15
        }
430
526
    }
431
75.3k
    return c;
432
75.4k
}
433
434
static inline int
435
1.13k
tok_continuation_line(struct tok_state *tok) {
436
1.13k
    int c = tok_nextc(tok);
437
1.13k
    if (c == '\r') {
438
78
        c = tok_nextc(tok);
439
78
    }
440
1.13k
    if (c != '\n') {
441
64
        tok->done = E_LINECONT;
442
64
        return -1;
443
64
    }
444
1.07k
    c = tok_nextc(tok);
445
1.07k
    if (c == EOF) {
446
53
        tok->done = E_EOF;
447
53
        tok->cur = tok->inp;
448
53
        return -1;
449
1.02k
    } else {
450
1.02k
        tok_backup(tok, c);
451
1.02k
    }
452
1.02k
    return c;
453
1.07k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
23.4k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
23.4k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
23.4k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
23.4k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
23.4k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
23.4k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
23.4k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
23.4k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
23.4k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
23.4k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
23.4k
#undef RETURN_SYNTAX_ERROR
496
497
23.4k
    return 0;
498
23.4k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.70M
{
503
1.70M
    int c;
504
1.70M
    int blankline, nonascii;
505
506
1.70M
    const char *p_start = NULL;
507
1.70M
    const char *p_end = NULL;
508
1.80M
  nextline:
509
1.80M
    tok->start = NULL;
510
1.80M
    tok->starting_col_offset = -1;
511
1.80M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.80M
    if (tok->atbol) {
516
250k
        int col = 0;
517
250k
        int altcol = 0;
518
250k
        tok->atbol = 0;
519
250k
        int cont_line_col = 0;
520
1.14M
        for (;;) {
521
1.14M
            c = tok_nextc(tok);
522
1.14M
            if (c == ' ') {
523
895k
                col++, altcol++;
524
895k
            }
525
252k
            else if (c == '\t') {
526
645
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
645
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
645
            }
529
251k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
789
                col = altcol = 0; /* For Emacs users */
531
789
            }
532
251k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
676
                cont_line_col = cont_line_col ? cont_line_col : col;
538
676
                if ((c = tok_continuation_line(tok)) == -1) {
539
53
                    return MAKE_TOKEN(ERRORTOKEN);
540
53
                }
541
676
            }
542
250k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
250k
            else {
546
250k
                break;
547
250k
            }
548
1.14M
        }
549
250k
        tok_backup(tok, c);
550
250k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
63.3k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
63.3k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
63.3k
            else {
566
63.3k
                blankline = 1; /* Ignore completely */
567
63.3k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
63.3k
        }
571
250k
        if (!blankline && tok->level == 0) {
572
144k
            col = cont_line_col ? cont_line_col : col;
573
144k
            altcol = cont_line_col ? cont_line_col : altcol;
574
144k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
94.7k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
94.7k
            }
580
49.3k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
27.5k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
27.5k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
27.5k
                tok->pendin++;
591
27.5k
                tok->indstack[++tok->indent] = col;
592
27.5k
                tok->altindstack[tok->indent] = altcol;
593
27.5k
            }
594
21.7k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
48.6k
                while (tok->indent > 0 &&
597
43.5k
                    col < tok->indstack[tok->indent]) {
598
26.8k
                    tok->pendin--;
599
26.8k
                    tok->indent--;
600
26.8k
                }
601
21.7k
                if (col != tok->indstack[tok->indent]) {
602
6
                    tok->done = E_DEDENT;
603
6
                    tok->cur = tok->inp;
604
6
                    return MAKE_TOKEN(ERRORTOKEN);
605
6
                }
606
21.7k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
21.7k
            }
610
144k
        }
611
250k
    }
612
613
1.80M
    tok->start = tok->cur;
614
1.80M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
1.80M
    if (tok->pendin != 0) {
618
54.4k
        if (tok->pendin < 0) {
619
26.8k
            if (tok->tok_extra_tokens) {
620
0
                p_start = tok->cur;
621
0
                p_end = tok->cur;
622
0
            }
623
26.8k
            tok->pendin++;
624
26.8k
            return MAKE_TOKEN(DEDENT);
625
26.8k
        }
626
27.5k
        else {
627
27.5k
            if (tok->tok_extra_tokens) {
628
0
                p_start = tok->buf;
629
0
                p_end = tok->cur;
630
0
            }
631
27.5k
            tok->pendin--;
632
27.5k
            return MAKE_TOKEN(INDENT);
633
27.5k
        }
634
54.4k
    }
635
636
    /* Peek ahead at the next character */
637
1.75M
    c = tok_nextc(tok);
638
1.75M
    tok_backup(tok, c);
639
640
1.75M
 again:
641
1.75M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.13M
    do {
644
2.13M
        c = tok_nextc(tok);
645
2.13M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.75M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.75M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.75M
    if (c == '#') {
653
654
40.7k
        const char* p = NULL;
655
40.7k
        const char *prefix, *type_start;
656
40.7k
        int current_starting_col_offset;
657
658
1.36M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.32M
            c = tok_nextc(tok);
660
1.32M
        }
661
662
40.7k
        if (tok->tok_extra_tokens) {
663
0
            p = tok->start;
664
0
        }
665
666
40.7k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
40.7k
        if (tok->tok_extra_tokens) {
721
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
0
            p_start = p;
723
0
            p_end = tok->cur;
724
0
            tok->comment_newline = blankline;
725
0
            return MAKE_TOKEN(COMMENT);
726
0
        }
727
40.7k
    }
728
729
1.75M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.75M
    if (c == EOF) {
735
15.7k
        if (tok->level) {
736
4.07k
            return MAKE_TOKEN(ERRORTOKEN);
737
4.07k
        }
738
11.6k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
15.7k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.73M
    nonascii = 0;
743
1.73M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
581k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
713k
        while (1) {
747
713k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
22.7k
                saw_b = 1;
749
22.7k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
690k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
8.64k
                saw_u = 1;
754
8.64k
            }
755
            /* ur"" and ru"" are not supported */
756
681k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
38.8k
                saw_r = 1;
758
38.8k
            }
759
642k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
49.0k
                saw_f = 1;
761
49.0k
            }
762
593k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
36.1k
                saw_t = 1;
764
36.1k
            }
765
557k
            else {
766
557k
                break;
767
557k
            }
768
155k
            c = tok_nextc(tok);
769
155k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
23.4k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
23.4k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
23.4k
                if (status < 0) {
774
7
                    return MAKE_TOKEN(ERRORTOKEN);
775
7
                }
776
777
                // Handle valid f or t string creation:
778
23.4k
                if (saw_f || saw_t) {
779
18.2k
                    goto f_string_quote;
780
18.2k
                }
781
5.24k
                goto letter_quote;
782
23.4k
            }
783
155k
        }
784
2.65M
        while (is_potential_identifier_char(c)) {
785
2.10M
            if (c >= 128) {
786
138k
                nonascii = 1;
787
138k
            }
788
2.10M
            c = tok_nextc(tok);
789
2.10M
        }
790
557k
        tok_backup(tok, c);
791
557k
        if (nonascii && !verify_identifier(tok)) {
792
699
            return MAKE_TOKEN(ERRORTOKEN);
793
699
        }
794
795
557k
        p_start = tok->start;
796
557k
        p_end = tok->cur;
797
798
557k
        return MAKE_TOKEN(NAME);
799
557k
    }
800
801
1.15M
    if (c == '\r') {
802
395
        c = tok_nextc(tok);
803
395
    }
804
805
    /* Newline */
806
1.15M
    if (c == '\n') {
807
232k
        tok->atbol = 1;
808
232k
        if (blankline || tok->level > 0) {
809
106k
            if (tok->tok_extra_tokens) {
810
0
                if (tok->comment_newline) {
811
0
                    tok->comment_newline = 0;
812
0
                }
813
0
                p_start = tok->start;
814
0
                p_end = tok->cur;
815
0
                return MAKE_TOKEN(NL);
816
0
            }
817
106k
            goto nextline;
818
106k
        }
819
125k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
0
            tok->comment_newline = 0;
821
0
            p_start = tok->start;
822
0
            p_end = tok->cur;
823
0
            return MAKE_TOKEN(NL);
824
0
        }
825
125k
        p_start = tok->start;
826
125k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
125k
        tok->cont_line = 0;
828
125k
        return MAKE_TOKEN(NEWLINE);
829
125k
    }
830
831
    /* Period or number starting with period? */
832
924k
    if (c == '.') {
833
41.3k
        c = tok_nextc(tok);
834
41.3k
        if (Py_ISDIGIT(c)) {
835
3.46k
            goto fraction;
836
37.8k
        } else if (c == '.') {
837
1.33k
            c = tok_nextc(tok);
838
1.33k
            if (c == '.') {
839
607
                p_start = tok->start;
840
607
                p_end = tok->cur;
841
607
                return MAKE_TOKEN(ELLIPSIS);
842
607
            }
843
732
            else {
844
732
                tok_backup(tok, c);
845
732
            }
846
732
            tok_backup(tok, '.');
847
732
        }
848
36.5k
        else {
849
36.5k
            tok_backup(tok, c);
850
36.5k
        }
851
37.2k
        p_start = tok->start;
852
37.2k
        p_end = tok->cur;
853
37.2k
        return MAKE_TOKEN(DOT);
854
41.3k
    }
855
856
    /* Number */
857
883k
    if (Py_ISDIGIT(c)) {
858
92.9k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
33.6k
            c = tok_nextc(tok);
861
33.6k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
15.8k
                c = tok_nextc(tok);
864
16.0k
                do {
865
16.0k
                    if (c == '_') {
866
259
                        c = tok_nextc(tok);
867
259
                    }
868
16.0k
                    if (!Py_ISXDIGIT(c)) {
869
19
                        tok_backup(tok, c);
870
19
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
19
                    }
872
75.6k
                    do {
873
75.6k
                        c = tok_nextc(tok);
874
75.6k
                    } while (Py_ISXDIGIT(c));
875
16.0k
                } while (c == '_');
876
15.8k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
1
                    return MAKE_TOKEN(ERRORTOKEN);
878
1
                }
879
15.8k
            }
880
17.8k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
573
                c = tok_nextc(tok);
883
946
                do {
884
946
                    if (c == '_') {
885
377
                        c = tok_nextc(tok);
886
377
                    }
887
946
                    if (c < '0' || c >= '8') {
888
22
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
21
                        else {
893
21
                            tok_backup(tok, c);
894
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
21
                        }
896
22
                    }
897
2.98k
                    do {
898
2.98k
                        c = tok_nextc(tok);
899
2.98k
                    } while ('0' <= c && c < '8');
900
924
                } while (c == '_');
901
551
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
550
                if (!verify_end_of_number(tok, c, "octal")) {
906
3
                    return MAKE_TOKEN(ERRORTOKEN);
907
3
                }
908
550
            }
909
17.2k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
541
                c = tok_nextc(tok);
912
1.02k
                do {
913
1.02k
                    if (c == '_') {
914
492
                        c = tok_nextc(tok);
915
492
                    }
916
1.02k
                    if (c != '0' && c != '1') {
917
27
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
26
                        else {
921
26
                            tok_backup(tok, c);
922
26
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
26
                        }
924
27
                    }
925
4.04k
                    do {
926
4.04k
                        c = tok_nextc(tok);
927
4.04k
                    } while (c == '0' || c == '1');
928
995
                } while (c == '_');
929
514
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
512
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
512
            }
936
16.7k
            else {
937
16.7k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
17.7k
                while (1) {
941
17.7k
                    if (c == '_') {
942
120
                        c = tok_nextc(tok);
943
120
                        if (!Py_ISDIGIT(c)) {
944
2
                            tok_backup(tok, c);
945
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
2
                        }
947
120
                    }
948
17.7k
                    if (c != '0') {
949
16.7k
                        break;
950
16.7k
                    }
951
1.02k
                    c = tok_nextc(tok);
952
1.02k
                }
953
16.7k
                char* zeros_end = tok->cur;
954
16.7k
                if (Py_ISDIGIT(c)) {
955
373
                    nonzero = 1;
956
373
                    c = tok_decimal_tail(tok);
957
373
                    if (c == 0) {
958
2
                        return MAKE_TOKEN(ERRORTOKEN);
959
2
                    }
960
373
                }
961
16.7k
                if (c == '.') {
962
1.09k
                    c = tok_nextc(tok);
963
1.09k
                    goto fraction;
964
1.09k
                }
965
15.6k
                else if (c == 'e' || c == 'E') {
966
892
                    goto exponent;
967
892
                }
968
14.7k
                else if (c == 'j' || c == 'J') {
969
759
                    goto imaginary;
970
759
                }
971
13.9k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
30
                    tok_backup(tok, c);
974
30
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
30
                            tok, (int)(tok->start + 1 - tok->line_start),
976
30
                            (int)(zeros_end - tok->line_start),
977
30
                            "leading zeros in decimal integer "
978
30
                            "literals are not permitted; "
979
30
                            "use an 0o prefix for octal integers"));
980
30
                }
981
13.9k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
24
                    return MAKE_TOKEN(ERRORTOKEN);
983
24
                }
984
13.9k
            }
985
33.6k
        }
986
59.2k
        else {
987
            /* Decimal */
988
59.2k
            c = tok_decimal_tail(tok);
989
59.2k
            if (c == 0) {
990
10
                return MAKE_TOKEN(ERRORTOKEN);
991
10
            }
992
59.2k
            {
993
                /* Accept floating-point numbers. */
994
59.2k
                if (c == '.') {
995
3.41k
                    c = tok_nextc(tok);
996
7.97k
        fraction:
997
                    /* Fraction */
998
7.97k
                    if (Py_ISDIGIT(c)) {
999
6.22k
                        c = tok_decimal_tail(tok);
1000
6.22k
                        if (c == 0) {
1001
2
                            return MAKE_TOKEN(ERRORTOKEN);
1002
2
                        }
1003
6.22k
                    }
1004
7.97k
                }
1005
63.8k
                if (c == 'e' || c == 'E') {
1006
9.27k
                    int e;
1007
10.1k
                  exponent:
1008
10.1k
                    e = c;
1009
                    /* Exponent part */
1010
10.1k
                    c = tok_nextc(tok);
1011
10.1k
                    if (c == '+' || c == '-') {
1012
3.68k
                        c = tok_nextc(tok);
1013
3.68k
                        if (!Py_ISDIGIT(c)) {
1014
14
                            tok_backup(tok, c);
1015
14
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
14
                        }
1017
6.48k
                    } else if (!Py_ISDIGIT(c)) {
1018
615
                        tok_backup(tok, c);
1019
615
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
32
                            return MAKE_TOKEN(ERRORTOKEN);
1021
32
                        }
1022
583
                        tok_backup(tok, e);
1023
583
                        p_start = tok->start;
1024
583
                        p_end = tok->cur;
1025
583
                        return MAKE_TOKEN(NUMBER);
1026
615
                    }
1027
9.53k
                    c = tok_decimal_tail(tok);
1028
9.53k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
9.53k
                }
1032
64.0k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.60k
        imaginary:
1035
3.60k
                    c = tok_nextc(tok);
1036
3.60k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
9
                        return MAKE_TOKEN(ERRORTOKEN);
1038
9
                    }
1039
3.60k
                }
1040
61.2k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
125
                    return MAKE_TOKEN(ERRORTOKEN);
1042
125
                }
1043
64.0k
            }
1044
64.0k
        }
1045
95.5k
        tok_backup(tok, c);
1046
95.5k
        p_start = tok->start;
1047
95.5k
        p_end = tok->cur;
1048
95.5k
        return MAKE_TOKEN(NUMBER);
1049
92.9k
    }
1050
1051
808k
  f_string_quote:
1052
808k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
18.2k
        && (c == '\'' || c == '"'))) {
1054
1055
18.2k
        int quote = c;
1056
18.2k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
18.2k
        tok->first_lineno = tok->lineno;
1063
18.2k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
18.2k
        int after_quote = tok_nextc(tok);
1067
18.2k
        if (after_quote == quote) {
1068
3.04k
            int after_after_quote = tok_nextc(tok);
1069
3.04k
            if (after_after_quote == quote) {
1070
970
                quote_size = 3;
1071
970
            }
1072
2.07k
            else {
1073
                // TODO: Check this
1074
2.07k
                tok_backup(tok, after_after_quote);
1075
2.07k
                tok_backup(tok, after_quote);
1076
2.07k
            }
1077
3.04k
        }
1078
18.2k
        if (after_quote != quote) {
1079
15.1k
            tok_backup(tok, after_quote);
1080
15.1k
        }
1081
1082
1083
18.2k
        p_start = tok->start;
1084
18.2k
        p_end = tok->cur;
1085
18.2k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
2
        }
1088
18.2k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
18.2k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
18.2k
        the_current_tok->quote = quote;
1091
18.2k
        the_current_tok->quote_size = quote_size;
1092
18.2k
        the_current_tok->start = tok->start;
1093
18.2k
        the_current_tok->multi_line_start = tok->line_start;
1094
18.2k
        the_current_tok->first_line = tok->lineno;
1095
18.2k
        the_current_tok->start_offset = -1;
1096
18.2k
        the_current_tok->multi_line_start_offset = -1;
1097
18.2k
        the_current_tok->last_expr_buffer = NULL;
1098
18.2k
        the_current_tok->last_expr_size = 0;
1099
18.2k
        the_current_tok->last_expr_end = -1;
1100
18.2k
        the_current_tok->in_format_spec = 0;
1101
18.2k
        the_current_tok->in_debug = 0;
1102
1103
18.2k
        enum string_kind_t string_kind = FSTRING;
1104
18.2k
        switch (*tok->start) {
1105
984
            case 'T':
1106
4.95k
            case 't':
1107
4.95k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.95k
                string_kind = TSTRING;
1109
4.95k
                break;
1110
2.18k
            case 'F':
1111
12.7k
            case 'f':
1112
12.7k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
12.7k
                break;
1114
212
            case 'R':
1115
481
            case 'r':
1116
481
                the_current_tok->raw = 1;
1117
481
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
200
                    string_kind = TSTRING;
1119
200
                }
1120
481
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
18.2k
        }
1124
1125
18.2k
        the_current_tok->string_kind = string_kind;
1126
18.2k
        the_current_tok->curly_bracket_depth = 0;
1127
18.2k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
18.2k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
18.2k
    }
1130
1131
795k
  letter_quote:
1132
    /* String */
1133
795k
    if (c == '\'' || c == '"') {
1134
60.6k
        int quote = c;
1135
60.6k
        int quote_size = 1;             /* 1 or 3 */
1136
60.6k
        int end_quote_size = 0;
1137
60.6k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
60.6k
        tok->first_lineno = tok->lineno;
1144
60.6k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
60.6k
        c = tok_nextc(tok);
1148
60.6k
        if (c == quote) {
1149
10.1k
            c = tok_nextc(tok);
1150
10.1k
            if (c == quote) {
1151
2.97k
                quote_size = 3;
1152
2.97k
            }
1153
7.18k
            else {
1154
7.18k
                end_quote_size = 1;     /* empty string found */
1155
7.18k
            }
1156
10.1k
        }
1157
60.6k
        if (c != quote) {
1158
57.7k
            tok_backup(tok, c);
1159
57.7k
        }
1160
1161
        /* Get rest of string */
1162
1.25M
        while (end_quote_size != quote_size) {
1163
1.19M
            c = tok_nextc(tok);
1164
1.19M
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
1.19M
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
1.19M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
300
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
300
                tok->cur = (char *)tok->start;
1176
300
                tok->cur++;
1177
300
                tok->line_start = tok->multi_line_start;
1178
300
                int start = tok->lineno;
1179
300
                tok->lineno = tok->first_lineno;
1180
1181
300
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
37
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
37
                    if (the_current_tok->quote == quote &&
1189
32
                        the_current_tok->quote_size == quote_size) {
1190
25
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
25
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
25
                    }
1193
37
                }
1194
1195
275
                if (quote_size == 3) {
1196
23
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
23
                                     " (detected at line %d)", start);
1198
23
                    if (c != '\n') {
1199
23
                        tok->done = E_EOFS;
1200
23
                    }
1201
23
                    return MAKE_TOKEN(ERRORTOKEN);
1202
23
                }
1203
252
                else {
1204
252
                    if (has_escaped_quote) {
1205
10
                        _PyTokenizer_syntaxerror(
1206
10
                            tok,
1207
10
                            "unterminated string literal (detected at line %d); "
1208
10
                            "perhaps you escaped the end quote?",
1209
10
                            start
1210
10
                        );
1211
242
                    } else {
1212
242
                        _PyTokenizer_syntaxerror(
1213
242
                            tok, "unterminated string literal (detected at line %d)", start
1214
242
                        );
1215
242
                    }
1216
252
                    if (c != '\n') {
1217
14
                        tok->done = E_EOLS;
1218
14
                    }
1219
252
                    return MAKE_TOKEN(ERRORTOKEN);
1220
252
                }
1221
275
            }
1222
1.19M
            if (c == quote) {
1223
60.6k
                end_quote_size += 1;
1224
60.6k
            }
1225
1.13M
            else {
1226
1.13M
                end_quote_size = 0;
1227
1.13M
                if (c == '\\') {
1228
29.6k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
29.6k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
1.32k
                        has_escaped_quote = 1;
1231
1.32k
                    }
1232
29.6k
                    if (c == '\r') {
1233
69
                        c = tok_nextc(tok);
1234
69
                    }
1235
29.6k
                }
1236
1.13M
            }
1237
1.19M
        }
1238
1239
60.3k
        p_start = tok->start;
1240
60.3k
        p_end = tok->cur;
1241
60.3k
        return MAKE_TOKEN(STRING);
1242
60.6k
    }
1243
1244
    /* Line continuation */
1245
734k
    if (c == '\\') {
1246
462
        if ((c = tok_continuation_line(tok)) == -1) {
1247
64
            return MAKE_TOKEN(ERRORTOKEN);
1248
64
        }
1249
398
        tok->cont_line = 1;
1250
398
        goto again; /* Read next line */
1251
462
    }
1252
1253
    /* Punctuation character */
1254
734k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
734k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
57.3k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
57.3k
        int in_format_spec = current_tok->in_format_spec;
1261
57.3k
         int cursor_in_format_with_debug =
1262
57.3k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
57.3k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
57.3k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
57.3k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
57.3k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.37k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.37k
            current_tok->in_format_spec = 1;
1274
4.37k
            p_start = tok->start;
1275
4.37k
            p_end = tok->cur;
1276
4.37k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.37k
        }
1278
57.3k
    }
1279
1280
    /* Check for two-character token */
1281
729k
    {
1282
729k
        int c2 = tok_nextc(tok);
1283
729k
        int current_token = _PyToken_TwoChars(c, c2);
1284
729k
        if (current_token != OP) {
1285
27.7k
            int c3 = tok_nextc(tok);
1286
27.7k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
27.7k
            if (current_token3 != OP) {
1288
1.66k
                current_token = current_token3;
1289
1.66k
            }
1290
26.0k
            else {
1291
26.0k
                tok_backup(tok, c3);
1292
26.0k
            }
1293
27.7k
            p_start = tok->start;
1294
27.7k
            p_end = tok->cur;
1295
27.7k
            return MAKE_TOKEN(current_token);
1296
27.7k
        }
1297
702k
        tok_backup(tok, c2);
1298
702k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
93.2k
    case '(':
1303
126k
    case '[':
1304
170k
    case '{':
1305
170k
        if (tok->level >= MAXLEVEL) {
1306
10
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
10
        }
1308
170k
        tok->parenstack[tok->level] = c;
1309
170k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
170k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
170k
        tok->level++;
1312
170k
        if (INSIDE_FSTRING(tok)) {
1313
30.5k
            current_tok->curly_bracket_depth++;
1314
30.5k
        }
1315
170k
        break;
1316
66.8k
    case ')':
1317
79.1k
    case ']':
1318
105k
    case '}':
1319
105k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
58
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
58
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
58
        }
1323
105k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
199
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
199
        }
1326
105k
        if (tok->level > 0) {
1327
105k
            tok->level--;
1328
105k
            int opening = tok->parenstack[tok->level];
1329
105k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
38.6k
                                            (opening == '[' && c == ']') ||
1331
26.4k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
43
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
5
                    assert(current_tok->curly_bracket_depth >= 0);
1339
5
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
5
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
3
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
3
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
3
                    }
1344
5
                }
1345
40
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
4
                            "closing parenthesis '%c' does not match "
1348
4
                            "opening parenthesis '%c' on line %d",
1349
4
                            c, opening, tok->parenlinenostack[tok->level]));
1350
4
                }
1351
36
                else {
1352
36
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
36
                            "closing parenthesis '%c' does not match "
1354
36
                            "opening parenthesis '%c'",
1355
36
                            c, opening));
1356
36
                }
1357
40
            }
1358
105k
        }
1359
1360
105k
        if (INSIDE_FSTRING(tok)) {
1361
22.9k
            current_tok->curly_bracket_depth--;
1362
22.9k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
22.9k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
21.5k
                current_tok->curly_bracket_expr_start_depth--;
1368
21.5k
                current_tok->kind = TOK_FSTRING_MODE;
1369
21.5k
                current_tok->in_format_spec = 0;
1370
21.5k
                current_tok->in_debug = 0;
1371
21.5k
            }
1372
22.9k
        }
1373
105k
        break;
1374
425k
    default:
1375
425k
        break;
1376
702k
    }
1377
1378
701k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
433
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
433
    }
1381
1382
701k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
5.40k
        current_tok->in_debug = 1;
1384
5.40k
    }
1385
1386
    /* Punctuation character */
1387
701k
    p_start = tok->start;
1388
701k
    p_end = tok->cur;
1389
701k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
701k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
54.7k
{
1395
54.7k
    const char *p_start = NULL;
1396
54.7k
    const char *p_end = NULL;
1397
54.7k
    int end_quote_size = 0;
1398
54.7k
    int unicode_escape = 0;
1399
1400
54.7k
    tok->start = tok->cur;
1401
54.7k
    tok->first_lineno = tok->lineno;
1402
54.7k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
54.7k
    int start_char = tok_nextc(tok);
1407
54.7k
    if (start_char == '{') {
1408
14.9k
        int peek1 = tok_nextc(tok);
1409
14.9k
        tok_backup(tok, peek1);
1410
14.9k
        tok_backup(tok, start_char);
1411
14.9k
        if (peek1 != '{') {
1412
13.0k
            current_tok->curly_bracket_expr_start_depth++;
1413
13.0k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
2
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
2
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
2
            }
1417
13.0k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
13.0k
            return tok_get_normal_mode(tok, current_tok, token);
1419
13.0k
        }
1420
14.9k
    }
1421
39.8k
    else {
1422
39.8k
        tok_backup(tok, start_char);
1423
39.8k
    }
1424
1425
    // Check if we are at the end of the string
1426
61.1k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
47.4k
        int quote = tok_nextc(tok);
1428
47.4k
        if (quote != current_tok->quote) {
1429
28.0k
            tok_backup(tok, quote);
1430
28.0k
            goto f_string_middle;
1431
28.0k
        }
1432
47.4k
    }
1433
1434
13.6k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.99k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.99k
        current_tok->last_expr_buffer = NULL;
1437
7.99k
        current_tok->last_expr_size = 0;
1438
7.99k
        current_tok->last_expr_end = -1;
1439
7.99k
    }
1440
1441
13.6k
    p_start = tok->start;
1442
13.6k
    p_end = tok->cur;
1443
13.6k
    tok->tok_mode_stack_index--;
1444
13.6k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
28.0k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
28.0k
    tok->multi_line_start = tok->line_start;
1451
178k
    while (end_quote_size != current_tok->quote_size) {
1452
172k
        int c = tok_nextc(tok);
1453
172k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
172k
        int in_format_spec = (
1457
172k
                current_tok->in_format_spec
1458
11.2k
                &&
1459
11.2k
                INSIDE_FSTRING_EXPR(current_tok)
1460
172k
        );
1461
1462
172k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
448
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
448
            if (in_format_spec && c == '\n') {
1471
57
                if (current_tok->quote_size == 1) {
1472
57
                    return MAKE_TOKEN(
1473
57
                        _PyTokenizer_syntaxerror(
1474
57
                            tok,
1475
57
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
57
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
57
                        )
1478
57
                    );
1479
57
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
57
            }
1487
1488
448
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
391
            tok->cur = (char *)current_tok->start;
1493
391
            tok->cur++;
1494
391
            tok->line_start = current_tok->multi_line_start;
1495
391
            int start = tok->lineno;
1496
1497
391
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
391
            tok->lineno = the_current_tok->first_line;
1499
1500
391
            if (current_tok->quote_size == 3) {
1501
41
                _PyTokenizer_syntaxerror(tok,
1502
41
                                    "unterminated triple-quoted %c-string literal"
1503
41
                                    " (detected at line %d)",
1504
41
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
41
                if (c != '\n') {
1506
41
                    tok->done = E_EOFS;
1507
41
                }
1508
41
                return MAKE_TOKEN(ERRORTOKEN);
1509
41
            }
1510
350
            else {
1511
350
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
350
                                    "unterminated %c-string literal (detected at"
1513
350
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
350
            }
1515
391
        }
1516
1517
172k
        if (c == current_tok->quote) {
1518
11.1k
            end_quote_size += 1;
1519
11.1k
            continue;
1520
160k
        } else {
1521
160k
            end_quote_size = 0;
1522
160k
        }
1523
1524
160k
        if (c == '{') {
1525
16.2k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
16.2k
            int peek = tok_nextc(tok);
1529
16.2k
            if (peek != '{' || in_format_spec) {
1530
13.8k
                tok_backup(tok, peek);
1531
13.8k
                tok_backup(tok, c);
1532
13.8k
                current_tok->curly_bracket_expr_start_depth++;
1533
13.8k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
4
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
4
                }
1537
13.8k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
13.8k
                current_tok->in_format_spec = 0;
1539
13.8k
                p_start = tok->start;
1540
13.8k
                p_end = tok->cur;
1541
13.8k
            } else {
1542
2.43k
                p_start = tok->start;
1543
2.43k
                p_end = tok->cur - 1;
1544
2.43k
            }
1545
16.2k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
144k
        } else if (c == '}') {
1547
5.20k
            if (unicode_escape) {
1548
427
                p_start = tok->start;
1549
427
                p_end = tok->cur;
1550
427
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
427
            }
1552
4.78k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.78k
            int cursor = current_tok->curly_bracket_depth;
1559
4.78k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.57k
                p_start = tok->start;
1561
1.57k
                p_end = tok->cur - 1;
1562
3.20k
            } else {
1563
3.20k
                tok_backup(tok, peek);
1564
3.20k
                tok_backup(tok, c);
1565
3.20k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.20k
                current_tok->in_format_spec = 0;
1567
3.20k
                p_start = tok->start;
1568
3.20k
                p_end = tok->cur;
1569
3.20k
            }
1570
4.78k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
139k
        } else if (c == '\\') {
1572
6.48k
            int peek = tok_nextc(tok);
1573
6.48k
            if (peek == '\r') {
1574
18
                peek = tok_nextc(tok);
1575
18
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
6.48k
            if (peek == '{' || peek == '}') {
1580
1.32k
                if (!current_tok->raw) {
1581
1.12k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
1
                        return MAKE_TOKEN(ERRORTOKEN);
1583
1
                    }
1584
1.12k
                }
1585
1.32k
                tok_backup(tok, peek);
1586
1.32k
                continue;
1587
1.32k
            }
1588
1589
5.16k
            if (!current_tok->raw) {
1590
4.56k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
529
                    peek = tok_nextc(tok);
1593
529
                    if (peek == '{') {
1594
451
                        unicode_escape = 1;
1595
451
                    } else {
1596
78
                        tok_backup(tok, peek);
1597
78
                    }
1598
529
                }
1599
4.56k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
5.16k
        }
1603
160k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
13.8k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
7.71k
        tok_backup(tok, current_tok->quote);
1609
7.71k
    }
1610
6.16k
    p_start = tok->start;
1611
6.16k
    p_end = tok->cur;
1612
6.16k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
28.0k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.74M
{
1618
1.74M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.74M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.68M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.68M
    } else {
1622
54.7k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
54.7k
    }
1624
1.74M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.74M
{
1629
1.74M
    int result = tok_get(tok, token);
1630
1.74M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.74M
    return result;
1635
1.74M
}