Coverage Report

Created: 2025-12-14 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.29k
#define ALTTABSIZE 1
11
12
1.75M
#define is_potential_identifier_start(c) (\
13
1.75M
              (c >= 'a' && c <= 'z')\
14
1.75M
               || (c >= 'A' && c <= 'Z')\
15
1.75M
               || c == '_'\
16
1.75M
               || (c >= 128))
17
18
2.72M
#define is_potential_identifier_char(c) (\
19
2.72M
              (c >= 'a' && c <= 'z')\
20
2.72M
               || (c >= 'A' && c <= 'Z')\
21
2.72M
               || (c >= '0' && c <= '9')\
22
2.72M
               || c == '_'\
23
2.72M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.86M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
16.6k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
38
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.75M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
261k
{
55
261k
    return memchr(str, 0, size) != NULL;
56
261k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.4M
{
62
11.4M
    int rc;
63
11.7M
    for (;;) {
64
11.7M
        if (tok->cur != tok->inp) {
65
11.4M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.4M
            tok->col_offset++;
70
11.4M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.4M
        }
72
310k
        if (tok->done != E_OK) {
73
32.2k
            return EOF;
74
32.2k
        }
75
277k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
277k
        if (!rc) {
84
16.3k
            tok->cur = tok->inp;
85
16.3k
            return EOF;
86
16.3k
        }
87
261k
        tok->line_start = tok->cur;
88
89
261k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
261k
    }
95
11.4M
    Py_UNREACHABLE();
96
11.4M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.69M
{
102
3.69M
    if (c != EOF) {
103
3.66M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.66M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.66M
        tok->col_offset--;
110
3.66M
    }
111
3.69M
}
112
113
static int
114
23.6k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
23.6k
    assert(token != NULL);
116
23.6k
    assert(c == '}' || c == ':' || c == '!');
117
23.6k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
23.6k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
12.8k
        return 0;
121
12.8k
    }
122
10.8k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
10.8k
    int hash_detected = 0;
126
10.8k
    int in_string = 0;
127
10.8k
    char quote_char = 0;
128
129
1.07M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.06M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.06M
        if (ch == '\\') {
134
18.3k
            i++;
135
18.3k
            continue;
136
18.3k
        }
137
138
        // Handle quotes
139
1.04M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
181k
            if (!in_string) {
148
67.9k
                in_string = 1;
149
67.9k
                quote_char = ch;
150
67.9k
            }
151
113k
            else if (ch == quote_char) {
152
67.1k
                in_string = 0;
153
67.1k
            }
154
181k
            continue;
155
181k
        }
156
157
        // Check for # outside strings
158
865k
        if (ch == '#' && !in_string) {
159
892
            hash_detected = 1;
160
892
            break;
161
892
        }
162
865k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
10.8k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
892
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
892
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
892
        Py_ssize_t i = 0;  // Input position
172
892
        Py_ssize_t j = 0;  // Output position
173
892
        in_string = 0;     // Whether we're in a string
174
892
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
65.7k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
64.9k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
64.9k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
9.96k
                if (!in_string) {
184
3.94k
                    in_string = 1;
185
3.94k
                    quote_char = ch;
186
6.02k
                } else if (ch == quote_char) {
187
3.93k
                    in_string = 0;
188
3.93k
                }
189
9.96k
                result[j++] = ch;
190
9.96k
            }
191
            // Skip comments
192
54.9k
            else if (ch == '#' && !in_string) {
193
47.3k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
46.5k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
46.2k
                    i++;
196
46.2k
                }
197
1.08k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
306
                    result[j++] = '\n';
199
306
                }
200
1.08k
            }
201
            // Copy other chars
202
53.8k
            else {
203
53.8k
                result[j++] = ch;
204
53.8k
            }
205
64.9k
            i++;
206
64.9k
        }
207
208
892
        result[j] = '\0';  // Null-terminate the result string
209
892
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
892
        PyMem_Free(result);
211
9.94k
    } else {
212
9.94k
        res = PyUnicode_DecodeUTF8(
213
9.94k
            tok_mode->last_expr_buffer,
214
9.94k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
9.94k
            NULL
216
9.94k
        );
217
9.94k
    }
218
219
10.8k
    if (!res) {
220
0
        return -1;
221
0
    }
222
10.8k
    token->metadata = res;
223
10.8k
    return 0;
224
10.8k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
62.6k
{
229
62.6k
    assert(tok->cur != NULL);
230
231
62.6k
    Py_ssize_t size = strlen(tok->cur);
232
62.6k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
62.6k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
39.0k
        case '{':
252
39.0k
            if (tok_mode->last_expr_buffer != NULL) {
253
27.4k
                PyMem_Free(tok_mode->last_expr_buffer);
254
27.4k
            }
255
39.0k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
39.0k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
39.0k
            tok_mode->last_expr_size = size;
260
39.0k
            tok_mode->last_expr_end = -1;
261
39.0k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
39.0k
            break;
263
18.5k
        case '}':
264
20.2k
        case '!':
265
20.2k
            tok_mode->last_expr_end = strlen(tok->start);
266
20.2k
            break;
267
3.43k
        case ':':
268
3.43k
            if (tok_mode->last_expr_end == -1) {
269
3.04k
               tok_mode->last_expr_end = strlen(tok->start);
270
3.04k
            }
271
3.43k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
62.6k
    }
275
62.6k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
62.6k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
9.09k
{
284
9.09k
    const char *s = test;
285
9.09k
    int res = 0;
286
24.3k
    while (1) {
287
24.3k
        int c = tok_nextc(tok);
288
24.3k
        if (*s == 0) {
289
9.00k
            res = !is_potential_identifier_char(c);
290
9.00k
        }
291
15.3k
        else if (c == *s) {
292
15.2k
            s++;
293
15.2k
            continue;
294
15.2k
        }
295
296
9.09k
        tok_backup(tok, c);
297
24.3k
        while (s != test) {
298
15.2k
            tok_backup(tok, *--s);
299
15.2k
        }
300
9.09k
        return res;
301
24.3k
    }
302
9.09k
}
303
304
static int
305
98.3k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
98.3k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
98.3k
    int r = 0;
322
98.3k
    if (c == 'a') {
323
1.15k
        r = lookahead(tok, "nd");
324
1.15k
    }
325
97.2k
    else if (c == 'e') {
326
532
        r = lookahead(tok, "lse");
327
532
    }
328
96.6k
    else if (c == 'f') {
329
3.78k
        r = lookahead(tok, "or");
330
3.78k
    }
331
92.8k
    else if (c == 'i') {
332
1.52k
        int c2 = tok_nextc(tok);
333
1.52k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.50k
            r = 1;
335
1.50k
        }
336
1.52k
        tok_backup(tok, c2);
337
1.52k
    }
338
91.3k
    else if (c == 'o') {
339
3.31k
        r = lookahead(tok, "r");
340
3.31k
    }
341
88.0k
    else if (c == 'n') {
342
306
        r = lookahead(tok, "ot");
343
306
    }
344
98.3k
    if (r) {
345
10.5k
        tok_backup(tok, c);
346
10.5k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.5k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.5k
        tok_nextc(tok);
352
10.5k
    }
353
87.8k
    else /* In future releases, only error will remain. */
354
87.8k
    if (c < 128 && is_potential_identifier_char(c)) {
355
189
        tok_backup(tok, c);
356
189
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
189
        return 0;
358
189
    }
359
98.1k
    return 1;
360
98.3k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.8k
{
366
11.8k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.8k
    PyObject *s;
370
11.8k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.8k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.8k
    if (s == NULL) {
374
2
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
2
            tok->done = E_DECODE;
376
2
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
2
        return 0;
381
2
    }
382
11.8k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.8k
    assert(invalid >= 0);
384
11.8k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.8k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
607
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
607
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
411
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
411
            if (s != NULL) {
391
411
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
411
            }
393
411
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
411
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
411
        }
399
607
        Py_DECREF(s);
400
607
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
321
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
321
        }
403
286
        else {
404
286
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
286
        }
406
607
        return 0;
407
607
    }
408
11.2k
    Py_DECREF(s);
409
11.2k
    return 1;
410
11.8k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
76.3k
{
415
76.3k
    int c;
416
417
76.8k
    while (1) {
418
220k
        do {
419
220k
            c = tok_nextc(tok);
420
220k
        } while (Py_ISDIGIT(c));
421
76.8k
        if (c != '_') {
422
76.3k
            break;
423
76.3k
        }
424
540
        c = tok_nextc(tok);
425
540
        if (!Py_ISDIGIT(c)) {
426
13
            tok_backup(tok, c);
427
13
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
13
            return 0;
429
13
        }
430
540
    }
431
76.3k
    return c;
432
76.3k
}
433
434
static inline int
435
1.11k
tok_continuation_line(struct tok_state *tok) {
436
1.11k
    int c = tok_nextc(tok);
437
1.11k
    if (c == '\r') {
438
69
        c = tok_nextc(tok);
439
69
    }
440
1.11k
    if (c != '\n') {
441
61
        tok->done = E_LINECONT;
442
61
        return -1;
443
61
    }
444
1.05k
    c = tok_nextc(tok);
445
1.05k
    if (c == EOF) {
446
54
        tok->done = E_EOF;
447
54
        tok->cur = tok->inp;
448
54
        return -1;
449
1.00k
    } else {
450
1.00k
        tok_backup(tok, c);
451
1.00k
    }
452
1.00k
    return c;
453
1.05k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
21.7k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
21.7k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
21.7k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
21.7k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
21.7k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
21.7k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
21.7k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
21.7k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
21.7k
    if (saw_b && saw_t) {
488
2
        RETURN_SYNTAX_ERROR("b", "t");
489
2
    }
490
491
21.7k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
21.7k
#undef RETURN_SYNTAX_ERROR
496
497
21.7k
    return 0;
498
21.7k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.71M
{
503
1.71M
    int c;
504
1.71M
    int blankline, nonascii;
505
506
1.71M
    const char *p_start = NULL;
507
1.71M
    const char *p_end = NULL;
508
1.82M
  nextline:
509
1.82M
    tok->start = NULL;
510
1.82M
    tok->starting_col_offset = -1;
511
1.82M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.82M
    if (tok->atbol) {
516
255k
        int col = 0;
517
255k
        int altcol = 0;
518
255k
        tok->atbol = 0;
519
255k
        int cont_line_col = 0;
520
1.16M
        for (;;) {
521
1.16M
            c = tok_nextc(tok);
522
1.16M
            if (c == ' ') {
523
909k
                col++, altcol++;
524
909k
            }
525
257k
            else if (c == '\t') {
526
646
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
646
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
646
            }
529
256k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.06k
                col = altcol = 0; /* For Emacs users */
531
1.06k
            }
532
255k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
660
                cont_line_col = cont_line_col ? cont_line_col : col;
538
660
                if ((c = tok_continuation_line(tok)) == -1) {
539
44
                    return MAKE_TOKEN(ERRORTOKEN);
540
44
                }
541
660
            }
542
255k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
255k
            else {
546
255k
                break;
547
255k
            }
548
1.16M
        }
549
255k
        tok_backup(tok, c);
550
255k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
65.2k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
65.2k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
65.2k
            else {
566
65.2k
                blankline = 1; /* Ignore completely */
567
65.2k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
65.2k
        }
571
255k
        if (!blankline && tok->level == 0) {
572
146k
            col = cont_line_col ? cont_line_col : col;
573
146k
            altcol = cont_line_col ? cont_line_col : altcol;
574
146k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
96.7k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
96.7k
            }
580
50.1k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
28.0k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
28.0k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
28.0k
                tok->pendin++;
591
28.0k
                tok->indstack[++tok->indent] = col;
592
28.0k
                tok->altindstack[tok->indent] = altcol;
593
28.0k
            }
594
22.1k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
49.3k
                while (tok->indent > 0 &&
597
44.2k
                    col < tok->indstack[tok->indent]) {
598
27.2k
                    tok->pendin--;
599
27.2k
                    tok->indent--;
600
27.2k
                }
601
22.1k
                if (col != tok->indstack[tok->indent]) {
602
7
                    tok->done = E_DEDENT;
603
7
                    tok->cur = tok->inp;
604
7
                    return MAKE_TOKEN(ERRORTOKEN);
605
7
                }
606
22.0k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
22.0k
            }
610
146k
        }
611
255k
    }
612
613
1.82M
    tok->start = tok->cur;
614
1.82M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
1.82M
    if (tok->pendin != 0) {
618
55.2k
        if (tok->pendin < 0) {
619
27.2k
            if (tok->tok_extra_tokens) {
620
0
                p_start = tok->cur;
621
0
                p_end = tok->cur;
622
0
            }
623
27.2k
            tok->pendin++;
624
27.2k
            return MAKE_TOKEN(DEDENT);
625
27.2k
        }
626
28.0k
        else {
627
28.0k
            if (tok->tok_extra_tokens) {
628
0
                p_start = tok->buf;
629
0
                p_end = tok->cur;
630
0
            }
631
28.0k
            tok->pendin--;
632
28.0k
            return MAKE_TOKEN(INDENT);
633
28.0k
        }
634
55.2k
    }
635
636
    /* Peek ahead at the next character */
637
1.76M
    c = tok_nextc(tok);
638
1.76M
    tok_backup(tok, c);
639
640
1.76M
 again:
641
1.76M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.15M
    do {
644
2.15M
        c = tok_nextc(tok);
645
2.15M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.76M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.76M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.76M
    if (c == '#') {
653
654
40.6k
        const char* p = NULL;
655
40.6k
        const char *prefix, *type_start;
656
40.6k
        int current_starting_col_offset;
657
658
1.34M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.30M
            c = tok_nextc(tok);
660
1.30M
        }
661
662
40.6k
        if (tok->tok_extra_tokens) {
663
0
            p = tok->start;
664
0
        }
665
666
40.6k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
40.6k
        if (tok->tok_extra_tokens) {
721
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
0
            p_start = p;
723
0
            p_end = tok->cur;
724
0
            tok->comment_newline = blankline;
725
0
            return MAKE_TOKEN(COMMENT);
726
0
        }
727
40.6k
    }
728
729
1.76M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.76M
    if (c == EOF) {
735
16.1k
        if (tok->level) {
736
4.19k
            return MAKE_TOKEN(ERRORTOKEN);
737
4.19k
        }
738
11.9k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
16.1k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.75M
    nonascii = 0;
743
1.75M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
581k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
714k
        while (1) {
747
714k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
22.5k
                saw_b = 1;
749
22.5k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
692k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
7.78k
                saw_u = 1;
754
7.78k
            }
755
            /* ur"" and ru"" are not supported */
756
684k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
40.5k
                saw_r = 1;
758
40.5k
            }
759
643k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
48.6k
                saw_f = 1;
761
48.6k
            }
762
595k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
35.8k
                saw_t = 1;
764
35.8k
            }
765
559k
            else {
766
559k
                break;
767
559k
            }
768
155k
            c = tok_nextc(tok);
769
155k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
21.7k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
21.7k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
21.7k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
21.7k
                if (saw_f || saw_t) {
779
16.6k
                    goto f_string_quote;
780
16.6k
                }
781
5.06k
                goto letter_quote;
782
21.7k
            }
783
155k
        }
784
2.62M
        while (is_potential_identifier_char(c)) {
785
2.06M
            if (c >= 128) {
786
116k
                nonascii = 1;
787
116k
            }
788
2.06M
            c = tok_nextc(tok);
789
2.06M
        }
790
559k
        tok_backup(tok, c);
791
559k
        if (nonascii && !verify_identifier(tok)) {
792
609
            return MAKE_TOKEN(ERRORTOKEN);
793
609
        }
794
795
558k
        p_start = tok->start;
796
558k
        p_end = tok->cur;
797
798
558k
        return MAKE_TOKEN(NAME);
799
559k
    }
800
801
1.17M
    if (c == '\r') {
802
414
        c = tok_nextc(tok);
803
414
    }
804
805
    /* Newline */
806
1.17M
    if (c == '\n') {
807
236k
        tok->atbol = 1;
808
236k
        if (blankline || tok->level > 0) {
809
108k
            if (tok->tok_extra_tokens) {
810
0
                if (tok->comment_newline) {
811
0
                    tok->comment_newline = 0;
812
0
                }
813
0
                p_start = tok->start;
814
0
                p_end = tok->cur;
815
0
                return MAKE_TOKEN(NL);
816
0
            }
817
108k
            goto nextline;
818
108k
        }
819
128k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
0
            tok->comment_newline = 0;
821
0
            p_start = tok->start;
822
0
            p_end = tok->cur;
823
0
            return MAKE_TOKEN(NL);
824
0
        }
825
128k
        p_start = tok->start;
826
128k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
128k
        tok->cont_line = 0;
828
128k
        return MAKE_TOKEN(NEWLINE);
829
128k
    }
830
831
    /* Period or number starting with period? */
832
933k
    if (c == '.') {
833
41.1k
        c = tok_nextc(tok);
834
41.1k
        if (Py_ISDIGIT(c)) {
835
3.32k
            goto fraction;
836
37.8k
        } else if (c == '.') {
837
1.33k
            c = tok_nextc(tok);
838
1.33k
            if (c == '.') {
839
619
                p_start = tok->start;
840
619
                p_end = tok->cur;
841
619
                return MAKE_TOKEN(ELLIPSIS);
842
619
            }
843
711
            else {
844
711
                tok_backup(tok, c);
845
711
            }
846
711
            tok_backup(tok, '.');
847
711
        }
848
36.5k
        else {
849
36.5k
            tok_backup(tok, c);
850
36.5k
        }
851
37.2k
        p_start = tok->start;
852
37.2k
        p_end = tok->cur;
853
37.2k
        return MAKE_TOKEN(DOT);
854
41.1k
    }
855
856
    /* Number */
857
892k
    if (Py_ISDIGIT(c)) {
858
95.1k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
35.1k
            c = tok_nextc(tok);
861
35.1k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
15.8k
                c = tok_nextc(tok);
864
16.0k
                do {
865
16.0k
                    if (c == '_') {
866
210
                        c = tok_nextc(tok);
867
210
                    }
868
16.0k
                    if (!Py_ISXDIGIT(c)) {
869
20
                        tok_backup(tok, c);
870
20
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
20
                    }
872
79.0k
                    do {
873
79.0k
                        c = tok_nextc(tok);
874
79.0k
                    } while (Py_ISXDIGIT(c));
875
16.0k
                } while (c == '_');
876
15.8k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
2
                    return MAKE_TOKEN(ERRORTOKEN);
878
2
                }
879
15.8k
            }
880
19.3k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
612
                c = tok_nextc(tok);
883
964
                do {
884
964
                    if (c == '_') {
885
358
                        c = tok_nextc(tok);
886
358
                    }
887
964
                    if (c < '0' || c >= '8') {
888
24
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
23
                        else {
893
23
                            tok_backup(tok, c);
894
23
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
23
                        }
896
24
                    }
897
2.42k
                    do {
898
2.42k
                        c = tok_nextc(tok);
899
2.42k
                    } while ('0' <= c && c < '8');
900
940
                } while (c == '_');
901
588
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
587
                if (!verify_end_of_number(tok, c, "octal")) {
906
4
                    return MAKE_TOKEN(ERRORTOKEN);
907
4
                }
908
587
            }
909
18.7k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
558
                c = tok_nextc(tok);
912
1.04k
                do {
913
1.04k
                    if (c == '_') {
914
497
                        c = tok_nextc(tok);
915
497
                    }
916
1.04k
                    if (c != '0' && c != '1') {
917
19
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
18
                        else {
921
18
                            tok_backup(tok, c);
922
18
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
18
                        }
924
19
                    }
925
4.06k
                    do {
926
4.06k
                        c = tok_nextc(tok);
927
4.06k
                    } while (c == '0' || c == '1');
928
1.02k
                } while (c == '_');
929
539
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
537
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
537
            }
936
18.1k
            else {
937
18.1k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
19.4k
                while (1) {
941
19.4k
                    if (c == '_') {
942
95
                        c = tok_nextc(tok);
943
95
                        if (!Py_ISDIGIT(c)) {
944
4
                            tok_backup(tok, c);
945
4
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
4
                        }
947
95
                    }
948
19.4k
                    if (c != '0') {
949
18.1k
                        break;
950
18.1k
                    }
951
1.21k
                    c = tok_nextc(tok);
952
1.21k
                }
953
18.1k
                char* zeros_end = tok->cur;
954
18.1k
                if (Py_ISDIGIT(c)) {
955
393
                    nonzero = 1;
956
393
                    c = tok_decimal_tail(tok);
957
393
                    if (c == 0) {
958
2
                        return MAKE_TOKEN(ERRORTOKEN);
959
2
                    }
960
393
                }
961
18.1k
                if (c == '.') {
962
928
                    c = tok_nextc(tok);
963
928
                    goto fraction;
964
928
                }
965
17.2k
                else if (c == 'e' || c == 'E') {
966
843
                    goto exponent;
967
843
                }
968
16.4k
                else if (c == 'j' || c == 'J') {
969
859
                    goto imaginary;
970
859
                }
971
15.5k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
26
                    tok_backup(tok, c);
974
26
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
26
                            tok, (int)(tok->start + 1 - tok->line_start),
976
26
                            (int)(zeros_end - tok->line_start),
977
26
                            "leading zeros in decimal integer "
978
26
                            "literals are not permitted; "
979
26
                            "use an 0o prefix for octal integers"));
980
26
                }
981
15.5k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
32
                    return MAKE_TOKEN(ERRORTOKEN);
983
32
                }
984
15.5k
            }
985
35.1k
        }
986
59.9k
        else {
987
            /* Decimal */
988
59.9k
            c = tok_decimal_tail(tok);
989
59.9k
            if (c == 0) {
990
9
                return MAKE_TOKEN(ERRORTOKEN);
991
9
            }
992
59.9k
            {
993
                /* Accept floating-point numbers. */
994
59.9k
                if (c == '.') {
995
3.55k
                    c = tok_nextc(tok);
996
7.80k
        fraction:
997
                    /* Fraction */
998
7.80k
                    if (Py_ISDIGIT(c)) {
999
6.02k
                        c = tok_decimal_tail(tok);
1000
6.02k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
6.02k
                    }
1004
7.80k
                }
1005
64.2k
                if (c == 'e' || c == 'E') {
1006
9.66k
                    int e;
1007
10.5k
                  exponent:
1008
10.5k
                    e = c;
1009
                    /* Exponent part */
1010
10.5k
                    c = tok_nextc(tok);
1011
10.5k
                    if (c == '+' || c == '-') {
1012
3.81k
                        c = tok_nextc(tok);
1013
3.81k
                        if (!Py_ISDIGIT(c)) {
1014
12
                            tok_backup(tok, c);
1015
12
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
12
                        }
1017
6.69k
                    } else if (!Py_ISDIGIT(c)) {
1018
534
                        tok_backup(tok, c);
1019
534
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
34
                            return MAKE_TOKEN(ERRORTOKEN);
1021
34
                        }
1022
500
                        tok_backup(tok, e);
1023
500
                        p_start = tok->start;
1024
500
                        p_end = tok->cur;
1025
500
                        return MAKE_TOKEN(NUMBER);
1026
534
                    }
1027
9.96k
                    c = tok_decimal_tail(tok);
1028
9.96k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
9.96k
                }
1032
64.5k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
4.18k
        imaginary:
1035
4.18k
                    c = tok_nextc(tok);
1036
4.18k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
10
                        return MAKE_TOKEN(ERRORTOKEN);
1038
10
                    }
1039
4.18k
                }
1040
61.1k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
106
                    return MAKE_TOKEN(ERRORTOKEN);
1042
106
                }
1043
64.5k
            }
1044
64.5k
        }
1045
97.6k
        tok_backup(tok, c);
1046
97.6k
        p_start = tok->start;
1047
97.6k
        p_end = tok->cur;
1048
97.6k
        return MAKE_TOKEN(NUMBER);
1049
95.1k
    }
1050
1051
813k
  f_string_quote:
1052
813k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
16.6k
        && (c == '\'' || c == '"'))) {
1054
1055
16.6k
        int quote = c;
1056
16.6k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
16.6k
        tok->first_lineno = tok->lineno;
1063
16.6k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
16.6k
        int after_quote = tok_nextc(tok);
1067
16.6k
        if (after_quote == quote) {
1068
2.70k
            int after_after_quote = tok_nextc(tok);
1069
2.70k
            if (after_after_quote == quote) {
1070
888
                quote_size = 3;
1071
888
            }
1072
1.81k
            else {
1073
                // TODO: Check this
1074
1.81k
                tok_backup(tok, after_after_quote);
1075
1.81k
                tok_backup(tok, after_quote);
1076
1.81k
            }
1077
2.70k
        }
1078
16.6k
        if (after_quote != quote) {
1079
13.9k
            tok_backup(tok, after_quote);
1080
13.9k
        }
1081
1082
1083
16.6k
        p_start = tok->start;
1084
16.6k
        p_end = tok->cur;
1085
16.6k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
3
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
3
        }
1088
16.6k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
16.6k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
16.6k
        the_current_tok->quote = quote;
1091
16.6k
        the_current_tok->quote_size = quote_size;
1092
16.6k
        the_current_tok->start = tok->start;
1093
16.6k
        the_current_tok->multi_line_start = tok->line_start;
1094
16.6k
        the_current_tok->first_line = tok->lineno;
1095
16.6k
        the_current_tok->start_offset = -1;
1096
16.6k
        the_current_tok->multi_line_start_offset = -1;
1097
16.6k
        the_current_tok->last_expr_buffer = NULL;
1098
16.6k
        the_current_tok->last_expr_size = 0;
1099
16.6k
        the_current_tok->last_expr_end = -1;
1100
16.6k
        the_current_tok->in_format_spec = 0;
1101
16.6k
        the_current_tok->in_debug = 0;
1102
1103
16.6k
        enum string_kind_t string_kind = FSTRING;
1104
16.6k
        switch (*tok->start) {
1105
579
            case 'T':
1106
4.27k
            case 't':
1107
4.27k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.27k
                string_kind = TSTRING;
1109
4.27k
                break;
1110
1.89k
            case 'F':
1111
11.9k
            case 'f':
1112
11.9k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
11.9k
                break;
1114
196
            case 'R':
1115
477
            case 'r':
1116
477
                the_current_tok->raw = 1;
1117
477
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
202
                    string_kind = TSTRING;
1119
202
                }
1120
477
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
16.6k
        }
1124
1125
16.6k
        the_current_tok->string_kind = string_kind;
1126
16.6k
        the_current_tok->curly_bracket_depth = 0;
1127
16.6k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
16.6k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
16.6k
    }
1130
1131
802k
  letter_quote:
1132
    /* String */
1133
802k
    if (c == '\'' || c == '"') {
1134
60.2k
        int quote = c;
1135
60.2k
        int quote_size = 1;             /* 1 or 3 */
1136
60.2k
        int end_quote_size = 0;
1137
60.2k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
60.2k
        tok->first_lineno = tok->lineno;
1144
60.2k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
60.2k
        c = tok_nextc(tok);
1148
60.2k
        if (c == quote) {
1149
9.90k
            c = tok_nextc(tok);
1150
9.90k
            if (c == quote) {
1151
3.09k
                quote_size = 3;
1152
3.09k
            }
1153
6.81k
            else {
1154
6.81k
                end_quote_size = 1;     /* empty string found */
1155
6.81k
            }
1156
9.90k
        }
1157
60.2k
        if (c != quote) {
1158
57.1k
            tok_backup(tok, c);
1159
57.1k
        }
1160
1161
        /* Get rest of string */
1162
1.27M
        while (end_quote_size != quote_size) {
1163
1.21M
            c = tok_nextc(tok);
1164
1.21M
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
1.21M
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
1.21M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
290
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
290
                tok->cur = (char *)tok->start;
1176
290
                tok->cur++;
1177
290
                tok->line_start = tok->multi_line_start;
1178
290
                int start = tok->lineno;
1179
290
                tok->lineno = tok->first_lineno;
1180
1181
290
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
31
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
31
                    if (the_current_tok->quote == quote &&
1189
26
                        the_current_tok->quote_size == quote_size) {
1190
21
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
21
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
21
                    }
1193
31
                }
1194
1195
269
                if (quote_size == 3) {
1196
17
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
17
                                     " (detected at line %d)", start);
1198
17
                    if (c != '\n') {
1199
17
                        tok->done = E_EOFS;
1200
17
                    }
1201
17
                    return MAKE_TOKEN(ERRORTOKEN);
1202
17
                }
1203
252
                else {
1204
252
                    if (has_escaped_quote) {
1205
10
                        _PyTokenizer_syntaxerror(
1206
10
                            tok,
1207
10
                            "unterminated string literal (detected at line %d); "
1208
10
                            "perhaps you escaped the end quote?",
1209
10
                            start
1210
10
                        );
1211
242
                    } else {
1212
242
                        _PyTokenizer_syntaxerror(
1213
242
                            tok, "unterminated string literal (detected at line %d)", start
1214
242
                        );
1215
242
                    }
1216
252
                    if (c != '\n') {
1217
14
                        tok->done = E_EOLS;
1218
14
                    }
1219
252
                    return MAKE_TOKEN(ERRORTOKEN);
1220
252
                }
1221
269
            }
1222
1.21M
            if (c == quote) {
1223
61.1k
                end_quote_size += 1;
1224
61.1k
            }
1225
1.15M
            else {
1226
1.15M
                end_quote_size = 0;
1227
1.15M
                if (c == '\\') {
1228
27.1k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
27.1k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
1.15k
                        has_escaped_quote = 1;
1231
1.15k
                    }
1232
27.1k
                    if (c == '\r') {
1233
67
                        c = tok_nextc(tok);
1234
67
                    }
1235
27.1k
                }
1236
1.15M
            }
1237
1.21M
        }
1238
1239
59.9k
        p_start = tok->start;
1240
59.9k
        p_end = tok->cur;
1241
59.9k
        return MAKE_TOKEN(STRING);
1242
60.2k
    }
1243
1244
    /* Line continuation */
1245
742k
    if (c == '\\') {
1246
459
        if ((c = tok_continuation_line(tok)) == -1) {
1247
71
            return MAKE_TOKEN(ERRORTOKEN);
1248
71
        }
1249
388
        tok->cont_line = 1;
1250
388
        goto again; /* Read next line */
1251
459
    }
1252
1253
    /* Punctuation character */
1254
741k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
741k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
55.3k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
55.3k
        int in_format_spec = current_tok->in_format_spec;
1261
55.3k
         int cursor_in_format_with_debug =
1262
55.3k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
55.3k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
55.3k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
55.3k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
55.3k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.45k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.45k
            current_tok->in_format_spec = 1;
1274
4.45k
            p_start = tok->start;
1275
4.45k
            p_end = tok->cur;
1276
4.45k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.45k
        }
1278
55.3k
    }
1279
1280
    /* Check for two-character token */
1281
737k
    {
1282
737k
        int c2 = tok_nextc(tok);
1283
737k
        int current_token = _PyToken_TwoChars(c, c2);
1284
737k
        if (current_token != OP) {
1285
25.9k
            int c3 = tok_nextc(tok);
1286
25.9k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
25.9k
            if (current_token3 != OP) {
1288
1.06k
                current_token = current_token3;
1289
1.06k
            }
1290
24.8k
            else {
1291
24.8k
                tok_backup(tok, c3);
1292
24.8k
            }
1293
25.9k
            p_start = tok->start;
1294
25.9k
            p_end = tok->cur;
1295
25.9k
            return MAKE_TOKEN(current_token);
1296
25.9k
        }
1297
711k
        tok_backup(tok, c2);
1298
711k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
93.9k
    case '(':
1303
126k
    case '[':
1304
168k
    case '{':
1305
168k
        if (tok->level >= MAXLEVEL) {
1306
3
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
3
        }
1308
168k
        tok->parenstack[tok->level] = c;
1309
168k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
168k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
168k
        tok->level++;
1312
168k
        if (INSIDE_FSTRING(tok)) {
1313
29.1k
            current_tok->curly_bracket_depth++;
1314
29.1k
        }
1315
168k
        break;
1316
66.8k
    case ')':
1317
79.3k
    case ']':
1318
105k
    case '}':
1319
105k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
51
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
51
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
51
        }
1323
105k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
176
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
176
        }
1326
104k
        if (tok->level > 0) {
1327
104k
            tok->level--;
1328
104k
            int opening = tok->parenstack[tok->level];
1329
104k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
38.1k
                                            (opening == '[' && c == ']') ||
1331
25.7k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
32
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
8
                    assert(current_tok->curly_bracket_depth >= 0);
1339
8
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
8
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
6
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
6
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
6
                    }
1344
8
                }
1345
26
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
2
                            "closing parenthesis '%c' does not match "
1348
2
                            "opening parenthesis '%c' on line %d",
1349
2
                            c, opening, tok->parenlinenostack[tok->level]));
1350
2
                }
1351
24
                else {
1352
24
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
24
                            "closing parenthesis '%c' does not match "
1354
24
                            "opening parenthesis '%c'",
1355
24
                            c, opening));
1356
24
                }
1357
26
            }
1358
104k
        }
1359
1360
104k
        if (INSIDE_FSTRING(tok)) {
1361
21.6k
            current_tok->curly_bracket_depth--;
1362
21.6k
            if (current_tok->curly_bracket_depth < 0) {
1363
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
1
                    TOK_GET_STRING_PREFIX(tok), c));
1365
1
            }
1366
21.6k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
20.6k
                current_tok->curly_bracket_expr_start_depth--;
1368
20.6k
                current_tok->kind = TOK_FSTRING_MODE;
1369
20.6k
                current_tok->in_format_spec = 0;
1370
20.6k
                current_tok->in_debug = 0;
1371
20.6k
            }
1372
21.6k
        }
1373
104k
        break;
1374
437k
    default:
1375
437k
        break;
1376
711k
    }
1377
1378
710k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
389
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
389
    }
1381
1382
710k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
5.14k
        current_tok->in_debug = 1;
1384
5.14k
    }
1385
1386
    /* Punctuation character */
1387
710k
    p_start = tok->start;
1388
710k
    p_end = tok->cur;
1389
710k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
710k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
51.1k
{
1395
51.1k
    const char *p_start = NULL;
1396
51.1k
    const char *p_end = NULL;
1397
51.1k
    int end_quote_size = 0;
1398
51.1k
    int unicode_escape = 0;
1399
1400
51.1k
    tok->start = tok->cur;
1401
51.1k
    tok->first_lineno = tok->lineno;
1402
51.1k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
51.1k
    int start_char = tok_nextc(tok);
1407
51.1k
    if (start_char == '{') {
1408
14.5k
        int peek1 = tok_nextc(tok);
1409
14.5k
        tok_backup(tok, peek1);
1410
14.5k
        tok_backup(tok, start_char);
1411
14.5k
        if (peek1 != '{') {
1412
12.8k
            current_tok->curly_bracket_expr_start_depth++;
1413
12.8k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
3
            }
1417
12.8k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
12.8k
            return tok_get_normal_mode(tok, current_tok, token);
1419
12.8k
        }
1420
14.5k
    }
1421
36.6k
    else {
1422
36.6k
        tok_backup(tok, start_char);
1423
36.6k
    }
1424
1425
    // Check if we are at the end of the string
1426
54.9k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
42.8k
        int quote = tok_nextc(tok);
1428
42.8k
        if (quote != current_tok->quote) {
1429
26.2k
            tok_backup(tok, quote);
1430
26.2k
            goto f_string_middle;
1431
26.2k
        }
1432
42.8k
    }
1433
1434
12.0k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.12k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.12k
        current_tok->last_expr_buffer = NULL;
1437
7.12k
        current_tok->last_expr_size = 0;
1438
7.12k
        current_tok->last_expr_end = -1;
1439
7.12k
    }
1440
1441
12.0k
    p_start = tok->start;
1442
12.0k
    p_end = tok->cur;
1443
12.0k
    tok->tok_mode_stack_index--;
1444
12.0k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
26.2k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
26.2k
    tok->multi_line_start = tok->line_start;
1451
164k
    while (end_quote_size != current_tok->quote_size) {
1452
158k
        int c = tok_nextc(tok);
1453
158k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
158k
        int in_format_spec = (
1457
158k
                current_tok->in_format_spec
1458
10.7k
                &&
1459
10.7k
                INSIDE_FSTRING_EXPR(current_tok)
1460
158k
        );
1461
1462
158k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
439
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
439
            if (in_format_spec && c == '\n') {
1471
52
                if (current_tok->quote_size == 1) {
1472
52
                    return MAKE_TOKEN(
1473
52
                        _PyTokenizer_syntaxerror(
1474
52
                            tok,
1475
52
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
52
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
52
                        )
1478
52
                    );
1479
52
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
52
            }
1487
1488
439
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
387
            tok->cur = (char *)current_tok->start;
1493
387
            tok->cur++;
1494
387
            tok->line_start = current_tok->multi_line_start;
1495
387
            int start = tok->lineno;
1496
1497
387
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
387
            tok->lineno = the_current_tok->first_line;
1499
1500
387
            if (current_tok->quote_size == 3) {
1501
38
                _PyTokenizer_syntaxerror(tok,
1502
38
                                    "unterminated triple-quoted %c-string literal"
1503
38
                                    " (detected at line %d)",
1504
38
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
38
                if (c != '\n') {
1506
38
                    tok->done = E_EOFS;
1507
38
                }
1508
38
                return MAKE_TOKEN(ERRORTOKEN);
1509
38
            }
1510
349
            else {
1511
349
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
349
                                    "unterminated %c-string literal (detected at"
1513
349
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
349
            }
1515
387
        }
1516
1517
158k
        if (c == current_tok->quote) {
1518
8.41k
            end_quote_size += 1;
1519
8.41k
            continue;
1520
149k
        } else {
1521
149k
            end_quote_size = 0;
1522
149k
        }
1523
1524
149k
        if (c == '{') {
1525
15.3k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
15.3k
            int peek = tok_nextc(tok);
1529
15.3k
            if (peek != '{' || in_format_spec) {
1530
13.2k
                tok_backup(tok, peek);
1531
13.2k
                tok_backup(tok, c);
1532
13.2k
                current_tok->curly_bracket_expr_start_depth++;
1533
13.2k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
5
                }
1537
13.2k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
13.2k
                current_tok->in_format_spec = 0;
1539
13.2k
                p_start = tok->start;
1540
13.2k
                p_end = tok->cur;
1541
13.2k
            } else {
1542
2.14k
                p_start = tok->start;
1543
2.14k
                p_end = tok->cur - 1;
1544
2.14k
            }
1545
15.3k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
134k
        } else if (c == '}') {
1547
5.10k
            if (unicode_escape) {
1548
337
                p_start = tok->start;
1549
337
                p_end = tok->cur;
1550
337
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
337
            }
1552
4.76k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.76k
            int cursor = current_tok->curly_bracket_depth;
1559
4.76k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.61k
                p_start = tok->start;
1561
1.61k
                p_end = tok->cur - 1;
1562
3.14k
            } else {
1563
3.14k
                tok_backup(tok, peek);
1564
3.14k
                tok_backup(tok, c);
1565
3.14k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.14k
                current_tok->in_format_spec = 0;
1567
3.14k
                p_start = tok->start;
1568
3.14k
                p_end = tok->cur;
1569
3.14k
            }
1570
4.76k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
129k
        } else if (c == '\\') {
1572
5.46k
            int peek = tok_nextc(tok);
1573
5.46k
            if (peek == '\r') {
1574
67
                peek = tok_nextc(tok);
1575
67
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.46k
            if (peek == '{' || peek == '}') {
1580
1.40k
                if (!current_tok->raw) {
1581
1.21k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
1
                        return MAKE_TOKEN(ERRORTOKEN);
1583
1
                    }
1584
1.21k
                }
1585
1.40k
                tok_backup(tok, peek);
1586
1.40k
                continue;
1587
1.40k
            }
1588
1589
4.05k
            if (!current_tok->raw) {
1590
3.94k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
463
                    peek = tok_nextc(tok);
1593
463
                    if (peek == '{') {
1594
376
                        unicode_escape = 1;
1595
376
                    } else {
1596
87
                        tok_backup(tok, peek);
1597
87
                    }
1598
463
                }
1599
3.94k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
4.05k
        }
1603
149k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
11.4k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.15k
        tok_backup(tok, current_tok->quote);
1609
6.15k
    }
1610
5.31k
    p_start = tok->start;
1611
5.31k
    p_end = tok->cur;
1612
5.31k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
26.2k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.75M
{
1618
1.75M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.75M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.70M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.70M
    } else {
1622
51.1k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
51.1k
    }
1624
1.75M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.75M
{
1629
1.75M
    int result = tok_get(tok, token);
1630
1.75M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.75M
    return result;
1635
1.75M
}