Coverage Report

Created: 2026-03-23 06:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.62k
#define ALTTABSIZE 1
11
12
1.90M
#define is_potential_identifier_start(c) (\
13
1.90M
              (c >= 'a' && c <= 'z')\
14
1.90M
               || (c >= 'A' && c <= 'Z')\
15
1.90M
               || c == '_'\
16
1.90M
               || (c >= 128))
17
18
3.07M
#define is_potential_identifier_char(c) (\
19
3.07M
              (c >= 'a' && c <= 'z')\
20
3.07M
               || (c >= 'A' && c <= 'Z')\
21
3.07M
               || (c >= '0' && c <= '9')\
22
3.07M
               || c == '_'\
23
3.07M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.10M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
18.9k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
24
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.97M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
239k
{
55
239k
    return memchr(str, 0, size) != NULL;
56
239k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
10.8M
{
62
10.8M
    int rc;
63
11.0M
    for (;;) {
64
11.0M
        if (tok->cur != tok->inp) {
65
10.5M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
10.5M
            tok->col_offset++;
70
10.5M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
10.5M
        }
72
504k
        if (tok->done != E_OK) {
73
176k
            return EOF;
74
176k
        }
75
327k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
327k
        if (!rc) {
84
88.2k
            tok->cur = tok->inp;
85
88.2k
            return EOF;
86
88.2k
        }
87
239k
        tok->line_start = tok->cur;
88
89
239k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
239k
    }
95
10.8M
    Py_UNREACHABLE();
96
10.8M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.14M
{
102
4.14M
    if (c != EOF) {
103
3.96M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.96M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.96M
        tok->col_offset--;
110
3.96M
    }
111
4.14M
}
112
113
static int
114
26.3k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
26.3k
    assert(token != NULL);
116
26.3k
    assert(c == '}' || c == ':' || c == '!');
117
26.3k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
26.3k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
15.3k
        return 0;
121
15.3k
    }
122
11.0k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
11.0k
    int hash_detected = 0;
126
11.0k
    int in_string = 0;
127
11.0k
    char quote_char = 0;
128
129
1.34M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.33M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.33M
        if (ch == '\\') {
134
28.0k
            i++;
135
28.0k
            continue;
136
28.0k
        }
137
138
        // Handle quotes
139
1.30M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
240k
            if (!in_string) {
148
88.5k
                in_string = 1;
149
88.5k
                quote_char = ch;
150
88.5k
            }
151
151k
            else if (ch == quote_char) {
152
87.5k
                in_string = 0;
153
87.5k
            }
154
240k
            continue;
155
240k
        }
156
157
        // Check for # outside strings
158
1.06M
        if (ch == '#' && !in_string) {
159
859
            hash_detected = 1;
160
859
            break;
161
859
        }
162
1.06M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
11.0k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
859
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
859
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
859
        Py_ssize_t i = 0;  // Input position
172
859
        Py_ssize_t j = 0;  // Output position
173
859
        in_string = 0;     // Whether we're in a string
174
859
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
50.9k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
50.1k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
50.1k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
8.75k
                if (!in_string) {
184
3.01k
                    in_string = 1;
185
3.01k
                    quote_char = ch;
186
5.73k
                } else if (ch == quote_char) {
187
3.00k
                    in_string = 0;
188
3.00k
                }
189
8.75k
                result[j++] = ch;
190
8.75k
            }
191
            // Skip comments
192
41.3k
            else if (ch == '#' && !in_string) {
193
28.6k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
27.8k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
27.6k
                    i++;
196
27.6k
                }
197
1.04k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
268
                    result[j++] = '\n';
199
268
                }
200
1.04k
            }
201
            // Copy other chars
202
40.3k
            else {
203
40.3k
                result[j++] = ch;
204
40.3k
            }
205
50.1k
            i++;
206
50.1k
        }
207
208
859
        result[j] = '\0';  // Null-terminate the result string
209
859
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
859
        PyMem_Free(result);
211
10.2k
    } else {
212
10.2k
        res = PyUnicode_DecodeUTF8(
213
10.2k
            tok_mode->last_expr_buffer,
214
10.2k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
10.2k
            NULL
216
10.2k
        );
217
10.2k
    }
218
219
11.0k
    if (!res) {
220
0
        return -1;
221
0
    }
222
11.0k
    token->metadata = res;
223
11.0k
    return 0;
224
11.0k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
69.0k
{
229
69.0k
    assert(tok->cur != NULL);
230
231
69.0k
    Py_ssize_t size = strlen(tok->cur);
232
69.0k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
69.0k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
42.6k
        case '{':
252
42.6k
            if (tok_mode->last_expr_buffer != NULL) {
253
28.9k
                PyMem_Free(tok_mode->last_expr_buffer);
254
28.9k
            }
255
42.6k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
42.6k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
42.6k
            tok_mode->last_expr_size = size;
260
42.6k
            tok_mode->last_expr_end = -1;
261
42.6k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
42.6k
            break;
263
20.8k
        case '}':
264
23.2k
        case '!':
265
23.2k
            tok_mode->last_expr_end = strlen(tok->start);
266
23.2k
            break;
267
3.15k
        case ':':
268
3.15k
            if (tok_mode->last_expr_end == -1) {
269
2.82k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.82k
            }
271
3.15k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
69.0k
    }
275
69.0k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
69.0k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
6.95k
{
284
6.95k
    const char *s = test;
285
6.95k
    int res = 0;
286
18.7k
    while (1) {
287
18.7k
        int c = tok_nextc(tok);
288
18.7k
        if (*s == 0) {
289
6.86k
            res = !is_potential_identifier_char(c);
290
6.86k
        }
291
11.8k
        else if (c == *s) {
292
11.7k
            s++;
293
11.7k
            continue;
294
11.7k
        }
295
296
6.95k
        tok_backup(tok, c);
297
18.7k
        while (s != test) {
298
11.7k
            tok_backup(tok, *--s);
299
11.7k
        }
300
6.95k
        return res;
301
18.7k
    }
302
6.95k
}
303
304
static int
305
78.8k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
78.8k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
78.8k
    int r = 0;
322
78.8k
    if (c == 'a') {
323
1.00k
        r = lookahead(tok, "nd");
324
1.00k
    }
325
77.8k
    else if (c == 'e') {
326
354
        r = lookahead(tok, "lse");
327
354
    }
328
77.4k
    else if (c == 'f') {
329
2.98k
        r = lookahead(tok, "or");
330
2.98k
    }
331
74.4k
    else if (c == 'i') {
332
1.08k
        int c2 = tok_nextc(tok);
333
1.08k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.07k
            r = 1;
335
1.07k
        }
336
1.08k
        tok_backup(tok, c2);
337
1.08k
    }
338
73.4k
    else if (c == 'o') {
339
2.30k
        r = lookahead(tok, "r");
340
2.30k
    }
341
71.1k
    else if (c == 'n') {
342
308
        r = lookahead(tok, "ot");
343
308
    }
344
78.8k
    if (r) {
345
7.92k
        tok_backup(tok, c);
346
7.92k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
7.92k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
7.92k
        tok_nextc(tok);
352
7.92k
    }
353
70.9k
    else /* In future releases, only error will remain. */
354
70.9k
    if (c < 128 && is_potential_identifier_char(c)) {
355
201
        tok_backup(tok, c);
356
201
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
201
        return 0;
358
201
    }
359
78.6k
    return 1;
360
78.8k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
10.8k
{
366
10.8k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
10.8k
    PyObject *s;
370
10.8k
    if (tok->decoding_erred)
371
0
        return 0;
372
10.8k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
10.8k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
10.8k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
10.8k
    assert(invalid >= 0);
384
10.8k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
10.8k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
530
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
530
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
360
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
360
            if (s != NULL) {
391
360
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
360
            }
393
360
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
360
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
360
        }
399
530
        Py_DECREF(s);
400
530
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
292
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
292
        }
403
238
        else {
404
238
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
238
        }
406
530
        return 0;
407
530
    }
408
10.2k
    Py_DECREF(s);
409
10.2k
    return 1;
410
10.8k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
59.6k
{
415
59.6k
    int c;
416
417
59.9k
    while (1) {
418
199k
        do {
419
199k
            c = tok_nextc(tok);
420
199k
        } while (Py_ISDIGIT(c));
421
59.9k
        if (c != '_') {
422
59.6k
            break;
423
59.6k
        }
424
304
        c = tok_nextc(tok);
425
304
        if (!Py_ISDIGIT(c)) {
426
17
            tok_backup(tok, c);
427
17
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
17
            return 0;
429
17
        }
430
304
    }
431
59.6k
    return c;
432
59.6k
}
433
434
static inline int
435
851
tok_continuation_line(struct tok_state *tok) {
436
851
    int c = tok_nextc(tok);
437
851
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
851
    if (c != '\n') {
441
52
        tok->done = E_LINECONT;
442
52
        return -1;
443
52
    }
444
799
    c = tok_nextc(tok);
445
799
    if (c == EOF) {
446
37
        tok->done = E_EOF;
447
37
        tok->cur = tok->inp;
448
37
        return -1;
449
762
    } else {
450
762
        tok_backup(tok, c);
451
762
    }
452
762
    return c;
453
799
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
22.3k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
22.3k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
22.3k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
22.3k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
22.3k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
22.3k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
22.3k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
22.3k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
22.3k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
22.3k
    if (saw_f && saw_t) {
492
2
        RETURN_SYNTAX_ERROR("f", "t");
493
2
    }
494
495
22.3k
#undef RETURN_SYNTAX_ERROR
496
497
22.3k
    return 0;
498
22.3k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.93M
{
503
1.93M
    int c;
504
1.93M
    int blankline, nonascii;
505
506
1.93M
    const char *p_start = NULL;
507
1.93M
    const char *p_end = NULL;
508
2.01M
  nextline:
509
2.01M
    tok->start = NULL;
510
2.01M
    tok->starting_col_offset = -1;
511
2.01M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.01M
    if (tok->atbol) {
516
317k
        int col = 0;
517
317k
        int altcol = 0;
518
317k
        tok->atbol = 0;
519
317k
        int cont_line_col = 0;
520
670k
        for (;;) {
521
670k
            c = tok_nextc(tok);
522
670k
            if (c == ' ') {
523
350k
                col++, altcol++;
524
350k
            }
525
319k
            else if (c == '\t') {
526
813
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
813
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
813
            }
529
318k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
774
                col = altcol = 0; /* For Emacs users */
531
774
            }
532
317k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
535
                cont_line_col = cont_line_col ? cont_line_col : col;
538
535
                if ((c = tok_continuation_line(tok)) == -1) {
539
31
                    return MAKE_TOKEN(ERRORTOKEN);
540
31
                }
541
535
            }
542
317k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
317k
            else {
546
317k
                break;
547
317k
            }
548
670k
        }
549
317k
        tok_backup(tok, c);
550
317k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
46.8k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
46.8k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
46.8k
            else {
566
46.8k
                blankline = 1; /* Ignore completely */
567
46.8k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
46.8k
        }
571
317k
        if (!blankline && tok->level == 0) {
572
237k
            col = cont_line_col ? cont_line_col : col;
573
237k
            altcol = cont_line_col ? cont_line_col : altcol;
574
237k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
215k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
215k
            }
580
22.0k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
12.3k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
12.3k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
12.3k
                tok->pendin++;
591
12.3k
                tok->indstack[++tok->indent] = col;
592
12.3k
                tok->altindstack[tok->indent] = altcol;
593
12.3k
            }
594
9.69k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
21.4k
                while (tok->indent > 0 &&
597
18.3k
                    col < tok->indstack[tok->indent]) {
598
11.7k
                    tok->pendin--;
599
11.7k
                    tok->indent--;
600
11.7k
                }
601
9.69k
                if (col != tok->indstack[tok->indent]) {
602
8
                    tok->done = E_DEDENT;
603
8
                    tok->cur = tok->inp;
604
8
                    return MAKE_TOKEN(ERRORTOKEN);
605
8
                }
606
9.68k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.68k
            }
610
237k
        }
611
317k
    }
612
613
2.01M
    tok->start = tok->cur;
614
2.01M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.01M
    if (tok->pendin != 0) {
618
24.0k
        if (tok->pendin < 0) {
619
11.7k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
11.7k
            tok->pendin++;
624
11.7k
            return MAKE_TOKEN(DEDENT);
625
11.7k
        }
626
12.3k
        else {
627
12.3k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
12.3k
            tok->pendin--;
632
12.3k
            return MAKE_TOKEN(INDENT);
633
12.3k
        }
634
24.0k
    }
635
636
    /* Peek ahead at the next character */
637
1.99M
    c = tok_nextc(tok);
638
1.99M
    tok_backup(tok, c);
639
640
1.99M
 again:
641
1.99M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.40M
    do {
644
2.40M
        c = tok_nextc(tok);
645
2.40M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.99M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.99M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.99M
    if (c == '#') {
653
654
31.1k
        const char* p = NULL;
655
31.1k
        const char *prefix, *type_start;
656
31.1k
        int current_starting_col_offset;
657
658
989k
        while (c != EOF && c != '\n' && c != '\r') {
659
958k
            c = tok_nextc(tok);
660
958k
        }
661
662
31.1k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
31.1k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
31.1k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
31.1k
    }
728
729
1.99M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.99M
    if (c == EOF) {
735
88.1k
        if (tok->level) {
736
3.67k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.67k
        }
738
84.5k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
88.1k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.90M
    nonascii = 0;
743
1.90M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
648k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
824k
        while (1) {
747
824k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
17.3k
                saw_b = 1;
749
17.3k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
807k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
79.6k
                saw_u = 1;
754
79.6k
            }
755
            /* ur"" and ru"" are not supported */
756
727k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
26.4k
                saw_r = 1;
758
26.4k
            }
759
701k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
42.6k
                saw_f = 1;
761
42.6k
            }
762
658k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
32.8k
                saw_t = 1;
764
32.8k
            }
765
626k
            else {
766
626k
                break;
767
626k
            }
768
198k
            c = tok_nextc(tok);
769
198k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
22.3k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
22.3k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
22.3k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
22.3k
                if (saw_f || saw_t) {
779
18.9k
                    goto f_string_quote;
780
18.9k
                }
781
3.35k
                goto letter_quote;
782
22.3k
            }
783
198k
        }
784
2.99M
        while (is_potential_identifier_char(c)) {
785
2.36M
            if (c >= 128) {
786
118k
                nonascii = 1;
787
118k
            }
788
2.36M
            c = tok_nextc(tok);
789
2.36M
        }
790
626k
        tok_backup(tok, c);
791
626k
        if (nonascii && !verify_identifier(tok)) {
792
530
            return MAKE_TOKEN(ERRORTOKEN);
793
530
        }
794
795
625k
        p_start = tok->start;
796
625k
        p_end = tok->cur;
797
798
625k
        return MAKE_TOKEN(NAME);
799
626k
    }
800
801
1.25M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.25M
    if (c == '\n') {
807
226k
        tok->atbol = 1;
808
226k
        if (blankline || tok->level > 0) {
809
80.1k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
80.0k
            goto nextline;
818
80.1k
        }
819
146k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
146k
        p_start = tok->start;
826
146k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
146k
        tok->cont_line = 0;
828
146k
        return MAKE_TOKEN(NEWLINE);
829
146k
    }
830
831
    /* Period or number starting with period? */
832
1.02M
    if (c == '.') {
833
29.6k
        c = tok_nextc(tok);
834
29.6k
        if (Py_ISDIGIT(c)) {
835
3.37k
            goto fraction;
836
26.2k
        } else if (c == '.') {
837
1.33k
            c = tok_nextc(tok);
838
1.33k
            if (c == '.') {
839
698
                p_start = tok->start;
840
698
                p_end = tok->cur;
841
698
                return MAKE_TOKEN(ELLIPSIS);
842
698
            }
843
636
            else {
844
636
                tok_backup(tok, c);
845
636
            }
846
636
            tok_backup(tok, '.');
847
636
        }
848
24.9k
        else {
849
24.9k
            tok_backup(tok, c);
850
24.9k
        }
851
25.5k
        p_start = tok->start;
852
25.5k
        p_end = tok->cur;
853
25.5k
        return MAKE_TOKEN(DOT);
854
29.6k
    }
855
856
    /* Number */
857
997k
    if (Py_ISDIGIT(c)) {
858
75.6k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
30.0k
            c = tok_nextc(tok);
861
30.0k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
13.7k
                c = tok_nextc(tok);
864
13.8k
                do {
865
13.8k
                    if (c == '_') {
866
74
                        c = tok_nextc(tok);
867
74
                    }
868
13.8k
                    if (!Py_ISXDIGIT(c)) {
869
15
                        tok_backup(tok, c);
870
15
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
15
                    }
872
69.8k
                    do {
873
69.8k
                        c = tok_nextc(tok);
874
69.8k
                    } while (Py_ISXDIGIT(c));
875
13.7k
                } while (c == '_');
876
13.7k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
2
                    return MAKE_TOKEN(ERRORTOKEN);
878
2
                }
879
13.7k
            }
880
16.2k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
616
                c = tok_nextc(tok);
883
828
                do {
884
828
                    if (c == '_') {
885
213
                        c = tok_nextc(tok);
886
213
                    }
887
828
                    if (c < '0' || c >= '8') {
888
20
                        if (Py_ISDIGIT(c)) {
889
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
2
                                    "invalid digit '%c' in octal literal", c));
891
2
                        }
892
18
                        else {
893
18
                            tok_backup(tok, c);
894
18
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
18
                        }
896
20
                    }
897
2.07k
                    do {
898
2.07k
                        c = tok_nextc(tok);
899
2.07k
                    } while ('0' <= c && c < '8');
900
808
                } while (c == '_');
901
596
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
595
                if (!verify_end_of_number(tok, c, "octal")) {
906
4
                    return MAKE_TOKEN(ERRORTOKEN);
907
4
                }
908
595
            }
909
15.6k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
388
                c = tok_nextc(tok);
912
535
                do {
913
535
                    if (c == '_') {
914
154
                        c = tok_nextc(tok);
915
154
                    }
916
535
                    if (c != '0' && c != '1') {
917
23
                        if (Py_ISDIGIT(c)) {
918
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
2
                        }
920
21
                        else {
921
21
                            tok_backup(tok, c);
922
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
21
                        }
924
23
                    }
925
3.17k
                    do {
926
3.17k
                        c = tok_nextc(tok);
927
3.17k
                    } while (c == '0' || c == '1');
928
512
                } while (c == '_');
929
365
                if (Py_ISDIGIT(c)) {
930
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
1
                }
932
364
                if (!verify_end_of_number(tok, c, "binary")) {
933
3
                    return MAKE_TOKEN(ERRORTOKEN);
934
3
                }
935
364
            }
936
15.2k
            else {
937
15.2k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
17.2k
                while (1) {
941
17.2k
                    if (c == '_') {
942
241
                        c = tok_nextc(tok);
943
241
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
241
                    }
948
17.2k
                    if (c != '0') {
949
15.2k
                        break;
950
15.2k
                    }
951
2.00k
                    c = tok_nextc(tok);
952
2.00k
                }
953
15.2k
                char* zeros_end = tok->cur;
954
15.2k
                if (Py_ISDIGIT(c)) {
955
359
                    nonzero = 1;
956
359
                    c = tok_decimal_tail(tok);
957
359
                    if (c == 0) {
958
3
                        return MAKE_TOKEN(ERRORTOKEN);
959
3
                    }
960
359
                }
961
15.2k
                if (c == '.') {
962
1.11k
                    c = tok_nextc(tok);
963
1.11k
                    goto fraction;
964
1.11k
                }
965
14.1k
                else if (c == 'e' || c == 'E') {
966
768
                    goto exponent;
967
768
                }
968
13.3k
                else if (c == 'j' || c == 'J') {
969
657
                    goto imaginary;
970
657
                }
971
12.7k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
30
                    tok_backup(tok, c);
974
30
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
30
                            tok, (int)(tok->start + 1 - tok->line_start),
976
30
                            (int)(zeros_end - tok->line_start),
977
30
                            "leading zeros in decimal integer "
978
30
                            "literals are not permitted; "
979
30
                            "use an 0o prefix for octal integers"));
980
30
                }
981
12.7k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
28
                    return MAKE_TOKEN(ERRORTOKEN);
983
28
                }
984
12.7k
            }
985
30.0k
        }
986
45.6k
        else {
987
            /* Decimal */
988
45.6k
            c = tok_decimal_tail(tok);
989
45.6k
            if (c == 0) {
990
11
                return MAKE_TOKEN(ERRORTOKEN);
991
11
            }
992
45.5k
            {
993
                /* Accept floating-point numbers. */
994
45.5k
                if (c == '.') {
995
3.09k
                    c = tok_nextc(tok);
996
7.58k
        fraction:
997
                    /* Fraction */
998
7.58k
                    if (Py_ISDIGIT(c)) {
999
5.79k
                        c = tok_decimal_tail(tok);
1000
5.79k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
5.79k
                    }
1004
7.58k
                }
1005
50.0k
                if (c == 'e' || c == 'E') {
1006
7.49k
                    int e;
1007
8.26k
                  exponent:
1008
8.26k
                    e = c;
1009
                    /* Exponent part */
1010
8.26k
                    c = tok_nextc(tok);
1011
8.26k
                    if (c == '+' || c == '-') {
1012
3.06k
                        c = tok_nextc(tok);
1013
3.06k
                        if (!Py_ISDIGIT(c)) {
1014
11
                            tok_backup(tok, c);
1015
11
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
11
                        }
1017
5.19k
                    } else if (!Py_ISDIGIT(c)) {
1018
359
                        tok_backup(tok, c);
1019
359
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
33
                            return MAKE_TOKEN(ERRORTOKEN);
1021
33
                        }
1022
326
                        tok_backup(tok, e);
1023
326
                        p_start = tok->start;
1024
326
                        p_end = tok->cur;
1025
326
                        return MAKE_TOKEN(NUMBER);
1026
359
                    }
1027
7.89k
                    c = tok_decimal_tail(tok);
1028
7.89k
                    if (c == 0) {
1029
2
                        return MAKE_TOKEN(ERRORTOKEN);
1030
2
                    }
1031
7.89k
                }
1032
50.4k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.87k
        imaginary:
1035
3.87k
                    c = tok_nextc(tok);
1036
3.87k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
8
                        return MAKE_TOKEN(ERRORTOKEN);
1038
8
                    }
1039
3.87k
                }
1040
47.2k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
123
                    return MAKE_TOKEN(ERRORTOKEN);
1042
123
                }
1043
50.4k
            }
1044
50.4k
        }
1045
78.3k
        tok_backup(tok, c);
1046
78.3k
        p_start = tok->start;
1047
78.3k
        p_end = tok->cur;
1048
78.3k
        return MAKE_TOKEN(NUMBER);
1049
75.6k
    }
1050
1051
940k
  f_string_quote:
1052
940k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
18.9k
        && (c == '\'' || c == '"'))) {
1054
1055
18.9k
        int quote = c;
1056
18.9k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
18.9k
        tok->first_lineno = tok->lineno;
1063
18.9k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
18.9k
        int after_quote = tok_nextc(tok);
1067
18.9k
        if (after_quote == quote) {
1068
2.72k
            int after_after_quote = tok_nextc(tok);
1069
2.72k
            if (after_after_quote == quote) {
1070
764
                quote_size = 3;
1071
764
            }
1072
1.96k
            else {
1073
                // TODO: Check this
1074
1.96k
                tok_backup(tok, after_after_quote);
1075
1.96k
                tok_backup(tok, after_quote);
1076
1.96k
            }
1077
2.72k
        }
1078
18.9k
        if (after_quote != quote) {
1079
16.2k
            tok_backup(tok, after_quote);
1080
16.2k
        }
1081
1082
1083
18.9k
        p_start = tok->start;
1084
18.9k
        p_end = tok->cur;
1085
18.9k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
18.9k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
18.9k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
18.9k
        the_current_tok->quote = quote;
1091
18.9k
        the_current_tok->quote_size = quote_size;
1092
18.9k
        the_current_tok->start = tok->start;
1093
18.9k
        the_current_tok->multi_line_start = tok->line_start;
1094
18.9k
        the_current_tok->first_line = tok->lineno;
1095
18.9k
        the_current_tok->start_offset = -1;
1096
18.9k
        the_current_tok->multi_line_start_offset = -1;
1097
18.9k
        the_current_tok->last_expr_buffer = NULL;
1098
18.9k
        the_current_tok->last_expr_size = 0;
1099
18.9k
        the_current_tok->last_expr_end = -1;
1100
18.9k
        the_current_tok->in_format_spec = 0;
1101
18.9k
        the_current_tok->in_debug = 0;
1102
1103
18.9k
        enum string_kind_t string_kind = FSTRING;
1104
18.9k
        switch (*tok->start) {
1105
961
            case 'T':
1106
4.64k
            case 't':
1107
4.64k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.64k
                string_kind = TSTRING;
1109
4.64k
                break;
1110
3.35k
            case 'F':
1111
13.7k
            case 'f':
1112
13.7k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
13.7k
                break;
1114
325
            case 'R':
1115
604
            case 'r':
1116
604
                the_current_tok->raw = 1;
1117
604
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
73
                    string_kind = TSTRING;
1119
73
                }
1120
604
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
18.9k
        }
1124
1125
18.9k
        the_current_tok->string_kind = string_kind;
1126
18.9k
        the_current_tok->curly_bracket_depth = 0;
1127
18.9k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
18.9k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
18.9k
    }
1130
1131
925k
  letter_quote:
1132
    /* String */
1133
925k
    if (c == '\'' || c == '"') {
1134
38.2k
        int quote = c;
1135
38.2k
        int quote_size = 1;             /* 1 or 3 */
1136
38.2k
        int end_quote_size = 0;
1137
38.2k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
38.2k
        tok->first_lineno = tok->lineno;
1144
38.2k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
38.2k
        c = tok_nextc(tok);
1148
38.2k
        if (c == quote) {
1149
7.13k
            c = tok_nextc(tok);
1150
7.13k
            if (c == quote) {
1151
1.22k
                quote_size = 3;
1152
1.22k
            }
1153
5.90k
            else {
1154
5.90k
                end_quote_size = 1;     /* empty string found */
1155
5.90k
            }
1156
7.13k
        }
1157
38.2k
        if (c != quote) {
1158
37.0k
            tok_backup(tok, c);
1159
37.0k
        }
1160
1161
        /* Get rest of string */
1162
555k
        while (end_quote_size != quote_size) {
1163
517k
            c = tok_nextc(tok);
1164
517k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
517k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
517k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
306
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
306
                tok->cur = (char *)tok->start;
1176
306
                tok->cur++;
1177
306
                tok->line_start = tok->multi_line_start;
1178
306
                int start = tok->lineno;
1179
306
                tok->lineno = tok->first_lineno;
1180
1181
306
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
36
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
36
                    if (the_current_tok->quote == quote &&
1189
26
                        the_current_tok->quote_size == quote_size) {
1190
22
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
22
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
22
                    }
1193
36
                }
1194
1195
284
                if (quote_size == 3) {
1196
22
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
22
                                     " (detected at line %d)", start);
1198
22
                    if (c != '\n') {
1199
22
                        tok->done = E_EOFS;
1200
22
                    }
1201
22
                    return MAKE_TOKEN(ERRORTOKEN);
1202
22
                }
1203
262
                else {
1204
262
                    if (has_escaped_quote) {
1205
8
                        _PyTokenizer_syntaxerror(
1206
8
                            tok,
1207
8
                            "unterminated string literal (detected at line %d); "
1208
8
                            "perhaps you escaped the end quote?",
1209
8
                            start
1210
8
                        );
1211
254
                    } else {
1212
254
                        _PyTokenizer_syntaxerror(
1213
254
                            tok, "unterminated string literal (detected at line %d)", start
1214
254
                        );
1215
254
                    }
1216
262
                    if (c != '\n') {
1217
7
                        tok->done = E_EOLS;
1218
7
                    }
1219
262
                    return MAKE_TOKEN(ERRORTOKEN);
1220
262
                }
1221
284
            }
1222
516k
            if (c == quote) {
1223
35.7k
                end_quote_size += 1;
1224
35.7k
            }
1225
481k
            else {
1226
481k
                end_quote_size = 0;
1227
481k
                if (c == '\\') {
1228
25.0k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
25.0k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
976
                        has_escaped_quote = 1;
1231
976
                    }
1232
25.0k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
25.0k
                }
1236
481k
            }
1237
516k
        }
1238
1239
37.9k
        p_start = tok->start;
1240
37.9k
        p_end = tok->cur;
1241
37.9k
        return MAKE_TOKEN(STRING);
1242
38.2k
    }
1243
1244
    /* Line continuation */
1245
886k
    if (c == '\\') {
1246
316
        if ((c = tok_continuation_line(tok)) == -1) {
1247
58
            return MAKE_TOKEN(ERRORTOKEN);
1248
58
        }
1249
258
        tok->cont_line = 1;
1250
258
        goto again; /* Read next line */
1251
316
    }
1252
1253
    /* Punctuation character */
1254
886k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
886k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
59.6k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
59.6k
        int in_format_spec = current_tok->in_format_spec;
1261
59.6k
         int cursor_in_format_with_debug =
1262
59.6k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
59.6k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
59.6k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
59.6k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
59.6k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
3.81k
            current_tok->kind = TOK_FSTRING_MODE;
1273
3.81k
            current_tok->in_format_spec = 1;
1274
3.81k
            p_start = tok->start;
1275
3.81k
            p_end = tok->cur;
1276
3.81k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
3.81k
        }
1278
59.6k
    }
1279
1280
    /* Check for two-character token */
1281
882k
    {
1282
882k
        int c2 = tok_nextc(tok);
1283
882k
        int current_token = _PyToken_TwoChars(c, c2);
1284
882k
        if (current_token != OP) {
1285
22.1k
            int c3 = tok_nextc(tok);
1286
22.1k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
22.1k
            if (current_token3 != OP) {
1288
1.15k
                current_token = current_token3;
1289
1.15k
            }
1290
20.9k
            else {
1291
20.9k
                tok_backup(tok, c3);
1292
20.9k
            }
1293
22.1k
            p_start = tok->start;
1294
22.1k
            p_end = tok->cur;
1295
22.1k
            return MAKE_TOKEN(current_token);
1296
22.1k
        }
1297
860k
        tok_backup(tok, c2);
1298
860k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
70.5k
    case '(':
1303
100k
    case '[':
1304
144k
    case '{':
1305
144k
        if (tok->level >= MAXLEVEL) {
1306
16
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
16
        }
1308
144k
        tok->parenstack[tok->level] = c;
1309
144k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
144k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
144k
        tok->level++;
1312
144k
        if (INSIDE_FSTRING(tok)) {
1313
33.0k
            current_tok->curly_bracket_depth++;
1314
33.0k
        }
1315
144k
        break;
1316
43.9k
    case ')':
1317
49.9k
    case ']':
1318
77.2k
    case '}':
1319
77.2k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
49
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
49
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
49
        }
1323
77.1k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
193
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
193
        }
1326
76.9k
        if (tok->level > 0) {
1327
76.9k
            tok->level--;
1328
76.9k
            int opening = tok->parenstack[tok->level];
1329
76.9k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
33.1k
                                            (opening == '[' && c == ']') ||
1331
27.1k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
44
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
2
                    assert(current_tok->curly_bracket_depth >= 0);
1339
2
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
2
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
1
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
1
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
1
                    }
1344
2
                }
1345
43
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
4
                            "closing parenthesis '%c' does not match "
1348
4
                            "opening parenthesis '%c' on line %d",
1349
4
                            c, opening, tok->parenlinenostack[tok->level]));
1350
4
                }
1351
39
                else {
1352
39
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
39
                            "closing parenthesis '%c' does not match "
1354
39
                            "opening parenthesis '%c'",
1355
39
                            c, opening));
1356
39
                }
1357
43
            }
1358
76.9k
        }
1359
1360
76.9k
        if (INSIDE_FSTRING(tok)) {
1361
23.5k
            current_tok->curly_bracket_depth--;
1362
23.5k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
23.5k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
22.4k
                current_tok->curly_bracket_expr_start_depth--;
1368
22.4k
                current_tok->kind = TOK_FSTRING_MODE;
1369
22.4k
                current_tok->in_format_spec = 0;
1370
22.4k
                current_tok->in_debug = 0;
1371
22.4k
            }
1372
23.5k
        }
1373
76.9k
        break;
1374
638k
    default:
1375
638k
        break;
1376
860k
    }
1377
1378
860k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
414
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
414
    }
1381
1382
859k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
6.14k
        current_tok->in_debug = 1;
1384
6.14k
    }
1385
1386
    /* Punctuation character */
1387
859k
    p_start = tok->start;
1388
859k
    p_end = tok->cur;
1389
859k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
860k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
54.8k
{
1395
54.8k
    const char *p_start = NULL;
1396
54.8k
    const char *p_end = NULL;
1397
54.8k
    int end_quote_size = 0;
1398
54.8k
    int unicode_escape = 0;
1399
1400
54.8k
    tok->start = tok->cur;
1401
54.8k
    tok->first_lineno = tok->lineno;
1402
54.8k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
54.8k
    int start_char = tok_nextc(tok);
1407
54.8k
    if (start_char == '{') {
1408
16.7k
        int peek1 = tok_nextc(tok);
1409
16.7k
        tok_backup(tok, peek1);
1410
16.7k
        tok_backup(tok, start_char);
1411
16.7k
        if (peek1 != '{') {
1412
14.5k
            current_tok->curly_bracket_expr_start_depth++;
1413
14.5k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
2
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
2
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
2
            }
1417
14.5k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
14.5k
            return tok_get_normal_mode(tok, current_tok, token);
1419
14.5k
        }
1420
16.7k
    }
1421
38.0k
    else {
1422
38.0k
        tok_backup(tok, start_char);
1423
38.0k
    }
1424
1425
    // Check if we are at the end of the string
1426
59.1k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
45.4k
        int quote = tok_nextc(tok);
1428
45.4k
        if (quote != current_tok->quote) {
1429
26.5k
            tok_backup(tok, quote);
1430
26.5k
            goto f_string_middle;
1431
26.5k
        }
1432
45.4k
    }
1433
1434
13.7k
    if (current_tok->last_expr_buffer != NULL) {
1435
8.57k
        PyMem_Free(current_tok->last_expr_buffer);
1436
8.57k
        current_tok->last_expr_buffer = NULL;
1437
8.57k
        current_tok->last_expr_size = 0;
1438
8.57k
        current_tok->last_expr_end = -1;
1439
8.57k
    }
1440
1441
13.7k
    p_start = tok->start;
1442
13.7k
    p_end = tok->cur;
1443
13.7k
    tok->tok_mode_stack_index--;
1444
13.7k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
26.5k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
26.5k
    tok->multi_line_start = tok->line_start;
1451
176k
    while (end_quote_size != current_tok->quote_size) {
1452
171k
        int c = tok_nextc(tok);
1453
171k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
171k
        int in_format_spec = (
1457
171k
                current_tok->in_format_spec
1458
10.2k
                &&
1459
10.2k
                INSIDE_FSTRING_EXPR(current_tok)
1460
171k
        );
1461
1462
171k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
380
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
380
            if (in_format_spec && c == '\n') {
1471
45
                if (current_tok->quote_size == 1) {
1472
45
                    return MAKE_TOKEN(
1473
45
                        _PyTokenizer_syntaxerror(
1474
45
                            tok,
1475
45
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
45
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
45
                        )
1478
45
                    );
1479
45
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
45
            }
1487
1488
380
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
335
            tok->cur = (char *)current_tok->start;
1493
335
            tok->cur++;
1494
335
            tok->line_start = current_tok->multi_line_start;
1495
335
            int start = tok->lineno;
1496
1497
335
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
335
            tok->lineno = the_current_tok->first_line;
1499
1500
335
            if (current_tok->quote_size == 3) {
1501
24
                _PyTokenizer_syntaxerror(tok,
1502
24
                                    "unterminated triple-quoted %c-string literal"
1503
24
                                    " (detected at line %d)",
1504
24
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
24
                if (c != '\n') {
1506
24
                    tok->done = E_EOFS;
1507
24
                }
1508
24
                return MAKE_TOKEN(ERRORTOKEN);
1509
24
            }
1510
311
            else {
1511
311
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
311
                                    "unterminated %c-string literal (detected at"
1513
311
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
311
            }
1515
335
        }
1516
1517
170k
        if (c == current_tok->quote) {
1518
9.94k
            end_quote_size += 1;
1519
9.94k
            continue;
1520
160k
        } else {
1521
160k
            end_quote_size = 0;
1522
160k
        }
1523
1524
160k
        if (c == '{') {
1525
16.2k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
16.2k
            int peek = tok_nextc(tok);
1529
16.2k
            if (peek != '{' || in_format_spec) {
1530
13.6k
                tok_backup(tok, peek);
1531
13.6k
                tok_backup(tok, c);
1532
13.6k
                current_tok->curly_bracket_expr_start_depth++;
1533
13.6k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
4
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
4
                }
1537
13.6k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
13.6k
                current_tok->in_format_spec = 0;
1539
13.6k
                p_start = tok->start;
1540
13.6k
                p_end = tok->cur;
1541
13.6k
            } else {
1542
2.54k
                p_start = tok->start;
1543
2.54k
                p_end = tok->cur - 1;
1544
2.54k
            }
1545
16.2k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
144k
        } else if (c == '}') {
1547
4.37k
            if (unicode_escape) {
1548
256
                p_start = tok->start;
1549
256
                p_end = tok->cur;
1550
256
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
256
            }
1552
4.11k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.11k
            int cursor = current_tok->curly_bracket_depth;
1559
4.11k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.28k
                p_start = tok->start;
1561
1.28k
                p_end = tok->cur - 1;
1562
2.83k
            } else {
1563
2.83k
                tok_backup(tok, peek);
1564
2.83k
                tok_backup(tok, c);
1565
2.83k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
2.83k
                current_tok->in_format_spec = 0;
1567
2.83k
                p_start = tok->start;
1568
2.83k
                p_end = tok->cur;
1569
2.83k
            }
1570
4.11k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
140k
        } else if (c == '\\') {
1572
5.06k
            int peek = tok_nextc(tok);
1573
5.06k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.06k
            if (peek == '{' || peek == '}') {
1580
1.08k
                if (!current_tok->raw) {
1581
891
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
891
                }
1585
1.08k
                tok_backup(tok, peek);
1586
1.08k
                continue;
1587
1.08k
            }
1588
1589
3.97k
            if (!current_tok->raw) {
1590
3.72k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
485
                    peek = tok_nextc(tok);
1593
485
                    if (peek == '{') {
1594
264
                        unicode_escape = 1;
1595
264
                    } else {
1596
221
                        tok_backup(tok, peek);
1597
221
                    }
1598
485
                }
1599
3.72k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
3.97k
        }
1603
160k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
12.2k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.75k
        tok_backup(tok, current_tok->quote);
1609
6.75k
    }
1610
5.53k
    p_start = tok->start;
1611
5.53k
    p_end = tok->cur;
1612
5.53k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
26.5k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.97M
{
1618
1.97M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.97M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.91M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.91M
    } else {
1622
54.8k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
54.8k
    }
1624
1.97M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.97M
{
1629
1.97M
    int result = tok_get(tok, token);
1630
1.97M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.97M
    return result;
1635
1.97M
}