Coverage Report

Created: 2026-04-20 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.45k
#define ALTTABSIZE 1
11
12
2.11M
#define is_potential_identifier_start(c) (\
13
2.11M
              (c >= 'a' && c <= 'z')\
14
2.11M
               || (c >= 'A' && c <= 'Z')\
15
2.11M
               || c == '_'\
16
2.11M
               || (c >= 128))
17
18
3.32M
#define is_potential_identifier_char(c) (\
19
3.32M
              (c >= 'a' && c <= 'z')\
20
3.32M
               || (c >= 'A' && c <= 'Z')\
21
3.32M
               || (c >= '0' && c <= '9')\
22
3.32M
               || c == '_'\
23
3.32M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.32M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
19.0k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
22
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
2.18M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
272k
{
55
272k
    return memchr(str, 0, size) != NULL;
56
272k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.8M
{
62
11.8M
    int rc;
63
12.1M
    for (;;) {
64
12.1M
        if (tok->cur != tok->inp) {
65
11.5M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.5M
            tok->col_offset++;
70
11.5M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.5M
        }
72
556k
        if (tok->done != E_OK) {
73
188k
            return EOF;
74
188k
        }
75
367k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
367k
        if (!rc) {
84
94.5k
            tok->cur = tok->inp;
85
94.5k
            return EOF;
86
94.5k
        }
87
272k
        tok->line_start = tok->cur;
88
89
272k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
272k
    }
95
11.8M
    Py_UNREACHABLE();
96
11.8M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.61M
{
102
4.61M
    if (c != EOF) {
103
4.42M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
4.42M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
4.42M
        tok->col_offset--;
110
4.42M
    }
111
4.61M
}
112
113
static int
114
28.2k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
28.2k
    assert(token != NULL);
116
28.2k
    assert(c == '}' || c == ':' || c == '!');
117
28.2k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
28.2k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
16.4k
        return 0;
121
16.4k
    }
122
11.7k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
11.7k
    int hash_detected = 0;
126
11.7k
    int in_string = 0;
127
11.7k
    char quote_char = 0;
128
129
1.82M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.81M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.81M
        if (ch == '\\') {
134
38.6k
            i++;
135
38.6k
            continue;
136
38.6k
        }
137
138
        // Handle quotes
139
1.77M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
319k
            if (!in_string) {
148
118k
                in_string = 1;
149
118k
                quote_char = ch;
150
118k
            }
151
200k
            else if (ch == quote_char) {
152
116k
                in_string = 0;
153
116k
            }
154
319k
            continue;
155
319k
        }
156
157
        // Check for # outside strings
158
1.45M
        if (ch == '#' && !in_string) {
159
895
            hash_detected = 1;
160
895
            break;
161
895
        }
162
1.45M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
11.7k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
895
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
895
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
895
        Py_ssize_t i = 0;  // Input position
172
895
        Py_ssize_t j = 0;  // Output position
173
895
        in_string = 0;     // Whether we're in a string
174
895
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
73.0k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
72.1k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
72.1k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
12.0k
                if (!in_string) {
184
4.30k
                    in_string = 1;
185
4.30k
                    quote_char = ch;
186
7.76k
                } else if (ch == quote_char) {
187
4.28k
                    in_string = 0;
188
4.28k
                }
189
12.0k
                result[j++] = ch;
190
12.0k
            }
191
            // Skip comments
192
60.0k
            else if (ch == '#' && !in_string) {
193
33.6k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
32.7k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
32.5k
                    i++;
196
32.5k
                }
197
1.07k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
245
                    result[j++] = '\n';
199
245
                }
200
1.07k
            }
201
            // Copy other chars
202
59.0k
            else {
203
59.0k
                result[j++] = ch;
204
59.0k
            }
205
72.1k
            i++;
206
72.1k
        }
207
208
895
        result[j] = '\0';  // Null-terminate the result string
209
895
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
895
        PyMem_Free(result);
211
10.8k
    } else {
212
10.8k
        res = PyUnicode_DecodeUTF8(
213
10.8k
            tok_mode->last_expr_buffer,
214
10.8k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
10.8k
            NULL
216
10.8k
        );
217
10.8k
    }
218
219
11.7k
    if (!res) {
220
0
        return -1;
221
0
    }
222
11.7k
    token->metadata = res;
223
11.7k
    return 0;
224
11.7k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
75.5k
{
229
75.5k
    assert(tok->cur != NULL);
230
231
75.5k
    Py_ssize_t size = strlen(tok->cur);
232
75.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
75.5k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
47.3k
        case '{':
252
47.3k
            if (tok_mode->last_expr_buffer != NULL) {
253
33.6k
                PyMem_Free(tok_mode->last_expr_buffer);
254
33.6k
            }
255
47.3k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
47.3k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
47.3k
            tok_mode->last_expr_size = size;
260
47.3k
            tok_mode->last_expr_end = -1;
261
47.3k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
47.3k
            break;
263
22.1k
        case '}':
264
24.8k
        case '!':
265
24.8k
            tok_mode->last_expr_end = strlen(tok->start);
266
24.8k
            break;
267
3.39k
        case ':':
268
3.39k
            if (tok_mode->last_expr_end == -1) {
269
3.15k
               tok_mode->last_expr_end = strlen(tok->start);
270
3.15k
            }
271
3.39k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
75.5k
    }
275
75.5k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
75.5k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
9.51k
{
284
9.51k
    const char *s = test;
285
9.51k
    int res = 0;
286
23.7k
    while (1) {
287
23.7k
        int c = tok_nextc(tok);
288
23.7k
        if (*s == 0) {
289
9.39k
            res = !is_potential_identifier_char(c);
290
9.39k
        }
291
14.3k
        else if (c == *s) {
292
14.1k
            s++;
293
14.1k
            continue;
294
14.1k
        }
295
296
9.51k
        tok_backup(tok, c);
297
23.7k
        while (s != test) {
298
14.1k
            tok_backup(tok, *--s);
299
14.1k
        }
300
9.51k
        return res;
301
23.7k
    }
302
9.51k
}
303
304
static int
305
91.5k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
91.5k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
91.5k
    int r = 0;
322
91.5k
    if (c == 'a') {
323
889
        r = lookahead(tok, "nd");
324
889
    }
325
90.6k
    else if (c == 'e') {
326
532
        r = lookahead(tok, "lse");
327
532
    }
328
90.1k
    else if (c == 'f') {
329
2.68k
        r = lookahead(tok, "or");
330
2.68k
    }
331
87.4k
    else if (c == 'i') {
332
1.22k
        int c2 = tok_nextc(tok);
333
1.22k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.21k
            r = 1;
335
1.21k
        }
336
1.22k
        tok_backup(tok, c2);
337
1.22k
    }
338
86.2k
    else if (c == 'o') {
339
5.10k
        r = lookahead(tok, "r");
340
5.10k
    }
341
81.1k
    else if (c == 'n') {
342
303
        r = lookahead(tok, "ot");
343
303
    }
344
91.5k
    if (r) {
345
10.5k
        tok_backup(tok, c);
346
10.5k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.5k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.5k
        tok_nextc(tok);
352
10.5k
    }
353
80.9k
    else /* In future releases, only error will remain. */
354
80.9k
    if (c < 128 && is_potential_identifier_char(c)) {
355
264
        tok_backup(tok, c);
356
264
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
264
        return 0;
358
264
    }
359
91.2k
    return 1;
360
91.5k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.8k
{
366
11.8k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.8k
    PyObject *s;
370
11.8k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.8k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.8k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
11.8k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.8k
    assert(invalid >= 0);
384
11.8k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.8k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
543
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
543
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
361
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
361
            if (s != NULL) {
391
361
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
361
            }
393
361
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
361
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
361
        }
399
543
        Py_DECREF(s);
400
543
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
285
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
285
        }
403
258
        else {
404
258
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
258
        }
406
543
        return 0;
407
543
    }
408
11.3k
    Py_DECREF(s);
409
11.3k
    return 1;
410
11.8k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
70.4k
{
415
70.4k
    int c;
416
417
70.8k
    while (1) {
418
225k
        do {
419
225k
            c = tok_nextc(tok);
420
225k
        } while (Py_ISDIGIT(c));
421
70.8k
        if (c != '_') {
422
70.3k
            break;
423
70.3k
        }
424
493
        c = tok_nextc(tok);
425
493
        if (!Py_ISDIGIT(c)) {
426
22
            tok_backup(tok, c);
427
22
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
22
            return 0;
429
22
        }
430
493
    }
431
70.3k
    return c;
432
70.4k
}
433
434
static inline int
435
859
tok_continuation_line(struct tok_state *tok) {
436
859
    int c = tok_nextc(tok);
437
859
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
859
    if (c != '\n') {
441
78
        tok->done = E_LINECONT;
442
78
        return -1;
443
78
    }
444
781
    c = tok_nextc(tok);
445
781
    if (c == EOF) {
446
44
        tok->done = E_EOF;
447
44
        tok->cur = tok->inp;
448
44
        return -1;
449
737
    } else {
450
737
        tok_backup(tok, c);
451
737
    }
452
737
    return c;
453
781
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
22.3k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
22.3k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
22.3k
    do {                                                                  \
464
10
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
10
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
10
            (int)(tok->cur - tok->line_start),                            \
467
10
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
10
        return -1;                                                        \
469
10
    } while (0)
470
471
22.3k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
22.3k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
22.3k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
22.3k
    if (saw_u && saw_t) {
481
3
        RETURN_SYNTAX_ERROR("u", "t");
482
3
    }
483
484
22.3k
    if (saw_b && saw_f) {
485
2
        RETURN_SYNTAX_ERROR("b", "f");
486
2
    }
487
22.3k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
22.3k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
22.3k
#undef RETURN_SYNTAX_ERROR
496
497
22.3k
    return 0;
498
22.3k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
2.13M
{
503
2.13M
    int c;
504
2.13M
    int blankline, nonascii;
505
506
2.13M
    const char *p_start = NULL;
507
2.13M
    const char *p_end = NULL;
508
2.23M
  nextline:
509
2.23M
    tok->start = NULL;
510
2.23M
    tok->starting_col_offset = -1;
511
2.23M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.23M
    if (tok->atbol) {
516
358k
        int col = 0;
517
358k
        int altcol = 0;
518
358k
        tok->atbol = 0;
519
358k
        int cont_line_col = 0;
520
713k
        for (;;) {
521
713k
            c = tok_nextc(tok);
522
713k
            if (c == ' ') {
523
353k
                col++, altcol++;
524
353k
            }
525
360k
            else if (c == '\t') {
526
727
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
727
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
727
            }
529
359k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
634
                col = altcol = 0; /* For Emacs users */
531
634
            }
532
358k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
512
                cont_line_col = cont_line_col ? cont_line_col : col;
538
512
                if ((c = tok_continuation_line(tok)) == -1) {
539
29
                    return MAKE_TOKEN(ERRORTOKEN);
540
29
                }
541
512
            }
542
358k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
358k
            else {
546
358k
                break;
547
358k
            }
548
713k
        }
549
358k
        tok_backup(tok, c);
550
358k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
63.1k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
63.1k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
63.1k
            else {
566
63.1k
                blankline = 1; /* Ignore completely */
567
63.1k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
63.1k
        }
571
358k
        if (!blankline && tok->level == 0) {
572
260k
            col = cont_line_col ? cont_line_col : col;
573
260k
            altcol = cont_line_col ? cont_line_col : altcol;
574
260k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
237k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
237k
            }
580
22.5k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
12.5k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
12.5k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
12.5k
                tok->pendin++;
591
12.5k
                tok->indstack[++tok->indent] = col;
592
12.5k
                tok->altindstack[tok->indent] = altcol;
593
12.5k
            }
594
10.0k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
21.9k
                while (tok->indent > 0 &&
597
18.7k
                    col < tok->indstack[tok->indent]) {
598
11.9k
                    tok->pendin--;
599
11.9k
                    tok->indent--;
600
11.9k
                }
601
10.0k
                if (col != tok->indstack[tok->indent]) {
602
9
                    tok->done = E_DEDENT;
603
9
                    tok->cur = tok->inp;
604
9
                    return MAKE_TOKEN(ERRORTOKEN);
605
9
                }
606
9.99k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.99k
            }
610
260k
        }
611
358k
    }
612
613
2.23M
    tok->start = tok->cur;
614
2.23M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.23M
    if (tok->pendin != 0) {
618
24.4k
        if (tok->pendin < 0) {
619
11.9k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
11.9k
            tok->pendin++;
624
11.9k
            return MAKE_TOKEN(DEDENT);
625
11.9k
        }
626
12.5k
        else {
627
12.5k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
12.5k
            tok->pendin--;
632
12.5k
            return MAKE_TOKEN(INDENT);
633
12.5k
        }
634
24.4k
    }
635
636
    /* Peek ahead at the next character */
637
2.21M
    c = tok_nextc(tok);
638
2.21M
    tok_backup(tok, c);
639
640
2.21M
 again:
641
2.21M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.64M
    do {
644
2.64M
        c = tok_nextc(tok);
645
2.64M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
2.21M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
2.21M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
2.21M
    if (c == '#') {
653
654
32.1k
        const char* p = NULL;
655
32.1k
        const char *prefix, *type_start;
656
32.1k
        int current_starting_col_offset;
657
658
1.04M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.00M
            c = tok_nextc(tok);
660
1.00M
        }
661
662
32.1k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
32.1k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
32.1k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
32.1k
    }
728
729
2.21M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
2.21M
    if (c == EOF) {
735
94.4k
        if (tok->level) {
736
3.90k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.90k
        }
738
90.5k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
94.4k
    }
740
741
    /* Identifier (most frequent token!) */
742
2.11M
    nonascii = 0;
743
2.11M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
699k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
888k
        while (1) {
747
888k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
18.7k
                saw_b = 1;
749
18.7k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
869k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
84.8k
                saw_u = 1;
754
84.8k
            }
755
            /* ur"" and ru"" are not supported */
756
784k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
27.7k
                saw_r = 1;
758
27.7k
            }
759
756k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
44.9k
                saw_f = 1;
761
44.9k
            }
762
711k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
34.8k
                saw_t = 1;
764
34.8k
            }
765
676k
            else {
766
676k
                break;
767
676k
            }
768
211k
            c = tok_nextc(tok);
769
211k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
22.3k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
22.3k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
22.3k
                if (status < 0) {
774
10
                    return MAKE_TOKEN(ERRORTOKEN);
775
10
                }
776
777
                // Handle valid f or t string creation:
778
22.3k
                if (saw_f || saw_t) {
779
19.0k
                    goto f_string_quote;
780
19.0k
                }
781
3.30k
                goto letter_quote;
782
22.3k
            }
783
211k
        }
784
3.23M
        while (is_potential_identifier_char(c)) {
785
2.55M
            if (c >= 128) {
786
130k
                nonascii = 1;
787
130k
            }
788
2.55M
            c = tok_nextc(tok);
789
2.55M
        }
790
676k
        tok_backup(tok, c);
791
676k
        if (nonascii && !verify_identifier(tok)) {
792
543
            return MAKE_TOKEN(ERRORTOKEN);
793
543
        }
794
795
676k
        p_start = tok->start;
796
676k
        p_end = tok->cur;
797
798
676k
        return MAKE_TOKEN(NAME);
799
676k
    }
800
801
1.41M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.41M
    if (c == '\n') {
807
260k
        tok->atbol = 1;
808
260k
        if (blankline || tok->level > 0) {
809
97.6k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
97.5k
            goto nextline;
818
97.6k
        }
819
163k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
163k
        p_start = tok->start;
826
163k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
163k
        tok->cont_line = 0;
828
163k
        return MAKE_TOKEN(NEWLINE);
829
163k
    }
830
831
    /* Period or number starting with period? */
832
1.15M
    if (c == '.') {
833
35.5k
        c = tok_nextc(tok);
834
35.5k
        if (Py_ISDIGIT(c)) {
835
4.30k
            goto fraction;
836
31.2k
        } else if (c == '.') {
837
1.83k
            c = tok_nextc(tok);
838
1.83k
            if (c == '.') {
839
1.01k
                p_start = tok->start;
840
1.01k
                p_end = tok->cur;
841
1.01k
                return MAKE_TOKEN(ELLIPSIS);
842
1.01k
            }
843
814
            else {
844
814
                tok_backup(tok, c);
845
814
            }
846
814
            tok_backup(tok, '.');
847
814
        }
848
29.4k
        else {
849
29.4k
            tok_backup(tok, c);
850
29.4k
        }
851
30.2k
        p_start = tok->start;
852
30.2k
        p_end = tok->cur;
853
30.2k
        return MAKE_TOKEN(DOT);
854
35.5k
    }
855
856
    /* Number */
857
1.12M
    if (Py_ISDIGIT(c)) {
858
87.4k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
32.9k
            c = tok_nextc(tok);
861
32.9k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
13.8k
                c = tok_nextc(tok);
864
14.0k
                do {
865
14.0k
                    if (c == '_') {
866
224
                        c = tok_nextc(tok);
867
224
                    }
868
14.0k
                    if (!Py_ISXDIGIT(c)) {
869
15
                        tok_backup(tok, c);
870
15
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
15
                    }
872
73.5k
                    do {
873
73.5k
                        c = tok_nextc(tok);
874
73.5k
                    } while (Py_ISXDIGIT(c));
875
14.0k
                } while (c == '_');
876
13.8k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
3
                    return MAKE_TOKEN(ERRORTOKEN);
878
3
                }
879
13.8k
            }
880
19.1k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
624
                c = tok_nextc(tok);
883
888
                do {
884
888
                    if (c == '_') {
885
265
                        c = tok_nextc(tok);
886
265
                    }
887
888
                    if (c < '0' || c >= '8') {
888
17
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
16
                        else {
893
16
                            tok_backup(tok, c);
894
16
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
16
                        }
896
17
                    }
897
3.11k
                    do {
898
3.11k
                        c = tok_nextc(tok);
899
3.11k
                    } while ('0' <= c && c < '8');
900
871
                } while (c == '_');
901
607
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
606
                if (!verify_end_of_number(tok, c, "octal")) {
906
4
                    return MAKE_TOKEN(ERRORTOKEN);
907
4
                }
908
606
            }
909
18.5k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
541
                c = tok_nextc(tok);
912
829
                do {
913
829
                    if (c == '_') {
914
297
                        c = tok_nextc(tok);
915
297
                    }
916
829
                    if (c != '0' && c != '1') {
917
17
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
16
                        else {
921
16
                            tok_backup(tok, c);
922
16
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
16
                        }
924
17
                    }
925
3.64k
                    do {
926
3.64k
                        c = tok_nextc(tok);
927
3.64k
                    } while (c == '0' || c == '1');
928
812
                } while (c == '_');
929
524
                if (Py_ISDIGIT(c)) {
930
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
2
                }
932
522
                if (!verify_end_of_number(tok, c, "binary")) {
933
3
                    return MAKE_TOKEN(ERRORTOKEN);
934
3
                }
935
522
            }
936
17.9k
            else {
937
17.9k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
20.9k
                while (1) {
941
20.9k
                    if (c == '_') {
942
250
                        c = tok_nextc(tok);
943
250
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
250
                    }
948
20.9k
                    if (c != '0') {
949
17.9k
                        break;
950
17.9k
                    }
951
2.93k
                    c = tok_nextc(tok);
952
2.93k
                }
953
17.9k
                char* zeros_end = tok->cur;
954
17.9k
                if (Py_ISDIGIT(c)) {
955
419
                    nonzero = 1;
956
419
                    c = tok_decimal_tail(tok);
957
419
                    if (c == 0) {
958
2
                        return MAKE_TOKEN(ERRORTOKEN);
959
2
                    }
960
419
                }
961
17.9k
                if (c == '.') {
962
762
                    c = tok_nextc(tok);
963
762
                    goto fraction;
964
762
                }
965
17.2k
                else if (c == 'e' || c == 'E') {
966
802
                    goto exponent;
967
802
                }
968
16.4k
                else if (c == 'j' || c == 'J') {
969
721
                    goto imaginary;
970
721
                }
971
15.6k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
26
                    tok_backup(tok, c);
974
26
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
26
                            tok, (int)(tok->start + 1 - tok->line_start),
976
26
                            (int)(zeros_end - tok->line_start),
977
26
                            "leading zeros in decimal integer "
978
26
                            "literals are not permitted; "
979
26
                            "use an 0o prefix for octal integers"));
980
26
                }
981
15.6k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
39
                    return MAKE_TOKEN(ERRORTOKEN);
983
39
                }
984
15.6k
            }
985
32.9k
        }
986
54.4k
        else {
987
            /* Decimal */
988
54.4k
            c = tok_decimal_tail(tok);
989
54.4k
            if (c == 0) {
990
17
                return MAKE_TOKEN(ERRORTOKEN);
991
17
            }
992
54.4k
            {
993
                /* Accept floating-point numbers. */
994
54.4k
                if (c == '.') {
995
3.12k
                    c = tok_nextc(tok);
996
8.19k
        fraction:
997
                    /* Fraction */
998
8.19k
                    if (Py_ISDIGIT(c)) {
999
6.69k
                        c = tok_decimal_tail(tok);
1000
6.69k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
6.69k
                    }
1004
8.19k
                }
1005
59.4k
                if (c == 'e' || c == 'E') {
1006
8.60k
                    int e;
1007
9.40k
                  exponent:
1008
9.40k
                    e = c;
1009
                    /* Exponent part */
1010
9.40k
                    c = tok_nextc(tok);
1011
9.40k
                    if (c == '+' || c == '-') {
1012
3.62k
                        c = tok_nextc(tok);
1013
3.62k
                        if (!Py_ISDIGIT(c)) {
1014
12
                            tok_backup(tok, c);
1015
12
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
12
                        }
1017
5.78k
                    } else if (!Py_ISDIGIT(c)) {
1018
528
                        tok_backup(tok, c);
1019
528
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
46
                            return MAKE_TOKEN(ERRORTOKEN);
1021
46
                        }
1022
482
                        tok_backup(tok, e);
1023
482
                        p_start = tok->start;
1024
482
                        p_end = tok->cur;
1025
482
                        return MAKE_TOKEN(NUMBER);
1026
528
                    }
1027
8.86k
                    c = tok_decimal_tail(tok);
1028
8.86k
                    if (c == 0) {
1029
2
                        return MAKE_TOKEN(ERRORTOKEN);
1030
2
                    }
1031
8.86k
                }
1032
59.7k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.14k
        imaginary:
1035
3.14k
                    c = tok_nextc(tok);
1036
3.14k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
12
                        return MAKE_TOKEN(ERRORTOKEN);
1038
12
                    }
1039
3.14k
                }
1040
57.3k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
157
                    return MAKE_TOKEN(ERRORTOKEN);
1042
157
                }
1043
59.7k
            }
1044
59.7k
        }
1045
90.8k
        tok_backup(tok, c);
1046
90.8k
        p_start = tok->start;
1047
90.8k
        p_end = tok->cur;
1048
90.8k
        return MAKE_TOKEN(NUMBER);
1049
87.4k
    }
1050
1051
1.05M
  f_string_quote:
1052
1.05M
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
19.0k
        && (c == '\'' || c == '"'))) {
1054
1055
19.0k
        int quote = c;
1056
19.0k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
19.0k
        tok->first_lineno = tok->lineno;
1063
19.0k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
19.0k
        int after_quote = tok_nextc(tok);
1067
19.0k
        if (after_quote == quote) {
1068
2.88k
            int after_after_quote = tok_nextc(tok);
1069
2.88k
            if (after_after_quote == quote) {
1070
784
                quote_size = 3;
1071
784
            }
1072
2.10k
            else {
1073
                // TODO: Check this
1074
2.10k
                tok_backup(tok, after_after_quote);
1075
2.10k
                tok_backup(tok, after_quote);
1076
2.10k
            }
1077
2.88k
        }
1078
19.0k
        if (after_quote != quote) {
1079
16.1k
            tok_backup(tok, after_quote);
1080
16.1k
        }
1081
1082
1083
19.0k
        p_start = tok->start;
1084
19.0k
        p_end = tok->cur;
1085
19.0k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
19.0k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
19.0k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
19.0k
        the_current_tok->quote = quote;
1091
19.0k
        the_current_tok->quote_size = quote_size;
1092
19.0k
        the_current_tok->start = tok->start;
1093
19.0k
        the_current_tok->multi_line_start = tok->line_start;
1094
19.0k
        the_current_tok->first_line = tok->lineno;
1095
19.0k
        the_current_tok->start_offset = -1;
1096
19.0k
        the_current_tok->multi_line_start_offset = -1;
1097
19.0k
        the_current_tok->last_expr_buffer = NULL;
1098
19.0k
        the_current_tok->last_expr_size = 0;
1099
19.0k
        the_current_tok->last_expr_end = -1;
1100
19.0k
        the_current_tok->in_format_spec = 0;
1101
19.0k
        the_current_tok->in_debug = 0;
1102
1103
19.0k
        enum string_kind_t string_kind = FSTRING;
1104
19.0k
        switch (*tok->start) {
1105
1.10k
            case 'T':
1106
5.01k
            case 't':
1107
5.01k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
5.01k
                string_kind = TSTRING;
1109
5.01k
                break;
1110
2.38k
            case 'F':
1111
13.4k
            case 'f':
1112
13.4k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
13.4k
                break;
1114
328
            case 'R':
1115
622
            case 'r':
1116
622
                the_current_tok->raw = 1;
1117
622
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
86
                    string_kind = TSTRING;
1119
86
                }
1120
622
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
19.0k
        }
1124
1125
19.0k
        the_current_tok->string_kind = string_kind;
1126
19.0k
        the_current_tok->curly_bracket_depth = 0;
1127
19.0k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
19.0k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
19.0k
    }
1130
1131
1.03M
  letter_quote:
1132
    /* String */
1133
1.03M
    if (c == '\'' || c == '"') {
1134
41.9k
        int quote = c;
1135
41.9k
        int quote_size = 1;             /* 1 or 3 */
1136
41.9k
        int end_quote_size = 0;
1137
41.9k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
41.9k
        tok->first_lineno = tok->lineno;
1144
41.9k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
41.9k
        c = tok_nextc(tok);
1148
41.9k
        if (c == quote) {
1149
9.05k
            c = tok_nextc(tok);
1150
9.05k
            if (c == quote) {
1151
2.49k
                quote_size = 3;
1152
2.49k
            }
1153
6.56k
            else {
1154
6.56k
                end_quote_size = 1;     /* empty string found */
1155
6.56k
            }
1156
9.05k
        }
1157
41.9k
        if (c != quote) {
1158
39.4k
            tok_backup(tok, c);
1159
39.4k
        }
1160
1161
        /* Get rest of string */
1162
617k
        while (end_quote_size != quote_size) {
1163
576k
            c = tok_nextc(tok);
1164
576k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
576k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
576k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
391
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
391
                tok->cur = (char *)tok->start;
1176
391
                tok->cur++;
1177
391
                tok->line_start = tok->multi_line_start;
1178
391
                int start = tok->lineno;
1179
391
                tok->lineno = tok->first_lineno;
1180
1181
391
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
52
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
52
                    if (the_current_tok->quote == quote &&
1189
34
                        the_current_tok->quote_size == quote_size) {
1190
30
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
30
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
30
                    }
1193
52
                }
1194
1195
361
                if (quote_size == 3) {
1196
32
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
32
                                     " (detected at line %d)", start);
1198
32
                    if (c != '\n') {
1199
32
                        tok->done = E_EOFS;
1200
32
                    }
1201
32
                    return MAKE_TOKEN(ERRORTOKEN);
1202
32
                }
1203
329
                else {
1204
329
                    if (has_escaped_quote) {
1205
8
                        _PyTokenizer_syntaxerror(
1206
8
                            tok,
1207
8
                            "unterminated string literal (detected at line %d); "
1208
8
                            "perhaps you escaped the end quote?",
1209
8
                            start
1210
8
                        );
1211
321
                    } else {
1212
321
                        _PyTokenizer_syntaxerror(
1213
321
                            tok, "unterminated string literal (detected at line %d)", start
1214
321
                        );
1215
321
                    }
1216
329
                    if (c != '\n') {
1217
5
                        tok->done = E_EOLS;
1218
5
                    }
1219
329
                    return MAKE_TOKEN(ERRORTOKEN);
1220
329
                }
1221
361
            }
1222
575k
            if (c == quote) {
1223
41.6k
                end_quote_size += 1;
1224
41.6k
            }
1225
534k
            else {
1226
534k
                end_quote_size = 0;
1227
534k
                if (c == '\\') {
1228
23.4k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
23.4k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
709
                        has_escaped_quote = 1;
1231
709
                    }
1232
23.4k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
23.4k
                }
1236
534k
            }
1237
575k
        }
1238
1239
41.5k
        p_start = tok->start;
1240
41.5k
        p_end = tok->cur;
1241
41.5k
        return MAKE_TOKEN(STRING);
1242
41.9k
    }
1243
1244
    /* Line continuation */
1245
996k
    if (c == '\\') {
1246
347
        if ((c = tok_continuation_line(tok)) == -1) {
1247
93
            return MAKE_TOKEN(ERRORTOKEN);
1248
93
        }
1249
254
        tok->cont_line = 1;
1250
254
        goto again; /* Read next line */
1251
347
    }
1252
1253
    /* Punctuation character */
1254
995k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
995k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
64.2k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
64.2k
        int in_format_spec = current_tok->in_format_spec;
1261
64.2k
         int cursor_in_format_with_debug =
1262
64.2k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
64.2k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
64.2k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
64.2k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
64.2k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.97k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.97k
            current_tok->in_format_spec = 1;
1274
4.97k
            p_start = tok->start;
1275
4.97k
            p_end = tok->cur;
1276
4.97k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.97k
        }
1278
64.2k
    }
1279
1280
    /* Check for two-character token */
1281
990k
    {
1282
990k
        int c2 = tok_nextc(tok);
1283
990k
        int current_token = _PyToken_TwoChars(c, c2);
1284
990k
        if (current_token != OP) {
1285
24.8k
            int c3 = tok_nextc(tok);
1286
24.8k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
24.8k
            if (current_token3 != OP) {
1288
1.22k
                current_token = current_token3;
1289
1.22k
            }
1290
23.5k
            else {
1291
23.5k
                tok_backup(tok, c3);
1292
23.5k
            }
1293
24.8k
            p_start = tok->start;
1294
24.8k
            p_end = tok->cur;
1295
24.8k
            return MAKE_TOKEN(current_token);
1296
24.8k
        }
1297
966k
        tok_backup(tok, c2);
1298
966k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
71.5k
    case '(':
1303
106k
    case '[':
1304
154k
    case '{':
1305
154k
        if (tok->level >= MAXLEVEL) {
1306
14
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
14
        }
1308
154k
        tok->parenstack[tok->level] = c;
1309
154k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
154k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
154k
        tok->level++;
1312
154k
        if (INSIDE_FSTRING(tok)) {
1313
35.5k
            current_tok->curly_bracket_depth++;
1314
35.5k
        }
1315
154k
        break;
1316
44.7k
    case ')':
1317
52.0k
    case ']':
1318
81.6k
    case '}':
1319
81.6k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
46
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
46
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
46
        }
1323
81.5k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
222
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
222
        }
1326
81.3k
        if (tok->level > 0) {
1327
81.3k
            tok->level--;
1328
81.3k
            int opening = tok->parenstack[tok->level];
1329
81.3k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
36.7k
                                            (opening == '[' && c == ']') ||
1331
29.4k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
64
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
5
                    assert(current_tok->curly_bracket_depth >= 0);
1339
5
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
5
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
2
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
2
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
2
                    }
1344
5
                }
1345
62
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
13
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
13
                            "closing parenthesis '%c' does not match "
1348
13
                            "opening parenthesis '%c' on line %d",
1349
13
                            c, opening, tok->parenlinenostack[tok->level]));
1350
13
                }
1351
49
                else {
1352
49
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
49
                            "closing parenthesis '%c' does not match "
1354
49
                            "opening parenthesis '%c'",
1355
49
                            c, opening));
1356
49
                }
1357
62
            }
1358
81.3k
        }
1359
1360
81.2k
        if (INSIDE_FSTRING(tok)) {
1361
26.0k
            current_tok->curly_bracket_depth--;
1362
26.0k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
26.0k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
24.0k
                current_tok->curly_bracket_expr_start_depth--;
1368
24.0k
                current_tok->kind = TOK_FSTRING_MODE;
1369
24.0k
                current_tok->in_format_spec = 0;
1370
24.0k
                current_tok->in_debug = 0;
1371
24.0k
            }
1372
26.0k
        }
1373
81.2k
        break;
1374
730k
    default:
1375
730k
        break;
1376
966k
    }
1377
1378
965k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
440
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
440
    }
1381
1382
965k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
6.04k
        current_tok->in_debug = 1;
1384
6.04k
    }
1385
1386
    /* Punctuation character */
1387
965k
    p_start = tok->start;
1388
965k
    p_end = tok->cur;
1389
965k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
965k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
59.8k
{
1395
59.8k
    const char *p_start = NULL;
1396
59.8k
    const char *p_end = NULL;
1397
59.8k
    int end_quote_size = 0;
1398
59.8k
    int unicode_escape = 0;
1399
1400
59.8k
    tok->start = tok->cur;
1401
59.8k
    tok->first_lineno = tok->lineno;
1402
59.8k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
59.8k
    int start_char = tok_nextc(tok);
1407
59.8k
    if (start_char == '{') {
1408
18.1k
        int peek1 = tok_nextc(tok);
1409
18.1k
        tok_backup(tok, peek1);
1410
18.1k
        tok_backup(tok, start_char);
1411
18.1k
        if (peek1 != '{') {
1412
14.2k
            current_tok->curly_bracket_expr_start_depth++;
1413
14.2k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
3
            }
1417
14.2k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
14.2k
            return tok_get_normal_mode(tok, current_tok, token);
1419
14.2k
        }
1420
18.1k
    }
1421
41.6k
    else {
1422
41.6k
        tok_backup(tok, start_char);
1423
41.6k
    }
1424
1425
    // Check if we are at the end of the string
1426
64.2k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
50.2k
        int quote = tok_nextc(tok);
1428
50.2k
        if (quote != current_tok->quote) {
1429
31.4k
            tok_backup(tok, quote);
1430
31.4k
            goto f_string_middle;
1431
31.4k
        }
1432
50.2k
    }
1433
1434
14.0k
    if (current_tok->last_expr_buffer != NULL) {
1435
8.83k
        PyMem_Free(current_tok->last_expr_buffer);
1436
8.83k
        current_tok->last_expr_buffer = NULL;
1437
8.83k
        current_tok->last_expr_size = 0;
1438
8.83k
        current_tok->last_expr_end = -1;
1439
8.83k
    }
1440
1441
14.0k
    p_start = tok->start;
1442
14.0k
    p_end = tok->cur;
1443
14.0k
    tok->tok_mode_stack_index--;
1444
14.0k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
31.4k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
31.4k
    tok->multi_line_start = tok->line_start;
1451
229k
    while (end_quote_size != current_tok->quote_size) {
1452
224k
        int c = tok_nextc(tok);
1453
224k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
224k
        int in_format_spec = (
1457
224k
                current_tok->in_format_spec
1458
12.1k
                &&
1459
12.1k
                INSIDE_FSTRING_EXPR(current_tok)
1460
224k
        );
1461
1462
224k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
383
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
383
            if (in_format_spec && c == '\n') {
1471
47
                if (current_tok->quote_size == 1) {
1472
47
                    return MAKE_TOKEN(
1473
47
                        _PyTokenizer_syntaxerror(
1474
47
                            tok,
1475
47
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
47
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
47
                        )
1478
47
                    );
1479
47
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
47
            }
1487
1488
383
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
336
            tok->cur = (char *)current_tok->start;
1493
336
            tok->cur++;
1494
336
            tok->line_start = current_tok->multi_line_start;
1495
336
            int start = tok->lineno;
1496
1497
336
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
336
            tok->lineno = the_current_tok->first_line;
1499
1500
336
            if (current_tok->quote_size == 3) {
1501
22
                _PyTokenizer_syntaxerror(tok,
1502
22
                                    "unterminated triple-quoted %c-string literal"
1503
22
                                    " (detected at line %d)",
1504
22
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
22
                if (c != '\n') {
1506
22
                    tok->done = E_EOFS;
1507
22
                }
1508
22
                return MAKE_TOKEN(ERRORTOKEN);
1509
22
            }
1510
314
            else {
1511
314
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
314
                                    "unterminated %c-string literal (detected at"
1513
314
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
314
            }
1515
336
        }
1516
1517
223k
        if (c == current_tok->quote) {
1518
10.2k
            end_quote_size += 1;
1519
10.2k
            continue;
1520
213k
        } else {
1521
213k
            end_quote_size = 0;
1522
213k
        }
1523
1524
213k
        if (c == '{') {
1525
19.9k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
19.9k
            int peek = tok_nextc(tok);
1529
19.9k
            if (peek != '{' || in_format_spec) {
1530
15.7k
                tok_backup(tok, peek);
1531
15.7k
                tok_backup(tok, c);
1532
15.7k
                current_tok->curly_bracket_expr_start_depth++;
1533
15.7k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
7
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
7
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
7
                }
1537
15.7k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
15.7k
                current_tok->in_format_spec = 0;
1539
15.7k
                p_start = tok->start;
1540
15.7k
                p_end = tok->cur;
1541
15.7k
            } else {
1542
4.25k
                p_start = tok->start;
1543
4.25k
                p_end = tok->cur - 1;
1544
4.25k
            }
1545
19.9k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
193k
        } else if (c == '}') {
1547
5.42k
            if (unicode_escape) {
1548
382
                p_start = tok->start;
1549
382
                p_end = tok->cur;
1550
382
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
382
            }
1552
5.04k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
5.04k
            int cursor = current_tok->curly_bracket_depth;
1559
5.04k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.44k
                p_start = tok->start;
1561
1.44k
                p_end = tok->cur - 1;
1562
3.60k
            } else {
1563
3.60k
                tok_backup(tok, peek);
1564
3.60k
                tok_backup(tok, c);
1565
3.60k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.60k
                current_tok->in_format_spec = 0;
1567
3.60k
                p_start = tok->start;
1568
3.60k
                p_end = tok->cur;
1569
3.60k
            }
1570
5.04k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
187k
        } else if (c == '\\') {
1572
6.06k
            int peek = tok_nextc(tok);
1573
6.06k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
6.06k
            if (peek == '{' || peek == '}') {
1580
899
                if (!current_tok->raw) {
1581
705
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
705
                }
1585
899
                tok_backup(tok, peek);
1586
899
                continue;
1587
899
            }
1588
1589
5.16k
            if (!current_tok->raw) {
1590
4.91k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
596
                    peek = tok_nextc(tok);
1593
596
                    if (peek == '{') {
1594
390
                        unicode_escape = 1;
1595
390
                    } else {
1596
206
                        tok_backup(tok, peek);
1597
206
                    }
1598
596
                }
1599
4.91k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
5.16k
        }
1603
213k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
12.6k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.91k
        tok_backup(tok, current_tok->quote);
1609
6.91k
    }
1610
5.69k
    p_start = tok->start;
1611
5.69k
    p_end = tok->cur;
1612
5.69k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
31.4k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
2.18M
{
1618
2.18M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
2.18M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
2.12M
        return tok_get_normal_mode(tok, current_tok, token);
1621
2.12M
    } else {
1622
59.8k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
59.8k
    }
1624
2.18M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
2.18M
{
1629
2.18M
    int result = tok_get(tok, token);
1630
2.18M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
2.18M
    return result;
1635
2.18M
}