Coverage Report

Created: 2026-04-12 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.35k
#define ALTTABSIZE 1
11
12
2.11M
#define is_potential_identifier_start(c) (\
13
2.11M
              (c >= 'a' && c <= 'z')\
14
2.11M
               || (c >= 'A' && c <= 'Z')\
15
2.11M
               || c == '_'\
16
2.11M
               || (c >= 128))
17
18
3.34M
#define is_potential_identifier_char(c) (\
19
3.34M
              (c >= 'a' && c <= 'z')\
20
3.34M
               || (c >= 'A' && c <= 'Z')\
21
3.34M
               || (c >= '0' && c <= '9')\
22
3.34M
               || c == '_'\
23
3.34M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.31M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
18.7k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
21
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
2.18M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
266k
{
55
266k
    return memchr(str, 0, size) != NULL;
56
266k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.8M
{
62
11.8M
    int rc;
63
12.1M
    for (;;) {
64
12.1M
        if (tok->cur != tok->inp) {
65
11.5M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.5M
            tok->col_offset++;
70
11.5M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.5M
        }
72
557k
        if (tok->done != E_OK) {
73
193k
            return EOF;
74
193k
        }
75
363k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
363k
        if (!rc) {
84
96.8k
            tok->cur = tok->inp;
85
96.8k
            return EOF;
86
96.8k
        }
87
266k
        tok->line_start = tok->cur;
88
89
266k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
266k
    }
95
11.8M
    Py_UNREACHABLE();
96
11.8M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.59M
{
102
4.59M
    if (c != EOF) {
103
4.40M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
4.40M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
4.40M
        tok->col_offset--;
110
4.40M
    }
111
4.59M
}
112
113
static int
114
27.3k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
27.3k
    assert(token != NULL);
116
27.3k
    assert(c == '}' || c == ':' || c == '!');
117
27.3k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
27.3k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
16.8k
        return 0;
121
16.8k
    }
122
10.5k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
10.5k
    int hash_detected = 0;
126
10.5k
    int in_string = 0;
127
10.5k
    char quote_char = 0;
128
129
1.73M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.72M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.72M
        if (ch == '\\') {
134
32.7k
            i++;
135
32.7k
            continue;
136
32.7k
        }
137
138
        // Handle quotes
139
1.68M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
303k
            if (!in_string) {
148
111k
                in_string = 1;
149
111k
                quote_char = ch;
150
111k
            }
151
192k
            else if (ch == quote_char) {
152
110k
                in_string = 0;
153
110k
            }
154
303k
            continue;
155
303k
        }
156
157
        // Check for # outside strings
158
1.38M
        if (ch == '#' && !in_string) {
159
827
            hash_detected = 1;
160
827
            break;
161
827
        }
162
1.38M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
10.5k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
827
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
827
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
827
        Py_ssize_t i = 0;  // Input position
172
827
        Py_ssize_t j = 0;  // Output position
173
827
        in_string = 0;     // Whether we're in a string
174
827
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
52.0k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
51.2k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
51.2k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
7.30k
                if (!in_string) {
184
3.12k
                    in_string = 1;
185
3.12k
                    quote_char = ch;
186
4.17k
                } else if (ch == quote_char) {
187
3.11k
                    in_string = 0;
188
3.11k
                }
189
7.30k
                result[j++] = ch;
190
7.30k
            }
191
            // Skip comments
192
43.9k
            else if (ch == '#' && !in_string) {
193
31.3k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
30.5k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
30.3k
                    i++;
196
30.3k
                }
197
1.00k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
234
                    result[j++] = '\n';
199
234
                }
200
1.00k
            }
201
            // Copy other chars
202
42.9k
            else {
203
42.9k
                result[j++] = ch;
204
42.9k
            }
205
51.2k
            i++;
206
51.2k
        }
207
208
827
        result[j] = '\0';  // Null-terminate the result string
209
827
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
827
        PyMem_Free(result);
211
9.69k
    } else {
212
9.69k
        res = PyUnicode_DecodeUTF8(
213
9.69k
            tok_mode->last_expr_buffer,
214
9.69k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
9.69k
            NULL
216
9.69k
        );
217
9.69k
    }
218
219
10.5k
    if (!res) {
220
0
        return -1;
221
0
    }
222
10.5k
    token->metadata = res;
223
10.5k
    return 0;
224
10.5k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
73.2k
{
229
73.2k
    assert(tok->cur != NULL);
230
231
73.2k
    Py_ssize_t size = strlen(tok->cur);
232
73.2k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
73.2k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
45.9k
        case '{':
252
45.9k
            if (tok_mode->last_expr_buffer != NULL) {
253
32.6k
                PyMem_Free(tok_mode->last_expr_buffer);
254
32.6k
            }
255
45.9k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
45.9k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
45.9k
            tok_mode->last_expr_size = size;
260
45.9k
            tok_mode->last_expr_end = -1;
261
45.9k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
45.9k
            break;
263
21.7k
        case '}':
264
24.1k
        case '!':
265
24.1k
            tok_mode->last_expr_end = strlen(tok->start);
266
24.1k
            break;
267
3.24k
        case ':':
268
3.24k
            if (tok_mode->last_expr_end == -1) {
269
2.99k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.99k
            }
271
3.24k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
73.2k
    }
275
73.2k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
73.2k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
8.78k
{
284
8.78k
    const char *s = test;
285
8.78k
    int res = 0;
286
22.4k
    while (1) {
287
22.4k
        int c = tok_nextc(tok);
288
22.4k
        if (*s == 0) {
289
8.66k
            res = !is_potential_identifier_char(c);
290
8.66k
        }
291
13.8k
        else if (c == *s) {
292
13.7k
            s++;
293
13.7k
            continue;
294
13.7k
        }
295
296
8.78k
        tok_backup(tok, c);
297
22.4k
        while (s != test) {
298
13.7k
            tok_backup(tok, *--s);
299
13.7k
        }
300
8.78k
        return res;
301
22.4k
    }
302
8.78k
}
303
304
static int
305
92.1k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
92.1k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
92.0k
    int r = 0;
322
92.0k
    if (c == 'a') {
323
894
        r = lookahead(tok, "nd");
324
894
    }
325
91.1k
    else if (c == 'e') {
326
528
        r = lookahead(tok, "lse");
327
528
    }
328
90.6k
    else if (c == 'f') {
329
2.92k
        r = lookahead(tok, "or");
330
2.92k
    }
331
87.7k
    else if (c == 'i') {
332
1.61k
        int c2 = tok_nextc(tok);
333
1.61k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.60k
            r = 1;
335
1.60k
        }
336
1.61k
        tok_backup(tok, c2);
337
1.61k
    }
338
86.0k
    else if (c == 'o') {
339
4.14k
        r = lookahead(tok, "r");
340
4.14k
    }
341
81.9k
    else if (c == 'n') {
342
287
        r = lookahead(tok, "ot");
343
287
    }
344
92.0k
    if (r) {
345
10.2k
        tok_backup(tok, c);
346
10.2k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.2k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.2k
        tok_nextc(tok);
352
10.2k
    }
353
81.7k
    else /* In future releases, only error will remain. */
354
81.7k
    if (c < 128 && is_potential_identifier_char(c)) {
355
241
        tok_backup(tok, c);
356
241
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
241
        return 0;
358
241
    }
359
91.8k
    return 1;
360
92.0k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.5k
{
366
11.5k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.5k
    PyObject *s;
370
11.5k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.5k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.5k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
11.5k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.5k
    assert(invalid >= 0);
384
11.5k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.5k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
524
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
524
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
352
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
352
            if (s != NULL) {
391
352
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
352
            }
393
352
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
352
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
352
        }
399
524
        Py_DECREF(s);
400
524
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
269
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
269
        }
403
255
        else {
404
255
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
255
        }
406
524
        return 0;
407
524
    }
408
11.0k
    Py_DECREF(s);
409
11.0k
    return 1;
410
11.5k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
64.5k
{
415
64.5k
    int c;
416
417
65.0k
    while (1) {
418
212k
        do {
419
212k
            c = tok_nextc(tok);
420
212k
        } while (Py_ISDIGIT(c));
421
65.0k
        if (c != '_') {
422
64.5k
            break;
423
64.5k
        }
424
467
        c = tok_nextc(tok);
425
467
        if (!Py_ISDIGIT(c)) {
426
21
            tok_backup(tok, c);
427
21
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
21
            return 0;
429
21
        }
430
467
    }
431
64.5k
    return c;
432
64.5k
}
433
434
static inline int
435
849
tok_continuation_line(struct tok_state *tok) {
436
849
    int c = tok_nextc(tok);
437
849
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
849
    if (c != '\n') {
441
78
        tok->done = E_LINECONT;
442
78
        return -1;
443
78
    }
444
771
    c = tok_nextc(tok);
445
771
    if (c == EOF) {
446
40
        tok->done = E_EOF;
447
40
        tok->cur = tok->inp;
448
40
        return -1;
449
731
    } else {
450
731
        tok_backup(tok, c);
451
731
    }
452
731
    return c;
453
771
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
21.9k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
21.9k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
21.9k
    do {                                                                  \
464
11
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
11
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
11
            (int)(tok->cur - tok->line_start),                            \
467
11
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
11
        return -1;                                                        \
469
11
    } while (0)
470
471
21.9k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
21.9k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
21.9k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
21.9k
    if (saw_u && saw_t) {
481
3
        RETURN_SYNTAX_ERROR("u", "t");
482
3
    }
483
484
21.9k
    if (saw_b && saw_f) {
485
3
        RETURN_SYNTAX_ERROR("b", "f");
486
3
    }
487
21.9k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
21.9k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
21.9k
#undef RETURN_SYNTAX_ERROR
496
497
21.9k
    return 0;
498
21.9k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
2.13M
{
503
2.13M
    int c;
504
2.13M
    int blankline, nonascii;
505
506
2.13M
    const char *p_start = NULL;
507
2.13M
    const char *p_end = NULL;
508
2.23M
  nextline:
509
2.23M
    tok->start = NULL;
510
2.23M
    tok->starting_col_offset = -1;
511
2.23M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.23M
    if (tok->atbol) {
516
354k
        int col = 0;
517
354k
        int altcol = 0;
518
354k
        tok->atbol = 0;
519
354k
        int cont_line_col = 0;
520
719k
        for (;;) {
521
719k
            c = tok_nextc(tok);
522
719k
            if (c == ' ') {
523
363k
                col++, altcol++;
524
363k
            }
525
356k
            else if (c == '\t') {
526
679
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
679
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
679
            }
529
355k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
626
                col = altcol = 0; /* For Emacs users */
531
626
            }
532
354k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
505
                cont_line_col = cont_line_col ? cont_line_col : col;
538
505
                if ((c = tok_continuation_line(tok)) == -1) {
539
31
                    return MAKE_TOKEN(ERRORTOKEN);
540
31
                }
541
505
            }
542
354k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
354k
            else {
546
354k
                break;
547
354k
            }
548
719k
        }
549
354k
        tok_backup(tok, c);
550
354k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
60.5k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
60.5k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
60.5k
            else {
566
60.5k
                blankline = 1; /* Ignore completely */
567
60.5k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
60.5k
        }
571
354k
        if (!blankline && tok->level == 0) {
572
258k
            col = cont_line_col ? cont_line_col : col;
573
258k
            altcol = cont_line_col ? cont_line_col : altcol;
574
258k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
235k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
235k
            }
580
22.8k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
12.7k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
12.7k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
12.7k
                tok->pendin++;
591
12.7k
                tok->indstack[++tok->indent] = col;
592
12.7k
                tok->altindstack[tok->indent] = altcol;
593
12.7k
            }
594
10.1k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
22.3k
                while (tok->indent > 0 &&
597
19.0k
                    col < tok->indstack[tok->indent]) {
598
12.1k
                    tok->pendin--;
599
12.1k
                    tok->indent--;
600
12.1k
                }
601
10.1k
                if (col != tok->indstack[tok->indent]) {
602
9
                    tok->done = E_DEDENT;
603
9
                    tok->cur = tok->inp;
604
9
                    return MAKE_TOKEN(ERRORTOKEN);
605
9
                }
606
10.1k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
10.1k
            }
610
258k
        }
611
354k
    }
612
613
2.23M
    tok->start = tok->cur;
614
2.23M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.23M
    if (tok->pendin != 0) {
618
24.8k
        if (tok->pendin < 0) {
619
12.1k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
12.1k
            tok->pendin++;
624
12.1k
            return MAKE_TOKEN(DEDENT);
625
12.1k
        }
626
12.7k
        else {
627
12.7k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
12.7k
            tok->pendin--;
632
12.7k
            return MAKE_TOKEN(INDENT);
633
12.7k
        }
634
24.8k
    }
635
636
    /* Peek ahead at the next character */
637
2.20M
    c = tok_nextc(tok);
638
2.20M
    tok_backup(tok, c);
639
640
2.20M
 again:
641
2.20M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.64M
    do {
644
2.64M
        c = tok_nextc(tok);
645
2.64M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
2.20M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
2.20M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
2.20M
    if (c == '#') {
653
654
33.0k
        const char* p = NULL;
655
33.0k
        const char *prefix, *type_start;
656
33.0k
        int current_starting_col_offset;
657
658
1.06M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.03M
            c = tok_nextc(tok);
660
1.03M
        }
661
662
33.0k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
33.0k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
33.0k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
33.0k
    }
728
729
2.20M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
2.20M
    if (c == EOF) {
735
96.8k
        if (tok->level) {
736
3.74k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.74k
        }
738
93.0k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
96.8k
    }
740
741
    /* Identifier (most frequent token!) */
742
2.11M
    nonascii = 0;
743
2.11M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
690k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
875k
        while (1) {
747
875k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
18.2k
                saw_b = 1;
749
18.2k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
857k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
87.7k
                saw_u = 1;
754
87.7k
            }
755
            /* ur"" and ru"" are not supported */
756
769k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
26.4k
                saw_r = 1;
758
26.4k
            }
759
743k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
41.7k
                saw_f = 1;
761
41.7k
            }
762
701k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
32.5k
                saw_t = 1;
764
32.5k
            }
765
669k
            else {
766
669k
                break;
767
669k
            }
768
206k
            c = tok_nextc(tok);
769
206k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
21.9k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
21.9k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
21.9k
                if (status < 0) {
774
11
                    return MAKE_TOKEN(ERRORTOKEN);
775
11
                }
776
777
                // Handle valid f or t string creation:
778
21.9k
                if (saw_f || saw_t) {
779
18.7k
                    goto f_string_quote;
780
18.7k
                }
781
3.21k
                goto letter_quote;
782
21.9k
            }
783
206k
        }
784
3.25M
        while (is_potential_identifier_char(c)) {
785
2.58M
            if (c >= 128) {
786
126k
                nonascii = 1;
787
126k
            }
788
2.58M
            c = tok_nextc(tok);
789
2.58M
        }
790
669k
        tok_backup(tok, c);
791
669k
        if (nonascii && !verify_identifier(tok)) {
792
524
            return MAKE_TOKEN(ERRORTOKEN);
793
524
        }
794
795
668k
        p_start = tok->start;
796
668k
        p_end = tok->cur;
797
798
668k
        return MAKE_TOKEN(NAME);
799
669k
    }
800
801
1.41M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.41M
    if (c == '\n') {
807
254k
        tok->atbol = 1;
808
254k
        if (blankline || tok->level > 0) {
809
95.7k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
95.6k
            goto nextline;
818
95.7k
        }
819
159k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
159k
        p_start = tok->start;
826
159k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
159k
        tok->cont_line = 0;
828
159k
        return MAKE_TOKEN(NEWLINE);
829
159k
    }
830
831
    /* Period or number starting with period? */
832
1.16M
    if (c == '.') {
833
33.8k
        c = tok_nextc(tok);
834
33.8k
        if (Py_ISDIGIT(c)) {
835
4.32k
            goto fraction;
836
29.5k
        } else if (c == '.') {
837
1.50k
            c = tok_nextc(tok);
838
1.50k
            if (c == '.') {
839
942
                p_start = tok->start;
840
942
                p_end = tok->cur;
841
942
                return MAKE_TOKEN(ELLIPSIS);
842
942
            }
843
561
            else {
844
561
                tok_backup(tok, c);
845
561
            }
846
561
            tok_backup(tok, '.');
847
561
        }
848
28.0k
        else {
849
28.0k
            tok_backup(tok, c);
850
28.0k
        }
851
28.6k
        p_start = tok->start;
852
28.6k
        p_end = tok->cur;
853
28.6k
        return MAKE_TOKEN(DOT);
854
33.8k
    }
855
856
    /* Number */
857
1.13M
    if (Py_ISDIGIT(c)) {
858
87.8k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
38.6k
            c = tok_nextc(tok);
861
38.6k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
14.4k
                c = tok_nextc(tok);
864
14.5k
                do {
865
14.5k
                    if (c == '_') {
866
73
                        c = tok_nextc(tok);
867
73
                    }
868
14.5k
                    if (!Py_ISXDIGIT(c)) {
869
14
                        tok_backup(tok, c);
870
14
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
14
                    }
872
74.0k
                    do {
873
74.0k
                        c = tok_nextc(tok);
874
74.0k
                    } while (Py_ISXDIGIT(c));
875
14.5k
                } while (c == '_');
876
14.4k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
1
                    return MAKE_TOKEN(ERRORTOKEN);
878
1
                }
879
14.4k
            }
880
24.1k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
612
                c = tok_nextc(tok);
883
863
                do {
884
863
                    if (c == '_') {
885
252
                        c = tok_nextc(tok);
886
252
                    }
887
863
                    if (c < '0' || c >= '8') {
888
16
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
15
                        else {
893
15
                            tok_backup(tok, c);
894
15
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
15
                        }
896
16
                    }
897
3.14k
                    do {
898
3.14k
                        c = tok_nextc(tok);
899
3.14k
                    } while ('0' <= c && c < '8');
900
847
                } while (c == '_');
901
596
                if (Py_ISDIGIT(c)) {
902
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
2
                            "invalid digit '%c' in octal literal", c));
904
2
                }
905
594
                if (!verify_end_of_number(tok, c, "octal")) {
906
5
                    return MAKE_TOKEN(ERRORTOKEN);
907
5
                }
908
594
            }
909
23.5k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
536
                c = tok_nextc(tok);
912
818
                do {
913
818
                    if (c == '_') {
914
289
                        c = tok_nextc(tok);
915
289
                    }
916
818
                    if (c != '0' && c != '1') {
917
20
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
19
                        else {
921
19
                            tok_backup(tok, c);
922
19
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
19
                        }
924
20
                    }
925
3.35k
                    do {
926
3.35k
                        c = tok_nextc(tok);
927
3.35k
                    } while (c == '0' || c == '1');
928
798
                } while (c == '_');
929
516
                if (Py_ISDIGIT(c)) {
930
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
1
                }
932
515
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
515
            }
936
23.0k
            else {
937
23.0k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
26.2k
                while (1) {
941
26.2k
                    if (c == '_') {
942
226
                        c = tok_nextc(tok);
943
226
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
226
                    }
948
26.2k
                    if (c != '0') {
949
23.0k
                        break;
950
23.0k
                    }
951
3.17k
                    c = tok_nextc(tok);
952
3.17k
                }
953
23.0k
                char* zeros_end = tok->cur;
954
23.0k
                if (Py_ISDIGIT(c)) {
955
399
                    nonzero = 1;
956
399
                    c = tok_decimal_tail(tok);
957
399
                    if (c == 0) {
958
3
                        return MAKE_TOKEN(ERRORTOKEN);
959
3
                    }
960
399
                }
961
23.0k
                if (c == '.') {
962
756
                    c = tok_nextc(tok);
963
756
                    goto fraction;
964
756
                }
965
22.2k
                else if (c == 'e' || c == 'E') {
966
792
                    goto exponent;
967
792
                }
968
21.4k
                else if (c == 'j' || c == 'J') {
969
530
                    goto imaginary;
970
530
                }
971
20.9k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
24
                    tok_backup(tok, c);
974
24
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
24
                            tok, (int)(tok->start + 1 - tok->line_start),
976
24
                            (int)(zeros_end - tok->line_start),
977
24
                            "leading zeros in decimal integer "
978
24
                            "literals are not permitted; "
979
24
                            "use an 0o prefix for octal integers"));
980
24
                }
981
20.9k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
32
                    return MAKE_TOKEN(ERRORTOKEN);
983
32
                }
984
20.9k
            }
985
38.6k
        }
986
49.2k
        else {
987
            /* Decimal */
988
49.2k
            c = tok_decimal_tail(tok);
989
49.2k
            if (c == 0) {
990
15
                return MAKE_TOKEN(ERRORTOKEN);
991
15
            }
992
49.2k
            {
993
                /* Accept floating-point numbers. */
994
49.2k
                if (c == '.') {
995
2.86k
                    c = tok_nextc(tok);
996
7.95k
        fraction:
997
                    /* Fraction */
998
7.95k
                    if (Py_ISDIGIT(c)) {
999
6.67k
                        c = tok_decimal_tail(tok);
1000
6.67k
                        if (c == 0) {
1001
2
                            return MAKE_TOKEN(ERRORTOKEN);
1002
2
                        }
1003
6.67k
                    }
1004
7.95k
                }
1005
54.3k
                if (c == 'e' || c == 'E') {
1006
8.01k
                    int e;
1007
8.80k
                  exponent:
1008
8.80k
                    e = c;
1009
                    /* Exponent part */
1010
8.80k
                    c = tok_nextc(tok);
1011
8.80k
                    if (c == '+' || c == '-') {
1012
2.68k
                        c = tok_nextc(tok);
1013
2.68k
                        if (!Py_ISDIGIT(c)) {
1014
11
                            tok_backup(tok, c);
1015
11
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
11
                        }
1017
6.12k
                    } else if (!Py_ISDIGIT(c)) {
1018
526
                        tok_backup(tok, c);
1019
526
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
43
                            return MAKE_TOKEN(ERRORTOKEN);
1021
43
                        }
1022
483
                        tok_backup(tok, e);
1023
483
                        p_start = tok->start;
1024
483
                        p_end = tok->cur;
1025
483
                        return MAKE_TOKEN(NUMBER);
1026
526
                    }
1027
8.26k
                    c = tok_decimal_tail(tok);
1028
8.26k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
8.26k
                }
1032
54.5k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.05k
        imaginary:
1035
3.05k
                    c = tok_nextc(tok);
1036
3.05k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
8
                        return MAKE_TOKEN(ERRORTOKEN);
1038
8
                    }
1039
3.05k
                }
1040
52.0k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
151
                    return MAKE_TOKEN(ERRORTOKEN);
1042
151
                }
1043
54.5k
            }
1044
54.5k
        }
1045
91.3k
        tok_backup(tok, c);
1046
91.3k
        p_start = tok->start;
1047
91.3k
        p_end = tok->cur;
1048
91.3k
        return MAKE_TOKEN(NUMBER);
1049
87.8k
    }
1050
1051
1.06M
  f_string_quote:
1052
1.06M
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
18.7k
        && (c == '\'' || c == '"'))) {
1054
1055
18.7k
        int quote = c;
1056
18.7k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
18.7k
        tok->first_lineno = tok->lineno;
1063
18.7k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
18.7k
        int after_quote = tok_nextc(tok);
1067
18.7k
        if (after_quote == quote) {
1068
2.91k
            int after_after_quote = tok_nextc(tok);
1069
2.91k
            if (after_after_quote == quote) {
1070
729
                quote_size = 3;
1071
729
            }
1072
2.18k
            else {
1073
                // TODO: Check this
1074
2.18k
                tok_backup(tok, after_after_quote);
1075
2.18k
                tok_backup(tok, after_quote);
1076
2.18k
            }
1077
2.91k
        }
1078
18.7k
        if (after_quote != quote) {
1079
15.8k
            tok_backup(tok, after_quote);
1080
15.8k
        }
1081
1082
1083
18.7k
        p_start = tok->start;
1084
18.7k
        p_end = tok->cur;
1085
18.7k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
2
        }
1088
18.7k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
18.7k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
18.7k
        the_current_tok->quote = quote;
1091
18.7k
        the_current_tok->quote_size = quote_size;
1092
18.7k
        the_current_tok->start = tok->start;
1093
18.7k
        the_current_tok->multi_line_start = tok->line_start;
1094
18.7k
        the_current_tok->first_line = tok->lineno;
1095
18.7k
        the_current_tok->start_offset = -1;
1096
18.7k
        the_current_tok->multi_line_start_offset = -1;
1097
18.7k
        the_current_tok->last_expr_buffer = NULL;
1098
18.7k
        the_current_tok->last_expr_size = 0;
1099
18.7k
        the_current_tok->last_expr_end = -1;
1100
18.7k
        the_current_tok->in_format_spec = 0;
1101
18.7k
        the_current_tok->in_debug = 0;
1102
1103
18.7k
        enum string_kind_t string_kind = FSTRING;
1104
18.7k
        switch (*tok->start) {
1105
980
            case 'T':
1106
5.45k
            case 't':
1107
5.45k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
5.45k
                string_kind = TSTRING;
1109
5.45k
                break;
1110
2.23k
            case 'F':
1111
12.6k
            case 'f':
1112
12.6k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
12.6k
                break;
1114
331
            case 'R':
1115
619
            case 'r':
1116
619
                the_current_tok->raw = 1;
1117
619
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
83
                    string_kind = TSTRING;
1119
83
                }
1120
619
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
18.7k
        }
1124
1125
18.7k
        the_current_tok->string_kind = string_kind;
1126
18.7k
        the_current_tok->curly_bracket_depth = 0;
1127
18.7k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
18.7k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
18.7k
    }
1130
1131
1.04M
  letter_quote:
1132
    /* String */
1133
1.04M
    if (c == '\'' || c == '"') {
1134
41.1k
        int quote = c;
1135
41.1k
        int quote_size = 1;             /* 1 or 3 */
1136
41.1k
        int end_quote_size = 0;
1137
41.1k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
41.1k
        tok->first_lineno = tok->lineno;
1144
41.1k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
41.1k
        c = tok_nextc(tok);
1148
41.1k
        if (c == quote) {
1149
7.76k
            c = tok_nextc(tok);
1150
7.76k
            if (c == quote) {
1151
1.68k
                quote_size = 3;
1152
1.68k
            }
1153
6.07k
            else {
1154
6.07k
                end_quote_size = 1;     /* empty string found */
1155
6.07k
            }
1156
7.76k
        }
1157
41.1k
        if (c != quote) {
1158
39.4k
            tok_backup(tok, c);
1159
39.4k
        }
1160
1161
        /* Get rest of string */
1162
616k
        while (end_quote_size != quote_size) {
1163
575k
            c = tok_nextc(tok);
1164
575k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
575k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
575k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
353
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
353
                tok->cur = (char *)tok->start;
1176
353
                tok->cur++;
1177
353
                tok->line_start = tok->multi_line_start;
1178
353
                int start = tok->lineno;
1179
353
                tok->lineno = tok->first_lineno;
1180
1181
353
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
41
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
41
                    if (the_current_tok->quote == quote &&
1189
31
                        the_current_tok->quote_size == quote_size) {
1190
28
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
28
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
28
                    }
1193
41
                }
1194
1195
325
                if (quote_size == 3) {
1196
27
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
27
                                     " (detected at line %d)", start);
1198
27
                    if (c != '\n') {
1199
27
                        tok->done = E_EOFS;
1200
27
                    }
1201
27
                    return MAKE_TOKEN(ERRORTOKEN);
1202
27
                }
1203
298
                else {
1204
298
                    if (has_escaped_quote) {
1205
8
                        _PyTokenizer_syntaxerror(
1206
8
                            tok,
1207
8
                            "unterminated string literal (detected at line %d); "
1208
8
                            "perhaps you escaped the end quote?",
1209
8
                            start
1210
8
                        );
1211
290
                    } else {
1212
290
                        _PyTokenizer_syntaxerror(
1213
290
                            tok, "unterminated string literal (detected at line %d)", start
1214
290
                        );
1215
290
                    }
1216
298
                    if (c != '\n') {
1217
6
                        tok->done = E_EOLS;
1218
6
                    }
1219
298
                    return MAKE_TOKEN(ERRORTOKEN);
1220
298
                }
1221
325
            }
1222
575k
            if (c == quote) {
1223
41.3k
                end_quote_size += 1;
1224
41.3k
            }
1225
533k
            else {
1226
533k
                end_quote_size = 0;
1227
533k
                if (c == '\\') {
1228
25.7k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
25.7k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
751
                        has_escaped_quote = 1;
1231
751
                    }
1232
25.7k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
25.7k
                }
1236
533k
            }
1237
575k
        }
1238
1239
40.7k
        p_start = tok->start;
1240
40.7k
        p_end = tok->cur;
1241
40.7k
        return MAKE_TOKEN(STRING);
1242
41.1k
    }
1243
1244
    /* Line continuation */
1245
1.00M
    if (c == '\\') {
1246
344
        if ((c = tok_continuation_line(tok)) == -1) {
1247
87
            return MAKE_TOKEN(ERRORTOKEN);
1248
87
        }
1249
257
        tok->cont_line = 1;
1250
257
        goto again; /* Read next line */
1251
344
    }
1252
1253
    /* Punctuation character */
1254
1.00M
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
1.00M
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
62.9k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
62.9k
        int in_format_spec = current_tok->in_format_spec;
1261
62.9k
         int cursor_in_format_with_debug =
1262
62.9k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
62.9k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
62.9k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
62.9k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
62.9k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.86k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.86k
            current_tok->in_format_spec = 1;
1274
4.86k
            p_start = tok->start;
1275
4.86k
            p_end = tok->cur;
1276
4.86k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.86k
        }
1278
62.9k
    }
1279
1280
    /* Check for two-character token */
1281
1.00M
    {
1282
1.00M
        int c2 = tok_nextc(tok);
1283
1.00M
        int current_token = _PyToken_TwoChars(c, c2);
1284
1.00M
        if (current_token != OP) {
1285
24.3k
            int c3 = tok_nextc(tok);
1286
24.3k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
24.3k
            if (current_token3 != OP) {
1288
1.19k
                current_token = current_token3;
1289
1.19k
            }
1290
23.1k
            else {
1291
23.1k
                tok_backup(tok, c3);
1292
23.1k
            }
1293
24.3k
            p_start = tok->start;
1294
24.3k
            p_end = tok->cur;
1295
24.3k
            return MAKE_TOKEN(current_token);
1296
24.3k
        }
1297
975k
        tok_backup(tok, c2);
1298
975k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
69.4k
    case '(':
1303
104k
    case '[':
1304
151k
    case '{':
1305
151k
        if (tok->level >= MAXLEVEL) {
1306
13
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
13
        }
1308
151k
        tok->parenstack[tok->level] = c;
1309
151k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
151k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
151k
        tok->level++;
1312
151k
        if (INSIDE_FSTRING(tok)) {
1313
35.0k
            current_tok->curly_bracket_depth++;
1314
35.0k
        }
1315
151k
        break;
1316
44.0k
    case ')':
1317
50.5k
    case ']':
1318
79.5k
    case '}':
1319
79.5k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
53
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
53
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
53
        }
1323
79.4k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
220
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
220
        }
1326
79.2k
        if (tok->level > 0) {
1327
79.2k
            tok->level--;
1328
79.2k
            int opening = tok->parenstack[tok->level];
1329
79.2k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
35.2k
                                            (opening == '[' && c == ']') ||
1331
28.8k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
52
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
3
                    assert(current_tok->curly_bracket_depth >= 0);
1339
3
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
3
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
1
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
1
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
1
                    }
1344
3
                }
1345
51
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
10
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
10
                            "closing parenthesis '%c' does not match "
1348
10
                            "opening parenthesis '%c' on line %d",
1349
10
                            c, opening, tok->parenlinenostack[tok->level]));
1350
10
                }
1351
41
                else {
1352
41
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
41
                            "closing parenthesis '%c' does not match "
1354
41
                            "opening parenthesis '%c'",
1355
41
                            c, opening));
1356
41
                }
1357
51
            }
1358
79.2k
        }
1359
1360
79.2k
        if (INSIDE_FSTRING(tok)) {
1361
25.3k
            current_tok->curly_bracket_depth--;
1362
25.3k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
25.3k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
23.7k
                current_tok->curly_bracket_expr_start_depth--;
1368
23.7k
                current_tok->kind = TOK_FSTRING_MODE;
1369
23.7k
                current_tok->in_format_spec = 0;
1370
23.7k
                current_tok->in_debug = 0;
1371
23.7k
            }
1372
25.3k
        }
1373
79.2k
        break;
1374
744k
    default:
1375
744k
        break;
1376
975k
    }
1377
1378
975k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
429
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
429
    }
1381
1382
974k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
5.71k
        current_tok->in_debug = 1;
1384
5.71k
    }
1385
1386
    /* Punctuation character */
1387
974k
    p_start = tok->start;
1388
974k
    p_end = tok->cur;
1389
974k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
975k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
57.6k
{
1395
57.6k
    const char *p_start = NULL;
1396
57.6k
    const char *p_end = NULL;
1397
57.6k
    int end_quote_size = 0;
1398
57.6k
    int unicode_escape = 0;
1399
1400
57.6k
    tok->start = tok->cur;
1401
57.6k
    tok->first_lineno = tok->lineno;
1402
57.6k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
57.6k
    int start_char = tok_nextc(tok);
1407
57.6k
    if (start_char == '{') {
1408
16.6k
        int peek1 = tok_nextc(tok);
1409
16.6k
        tok_backup(tok, peek1);
1410
16.6k
        tok_backup(tok, start_char);
1411
16.6k
        if (peek1 != '{') {
1412
13.8k
            current_tok->curly_bracket_expr_start_depth++;
1413
13.8k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
3
            }
1417
13.8k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
13.8k
            return tok_get_normal_mode(tok, current_tok, token);
1419
13.8k
        }
1420
16.6k
    }
1421
40.9k
    else {
1422
40.9k
        tok_backup(tok, start_char);
1423
40.9k
    }
1424
1425
    // Check if we are at the end of the string
1426
62.1k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
48.4k
        int quote = tok_nextc(tok);
1428
48.4k
        if (quote != current_tok->quote) {
1429
30.0k
            tok_backup(tok, quote);
1430
30.0k
            goto f_string_middle;
1431
30.0k
        }
1432
48.4k
    }
1433
1434
13.7k
    if (current_tok->last_expr_buffer != NULL) {
1435
8.45k
        PyMem_Free(current_tok->last_expr_buffer);
1436
8.45k
        current_tok->last_expr_buffer = NULL;
1437
8.45k
        current_tok->last_expr_size = 0;
1438
8.45k
        current_tok->last_expr_end = -1;
1439
8.45k
    }
1440
1441
13.7k
    p_start = tok->start;
1442
13.7k
    p_end = tok->cur;
1443
13.7k
    tok->tok_mode_stack_index--;
1444
13.7k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
30.0k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
30.0k
    tok->multi_line_start = tok->line_start;
1451
191k
    while (end_quote_size != current_tok->quote_size) {
1452
185k
        int c = tok_nextc(tok);
1453
185k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
185k
        int in_format_spec = (
1457
185k
                current_tok->in_format_spec
1458
12.5k
                &&
1459
12.5k
                INSIDE_FSTRING_EXPR(current_tok)
1460
185k
        );
1461
1462
185k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
365
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
365
            if (in_format_spec && c == '\n') {
1471
46
                if (current_tok->quote_size == 1) {
1472
46
                    return MAKE_TOKEN(
1473
46
                        _PyTokenizer_syntaxerror(
1474
46
                            tok,
1475
46
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
46
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
46
                        )
1478
46
                    );
1479
46
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
46
            }
1487
1488
365
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
319
            tok->cur = (char *)current_tok->start;
1493
319
            tok->cur++;
1494
319
            tok->line_start = current_tok->multi_line_start;
1495
319
            int start = tok->lineno;
1496
1497
319
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
319
            tok->lineno = the_current_tok->first_line;
1499
1500
319
            if (current_tok->quote_size == 3) {
1501
21
                _PyTokenizer_syntaxerror(tok,
1502
21
                                    "unterminated triple-quoted %c-string literal"
1503
21
                                    " (detected at line %d)",
1504
21
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
21
                if (c != '\n') {
1506
21
                    tok->done = E_EOFS;
1507
21
                }
1508
21
                return MAKE_TOKEN(ERRORTOKEN);
1509
21
            }
1510
298
            else {
1511
298
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
298
                                    "unterminated %c-string literal (detected at"
1513
298
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
298
            }
1515
319
        }
1516
1517
185k
        if (c == current_tok->quote) {
1518
10.0k
            end_quote_size += 1;
1519
10.0k
            continue;
1520
175k
        } else {
1521
175k
            end_quote_size = 0;
1522
175k
        }
1523
1524
175k
        if (c == '{') {
1525
18.9k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
18.9k
            int peek = tok_nextc(tok);
1529
18.9k
            if (peek != '{' || in_format_spec) {
1530
15.7k
                tok_backup(tok, peek);
1531
15.7k
                tok_backup(tok, c);
1532
15.7k
                current_tok->curly_bracket_expr_start_depth++;
1533
15.7k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
6
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
6
                }
1537
15.7k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
15.7k
                current_tok->in_format_spec = 0;
1539
15.7k
                p_start = tok->start;
1540
15.7k
                p_end = tok->cur;
1541
15.7k
            } else {
1542
3.16k
                p_start = tok->start;
1543
3.16k
                p_end = tok->cur - 1;
1544
3.16k
            }
1545
18.9k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
156k
        } else if (c == '}') {
1547
5.14k
            if (unicode_escape) {
1548
329
                p_start = tok->start;
1549
329
                p_end = tok->cur;
1550
329
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
329
            }
1552
4.81k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.81k
            int cursor = current_tok->curly_bracket_depth;
1559
4.81k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.25k
                p_start = tok->start;
1561
1.25k
                p_end = tok->cur - 1;
1562
3.56k
            } else {
1563
3.56k
                tok_backup(tok, peek);
1564
3.56k
                tok_backup(tok, c);
1565
3.56k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.56k
                current_tok->in_format_spec = 0;
1567
3.56k
                p_start = tok->start;
1568
3.56k
                p_end = tok->cur;
1569
3.56k
            }
1570
4.81k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
151k
        } else if (c == '\\') {
1572
5.74k
            int peek = tok_nextc(tok);
1573
5.74k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.74k
            if (peek == '{' || peek == '}') {
1580
714
                if (!current_tok->raw) {
1581
520
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
520
                }
1585
714
                tok_backup(tok, peek);
1586
714
                continue;
1587
714
            }
1588
1589
5.02k
            if (!current_tok->raw) {
1590
4.80k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
534
                    peek = tok_nextc(tok);
1593
534
                    if (peek == '{') {
1594
333
                        unicode_escape = 1;
1595
333
                    } else {
1596
201
                        tok_backup(tok, peek);
1597
201
                    }
1598
534
                }
1599
4.80k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
5.02k
        }
1603
175k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
12.3k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.75k
        tok_backup(tok, current_tok->quote);
1609
6.75k
    }
1610
5.58k
    p_start = tok->start;
1611
5.58k
    p_end = tok->cur;
1612
5.58k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
30.0k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
2.18M
{
1618
2.18M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
2.18M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
2.12M
        return tok_get_normal_mode(tok, current_tok, token);
1621
2.12M
    } else {
1622
57.6k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
57.6k
    }
1624
2.18M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
2.18M
{
1629
2.18M
    int result = tok_get(tok, token);
1630
2.18M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
2.18M
    return result;
1635
2.18M
}