Coverage Report

Created: 2026-02-26 06:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.19k
#define ALTTABSIZE 1
11
12
1.19M
#define is_potential_identifier_start(c) (\
13
1.19M
              (c >= 'a' && c <= 'z')\
14
1.19M
               || (c >= 'A' && c <= 'Z')\
15
1.19M
               || c == '_'\
16
1.19M
               || (c >= 128))
17
18
1.78M
#define is_potential_identifier_char(c) (\
19
1.78M
              (c >= 'a' && c <= 'z')\
20
1.78M
               || (c >= 'A' && c <= 'Z')\
21
1.78M
               || (c >= '0' && c <= '9')\
22
1.78M
               || c == '_'\
23
1.78M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.32M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
16.1k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
31
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.19M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
155k
{
55
155k
    return memchr(str, 0, size) != NULL;
56
155k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
7.42M
{
62
7.42M
    int rc;
63
7.57M
    for (;;) {
64
7.57M
        if (tok->cur != tok->inp) {
65
7.38M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
7.38M
            tok->col_offset++;
70
7.38M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
7.38M
        }
72
196k
        if (tok->done != E_OK) {
73
27.0k
            return EOF;
74
27.0k
        }
75
169k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
169k
        if (!rc) {
84
13.6k
            tok->cur = tok->inp;
85
13.6k
            return EOF;
86
13.6k
        }
87
155k
        tok->line_start = tok->cur;
88
89
155k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
155k
    }
95
7.42M
    Py_UNREACHABLE();
96
7.42M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
2.57M
{
102
2.57M
    if (c != EOF) {
103
2.55M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
2.55M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
2.55M
        tok->col_offset--;
110
2.55M
    }
111
2.57M
}
112
113
static int
114
25.7k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
25.7k
    assert(token != NULL);
116
25.7k
    assert(c == '}' || c == ':' || c == '!');
117
25.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
25.7k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
16.6k
        return 0;
121
16.6k
    }
122
9.12k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
9.12k
    int hash_detected = 0;
126
9.12k
    int in_string = 0;
127
9.12k
    char quote_char = 0;
128
129
852k
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
844k
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
844k
        if (ch == '\\') {
134
14.2k
            i++;
135
14.2k
            continue;
136
14.2k
        }
137
138
        // Handle quotes
139
829k
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
155k
            if (!in_string) {
148
57.0k
                in_string = 1;
149
57.0k
                quote_char = ch;
150
57.0k
            }
151
98.0k
            else if (ch == quote_char) {
152
56.4k
                in_string = 0;
153
56.4k
            }
154
155k
            continue;
155
155k
        }
156
157
        // Check for # outside strings
158
674k
        if (ch == '#' && !in_string) {
159
731
            hash_detected = 1;
160
731
            break;
161
731
        }
162
674k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
9.12k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
731
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
731
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
731
        Py_ssize_t i = 0;  // Input position
172
731
        Py_ssize_t j = 0;  // Output position
173
731
        in_string = 0;     // Whether we're in a string
174
731
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
47.9k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
47.1k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
47.1k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
6.68k
                if (!in_string) {
184
2.70k
                    in_string = 1;
185
2.70k
                    quote_char = ch;
186
3.98k
                } else if (ch == quote_char) {
187
2.68k
                    in_string = 0;
188
2.68k
                }
189
6.68k
                result[j++] = ch;
190
6.68k
            }
191
            // Skip comments
192
40.5k
            else if (ch == '#' && !in_string) {
193
22.4k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
21.7k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
21.4k
                    i++;
196
21.4k
                }
197
973
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
333
                    result[j++] = '\n';
199
333
                }
200
973
            }
201
            // Copy other chars
202
39.5k
            else {
203
39.5k
                result[j++] = ch;
204
39.5k
            }
205
47.1k
            i++;
206
47.1k
        }
207
208
731
        result[j] = '\0';  // Null-terminate the result string
209
731
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
731
        PyMem_Free(result);
211
8.39k
    } else {
212
8.39k
        res = PyUnicode_DecodeUTF8(
213
8.39k
            tok_mode->last_expr_buffer,
214
8.39k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.39k
            NULL
216
8.39k
        );
217
8.39k
    }
218
219
9.12k
    if (!res) {
220
0
        return -1;
221
0
    }
222
9.12k
    token->metadata = res;
223
9.12k
    return 0;
224
9.12k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
69.2k
{
229
69.2k
    assert(tok->cur != NULL);
230
231
69.2k
    Py_ssize_t size = strlen(tok->cur);
232
69.2k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
69.2k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
43.4k
        case '{':
252
43.4k
            if (tok_mode->last_expr_buffer != NULL) {
253
32.2k
                PyMem_Free(tok_mode->last_expr_buffer);
254
32.2k
            }
255
43.4k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
43.4k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
43.4k
            tok_mode->last_expr_size = size;
260
43.4k
            tok_mode->last_expr_end = -1;
261
43.4k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
43.4k
            break;
263
21.2k
        case '}':
264
22.8k
        case '!':
265
22.8k
            tok_mode->last_expr_end = strlen(tok->start);
266
22.8k
            break;
267
2.96k
        case ':':
268
2.96k
            if (tok_mode->last_expr_end == -1) {
269
2.55k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.55k
            }
271
2.96k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
69.2k
    }
275
69.2k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
69.2k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
7.23k
{
284
7.23k
    const char *s = test;
285
7.23k
    int res = 0;
286
19.5k
    while (1) {
287
19.5k
        int c = tok_nextc(tok);
288
19.5k
        if (*s == 0) {
289
7.14k
            res = !is_potential_identifier_char(c);
290
7.14k
        }
291
12.4k
        else if (c == *s) {
292
12.3k
            s++;
293
12.3k
            continue;
294
12.3k
        }
295
296
7.23k
        tok_backup(tok, c);
297
19.5k
        while (s != test) {
298
12.3k
            tok_backup(tok, *--s);
299
12.3k
        }
300
7.23k
        return res;
301
19.5k
    }
302
7.23k
}
303
304
static int
305
72.9k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
72.9k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
28
        return 1;
310
28
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
72.9k
    int r = 0;
322
72.9k
    if (c == 'a') {
323
670
        r = lookahead(tok, "nd");
324
670
    }
325
72.2k
    else if (c == 'e') {
326
480
        r = lookahead(tok, "lse");
327
480
    }
328
71.7k
    else if (c == 'f') {
329
3.40k
        r = lookahead(tok, "or");
330
3.40k
    }
331
68.3k
    else if (c == 'i') {
332
1.11k
        int c2 = tok_nextc(tok);
333
1.11k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.10k
            r = 1;
335
1.10k
        }
336
1.11k
        tok_backup(tok, c2);
337
1.11k
    }
338
67.2k
    else if (c == 'o') {
339
2.41k
        r = lookahead(tok, "r");
340
2.41k
    }
341
64.8k
    else if (c == 'n') {
342
268
        r = lookahead(tok, "ot");
343
268
    }
344
72.9k
    if (r) {
345
8.23k
        tok_backup(tok, c);
346
8.23k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
8.23k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
8.23k
        tok_nextc(tok);
352
8.23k
    }
353
64.6k
    else /* In future releases, only error will remain. */
354
64.6k
    if (c < 128 && is_potential_identifier_char(c)) {
355
183
        tok_backup(tok, c);
356
183
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
183
        return 0;
358
183
    }
359
72.7k
    return 1;
360
72.9k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
13.3k
{
366
13.3k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
13.3k
    PyObject *s;
370
13.3k
    if (tok->decoding_erred)
371
0
        return 0;
372
13.3k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
13.3k
    if (s == NULL) {
374
2
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
2
            tok->done = E_DECODE;
376
2
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
2
        return 0;
381
2
    }
382
13.3k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
13.3k
    assert(invalid >= 0);
384
13.3k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
13.3k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
596
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
596
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
413
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
413
            if (s != NULL) {
391
413
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
413
            }
393
413
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
413
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
413
        }
399
596
        Py_DECREF(s);
400
596
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
298
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
298
        }
403
298
        else {
404
298
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
298
        }
406
596
        return 0;
407
596
    }
408
12.7k
    Py_DECREF(s);
409
12.7k
    return 1;
410
13.3k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
55.1k
{
415
55.1k
    int c;
416
417
55.2k
    while (1) {
418
175k
        do {
419
175k
            c = tok_nextc(tok);
420
175k
        } while (Py_ISDIGIT(c));
421
55.2k
        if (c != '_') {
422
55.0k
            break;
423
55.0k
        }
424
126
        c = tok_nextc(tok);
425
126
        if (!Py_ISDIGIT(c)) {
426
10
            tok_backup(tok, c);
427
10
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
10
            return 0;
429
10
        }
430
126
    }
431
55.0k
    return c;
432
55.1k
}
433
434
static inline int
435
656
tok_continuation_line(struct tok_state *tok) {
436
656
    int c = tok_nextc(tok);
437
656
    if (c == '\r') {
438
42
        c = tok_nextc(tok);
439
42
    }
440
656
    if (c != '\n') {
441
38
        tok->done = E_LINECONT;
442
38
        return -1;
443
38
    }
444
618
    c = tok_nextc(tok);
445
618
    if (c == EOF) {
446
42
        tok->done = E_EOF;
447
42
        tok->cur = tok->inp;
448
42
        return -1;
449
576
    } else {
450
576
        tok_backup(tok, c);
451
576
    }
452
576
    return c;
453
618
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
19.4k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
19.4k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
19.4k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
19.4k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
19.4k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
19.4k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
19.4k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
19.4k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
19.4k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
19.4k
    if (saw_f && saw_t) {
492
2
        RETURN_SYNTAX_ERROR("f", "t");
493
2
    }
494
495
19.4k
#undef RETURN_SYNTAX_ERROR
496
497
19.4k
    return 0;
498
19.4k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.16M
{
503
1.16M
    int c;
504
1.16M
    int blankline, nonascii;
505
506
1.16M
    const char *p_start = NULL;
507
1.16M
    const char *p_end = NULL;
508
1.23M
  nextline:
509
1.23M
    tok->start = NULL;
510
1.23M
    tok->starting_col_offset = -1;
511
1.23M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.23M
    if (tok->atbol) {
516
159k
        int col = 0;
517
159k
        int altcol = 0;
518
159k
        tok->atbol = 0;
519
159k
        int cont_line_col = 0;
520
526k
        for (;;) {
521
526k
            c = tok_nextc(tok);
522
526k
            if (c == ' ') {
523
365k
                col++, altcol++;
524
365k
            }
525
160k
            else if (c == '\t') {
526
598
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
598
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
598
            }
529
160k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
553
                col = altcol = 0; /* For Emacs users */
531
553
            }
532
159k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
478
                cont_line_col = cont_line_col ? cont_line_col : col;
538
478
                if ((c = tok_continuation_line(tok)) == -1) {
539
39
                    return MAKE_TOKEN(ERRORTOKEN);
540
39
                }
541
478
            }
542
159k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
159k
            else {
546
159k
                break;
547
159k
            }
548
526k
        }
549
159k
        tok_backup(tok, c);
550
159k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
37.1k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
37.1k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
37.1k
            else {
566
37.1k
                blankline = 1; /* Ignore completely */
567
37.1k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
37.1k
        }
571
159k
        if (!blankline && tok->level == 0) {
572
88.2k
            col = cont_line_col ? cont_line_col : col;
573
88.2k
            altcol = cont_line_col ? cont_line_col : altcol;
574
88.2k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
67.1k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
67.1k
            }
580
21.0k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
11.9k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
11.9k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
11.9k
                tok->pendin++;
591
11.9k
                tok->indstack[++tok->indent] = col;
592
11.9k
                tok->altindstack[tok->indent] = altcol;
593
11.9k
            }
594
9.14k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
20.4k
                while (tok->indent > 0 &&
597
17.6k
                    col < tok->indstack[tok->indent]) {
598
11.2k
                    tok->pendin--;
599
11.2k
                    tok->indent--;
600
11.2k
                }
601
9.14k
                if (col != tok->indstack[tok->indent]) {
602
4
                    tok->done = E_DEDENT;
603
4
                    tok->cur = tok->inp;
604
4
                    return MAKE_TOKEN(ERRORTOKEN);
605
4
                }
606
9.14k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.14k
            }
610
88.2k
        }
611
159k
    }
612
613
1.23M
    tok->start = tok->cur;
614
1.23M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
1.23M
    if (tok->pendin != 0) {
618
23.1k
        if (tok->pendin < 0) {
619
11.2k
            if (tok->tok_extra_tokens) {
620
30
                p_start = tok->cur;
621
30
                p_end = tok->cur;
622
30
            }
623
11.2k
            tok->pendin++;
624
11.2k
            return MAKE_TOKEN(DEDENT);
625
11.2k
        }
626
11.9k
        else {
627
11.9k
            if (tok->tok_extra_tokens) {
628
32
                p_start = tok->buf;
629
32
                p_end = tok->cur;
630
32
            }
631
11.9k
            tok->pendin--;
632
11.9k
            return MAKE_TOKEN(INDENT);
633
11.9k
        }
634
23.1k
    }
635
636
    /* Peek ahead at the next character */
637
1.20M
    c = tok_nextc(tok);
638
1.20M
    tok_backup(tok, c);
639
640
1.20M
 again:
641
1.20M
    tok->start = NULL;
642
    /* Skip spaces */
643
1.46M
    do {
644
1.46M
        c = tok_nextc(tok);
645
1.46M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.20M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.20M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.20M
    if (c == '#') {
653
654
31.5k
        const char* p = NULL;
655
31.5k
        const char *prefix, *type_start;
656
31.5k
        int current_starting_col_offset;
657
658
979k
        while (c != EOF && c != '\n' && c != '\r') {
659
947k
            c = tok_nextc(tok);
660
947k
        }
661
662
31.5k
        if (tok->tok_extra_tokens) {
663
22
            p = tok->start;
664
22
        }
665
666
31.5k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
31.5k
        if (tok->tok_extra_tokens) {
721
22
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
22
            p_start = p;
723
22
            p_end = tok->cur;
724
22
            tok->comment_newline = blankline;
725
22
            return MAKE_TOKEN(COMMENT);
726
22
        }
727
31.5k
    }
728
729
1.20M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.20M
    if (c == EOF) {
735
13.5k
        if (tok->level) {
736
3.39k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.39k
        }
738
10.1k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
13.5k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.19M
    nonascii = 0;
743
1.19M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
394k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
489k
        while (1) {
747
489k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
16.0k
                saw_b = 1;
749
16.0k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
473k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
5.50k
                saw_u = 1;
754
5.50k
            }
755
            /* ur"" and ru"" are not supported */
756
467k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
26.2k
                saw_r = 1;
758
26.2k
            }
759
441k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
37.8k
                saw_f = 1;
761
37.8k
            }
762
403k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
28.4k
                saw_t = 1;
764
28.4k
            }
765
374k
            else {
766
374k
                break;
767
374k
            }
768
114k
            c = tok_nextc(tok);
769
114k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
19.4k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
19.4k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
19.4k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
19.4k
                if (saw_f || saw_t) {
779
16.1k
                    goto f_string_quote;
780
16.1k
                }
781
3.23k
                goto letter_quote;
782
19.4k
            }
783
114k
        }
784
1.71M
        while (is_potential_identifier_char(c)) {
785
1.33M
            if (c >= 128) {
786
160k
                nonascii = 1;
787
160k
            }
788
1.33M
            c = tok_nextc(tok);
789
1.33M
        }
790
374k
        tok_backup(tok, c);
791
374k
        if (nonascii && !verify_identifier(tok)) {
792
598
            return MAKE_TOKEN(ERRORTOKEN);
793
598
        }
794
795
374k
        p_start = tok->start;
796
374k
        p_end = tok->cur;
797
798
374k
        return MAKE_TOKEN(NAME);
799
374k
    }
800
801
799k
    if (c == '\r') {
802
184
        c = tok_nextc(tok);
803
184
    }
804
805
    /* Newline */
806
799k
    if (c == '\n') {
807
143k
        tok->atbol = 1;
808
143k
        if (blankline || tok->level > 0) {
809
70.8k
            if (tok->tok_extra_tokens) {
810
64
                if (tok->comment_newline) {
811
12
                    tok->comment_newline = 0;
812
12
                }
813
64
                p_start = tok->start;
814
64
                p_end = tok->cur;
815
64
                return MAKE_TOKEN(NL);
816
64
            }
817
70.7k
            goto nextline;
818
70.8k
        }
819
72.4k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
6
            tok->comment_newline = 0;
821
6
            p_start = tok->start;
822
6
            p_end = tok->cur;
823
6
            return MAKE_TOKEN(NL);
824
6
        }
825
72.4k
        p_start = tok->start;
826
72.4k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
72.4k
        tok->cont_line = 0;
828
72.4k
        return MAKE_TOKEN(NEWLINE);
829
72.4k
    }
830
831
    /* Period or number starting with period? */
832
656k
    if (c == '.') {
833
25.9k
        c = tok_nextc(tok);
834
25.9k
        if (Py_ISDIGIT(c)) {
835
2.54k
            goto fraction;
836
23.3k
        } else if (c == '.') {
837
1.19k
            c = tok_nextc(tok);
838
1.19k
            if (c == '.') {
839
558
                p_start = tok->start;
840
558
                p_end = tok->cur;
841
558
                return MAKE_TOKEN(ELLIPSIS);
842
558
            }
843
633
            else {
844
633
                tok_backup(tok, c);
845
633
            }
846
633
            tok_backup(tok, '.');
847
633
        }
848
22.1k
        else {
849
22.1k
            tok_backup(tok, c);
850
22.1k
        }
851
22.8k
        p_start = tok->start;
852
22.8k
        p_end = tok->cur;
853
22.8k
        return MAKE_TOKEN(DOT);
854
25.9k
    }
855
856
    /* Number */
857
630k
    if (Py_ISDIGIT(c)) {
858
70.5k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
28.3k
            c = tok_nextc(tok);
861
28.3k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
14.5k
                c = tok_nextc(tok);
864
14.6k
                do {
865
14.6k
                    if (c == '_') {
866
73
                        c = tok_nextc(tok);
867
73
                    }
868
14.6k
                    if (!Py_ISXDIGIT(c)) {
869
15
                        tok_backup(tok, c);
870
15
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
15
                    }
872
77.7k
                    do {
873
77.7k
                        c = tok_nextc(tok);
874
77.7k
                    } while (Py_ISXDIGIT(c));
875
14.6k
                } while (c == '_');
876
14.5k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
2
                    return MAKE_TOKEN(ERRORTOKEN);
878
2
                }
879
14.5k
            }
880
13.7k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
450
                c = tok_nextc(tok);
883
592
                do {
884
592
                    if (c == '_') {
885
143
                        c = tok_nextc(tok);
886
143
                    }
887
592
                    if (c < '0' || c >= '8') {
888
19
                        if (Py_ISDIGIT(c)) {
889
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
2
                                    "invalid digit '%c' in octal literal", c));
891
2
                        }
892
17
                        else {
893
17
                            tok_backup(tok, c);
894
17
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
17
                        }
896
19
                    }
897
1.31k
                    do {
898
1.31k
                        c = tok_nextc(tok);
899
1.31k
                    } while ('0' <= c && c < '8');
900
573
                } while (c == '_');
901
431
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
430
                if (!verify_end_of_number(tok, c, "octal")) {
906
3
                    return MAKE_TOKEN(ERRORTOKEN);
907
3
                }
908
430
            }
909
13.3k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
351
                c = tok_nextc(tok);
912
644
                do {
913
644
                    if (c == '_') {
914
302
                        c = tok_nextc(tok);
915
302
                    }
916
644
                    if (c != '0' && c != '1') {
917
23
                        if (Py_ISDIGIT(c)) {
918
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
2
                        }
920
21
                        else {
921
21
                            tok_backup(tok, c);
922
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
21
                        }
924
23
                    }
925
3.09k
                    do {
926
3.09k
                        c = tok_nextc(tok);
927
3.09k
                    } while (c == '0' || c == '1');
928
621
                } while (c == '_');
929
328
                if (Py_ISDIGIT(c)) {
930
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
1
                }
932
327
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
327
            }
936
12.9k
            else {
937
12.9k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
14.4k
                while (1) {
941
14.4k
                    if (c == '_') {
942
105
                        c = tok_nextc(tok);
943
105
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
105
                    }
948
14.4k
                    if (c != '0') {
949
12.9k
                        break;
950
12.9k
                    }
951
1.51k
                    c = tok_nextc(tok);
952
1.51k
                }
953
12.9k
                char* zeros_end = tok->cur;
954
12.9k
                if (Py_ISDIGIT(c)) {
955
395
                    nonzero = 1;
956
395
                    c = tok_decimal_tail(tok);
957
395
                    if (c == 0) {
958
1
                        return MAKE_TOKEN(ERRORTOKEN);
959
1
                    }
960
395
                }
961
12.9k
                if (c == '.') {
962
742
                    c = tok_nextc(tok);
963
742
                    goto fraction;
964
742
                }
965
12.2k
                else if (c == 'e' || c == 'E') {
966
923
                    goto exponent;
967
923
                }
968
11.3k
                else if (c == 'j' || c == 'J') {
969
960
                    goto imaginary;
970
960
                }
971
10.3k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
29
                    tok_backup(tok, c);
974
29
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
29
                            tok, (int)(tok->start + 1 - tok->line_start),
976
29
                            (int)(zeros_end - tok->line_start),
977
29
                            "leading zeros in decimal integer "
978
29
                            "literals are not permitted; "
979
29
                            "use an 0o prefix for octal integers"));
980
29
                }
981
10.3k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
23
                    return MAKE_TOKEN(ERRORTOKEN);
983
23
                }
984
10.3k
            }
985
28.3k
        }
986
42.1k
        else {
987
            /* Decimal */
988
42.1k
            c = tok_decimal_tail(tok);
989
42.1k
            if (c == 0) {
990
8
                return MAKE_TOKEN(ERRORTOKEN);
991
8
            }
992
42.1k
            {
993
                /* Accept floating-point numbers. */
994
42.1k
                if (c == '.') {
995
2.58k
                    c = tok_nextc(tok);
996
5.87k
        fraction:
997
                    /* Fraction */
998
5.87k
                    if (Py_ISDIGIT(c)) {
999
4.65k
                        c = tok_decimal_tail(tok);
1000
4.65k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
4.65k
                    }
1004
5.87k
                }
1005
45.4k
                if (c == 'e' || c == 'E') {
1006
7.46k
                    int e;
1007
8.38k
                  exponent:
1008
8.38k
                    e = c;
1009
                    /* Exponent part */
1010
8.38k
                    c = tok_nextc(tok);
1011
8.38k
                    if (c == '+' || c == '-') {
1012
2.94k
                        c = tok_nextc(tok);
1013
2.94k
                        if (!Py_ISDIGIT(c)) {
1014
12
                            tok_backup(tok, c);
1015
12
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
12
                        }
1017
5.44k
                    } else if (!Py_ISDIGIT(c)) {
1018
480
                        tok_backup(tok, c);
1019
480
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
39
                            return MAKE_TOKEN(ERRORTOKEN);
1021
39
                        }
1022
441
                        tok_backup(tok, e);
1023
441
                        p_start = tok->start;
1024
441
                        p_end = tok->cur;
1025
441
                        return MAKE_TOKEN(NUMBER);
1026
480
                    }
1027
7.89k
                    c = tok_decimal_tail(tok);
1028
7.89k
                    if (c == 0) {
1029
0
                        return MAKE_TOKEN(ERRORTOKEN);
1030
0
                    }
1031
7.89k
                }
1032
45.8k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.72k
        imaginary:
1035
3.72k
                    c = tok_nextc(tok);
1036
3.72k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
6
                        return MAKE_TOKEN(ERRORTOKEN);
1038
6
                    }
1039
3.72k
                }
1040
43.1k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
109
                    return MAKE_TOKEN(ERRORTOKEN);
1042
109
                }
1043
45.8k
            }
1044
45.8k
        }
1045
72.3k
        tok_backup(tok, c);
1046
72.3k
        p_start = tok->start;
1047
72.3k
        p_end = tok->cur;
1048
72.3k
        return MAKE_TOKEN(NUMBER);
1049
70.5k
    }
1050
1051
576k
  f_string_quote:
1052
576k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
16.1k
        && (c == '\'' || c == '"'))) {
1054
1055
16.1k
        int quote = c;
1056
16.1k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
16.1k
        tok->first_lineno = tok->lineno;
1063
16.1k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
16.1k
        int after_quote = tok_nextc(tok);
1067
16.1k
        if (after_quote == quote) {
1068
2.40k
            int after_after_quote = tok_nextc(tok);
1069
2.40k
            if (after_after_quote == quote) {
1070
503
                quote_size = 3;
1071
503
            }
1072
1.90k
            else {
1073
                // TODO: Check this
1074
1.90k
                tok_backup(tok, after_after_quote);
1075
1.90k
                tok_backup(tok, after_quote);
1076
1.90k
            }
1077
2.40k
        }
1078
16.1k
        if (after_quote != quote) {
1079
13.7k
            tok_backup(tok, after_quote);
1080
13.7k
        }
1081
1082
1083
16.1k
        p_start = tok->start;
1084
16.1k
        p_end = tok->cur;
1085
16.1k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
16.1k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
16.1k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
16.1k
        the_current_tok->quote = quote;
1091
16.1k
        the_current_tok->quote_size = quote_size;
1092
16.1k
        the_current_tok->start = tok->start;
1093
16.1k
        the_current_tok->multi_line_start = tok->line_start;
1094
16.1k
        the_current_tok->first_line = tok->lineno;
1095
16.1k
        the_current_tok->start_offset = -1;
1096
16.1k
        the_current_tok->multi_line_start_offset = -1;
1097
16.1k
        the_current_tok->last_expr_buffer = NULL;
1098
16.1k
        the_current_tok->last_expr_size = 0;
1099
16.1k
        the_current_tok->last_expr_end = -1;
1100
16.1k
        the_current_tok->in_format_spec = 0;
1101
16.1k
        the_current_tok->in_debug = 0;
1102
1103
16.1k
        enum string_kind_t string_kind = FSTRING;
1104
16.1k
        switch (*tok->start) {
1105
764
            case 'T':
1106
4.27k
            case 't':
1107
4.27k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.27k
                string_kind = TSTRING;
1109
4.27k
                break;
1110
1.52k
            case 'F':
1111
11.5k
            case 'f':
1112
11.5k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
11.5k
                break;
1114
211
            case 'R':
1115
358
            case 'r':
1116
358
                the_current_tok->raw = 1;
1117
358
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
42
                    string_kind = TSTRING;
1119
42
                }
1120
358
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
16.1k
        }
1124
1125
16.1k
        the_current_tok->string_kind = string_kind;
1126
16.1k
        the_current_tok->curly_bracket_depth = 0;
1127
16.1k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
16.1k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
16.1k
    }
1130
1131
563k
  letter_quote:
1132
    /* String */
1133
563k
    if (c == '\'' || c == '"') {
1134
36.6k
        int quote = c;
1135
36.6k
        int quote_size = 1;             /* 1 or 3 */
1136
36.6k
        int end_quote_size = 0;
1137
36.6k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
36.6k
        tok->first_lineno = tok->lineno;
1144
36.6k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
36.6k
        c = tok_nextc(tok);
1148
36.6k
        if (c == quote) {
1149
6.20k
            c = tok_nextc(tok);
1150
6.20k
            if (c == quote) {
1151
1.22k
                quote_size = 3;
1152
1.22k
            }
1153
4.98k
            else {
1154
4.98k
                end_quote_size = 1;     /* empty string found */
1155
4.98k
            }
1156
6.20k
        }
1157
36.6k
        if (c != quote) {
1158
35.3k
            tok_backup(tok, c);
1159
35.3k
        }
1160
1161
        /* Get rest of string */
1162
541k
        while (end_quote_size != quote_size) {
1163
505k
            c = tok_nextc(tok);
1164
505k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
505k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
505k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
280
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
280
                tok->cur = (char *)tok->start;
1176
280
                tok->cur++;
1177
280
                tok->line_start = tok->multi_line_start;
1178
280
                int start = tok->lineno;
1179
280
                tok->lineno = tok->first_lineno;
1180
1181
280
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
32
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
32
                    if (the_current_tok->quote == quote &&
1189
25
                        the_current_tok->quote_size == quote_size) {
1190
18
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
18
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
18
                    }
1193
32
                }
1194
1195
262
                if (quote_size == 3) {
1196
29
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
29
                                     " (detected at line %d)", start);
1198
29
                    if (c != '\n') {
1199
29
                        tok->done = E_EOFS;
1200
29
                    }
1201
29
                    return MAKE_TOKEN(ERRORTOKEN);
1202
29
                }
1203
233
                else {
1204
233
                    if (has_escaped_quote) {
1205
9
                        _PyTokenizer_syntaxerror(
1206
9
                            tok,
1207
9
                            "unterminated string literal (detected at line %d); "
1208
9
                            "perhaps you escaped the end quote?",
1209
9
                            start
1210
9
                        );
1211
224
                    } else {
1212
224
                        _PyTokenizer_syntaxerror(
1213
224
                            tok, "unterminated string literal (detected at line %d)", start
1214
224
                        );
1215
224
                    }
1216
233
                    if (c != '\n') {
1217
16
                        tok->done = E_EOLS;
1218
16
                    }
1219
233
                    return MAKE_TOKEN(ERRORTOKEN);
1220
233
                }
1221
262
            }
1222
504k
            if (c == quote) {
1223
34.9k
                end_quote_size += 1;
1224
34.9k
            }
1225
469k
            else {
1226
469k
                end_quote_size = 0;
1227
469k
                if (c == '\\') {
1228
25.2k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
25.2k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
1.25k
                        has_escaped_quote = 1;
1231
1.25k
                    }
1232
25.2k
                    if (c == '\r') {
1233
21
                        c = tok_nextc(tok);
1234
21
                    }
1235
25.2k
                }
1236
469k
            }
1237
504k
        }
1238
1239
36.3k
        p_start = tok->start;
1240
36.3k
        p_end = tok->cur;
1241
36.3k
        return MAKE_TOKEN(STRING);
1242
36.6k
    }
1243
1244
    /* Line continuation */
1245
526k
    if (c == '\\') {
1246
178
        if ((c = tok_continuation_line(tok)) == -1) {
1247
41
            return MAKE_TOKEN(ERRORTOKEN);
1248
41
        }
1249
137
        tok->cont_line = 1;
1250
137
        goto again; /* Read next line */
1251
178
    }
1252
1253
    /* Punctuation character */
1254
526k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
526k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
58.2k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
58.2k
        int in_format_spec = current_tok->in_format_spec;
1261
58.2k
         int cursor_in_format_with_debug =
1262
58.2k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
58.2k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
58.2k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
58.2k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
58.2k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
3.61k
            current_tok->kind = TOK_FSTRING_MODE;
1273
3.61k
            current_tok->in_format_spec = 1;
1274
3.61k
            p_start = tok->start;
1275
3.61k
            p_end = tok->cur;
1276
3.61k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
3.61k
        }
1278
58.2k
    }
1279
1280
    /* Check for two-character token */
1281
522k
    {
1282
522k
        int c2 = tok_nextc(tok);
1283
522k
        int current_token = _PyToken_TwoChars(c, c2);
1284
522k
        if (current_token != OP) {
1285
21.1k
            int c3 = tok_nextc(tok);
1286
21.1k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
21.1k
            if (current_token3 != OP) {
1288
1.50k
                current_token = current_token3;
1289
1.50k
            }
1290
19.6k
            else {
1291
19.6k
                tok_backup(tok, c3);
1292
19.6k
            }
1293
21.1k
            p_start = tok->start;
1294
21.1k
            p_end = tok->cur;
1295
21.1k
            return MAKE_TOKEN(current_token);
1296
21.1k
        }
1297
501k
        tok_backup(tok, c2);
1298
501k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
60.8k
    case '(':
1303
77.9k
    case '[':
1304
119k
    case '{':
1305
119k
        if (tok->level >= MAXLEVEL) {
1306
18
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
18
        }
1308
119k
        tok->parenstack[tok->level] = c;
1309
119k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
119k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
119k
        tok->level++;
1312
119k
        if (INSIDE_FSTRING(tok)) {
1313
30.7k
            current_tok->curly_bracket_depth++;
1314
30.7k
        }
1315
119k
        break;
1316
36.1k
    case ')':
1317
42.1k
    case ']':
1318
69.3k
    case '}':
1319
69.3k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
53
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
53
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
53
        }
1323
69.2k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
163
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
163
        }
1326
69.1k
        if (tok->level > 0) {
1327
69.1k
            tok->level--;
1328
69.1k
            int opening = tok->parenstack[tok->level];
1329
69.1k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
33.0k
                                            (opening == '[' && c == ']') ||
1331
27.0k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
35
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
5
                    assert(current_tok->curly_bracket_depth >= 0);
1339
5
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
5
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
3
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
3
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
3
                    }
1344
5
                }
1345
32
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
6
                            "closing parenthesis '%c' does not match "
1348
6
                            "opening parenthesis '%c' on line %d",
1349
6
                            c, opening, tok->parenlinenostack[tok->level]));
1350
6
                }
1351
26
                else {
1352
26
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
26
                            "closing parenthesis '%c' does not match "
1354
26
                            "opening parenthesis '%c'",
1355
26
                            c, opening));
1356
26
                }
1357
32
            }
1358
69.1k
        }
1359
1360
69.1k
        if (INSIDE_FSTRING(tok)) {
1361
24.0k
            current_tok->curly_bracket_depth--;
1362
24.0k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
24.0k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
22.7k
                current_tok->curly_bracket_expr_start_depth--;
1368
22.7k
                current_tok->kind = TOK_FSTRING_MODE;
1369
22.7k
                current_tok->in_format_spec = 0;
1370
22.7k
                current_tok->in_debug = 0;
1371
22.7k
            }
1372
24.0k
        }
1373
69.1k
        break;
1374
313k
    default:
1375
313k
        break;
1376
501k
    }
1377
1378
501k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
361
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
361
    }
1381
1382
501k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
5.12k
        current_tok->in_debug = 1;
1384
5.12k
    }
1385
1386
    /* Punctuation character */
1387
501k
    p_start = tok->start;
1388
501k
    p_end = tok->cur;
1389
501k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
501k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
51.4k
{
1395
51.4k
    const char *p_start = NULL;
1396
51.4k
    const char *p_end = NULL;
1397
51.4k
    int end_quote_size = 0;
1398
51.4k
    int unicode_escape = 0;
1399
1400
51.4k
    tok->start = tok->cur;
1401
51.4k
    tok->first_lineno = tok->lineno;
1402
51.4k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
51.4k
    int start_char = tok_nextc(tok);
1407
51.4k
    if (start_char == '{') {
1408
13.7k
        int peek1 = tok_nextc(tok);
1409
13.7k
        tok_backup(tok, peek1);
1410
13.7k
        tok_backup(tok, start_char);
1411
13.7k
        if (peek1 != '{') {
1412
11.8k
            current_tok->curly_bracket_expr_start_depth++;
1413
11.8k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
3
            }
1417
11.8k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
11.8k
            return tok_get_normal_mode(tok, current_tok, token);
1419
11.8k
        }
1420
13.7k
    }
1421
37.6k
    else {
1422
37.6k
        tok_backup(tok, start_char);
1423
37.6k
    }
1424
1425
    // Check if we are at the end of the string
1426
56.8k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
44.6k
        int quote = tok_nextc(tok);
1428
44.6k
        if (quote != current_tok->quote) {
1429
27.4k
            tok_backup(tok, quote);
1430
27.4k
            goto f_string_middle;
1431
27.4k
        }
1432
44.6k
    }
1433
1434
12.1k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.34k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.34k
        current_tok->last_expr_buffer = NULL;
1437
7.34k
        current_tok->last_expr_size = 0;
1438
7.34k
        current_tok->last_expr_end = -1;
1439
7.34k
    }
1440
1441
12.1k
    p_start = tok->start;
1442
12.1k
    p_end = tok->cur;
1443
12.1k
    tok->tok_mode_stack_index--;
1444
12.1k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
27.4k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
27.4k
    tok->multi_line_start = tok->line_start;
1451
172k
    while (end_quote_size != current_tok->quote_size) {
1452
167k
        int c = tok_nextc(tok);
1453
167k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
167k
        int in_format_spec = (
1457
167k
                current_tok->in_format_spec
1458
7.77k
                &&
1459
7.77k
                INSIDE_FSTRING_EXPR(current_tok)
1460
167k
        );
1461
1462
167k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
374
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
374
            if (in_format_spec && c == '\n') {
1471
52
                if (current_tok->quote_size == 1) {
1472
52
                    return MAKE_TOKEN(
1473
52
                        _PyTokenizer_syntaxerror(
1474
52
                            tok,
1475
52
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
52
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
52
                        )
1478
52
                    );
1479
52
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
52
            }
1487
1488
374
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
322
            tok->cur = (char *)current_tok->start;
1493
322
            tok->cur++;
1494
322
            tok->line_start = current_tok->multi_line_start;
1495
322
            int start = tok->lineno;
1496
1497
322
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
322
            tok->lineno = the_current_tok->first_line;
1499
1500
322
            if (current_tok->quote_size == 3) {
1501
31
                _PyTokenizer_syntaxerror(tok,
1502
31
                                    "unterminated triple-quoted %c-string literal"
1503
31
                                    " (detected at line %d)",
1504
31
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
31
                if (c != '\n') {
1506
31
                    tok->done = E_EOFS;
1507
31
                }
1508
31
                return MAKE_TOKEN(ERRORTOKEN);
1509
31
            }
1510
291
            else {
1511
291
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
291
                                    "unterminated %c-string literal (detected at"
1513
291
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
291
            }
1515
322
        }
1516
1517
166k
        if (c == current_tok->quote) {
1518
10.5k
            end_quote_size += 1;
1519
10.5k
            continue;
1520
156k
        } else {
1521
156k
            end_quote_size = 0;
1522
156k
        }
1523
1524
156k
        if (c == '{') {
1525
17.8k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
17.8k
            int peek = tok_nextc(tok);
1529
17.8k
            if (peek != '{' || in_format_spec) {
1530
15.5k
                tok_backup(tok, peek);
1531
15.5k
                tok_backup(tok, c);
1532
15.5k
                current_tok->curly_bracket_expr_start_depth++;
1533
15.5k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
4
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
4
                }
1537
15.5k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
15.5k
                current_tok->in_format_spec = 0;
1539
15.5k
                p_start = tok->start;
1540
15.5k
                p_end = tok->cur;
1541
15.5k
            } else {
1542
2.24k
                p_start = tok->start;
1543
2.24k
                p_end = tok->cur - 1;
1544
2.24k
            }
1545
17.8k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
138k
        } else if (c == '}') {
1547
4.07k
            if (unicode_escape) {
1548
244
                p_start = tok->start;
1549
244
                p_end = tok->cur;
1550
244
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
244
            }
1552
3.83k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
3.83k
            int cursor = current_tok->curly_bracket_depth;
1559
3.83k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.30k
                p_start = tok->start;
1561
1.30k
                p_end = tok->cur - 1;
1562
2.52k
            } else {
1563
2.52k
                tok_backup(tok, peek);
1564
2.52k
                tok_backup(tok, c);
1565
2.52k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
2.52k
                current_tok->in_format_spec = 0;
1567
2.52k
                p_start = tok->start;
1568
2.52k
                p_end = tok->cur;
1569
2.52k
            }
1570
3.83k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
134k
        } else if (c == '\\') {
1572
6.56k
            int peek = tok_nextc(tok);
1573
6.56k
            if (peek == '\r') {
1574
18
                peek = tok_nextc(tok);
1575
18
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
6.56k
            if (peek == '{' || peek == '}') {
1580
1.25k
                if (!current_tok->raw) {
1581
1.05k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
1.05k
                }
1585
1.25k
                tok_backup(tok, peek);
1586
1.25k
                continue;
1587
1.25k
            }
1588
1589
5.30k
            if (!current_tok->raw) {
1590
4.82k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
322
                    peek = tok_nextc(tok);
1593
322
                    if (peek == '{') {
1594
253
                        unicode_escape = 1;
1595
253
                    } else {
1596
69
                        tok_backup(tok, peek);
1597
69
                    }
1598
322
                }
1599
4.82k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
5.30k
        }
1603
156k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
11.0k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
5.91k
        tok_backup(tok, current_tok->quote);
1609
5.91k
    }
1610
5.16k
    p_start = tok->start;
1611
5.16k
    p_end = tok->cur;
1612
5.16k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
27.4k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.19M
{
1618
1.19M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.19M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.14M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.14M
    } else {
1622
51.4k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
51.4k
    }
1624
1.19M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.19M
{
1629
1.19M
    int result = tok_get(tok, token);
1630
1.19M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.19M
    return result;
1635
1.19M
}