Coverage Report

Created: 2026-05-16 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.39k
#define ALTTABSIZE 1
11
12
2.06M
#define is_potential_identifier_start(c) (\
13
2.06M
              (c >= 'a' && c <= 'z')\
14
2.06M
               || (c >= 'A' && c <= 'Z')\
15
2.06M
               || c == '_'\
16
2.06M
               || (c >= 128))
17
18
3.19M
#define is_potential_identifier_char(c) (\
19
3.19M
              (c >= 'a' && c <= 'z')\
20
3.19M
               || (c >= 'A' && c <= 'Z')\
21
3.19M
               || (c >= '0' && c <= '9')\
22
3.19M
               || c == '_'\
23
3.19M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.25M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
17.6k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
26
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
2.12M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
269k
{
55
269k
    return memchr(str, 0, size) != NULL;
56
269k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.5M
{
62
11.5M
    int rc;
63
11.7M
    for (;;) {
64
11.7M
        if (tok->cur != tok->inp) {
65
11.2M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.2M
            tok->col_offset++;
70
11.2M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.2M
        }
72
560k
        if (tok->done != E_OK) {
73
194k
            return EOF;
74
194k
        }
75
366k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
366k
        if (!rc) {
84
97.1k
            tok->cur = tok->inp;
85
97.1k
            return EOF;
86
97.1k
        }
87
269k
        tok->line_start = tok->cur;
88
89
269k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
269k
    }
95
11.5M
    Py_UNREACHABLE();
96
11.5M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.48M
{
102
4.48M
    if (c != EOF) {
103
4.29M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
4.29M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
4.29M
        tok->col_offset--;
110
4.29M
    }
111
4.48M
}
112
113
static int
114
26.7k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
26.7k
    assert(token != NULL);
116
26.7k
    assert(c == '}' || c == ':' || c == '!');
117
26.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
26.7k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
15.9k
        return 0;
121
15.9k
    }
122
10.7k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
10.7k
    int hash_detected = 0;
126
10.7k
    int in_string = 0;
127
10.7k
    char quote_char = 0;
128
129
1.60M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.59M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.59M
        if (ch == '\\') {
134
29.6k
            i++;
135
29.6k
            continue;
136
29.6k
        }
137
138
        // Handle quotes
139
1.56M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
294k
            if (!in_string) {
148
110k
                in_string = 1;
149
110k
                quote_char = ch;
150
110k
            }
151
184k
            else if (ch == quote_char) {
152
108k
                in_string = 0;
153
108k
            }
154
294k
            continue;
155
294k
        }
156
157
        // Check for # outside strings
158
1.27M
        if (ch == '#' && !in_string) {
159
964
            hash_detected = 1;
160
964
            break;
161
964
        }
162
1.27M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
10.7k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
964
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
964
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
964
        Py_ssize_t i = 0;  // Input position
172
964
        Py_ssize_t j = 0;  // Output position
173
964
        in_string = 0;     // Whether we're in a string
174
964
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
189k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
188k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
188k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
24.3k
                if (!in_string) {
184
8.03k
                    in_string = 1;
185
8.03k
                    quote_char = ch;
186
16.3k
                } else if (ch == quote_char) {
187
7.99k
                    in_string = 0;
188
7.99k
                }
189
24.3k
                result[j++] = ch;
190
24.3k
            }
191
            // Skip comments
192
164k
            else if (ch == '#' && !in_string) {
193
72.4k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
71.5k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
71.2k
                    i++;
196
71.2k
                }
197
1.14k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
282
                    result[j++] = '\n';
199
282
                }
200
1.14k
            }
201
            // Copy other chars
202
163k
            else {
203
163k
                result[j++] = ch;
204
163k
            }
205
188k
            i++;
206
188k
        }
207
208
964
        result[j] = '\0';  // Null-terminate the result string
209
964
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
964
        PyMem_Free(result);
211
9.78k
    } else {
212
9.78k
        res = PyUnicode_DecodeUTF8(
213
9.78k
            tok_mode->last_expr_buffer,
214
9.78k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
9.78k
            NULL
216
9.78k
        );
217
9.78k
    }
218
219
10.7k
    if (!res) {
220
0
        return -1;
221
0
    }
222
10.7k
    token->metadata = res;
223
10.7k
    return 0;
224
10.7k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
70.6k
{
229
70.6k
    assert(tok->cur != NULL);
230
231
70.6k
    Py_ssize_t size = strlen(tok->cur);
232
70.6k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
70.6k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
43.9k
        case '{':
252
43.9k
            if (tok_mode->last_expr_buffer != NULL) {
253
31.0k
                PyMem_Free(tok_mode->last_expr_buffer);
254
31.0k
            }
255
43.9k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
43.9k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
43.9k
            tok_mode->last_expr_size = size;
260
43.9k
            tok_mode->last_expr_end = -1;
261
43.9k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
43.9k
            break;
263
21.0k
        case '}':
264
23.3k
        case '!':
265
23.3k
            tok_mode->last_expr_end = strlen(tok->start);
266
23.3k
            break;
267
3.40k
        case ':':
268
3.40k
            if (tok_mode->last_expr_end == -1) {
269
3.17k
               tok_mode->last_expr_end = strlen(tok->start);
270
3.17k
            }
271
3.40k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
70.6k
    }
275
70.6k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
70.6k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
7.91k
{
284
7.91k
    const char *s = test;
285
7.91k
    int res = 0;
286
20.2k
    while (1) {
287
20.2k
        int c = tok_nextc(tok);
288
20.2k
        if (*s == 0) {
289
7.80k
            res = !is_potential_identifier_char(c);
290
7.80k
        }
291
12.4k
        else if (c == *s) {
292
12.3k
            s++;
293
12.3k
            continue;
294
12.3k
        }
295
296
7.91k
        tok_backup(tok, c);
297
20.2k
        while (s != test) {
298
12.3k
            tok_backup(tok, *--s);
299
12.3k
        }
300
7.91k
        return res;
301
20.2k
    }
302
7.91k
}
303
304
static int
305
82.9k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
82.9k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
82.9k
    int r = 0;
322
82.9k
    if (c == 'a') {
323
951
        r = lookahead(tok, "nd");
324
951
    }
325
81.9k
    else if (c == 'e') {
326
573
        r = lookahead(tok, "lse");
327
573
    }
328
81.4k
    else if (c == 'f') {
329
2.36k
        r = lookahead(tok, "or");
330
2.36k
    }
331
79.0k
    else if (c == 'i') {
332
1.18k
        int c2 = tok_nextc(tok);
333
1.18k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.17k
            r = 1;
335
1.17k
        }
336
1.18k
        tok_backup(tok, c2);
337
1.18k
    }
338
77.8k
    else if (c == 'o') {
339
3.86k
        r = lookahead(tok, "r");
340
3.86k
    }
341
73.9k
    else if (c == 'n') {
342
154
        r = lookahead(tok, "ot");
343
154
    }
344
82.9k
    if (r) {
345
8.97k
        tok_backup(tok, c);
346
8.97k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
8.97k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
8.97k
        tok_nextc(tok);
352
8.97k
    }
353
73.9k
    else /* In future releases, only error will remain. */
354
73.9k
    if (c < 128 && is_potential_identifier_char(c)) {
355
228
        tok_backup(tok, c);
356
228
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
228
        return 0;
358
228
    }
359
82.7k
    return 1;
360
82.9k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.0k
{
366
11.0k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.0k
    PyObject *s;
370
11.0k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.0k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.0k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
11.0k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.0k
    assert(invalid >= 0);
384
11.0k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.0k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
516
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
516
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
330
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
330
            if (s != NULL) {
391
330
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
330
            }
393
330
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
330
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
330
        }
399
516
        Py_DECREF(s);
400
516
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
286
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
286
        }
403
230
        else {
404
230
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
230
        }
406
516
        return 0;
407
516
    }
408
10.5k
    Py_DECREF(s);
409
10.5k
    return 1;
410
11.0k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
64.6k
{
415
64.6k
    int c;
416
417
65.2k
    while (1) {
418
192k
        do {
419
192k
            c = tok_nextc(tok);
420
192k
        } while (Py_ISDIGIT(c));
421
65.2k
        if (c != '_') {
422
64.6k
            break;
423
64.6k
        }
424
573
        c = tok_nextc(tok);
425
573
        if (!Py_ISDIGIT(c)) {
426
17
            tok_backup(tok, c);
427
17
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
17
            return 0;
429
17
        }
430
573
    }
431
64.6k
    return c;
432
64.6k
}
433
434
static inline int
435
849
tok_continuation_line(struct tok_state *tok) {
436
849
    int c = tok_nextc(tok);
437
849
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
849
    if (c != '\n') {
441
68
        tok->done = E_LINECONT;
442
68
        return -1;
443
68
    }
444
781
    c = tok_nextc(tok);
445
781
    if (c == EOF) {
446
49
        tok->done = E_EOF;
447
49
        tok->cur = tok->inp;
448
49
        return -1;
449
732
    } else {
450
732
        tok_backup(tok, c);
451
732
    }
452
732
    return c;
453
781
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
20.8k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
20.8k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
20.8k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
20.8k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
20.8k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
20.8k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
20.8k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
20.8k
    if (saw_b && saw_f) {
485
2
        RETURN_SYNTAX_ERROR("b", "f");
486
2
    }
487
20.8k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
20.8k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
20.8k
#undef RETURN_SYNTAX_ERROR
496
497
20.8k
    return 0;
498
20.8k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
2.08M
{
503
2.08M
    int c;
504
2.08M
    int blankline, nonascii;
505
506
2.08M
    const char *p_start = NULL;
507
2.08M
    const char *p_end = NULL;
508
2.18M
  nextline:
509
2.18M
    tok->start = NULL;
510
2.18M
    tok->starting_col_offset = -1;
511
2.18M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.18M
    if (tok->atbol) {
516
358k
        int col = 0;
517
358k
        int altcol = 0;
518
358k
        tok->atbol = 0;
519
358k
        int cont_line_col = 0;
520
717k
        for (;;) {
521
717k
            c = tok_nextc(tok);
522
717k
            if (c == ' ') {
523
357k
                col++, altcol++;
524
357k
            }
525
360k
            else if (c == '\t') {
526
697
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
697
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
697
            }
529
359k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
629
                col = altcol = 0; /* For Emacs users */
531
629
            }
532
358k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
505
                cont_line_col = cont_line_col ? cont_line_col : col;
538
505
                if ((c = tok_continuation_line(tok)) == -1) {
539
26
                    return MAKE_TOKEN(ERRORTOKEN);
540
26
                }
541
505
            }
542
358k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
358k
            else {
546
358k
                break;
547
358k
            }
548
717k
        }
549
358k
        tok_backup(tok, c);
550
358k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
58.7k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
58.7k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
58.7k
            else {
566
58.7k
                blankline = 1; /* Ignore completely */
567
58.7k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
58.7k
        }
571
358k
        if (!blankline && tok->level == 0) {
572
264k
            col = cont_line_col ? cont_line_col : col;
573
264k
            altcol = cont_line_col ? cont_line_col : altcol;
574
264k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
242k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
242k
            }
580
21.6k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
12.0k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
12.0k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
12.0k
                tok->pendin++;
591
12.0k
                tok->indstack[++tok->indent] = col;
592
12.0k
                tok->altindstack[tok->indent] = altcol;
593
12.0k
            }
594
9.56k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
21.0k
                while (tok->indent > 0 &&
597
17.8k
                    col < tok->indstack[tok->indent]) {
598
11.5k
                    tok->pendin--;
599
11.5k
                    tok->indent--;
600
11.5k
                }
601
9.56k
                if (col != tok->indstack[tok->indent]) {
602
9
                    tok->done = E_DEDENT;
603
9
                    tok->cur = tok->inp;
604
9
                    return MAKE_TOKEN(ERRORTOKEN);
605
9
                }
606
9.55k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.55k
            }
610
264k
        }
611
358k
    }
612
613
2.18M
    tok->start = tok->cur;
614
2.18M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.18M
    if (tok->pendin != 0) {
618
23.5k
        if (tok->pendin < 0) {
619
11.5k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
11.5k
            tok->pendin++;
624
11.5k
            return MAKE_TOKEN(DEDENT);
625
11.5k
        }
626
12.0k
        else {
627
12.0k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
12.0k
            tok->pendin--;
632
12.0k
            return MAKE_TOKEN(INDENT);
633
12.0k
        }
634
23.5k
    }
635
636
    /* Peek ahead at the next character */
637
2.15M
    c = tok_nextc(tok);
638
2.15M
    tok_backup(tok, c);
639
640
2.15M
 again:
641
2.15M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.59M
    do {
644
2.59M
        c = tok_nextc(tok);
645
2.59M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
2.15M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
2.15M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
2.15M
    if (c == '#') {
653
654
33.1k
        const char* p = NULL;
655
33.1k
        const char *prefix, *type_start;
656
33.1k
        int current_starting_col_offset;
657
658
1.06M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.02M
            c = tok_nextc(tok);
660
1.02M
        }
661
662
33.1k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
33.1k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
33.1k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
33.1k
    }
728
729
2.15M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
2.15M
    if (c == EOF) {
735
97.0k
        if (tok->level) {
736
3.89k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.89k
        }
738
93.1k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
97.0k
    }
740
741
    /* Identifier (most frequent token!) */
742
2.06M
    nonascii = 0;
743
2.06M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
686k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
874k
        while (1) {
747
874k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
18.6k
                saw_b = 1;
749
18.6k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
856k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
88.0k
                saw_u = 1;
754
88.0k
            }
755
            /* ur"" and ru"" are not supported */
756
767k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
26.4k
                saw_r = 1;
758
26.4k
            }
759
741k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
42.8k
                saw_f = 1;
761
42.8k
            }
762
698k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
32.8k
                saw_t = 1;
764
32.8k
            }
765
665k
            else {
766
665k
                break;
767
665k
            }
768
208k
            c = tok_nextc(tok);
769
208k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
20.8k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
20.8k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
20.8k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
20.8k
                if (saw_f || saw_t) {
779
17.6k
                    goto f_string_quote;
780
17.6k
                }
781
3.23k
                goto letter_quote;
782
20.8k
            }
783
208k
        }
784
3.11M
        while (is_potential_identifier_char(c)) {
785
2.45M
            if (c >= 128) {
786
124k
                nonascii = 1;
787
124k
            }
788
2.45M
            c = tok_nextc(tok);
789
2.45M
        }
790
665k
        tok_backup(tok, c);
791
665k
        if (nonascii && !verify_identifier(tok)) {
792
516
            return MAKE_TOKEN(ERRORTOKEN);
793
516
        }
794
795
665k
        p_start = tok->start;
796
665k
        p_end = tok->cur;
797
798
665k
        return MAKE_TOKEN(NAME);
799
665k
    }
800
801
1.37M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.37M
    if (c == '\n') {
807
258k
        tok->atbol = 1;
808
258k
        if (blankline || tok->level > 0) {
809
94.2k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
94.1k
            goto nextline;
818
94.2k
        }
819
164k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
164k
        p_start = tok->start;
826
164k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
164k
        tok->cont_line = 0;
828
164k
        return MAKE_TOKEN(NEWLINE);
829
164k
    }
830
831
    /* Period or number starting with period? */
832
1.11M
    if (c == '.') {
833
29.1k
        c = tok_nextc(tok);
834
29.1k
        if (Py_ISDIGIT(c)) {
835
3.36k
            goto fraction;
836
25.8k
        } else if (c == '.') {
837
2.26k
            c = tok_nextc(tok);
838
2.26k
            if (c == '.') {
839
1.60k
                p_start = tok->start;
840
1.60k
                p_end = tok->cur;
841
1.60k
                return MAKE_TOKEN(ELLIPSIS);
842
1.60k
            }
843
665
            else {
844
665
                tok_backup(tok, c);
845
665
            }
846
665
            tok_backup(tok, '.');
847
665
        }
848
23.5k
        else {
849
23.5k
            tok_backup(tok, c);
850
23.5k
        }
851
24.2k
        p_start = tok->start;
852
24.2k
        p_end = tok->cur;
853
24.2k
        return MAKE_TOKEN(DOT);
854
29.1k
    }
855
856
    /* Number */
857
1.08M
    if (Py_ISDIGIT(c)) {
858
79.7k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
29.2k
            c = tok_nextc(tok);
861
29.2k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
14.4k
                c = tok_nextc(tok);
864
16.1k
                do {
865
16.1k
                    if (c == '_') {
866
1.72k
                        c = tok_nextc(tok);
867
1.72k
                    }
868
16.1k
                    if (!Py_ISXDIGIT(c)) {
869
15
                        tok_backup(tok, c);
870
15
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
15
                    }
872
76.2k
                    do {
873
76.2k
                        c = tok_nextc(tok);
874
76.2k
                    } while (Py_ISXDIGIT(c));
875
16.1k
                } while (c == '_');
876
14.4k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
1
                    return MAKE_TOKEN(ERRORTOKEN);
878
1
                }
879
14.4k
            }
880
14.7k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
660
                c = tok_nextc(tok);
883
1.20k
                do {
884
1.20k
                    if (c == '_') {
885
543
                        c = tok_nextc(tok);
886
543
                    }
887
1.20k
                    if (c < '0' || c >= '8') {
888
20
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
19
                        else {
893
19
                            tok_backup(tok, c);
894
19
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
19
                        }
896
20
                    }
897
5.91k
                    do {
898
5.91k
                        c = tok_nextc(tok);
899
5.91k
                    } while ('0' <= c && c < '8');
900
1.18k
                } while (c == '_');
901
640
                if (Py_ISDIGIT(c)) {
902
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
2
                            "invalid digit '%c' in octal literal", c));
904
2
                }
905
638
                if (!verify_end_of_number(tok, c, "octal")) {
906
4
                    return MAKE_TOKEN(ERRORTOKEN);
907
4
                }
908
638
            }
909
14.1k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
399
                c = tok_nextc(tok);
912
648
                do {
913
648
                    if (c == '_') {
914
255
                        c = tok_nextc(tok);
915
255
                    }
916
648
                    if (c != '0' && c != '1') {
917
18
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
17
                        else {
921
17
                            tok_backup(tok, c);
922
17
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
17
                        }
924
18
                    }
925
2.84k
                    do {
926
2.84k
                        c = tok_nextc(tok);
927
2.84k
                    } while (c == '0' || c == '1');
928
630
                } while (c == '_');
929
381
                if (Py_ISDIGIT(c)) {
930
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
4
                }
932
377
                if (!verify_end_of_number(tok, c, "binary")) {
933
2
                    return MAKE_TOKEN(ERRORTOKEN);
934
2
                }
935
377
            }
936
13.7k
            else {
937
13.7k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
15.7k
                while (1) {
941
15.7k
                    if (c == '_') {
942
214
                        c = tok_nextc(tok);
943
214
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
214
                    }
948
15.7k
                    if (c != '0') {
949
13.7k
                        break;
950
13.7k
                    }
951
2.03k
                    c = tok_nextc(tok);
952
2.03k
                }
953
13.7k
                char* zeros_end = tok->cur;
954
13.7k
                if (Py_ISDIGIT(c)) {
955
367
                    nonzero = 1;
956
367
                    c = tok_decimal_tail(tok);
957
367
                    if (c == 0) {
958
2
                        return MAKE_TOKEN(ERRORTOKEN);
959
2
                    }
960
367
                }
961
13.7k
                if (c == '.') {
962
704
                    c = tok_nextc(tok);
963
704
                    goto fraction;
964
704
                }
965
13.0k
                else if (c == 'e' || c == 'E') {
966
994
                    goto exponent;
967
994
                }
968
12.0k
                else if (c == 'j' || c == 'J') {
969
605
                    goto imaginary;
970
605
                }
971
11.4k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
21
                    tok_backup(tok, c);
974
21
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
21
                            tok, (int)(tok->start + 1 - tok->line_start),
976
21
                            (int)(zeros_end - tok->line_start),
977
21
                            "leading zeros in decimal integer "
978
21
                            "literals are not permitted; "
979
21
                            "use an 0o prefix for octal integers"));
980
21
                }
981
11.4k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
32
                    return MAKE_TOKEN(ERRORTOKEN);
983
32
                }
984
11.4k
            }
985
29.2k
        }
986
50.5k
        else {
987
            /* Decimal */
988
50.5k
            c = tok_decimal_tail(tok);
989
50.5k
            if (c == 0) {
990
13
                return MAKE_TOKEN(ERRORTOKEN);
991
13
            }
992
50.4k
            {
993
                /* Accept floating-point numbers. */
994
50.4k
                if (c == '.') {
995
3.27k
                    c = tok_nextc(tok);
996
7.34k
        fraction:
997
                    /* Fraction */
998
7.34k
                    if (Py_ISDIGIT(c)) {
999
5.65k
                        c = tok_decimal_tail(tok);
1000
5.65k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
5.65k
                    }
1004
7.34k
                }
1005
54.5k
                if (c == 'e' || c == 'E') {
1006
7.73k
                    int e;
1007
8.72k
                  exponent:
1008
8.72k
                    e = c;
1009
                    /* Exponent part */
1010
8.72k
                    c = tok_nextc(tok);
1011
8.72k
                    if (c == '+' || c == '-') {
1012
3.87k
                        c = tok_nextc(tok);
1013
3.87k
                        if (!Py_ISDIGIT(c)) {
1014
16
                            tok_backup(tok, c);
1015
16
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
16
                        }
1017
4.84k
                    } else if (!Py_ISDIGIT(c)) {
1018
566
                        tok_backup(tok, c);
1019
566
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
40
                            return MAKE_TOKEN(ERRORTOKEN);
1021
40
                        }
1022
526
                        tok_backup(tok, e);
1023
526
                        p_start = tok->start;
1024
526
                        p_end = tok->cur;
1025
526
                        return MAKE_TOKEN(NUMBER);
1026
566
                    }
1027
8.14k
                    c = tok_decimal_tail(tok);
1028
8.14k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
8.14k
                }
1032
54.9k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.73k
        imaginary:
1035
3.73k
                    c = tok_nextc(tok);
1036
3.73k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
13
                        return MAKE_TOKEN(ERRORTOKEN);
1038
13
                    }
1039
3.73k
                }
1040
51.8k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
136
                    return MAKE_TOKEN(ERRORTOKEN);
1042
136
                }
1043
54.9k
            }
1044
54.9k
        }
1045
82.2k
        tok_backup(tok, c);
1046
82.2k
        p_start = tok->start;
1047
82.2k
        p_end = tok->cur;
1048
82.2k
        return MAKE_TOKEN(NUMBER);
1049
79.7k
    }
1050
1051
1.02M
  f_string_quote:
1052
1.02M
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
17.6k
        && (c == '\'' || c == '"'))) {
1054
1055
17.6k
        int quote = c;
1056
17.6k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
17.6k
        tok->first_lineno = tok->lineno;
1063
17.6k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
17.6k
        int after_quote = tok_nextc(tok);
1067
17.6k
        if (after_quote == quote) {
1068
2.79k
            int after_after_quote = tok_nextc(tok);
1069
2.79k
            if (after_after_quote == quote) {
1070
861
                quote_size = 3;
1071
861
            }
1072
1.93k
            else {
1073
                // TODO: Check this
1074
1.93k
                tok_backup(tok, after_after_quote);
1075
1.93k
                tok_backup(tok, after_quote);
1076
1.93k
            }
1077
2.79k
        }
1078
17.6k
        if (after_quote != quote) {
1079
14.8k
            tok_backup(tok, after_quote);
1080
14.8k
        }
1081
1082
1083
17.6k
        p_start = tok->start;
1084
17.6k
        p_end = tok->cur;
1085
17.6k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
17.6k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
17.6k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
17.6k
        the_current_tok->quote = quote;
1091
17.6k
        the_current_tok->quote_size = quote_size;
1092
17.6k
        the_current_tok->start = tok->start;
1093
17.6k
        the_current_tok->multi_line_start = tok->line_start;
1094
17.6k
        the_current_tok->first_line = tok->lineno;
1095
17.6k
        the_current_tok->start_offset = -1;
1096
17.6k
        the_current_tok->multi_line_start_offset = -1;
1097
17.6k
        the_current_tok->last_expr_buffer = NULL;
1098
17.6k
        the_current_tok->last_expr_size = 0;
1099
17.6k
        the_current_tok->last_expr_end = -1;
1100
17.6k
        the_current_tok->in_format_spec = 0;
1101
17.6k
        the_current_tok->in_debug = 0;
1102
1103
17.6k
        enum string_kind_t string_kind = FSTRING;
1104
17.6k
        switch (*tok->start) {
1105
996
            case 'T':
1106
4.90k
            case 't':
1107
4.90k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.90k
                string_kind = TSTRING;
1109
4.90k
                break;
1110
1.54k
            case 'F':
1111
12.1k
            case 'f':
1112
12.1k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
12.1k
                break;
1114
288
            case 'R':
1115
541
            case 'r':
1116
541
                the_current_tok->raw = 1;
1117
541
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
48
                    string_kind = TSTRING;
1119
48
                }
1120
541
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
17.6k
        }
1124
1125
17.6k
        the_current_tok->string_kind = string_kind;
1126
17.6k
        the_current_tok->curly_bracket_depth = 0;
1127
17.6k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
17.6k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
17.6k
    }
1130
1131
1.01M
  letter_quote:
1132
    /* String */
1133
1.01M
    if (c == '\'' || c == '"') {
1134
38.4k
        int quote = c;
1135
38.4k
        int quote_size = 1;             /* 1 or 3 */
1136
38.4k
        int end_quote_size = 0;
1137
38.4k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
38.4k
        tok->first_lineno = tok->lineno;
1144
38.4k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
38.4k
        c = tok_nextc(tok);
1148
38.4k
        if (c == quote) {
1149
7.27k
            c = tok_nextc(tok);
1150
7.27k
            if (c == quote) {
1151
1.62k
                quote_size = 3;
1152
1.62k
            }
1153
5.65k
            else {
1154
5.65k
                end_quote_size = 1;     /* empty string found */
1155
5.65k
            }
1156
7.27k
        }
1157
38.4k
        if (c != quote) {
1158
36.8k
            tok_backup(tok, c);
1159
36.8k
        }
1160
1161
        /* Get rest of string */
1162
544k
        while (end_quote_size != quote_size) {
1163
506k
            c = tok_nextc(tok);
1164
506k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
506k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
506k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
400
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
400
                tok->cur = (char *)tok->start;
1176
400
                tok->cur++;
1177
400
                tok->line_start = tok->multi_line_start;
1178
400
                int start = tok->lineno;
1179
400
                tok->lineno = tok->first_lineno;
1180
1181
400
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
42
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
42
                    if (the_current_tok->quote == quote &&
1189
27
                        the_current_tok->quote_size == quote_size) {
1190
25
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
25
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
25
                    }
1193
42
                }
1194
1195
375
                if (quote_size == 3) {
1196
32
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
32
                                     " (detected at line %d)", start);
1198
32
                    if (c != '\n') {
1199
32
                        tok->done = E_EOFS;
1200
32
                    }
1201
32
                    return MAKE_TOKEN(ERRORTOKEN);
1202
32
                }
1203
343
                else {
1204
343
                    if (has_escaped_quote) {
1205
8
                        _PyTokenizer_syntaxerror(
1206
8
                            tok,
1207
8
                            "unterminated string literal (detected at line %d); "
1208
8
                            "perhaps you escaped the end quote?",
1209
8
                            start
1210
8
                        );
1211
335
                    } else {
1212
335
                        _PyTokenizer_syntaxerror(
1213
335
                            tok, "unterminated string literal (detected at line %d)", start
1214
335
                        );
1215
335
                    }
1216
343
                    if (c != '\n') {
1217
6
                        tok->done = E_EOLS;
1218
6
                    }
1219
343
                    return MAKE_TOKEN(ERRORTOKEN);
1220
343
                }
1221
375
            }
1222
505k
            if (c == quote) {
1223
37.0k
                end_quote_size += 1;
1224
37.0k
            }
1225
468k
            else {
1226
468k
                end_quote_size = 0;
1227
468k
                if (c == '\\') {
1228
22.9k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
22.9k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
678
                        has_escaped_quote = 1;
1231
678
                    }
1232
22.9k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
22.9k
                }
1236
468k
            }
1237
505k
        }
1238
1239
38.0k
        p_start = tok->start;
1240
38.0k
        p_end = tok->cur;
1241
38.0k
        return MAKE_TOKEN(STRING);
1242
38.4k
    }
1243
1244
    /* Line continuation */
1245
971k
    if (c == '\\') {
1246
344
        if ((c = tok_continuation_line(tok)) == -1) {
1247
91
            return MAKE_TOKEN(ERRORTOKEN);
1248
91
        }
1249
253
        tok->cont_line = 1;
1250
253
        goto again; /* Read next line */
1251
344
    }
1252
1253
    /* Punctuation character */
1254
971k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
971k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
61.0k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
61.0k
        int in_format_spec = current_tok->in_format_spec;
1261
61.0k
         int cursor_in_format_with_debug =
1262
61.0k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
61.0k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
61.0k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
61.0k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
61.0k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.98k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.98k
            current_tok->in_format_spec = 1;
1274
4.98k
            p_start = tok->start;
1275
4.98k
            p_end = tok->cur;
1276
4.98k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.98k
        }
1278
61.0k
    }
1279
1280
    /* Check for two-character token */
1281
966k
    {
1282
966k
        int c2 = tok_nextc(tok);
1283
966k
        int current_token = _PyToken_TwoChars(c, c2);
1284
966k
        if (current_token != OP) {
1285
23.1k
            int c3 = tok_nextc(tok);
1286
23.1k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
23.1k
            if (current_token3 != OP) {
1288
1.19k
                current_token = current_token3;
1289
1.19k
            }
1290
21.9k
            else {
1291
21.9k
                tok_backup(tok, c3);
1292
21.9k
            }
1293
23.1k
            p_start = tok->start;
1294
23.1k
            p_end = tok->cur;
1295
23.1k
            return MAKE_TOKEN(current_token);
1296
23.1k
        }
1297
943k
        tok_backup(tok, c2);
1298
943k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
69.7k
    case '(':
1303
107k
    case '[':
1304
153k
    case '{':
1305
153k
        if (tok->level >= MAXLEVEL) {
1306
19
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
19
        }
1308
153k
        tok->parenstack[tok->level] = c;
1309
153k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
153k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
153k
        tok->level++;
1312
153k
        if (INSIDE_FSTRING(tok)) {
1313
34.8k
            current_tok->curly_bracket_depth++;
1314
34.8k
        }
1315
153k
        break;
1316
44.3k
    case ')':
1317
51.4k
    case ']':
1318
79.1k
    case '}':
1319
79.1k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
43
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
43
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
43
        }
1323
79.1k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
222
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
222
        }
1326
78.9k
        if (tok->level > 0) {
1327
78.9k
            tok->level--;
1328
78.9k
            int opening = tok->parenstack[tok->level];
1329
78.9k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
34.6k
                                            (opening == '[' && c == ']') ||
1331
27.6k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
40
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
2
                    assert(current_tok->curly_bracket_depth >= 0);
1339
2
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
2
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
1
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
1
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
1
                    }
1344
2
                }
1345
39
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
5
                            "closing parenthesis '%c' does not match "
1348
5
                            "opening parenthesis '%c' on line %d",
1349
5
                            c, opening, tok->parenlinenostack[tok->level]));
1350
5
                }
1351
34
                else {
1352
34
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
34
                            "closing parenthesis '%c' does not match "
1354
34
                            "opening parenthesis '%c'",
1355
34
                            c, opening));
1356
34
                }
1357
39
            }
1358
78.9k
        }
1359
1360
78.8k
        if (INSIDE_FSTRING(tok)) {
1361
26.6k
            current_tok->curly_bracket_depth--;
1362
26.6k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
26.6k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
23.1k
                current_tok->curly_bracket_expr_start_depth--;
1368
23.1k
                current_tok->kind = TOK_FSTRING_MODE;
1369
23.1k
                current_tok->in_format_spec = 0;
1370
23.1k
                current_tok->in_debug = 0;
1371
23.1k
            }
1372
26.6k
        }
1373
78.8k
        break;
1374
710k
    default:
1375
710k
        break;
1376
943k
    }
1377
1378
943k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
420
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
420
    }
1381
1382
942k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
6.09k
        current_tok->in_debug = 1;
1384
6.09k
    }
1385
1386
    /* Punctuation character */
1387
942k
    p_start = tok->start;
1388
942k
    p_end = tok->cur;
1389
942k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
943k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
55.5k
{
1395
55.5k
    const char *p_start = NULL;
1396
55.5k
    const char *p_end = NULL;
1397
55.5k
    int end_quote_size = 0;
1398
55.5k
    int unicode_escape = 0;
1399
1400
55.5k
    tok->start = tok->cur;
1401
55.5k
    tok->first_lineno = tok->lineno;
1402
55.5k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
55.5k
    int start_char = tok_nextc(tok);
1407
55.5k
    if (start_char == '{') {
1408
16.1k
        int peek1 = tok_nextc(tok);
1409
16.1k
        tok_backup(tok, peek1);
1410
16.1k
        tok_backup(tok, start_char);
1411
16.1k
        if (peek1 != '{') {
1412
13.3k
            current_tok->curly_bracket_expr_start_depth++;
1413
13.3k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
2
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
2
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
2
            }
1417
13.3k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
13.3k
            return tok_get_normal_mode(tok, current_tok, token);
1419
13.3k
        }
1420
16.1k
    }
1421
39.3k
    else {
1422
39.3k
        tok_backup(tok, start_char);
1423
39.3k
    }
1424
1425
    // Check if we are at the end of the string
1426
59.7k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
46.5k
        int quote = tok_nextc(tok);
1428
46.5k
        if (quote != current_tok->quote) {
1429
28.9k
            tok_backup(tok, quote);
1430
28.9k
            goto f_string_middle;
1431
28.9k
        }
1432
46.5k
    }
1433
1434
13.2k
    if (current_tok->last_expr_buffer != NULL) {
1435
8.66k
        PyMem_Free(current_tok->last_expr_buffer);
1436
8.66k
        current_tok->last_expr_buffer = NULL;
1437
8.66k
        current_tok->last_expr_size = 0;
1438
8.66k
        current_tok->last_expr_end = -1;
1439
8.66k
    }
1440
1441
13.2k
    p_start = tok->start;
1442
13.2k
    p_end = tok->cur;
1443
13.2k
    tok->tok_mode_stack_index--;
1444
13.2k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
28.9k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
28.9k
    tok->multi_line_start = tok->line_start;
1451
230k
    while (end_quote_size != current_tok->quote_size) {
1452
225k
        int c = tok_nextc(tok);
1453
225k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
225k
        int in_format_spec = (
1457
225k
                current_tok->in_format_spec
1458
13.3k
                &&
1459
13.3k
                INSIDE_FSTRING_EXPR(current_tok)
1460
225k
        );
1461
1462
225k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
370
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
370
            if (in_format_spec && c == '\n') {
1471
44
                if (current_tok->quote_size == 1) {
1472
44
                    return MAKE_TOKEN(
1473
44
                        _PyTokenizer_syntaxerror(
1474
44
                            tok,
1475
44
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
44
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
44
                        )
1478
44
                    );
1479
44
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
44
            }
1487
1488
370
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
326
            tok->cur = (char *)current_tok->start;
1493
326
            tok->cur++;
1494
326
            tok->line_start = current_tok->multi_line_start;
1495
326
            int start = tok->lineno;
1496
1497
326
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
326
            tok->lineno = the_current_tok->first_line;
1499
1500
326
            if (current_tok->quote_size == 3) {
1501
26
                _PyTokenizer_syntaxerror(tok,
1502
26
                                    "unterminated triple-quoted %c-string literal"
1503
26
                                    " (detected at line %d)",
1504
26
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
26
                if (c != '\n') {
1506
26
                    tok->done = E_EOFS;
1507
26
                }
1508
26
                return MAKE_TOKEN(ERRORTOKEN);
1509
26
            }
1510
300
            else {
1511
300
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
300
                                    "unterminated %c-string literal (detected at"
1513
300
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
300
            }
1515
326
        }
1516
1517
225k
        if (c == current_tok->quote) {
1518
9.90k
            end_quote_size += 1;
1519
9.90k
            continue;
1520
215k
        } else {
1521
215k
            end_quote_size = 0;
1522
215k
        }
1523
1524
215k
        if (c == '{') {
1525
18.2k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
18.2k
            int peek = tok_nextc(tok);
1529
18.2k
            if (peek != '{' || in_format_spec) {
1530
15.0k
                tok_backup(tok, peek);
1531
15.0k
                tok_backup(tok, c);
1532
15.0k
                current_tok->curly_bracket_expr_start_depth++;
1533
15.0k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
6
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
6
                }
1537
15.0k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
15.0k
                current_tok->in_format_spec = 0;
1539
15.0k
                p_start = tok->start;
1540
15.0k
                p_end = tok->cur;
1541
15.0k
            } else {
1542
3.21k
                p_start = tok->start;
1543
3.21k
                p_end = tok->cur - 1;
1544
3.21k
            }
1545
18.2k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
196k
        } else if (c == '}') {
1547
5.02k
            if (unicode_escape) {
1548
200
                p_start = tok->start;
1549
200
                p_end = tok->cur;
1550
200
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
200
            }
1552
4.82k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.82k
            int cursor = current_tok->curly_bracket_depth;
1559
4.82k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.12k
                p_start = tok->start;
1561
1.12k
                p_end = tok->cur - 1;
1562
3.70k
            } else {
1563
3.70k
                tok_backup(tok, peek);
1564
3.70k
                tok_backup(tok, c);
1565
3.70k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.70k
                current_tok->in_format_spec = 0;
1567
3.70k
                p_start = tok->start;
1568
3.70k
                p_end = tok->cur;
1569
3.70k
            }
1570
4.82k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
191k
        } else if (c == '\\') {
1572
5.43k
            int peek = tok_nextc(tok);
1573
5.43k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.43k
            if (peek == '{' || peek == '}') {
1580
891
                if (!current_tok->raw) {
1581
825
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
825
                }
1585
891
                tok_backup(tok, peek);
1586
891
                continue;
1587
891
            }
1588
1589
4.54k
            if (!current_tok->raw) {
1590
4.31k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
410
                    peek = tok_nextc(tok);
1593
410
                    if (peek == '{') {
1594
206
                        unicode_escape = 1;
1595
206
                    } else {
1596
204
                        tok_backup(tok, peek);
1597
204
                    }
1598
410
                }
1599
4.31k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
4.54k
        }
1603
215k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
11.6k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.40k
        tok_backup(tok, current_tok->quote);
1609
6.40k
    }
1610
5.26k
    p_start = tok->start;
1611
5.26k
    p_end = tok->cur;
1612
5.26k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
28.9k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
2.12M
{
1618
2.12M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
2.12M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
2.07M
        return tok_get_normal_mode(tok, current_tok, token);
1621
2.07M
    } else {
1622
55.5k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
55.5k
    }
1624
2.12M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
2.12M
{
1629
2.12M
    int result = tok_get(tok, token);
1630
2.12M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
2.12M
    return result;
1635
2.12M
}