Coverage Report

Created: 2025-07-18 06:09

/src/cpython/Parser/lexer/lexer.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.70k
#define ALTTABSIZE 1
11
12
1.67M
#define is_potential_identifier_start(c) (\
13
1.67M
              (c >= 'a' && c <= 'z')\
14
1.67M
               || (c >= 'A' && c <= 'Z')\
15
1.67M
               || c == '_'\
16
1.67M
               || (c >= 128))
17
18
2.23M
#define is_potential_identifier_char(c) (\
19
2.23M
              (c >= 'a' && c <= 'z')\
20
2.23M
               || (c >= 'A' && c <= 'Z')\
21
2.23M
               || (c >= '0' && c <= '9')\
22
2.23M
               || c == '_'\
23
2.23M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.78M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
14.7k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
28
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.68M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
211k
{
55
211k
    return memchr(str, 0, size) != NULL;
56
211k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
10.3M
{
62
10.3M
    int rc;
63
10.5M
    for (;;) {
64
10.5M
        if (tok->cur != tok->inp) {
65
10.2M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
10.2M
            tok->col_offset++;
70
10.2M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
10.2M
        }
72
257k
        if (tok->done != E_OK) {
73
30.0k
            return EOF;
74
30.0k
        }
75
227k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
227k
        if (!rc) {
84
15.1k
            tok->cur = tok->inp;
85
15.1k
            return EOF;
86
15.1k
        }
87
211k
        tok->line_start = tok->cur;
88
89
211k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
211k
    }
95
10.3M
    Py_UNREACHABLE();
96
10.3M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.53M
{
102
3.53M
    if (c != EOF) {
103
3.50M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.50M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.50M
        tok->col_offset--;
110
3.50M
    }
111
3.53M
}
112
113
static int
114
18.5k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
18.5k
    assert(token != NULL);
116
18.5k
    assert(c == '}' || c == ':' || c == '!');
117
18.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
18.5k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
11.1k
        return 0;
121
11.1k
    }
122
7.44k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
7.44k
    int hash_detected = 0;
126
7.44k
    int in_string = 0;
127
7.44k
    char quote_char = 0;
128
129
1.86M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.85M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.85M
        if (ch == '\\') {
134
44.6k
            i++;
135
44.6k
            continue;
136
44.6k
        }
137
138
        // Handle quotes
139
1.80M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
255k
            if (!in_string) {
148
87.1k
                in_string = 1;
149
87.1k
                quote_char = ch;
150
87.1k
            }
151
168k
            else if (ch == quote_char) {
152
86.0k
                in_string = 0;
153
86.0k
            }
154
255k
            continue;
155
255k
        }
156
157
        // Check for # outside strings
158
1.55M
        if (ch == '#' && !in_string) {
159
188
            hash_detected = 1;
160
188
            break;
161
188
        }
162
1.55M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
7.44k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
188
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
188
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
188
        Py_ssize_t i = 0;  // Input position
172
188
        Py_ssize_t j = 0;  // Output position
173
188
        in_string = 0;     // Whether we're in a string
174
188
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
30.4k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
30.2k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
30.2k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
4.03k
                if (!in_string) {
184
1.46k
                    in_string = 1;
185
1.46k
                    quote_char = ch;
186
2.57k
                } else if (ch == quote_char) {
187
1.44k
                    in_string = 0;
188
1.44k
                }
189
4.03k
                result[j++] = ch;
190
4.03k
            }
191
            // Skip comments
192
26.2k
            else if (ch == '#' && !in_string) {
193
7.27k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
7.27k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
7.02k
                    i++;
196
7.02k
                }
197
250
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
126
                    result[j++] = '\n';
199
126
                }
200
250
            }
201
            // Copy other chars
202
25.9k
            else {
203
25.9k
                result[j++] = ch;
204
25.9k
            }
205
30.2k
            i++;
206
30.2k
        }
207
208
188
        result[j] = '\0';  // Null-terminate the result string
209
188
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
188
        PyMem_Free(result);
211
7.25k
    } else {
212
7.25k
        res = PyUnicode_DecodeUTF8(
213
7.25k
            tok_mode->last_expr_buffer,
214
7.25k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
7.25k
            NULL
216
7.25k
        );
217
7.25k
    }
218
219
7.44k
    if (!res) {
220
12
        return -1;
221
12
    }
222
7.43k
    token->metadata = res;
223
7.43k
    return 0;
224
7.44k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
54.5k
{
229
54.5k
    assert(tok->cur != NULL);
230
231
54.5k
    Py_ssize_t size = strlen(tok->cur);
232
54.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
54.5k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
35.9k
        case '{':
252
35.9k
            if (tok_mode->last_expr_buffer != NULL) {
253
26.5k
                PyMem_Free(tok_mode->last_expr_buffer);
254
26.5k
            }
255
35.9k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
35.9k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
35.9k
            tok_mode->last_expr_size = size;
260
35.9k
            tok_mode->last_expr_end = -1;
261
35.9k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
35.9k
            break;
263
14.9k
        case '}':
264
15.8k
        case '!':
265
15.8k
            tok_mode->last_expr_end = strlen(tok->start);
266
15.8k
            break;
267
2.71k
        case ':':
268
2.71k
            if (tok_mode->last_expr_end == -1) {
269
2.44k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.44k
            }
271
2.71k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
54.5k
    }
275
54.5k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
54.5k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
7.53k
{
284
7.53k
    const char *s = test;
285
7.53k
    int res = 0;
286
19.7k
    while (1) {
287
19.7k
        int c = tok_nextc(tok);
288
19.7k
        if (*s == 0) {
289
7.42k
            res = !is_potential_identifier_char(c);
290
7.42k
        }
291
12.2k
        else if (c == *s) {
292
12.1k
            s++;
293
12.1k
            continue;
294
12.1k
        }
295
296
7.53k
        tok_backup(tok, c);
297
19.7k
        while (s != test) {
298
12.1k
            tok_backup(tok, *--s);
299
12.1k
        }
300
7.53k
        return res;
301
19.7k
    }
302
7.53k
}
303
304
static int
305
89.7k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
89.7k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
89.7k
    int r = 0;
322
89.7k
    if (c == 'a') {
323
885
        r = lookahead(tok, "nd");
324
885
    }
325
88.8k
    else if (c == 'e') {
326
333
        r = lookahead(tok, "lse");
327
333
    }
328
88.5k
    else if (c == 'f') {
329
3.02k
        r = lookahead(tok, "or");
330
3.02k
    }
331
85.4k
    else if (c == 'i') {
332
2.06k
        int c2 = tok_nextc(tok);
333
2.06k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
2.05k
            r = 1;
335
2.05k
        }
336
2.06k
        tok_backup(tok, c2);
337
2.06k
    }
338
83.4k
    else if (c == 'o') {
339
2.99k
        r = lookahead(tok, "r");
340
2.99k
    }
341
80.4k
    else if (c == 'n') {
342
296
        r = lookahead(tok, "ot");
343
296
    }
344
89.7k
    if (r) {
345
9.47k
        tok_backup(tok, c);
346
9.47k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
9.47k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
9.47k
        tok_nextc(tok);
352
9.47k
    }
353
80.2k
    else /* In future releases, only error will remain. */
354
80.2k
    if (c < 128 && is_potential_identifier_char(c)) {
355
247
        tok_backup(tok, c);
356
247
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
247
        return 0;
358
247
    }
359
89.4k
    return 1;
360
89.7k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
14.2k
{
366
14.2k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
14.2k
    PyObject *s;
370
14.2k
    if (tok->decoding_erred)
371
0
        return 0;
372
14.2k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
14.2k
    if (s == NULL) {
374
1.15k
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
1.15k
            tok->done = E_DECODE;
376
1.15k
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
1.15k
        return 0;
381
1.15k
    }
382
13.1k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
13.1k
    assert(invalid >= 0);
384
13.1k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
13.1k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
618
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
618
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
426
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
426
            if (s != NULL) {
391
426
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
426
            }
393
426
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
426
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
426
        }
399
618
        Py_DECREF(s);
400
618
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
349
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
349
        }
403
269
        else {
404
269
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
269
        }
406
618
        return 0;
407
618
    }
408
12.5k
    Py_DECREF(s);
409
12.5k
    return 1;
410
13.1k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
71.8k
{
415
71.8k
    int c;
416
417
72.3k
    while (1) {
418
223k
        do {
419
223k
            c = tok_nextc(tok);
420
223k
        } while (Py_ISDIGIT(c));
421
72.3k
        if (c != '_') {
422
71.8k
            break;
423
71.8k
        }
424
506
        c = tok_nextc(tok);
425
506
        if (!Py_ISDIGIT(c)) {
426
13
            tok_backup(tok, c);
427
13
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
13
            return 0;
429
13
        }
430
506
    }
431
71.8k
    return c;
432
71.8k
}
433
434
static inline int
435
1.11k
tok_continuation_line(struct tok_state *tok) {
436
1.11k
    int c = tok_nextc(tok);
437
1.11k
    if (c == '\r') {
438
69
        c = tok_nextc(tok);
439
69
    }
440
1.11k
    if (c != '\n') {
441
89
        tok->done = E_LINECONT;
442
89
        return -1;
443
89
    }
444
1.02k
    c = tok_nextc(tok);
445
1.02k
    if (c == EOF) {
446
40
        tok->done = E_EOF;
447
40
        tok->cur = tok->inp;
448
40
        return -1;
449
987
    } else {
450
987
        tok_backup(tok, c);
451
987
    }
452
987
    return c;
453
1.02k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
19.7k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
19.7k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
19.7k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
19.7k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
19.7k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
19.7k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
19.7k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
19.7k
    if (saw_b && saw_f) {
485
2
        RETURN_SYNTAX_ERROR("b", "f");
486
2
    }
487
19.7k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
19.7k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
19.7k
#undef RETURN_SYNTAX_ERROR
496
497
19.7k
    return 0;
498
19.7k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.64M
{
503
1.64M
    int c;
504
1.64M
    int blankline, nonascii;
505
506
1.64M
    const char *p_start = NULL;
507
1.64M
    const char *p_end = NULL;
508
1.72M
  nextline:
509
1.72M
    tok->start = NULL;
510
1.72M
    tok->starting_col_offset = -1;
511
1.72M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.72M
    if (tok->atbol) {
516
211k
        int col = 0;
517
211k
        int altcol = 0;
518
211k
        tok->atbol = 0;
519
211k
        int cont_line_col = 0;
520
819k
        for (;;) {
521
819k
            c = tok_nextc(tok);
522
819k
            if (c == ' ') {
523
605k
                col++, altcol++;
524
605k
            }
525
214k
            else if (c == '\t') {
526
851
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
851
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
851
            }
529
213k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.61k
                col = altcol = 0; /* For Emacs users */
531
1.61k
            }
532
211k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
653
                cont_line_col = cont_line_col ? cont_line_col : col;
538
653
                if ((c = tok_continuation_line(tok)) == -1) {
539
33
                    return MAKE_TOKEN(ERRORTOKEN);
540
33
                }
541
653
            }
542
211k
            else {
543
211k
                break;
544
211k
            }
545
819k
        }
546
211k
        tok_backup(tok, c);
547
211k
        if (c == '#' || c == '\n' || c == '\r') {
548
            /* Lines with only whitespace and/or comments
549
               shouldn't affect the indentation and are
550
               not passed to the parser as NEWLINE tokens,
551
               except *totally* empty lines in interactive
552
               mode, which signal the end of a command group. */
553
41.6k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
554
0
                blankline = 0; /* Let it through */
555
0
            }
556
41.6k
            else if (tok->prompt != NULL && tok->lineno == 1) {
557
                /* In interactive mode, if the first line contains
558
                   only spaces and/or a comment, let it through. */
559
0
                blankline = 0;
560
0
                col = altcol = 0;
561
0
            }
562
41.6k
            else {
563
41.6k
                blankline = 1; /* Ignore completely */
564
41.6k
            }
565
            /* We can't jump back right here since we still
566
               may need to skip to the end of a comment */
567
41.6k
        }
568
211k
        if (!blankline && tok->level == 0) {
569
130k
            col = cont_line_col ? cont_line_col : col;
570
130k
            altcol = cont_line_col ? cont_line_col : altcol;
571
130k
            if (col == tok->indstack[tok->indent]) {
572
                /* No change */
573
96.4k
                if (altcol != tok->altindstack[tok->indent]) {
574
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
575
1
                }
576
96.4k
            }
577
33.7k
            else if (col > tok->indstack[tok->indent]) {
578
                /* Indent -- always one */
579
18.9k
                if (tok->indent+1 >= MAXINDENT) {
580
0
                    tok->done = E_TOODEEP;
581
0
                    tok->cur = tok->inp;
582
0
                    return MAKE_TOKEN(ERRORTOKEN);
583
0
                }
584
18.9k
                if (altcol <= tok->altindstack[tok->indent]) {
585
3
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
586
3
                }
587
18.9k
                tok->pendin++;
588
18.9k
                tok->indstack[++tok->indent] = col;
589
18.9k
                tok->altindstack[tok->indent] = altcol;
590
18.9k
            }
591
14.8k
            else /* col < tok->indstack[tok->indent] */ {
592
                /* Dedent -- any number, must be consistent */
593
33.0k
                while (tok->indent > 0 &&
594
33.0k
                    col < tok->indstack[tok->indent]) {
595
18.2k
                    tok->pendin--;
596
18.2k
                    tok->indent--;
597
18.2k
                }
598
14.8k
                if (col != tok->indstack[tok->indent]) {
599
11
                    tok->done = E_DEDENT;
600
11
                    tok->cur = tok->inp;
601
11
                    return MAKE_TOKEN(ERRORTOKEN);
602
11
                }
603
14.8k
                if (altcol != tok->altindstack[tok->indent]) {
604
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
605
1
                }
606
14.8k
            }
607
130k
        }
608
211k
    }
609
610
1.72M
    tok->start = tok->cur;
611
1.72M
    tok->starting_col_offset = tok->col_offset;
612
613
    /* Return pending indents/dedents */
614
1.72M
    if (tok->pendin != 0) {
615
37.1k
        if (tok->pendin < 0) {
616
18.2k
            if (tok->tok_extra_tokens) {
617
0
                p_start = tok->cur;
618
0
                p_end = tok->cur;
619
0
            }
620
18.2k
            tok->pendin++;
621
18.2k
            return MAKE_TOKEN(DEDENT);
622
18.2k
        }
623
18.9k
        else {
624
18.9k
            if (tok->tok_extra_tokens) {
625
0
                p_start = tok->buf;
626
0
                p_end = tok->cur;
627
0
            }
628
18.9k
            tok->pendin--;
629
18.9k
            return MAKE_TOKEN(INDENT);
630
18.9k
        }
631
37.1k
    }
632
633
    /* Peek ahead at the next character */
634
1.69M
    c = tok_nextc(tok);
635
1.69M
    tok_backup(tok, c);
636
637
1.69M
 again:
638
1.69M
    tok->start = NULL;
639
    /* Skip spaces */
640
2.02M
    do {
641
2.02M
        c = tok_nextc(tok);
642
2.02M
    } while (c == ' ' || c == '\t' || c == '\014');
643
644
    /* Set start of current token */
645
1.69M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
646
1.69M
    tok->starting_col_offset = tok->col_offset - 1;
647
648
    /* Skip comment, unless it's a type comment */
649
1.69M
    if (c == '#') {
650
651
41.6k
        const char* p = NULL;
652
41.6k
        const char *prefix, *type_start;
653
41.6k
        int current_starting_col_offset;
654
655
1.27M
        while (c != EOF && c != '\n' && c != '\r') {
656
1.23M
            c = tok_nextc(tok);
657
1.23M
        }
658
659
41.6k
        if (tok->tok_extra_tokens) {
660
0
            p = tok->start;
661
0
        }
662
663
41.6k
        if (tok->type_comments) {
664
0
            p = tok->start;
665
0
            current_starting_col_offset = tok->starting_col_offset;
666
0
            prefix = type_comment_prefix;
667
0
            while (*prefix && p < tok->cur) {
668
0
                if (*prefix == ' ') {
669
0
                    while (*p == ' ' || *p == '\t') {
670
0
                        p++;
671
0
                        current_starting_col_offset++;
672
0
                    }
673
0
                } else if (*prefix == *p) {
674
0
                    p++;
675
0
                    current_starting_col_offset++;
676
0
                } else {
677
0
                    break;
678
0
                }
679
680
0
                prefix++;
681
0
            }
682
683
            /* This is a type comment if we matched all of type_comment_prefix. */
684
0
            if (!*prefix) {
685
0
                int is_type_ignore = 1;
686
                // +6 in order to skip the word 'ignore'
687
0
                const char *ignore_end = p + 6;
688
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
689
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
690
691
0
                type_start = p;
692
693
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
694
                 * or anything ASCII and non-alphanumeric. */
695
0
                is_type_ignore = (
696
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
697
0
                    && !(tok->cur > ignore_end
698
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
699
700
0
                if (is_type_ignore) {
701
0
                    p_start = ignore_end;
702
0
                    p_end = tok->cur;
703
704
                    /* If this type ignore is the only thing on the line, consume the newline also. */
705
0
                    if (blankline) {
706
0
                        tok_nextc(tok);
707
0
                        tok->atbol = 1;
708
0
                    }
709
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
710
0
                } else {
711
0
                    p_start = type_start;
712
0
                    p_end = tok->cur;
713
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
714
0
                }
715
0
            }
716
0
        }
717
41.6k
        if (tok->tok_extra_tokens) {
718
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
719
0
            p_start = p;
720
0
            p_end = tok->cur;
721
0
            tok->comment_newline = blankline;
722
0
            return MAKE_TOKEN(COMMENT);
723
0
        }
724
41.6k
    }
725
726
1.69M
    if (tok->done == E_INTERACT_STOP) {
727
0
        return MAKE_TOKEN(ENDMARKER);
728
0
    }
729
730
    /* Check for EOF and errors now */
731
1.69M
    if (c == EOF) {
732
15.0k
        if (tok->level) {
733
3.59k
            return MAKE_TOKEN(ERRORTOKEN);
734
3.59k
        }
735
11.4k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
736
15.0k
    }
737
738
    /* Identifier (most frequent token!) */
739
1.67M
    nonascii = 0;
740
1.67M
    if (is_potential_identifier_start(c)) {
741
        /* Process the various legal combinations of b"", r"", u"", and f"". */
742
492k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
743
605k
        while (1) {
744
605k
            if (!saw_b && (c == 'b' || c == 'B')) {
745
20.0k
                saw_b = 1;
746
20.0k
            }
747
            /* Since this is a backwards compatibility support literal we don't
748
               want to support it in arbitrary order like byte literals. */
749
585k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
750
6.25k
                saw_u = 1;
751
6.25k
            }
752
            /* ur"" and ru"" are not supported */
753
579k
            else if (!saw_r && (c == 'r' || c == 'R')) {
754
35.5k
                saw_r = 1;
755
35.5k
            }
756
543k
            else if (!saw_f && (c == 'f' || c == 'F')) {
757
39.7k
                saw_f = 1;
758
39.7k
            }
759
504k
            else if (!saw_t && (c == 't' || c == 'T')) {
760
31.1k
                saw_t = 1;
761
31.1k
            }
762
472k
            else {
763
472k
                break;
764
472k
            }
765
132k
            c = tok_nextc(tok);
766
132k
            if (c == '"' || c == '\'') {
767
                // Raise error on incompatible string prefixes:
768
19.7k
                int status = maybe_raise_syntax_error_for_string_prefixes(
769
19.7k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
770
19.7k
                if (status < 0) {
771
8
                    return MAKE_TOKEN(ERRORTOKEN);
772
8
                }
773
774
                // Handle valid f or t string creation:
775
19.7k
                if (saw_f || saw_t) {
776
14.8k
                    goto f_string_quote;
777
14.8k
                }
778
4.93k
                goto letter_quote;
779
19.7k
            }
780
132k
        }
781
2.14M
        while (is_potential_identifier_char(c)) {
782
1.67M
            if (c >= 128) {
783
192k
                nonascii = 1;
784
192k
            }
785
1.67M
            c = tok_nextc(tok);
786
1.67M
        }
787
472k
        tok_backup(tok, c);
788
472k
        if (nonascii && !verify_identifier(tok)) {
789
1.77k
            return MAKE_TOKEN(ERRORTOKEN);
790
1.77k
        }
791
792
471k
        p_start = tok->start;
793
471k
        p_end = tok->cur;
794
795
471k
        return MAKE_TOKEN(NAME);
796
472k
    }
797
798
1.18M
    if (c == '\r') {
799
408
        c = tok_nextc(tok);
800
408
    }
801
802
    /* Newline */
803
1.18M
    if (c == '\n') {
804
192k
        tok->atbol = 1;
805
192k
        if (blankline || tok->level > 0) {
806
80.9k
            if (tok->tok_extra_tokens) {
807
0
                if (tok->comment_newline) {
808
0
                    tok->comment_newline = 0;
809
0
                }
810
0
                p_start = tok->start;
811
0
                p_end = tok->cur;
812
0
                return MAKE_TOKEN(NL);
813
0
            }
814
80.9k
            goto nextline;
815
80.9k
        }
816
111k
        if (tok->comment_newline && tok->tok_extra_tokens) {
817
0
            tok->comment_newline = 0;
818
0
            p_start = tok->start;
819
0
            p_end = tok->cur;
820
0
            return MAKE_TOKEN(NL);
821
0
        }
822
111k
        p_start = tok->start;
823
111k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
824
111k
        tok->cont_line = 0;
825
111k
        return MAKE_TOKEN(NEWLINE);
826
111k
    }
827
828
    /* Period or number starting with period? */
829
992k
    if (c == '.') {
830
32.2k
        c = tok_nextc(tok);
831
32.2k
        if (Py_ISDIGIT(c)) {
832
3.11k
            goto fraction;
833
29.1k
        } else if (c == '.') {
834
3.54k
            c = tok_nextc(tok);
835
3.54k
            if (c == '.') {
836
2.90k
                p_start = tok->start;
837
2.90k
                p_end = tok->cur;
838
2.90k
                return MAKE_TOKEN(ELLIPSIS);
839
2.90k
            }
840
644
            else {
841
644
                tok_backup(tok, c);
842
644
            }
843
644
            tok_backup(tok, '.');
844
644
        }
845
25.5k
        else {
846
25.5k
            tok_backup(tok, c);
847
25.5k
        }
848
26.2k
        p_start = tok->start;
849
26.2k
        p_end = tok->cur;
850
26.2k
        return MAKE_TOKEN(DOT);
851
32.2k
    }
852
853
    /* Number */
854
959k
    if (Py_ISDIGIT(c)) {
855
86.7k
        if (c == '0') {
856
            /* Hex, octal or binary -- maybe. */
857
30.5k
            c = tok_nextc(tok);
858
30.5k
            if (c == 'x' || c == 'X') {
859
                /* Hex */
860
15.7k
                c = tok_nextc(tok);
861
15.9k
                do {
862
15.9k
                    if (c == '_') {
863
260
                        c = tok_nextc(tok);
864
260
                    }
865
15.9k
                    if (!Py_ISXDIGIT(c)) {
866
20
                        tok_backup(tok, c);
867
20
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
868
20
                    }
869
69.0k
                    do {
870
69.0k
                        c = tok_nextc(tok);
871
69.0k
                    } while (Py_ISXDIGIT(c));
872
15.9k
                } while (c == '_');
873
15.7k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
874
1
                    return MAKE_TOKEN(ERRORTOKEN);
875
1
                }
876
15.7k
            }
877
14.7k
            else if (c == 'o' || c == 'O') {
878
                /* Octal */
879
622
                c = tok_nextc(tok);
880
1.23k
                do {
881
1.23k
                    if (c == '_') {
882
616
                        c = tok_nextc(tok);
883
616
                    }
884
1.23k
                    if (c < '0' || c >= '8') {
885
22
                        if (Py_ISDIGIT(c)) {
886
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
887
1
                                    "invalid digit '%c' in octal literal", c));
888
1
                        }
889
21
                        else {
890
21
                            tok_backup(tok, c);
891
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
892
21
                        }
893
22
                    }
894
3.39k
                    do {
895
3.39k
                        c = tok_nextc(tok);
896
3.39k
                    } while ('0' <= c && c < '8');
897
1.21k
                } while (c == '_');
898
600
                if (Py_ISDIGIT(c)) {
899
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
900
1
                            "invalid digit '%c' in octal literal", c));
901
1
                }
902
599
                if (!verify_end_of_number(tok, c, "octal")) {
903
4
                    return MAKE_TOKEN(ERRORTOKEN);
904
4
                }
905
599
            }
906
14.1k
            else if (c == 'b' || c == 'B') {
907
                /* Binary */
908
552
                c = tok_nextc(tok);
909
645
                do {
910
645
                    if (c == '_') {
911
99
                        c = tok_nextc(tok);
912
99
                    }
913
645
                    if (c != '0' && c != '1') {
914
21
                        if (Py_ISDIGIT(c)) {
915
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
916
1
                        }
917
20
                        else {
918
20
                            tok_backup(tok, c);
919
20
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
920
20
                        }
921
21
                    }
922
3.94k
                    do {
923
3.94k
                        c = tok_nextc(tok);
924
3.94k
                    } while (c == '0' || c == '1');
925
624
                } while (c == '_');
926
531
                if (Py_ISDIGIT(c)) {
927
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
928
1
                }
929
530
                if (!verify_end_of_number(tok, c, "binary")) {
930
1
                    return MAKE_TOKEN(ERRORTOKEN);
931
1
                }
932
530
            }
933
13.6k
            else {
934
13.6k
                int nonzero = 0;
935
                /* maybe old-style octal; c is first char of it */
936
                /* in any case, allow '0' as a literal */
937
14.9k
                while (1) {
938
14.9k
                    if (c == '_') {
939
102
                        c = tok_nextc(tok);
940
102
                        if (!Py_ISDIGIT(c)) {
941
6
                            tok_backup(tok, c);
942
6
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
943
6
                        }
944
102
                    }
945
14.9k
                    if (c != '0') {
946
13.6k
                        break;
947
13.6k
                    }
948
1.29k
                    c = tok_nextc(tok);
949
1.29k
                }
950
13.6k
                char* zeros_end = tok->cur;
951
13.6k
                if (Py_ISDIGIT(c)) {
952
547
                    nonzero = 1;
953
547
                    c = tok_decimal_tail(tok);
954
547
                    if (c == 0) {
955
1
                        return MAKE_TOKEN(ERRORTOKEN);
956
1
                    }
957
547
                }
958
13.6k
                if (c == '.') {
959
715
                    c = tok_nextc(tok);
960
715
                    goto fraction;
961
715
                }
962
12.8k
                else if (c == 'e' || c == 'E') {
963
690
                    goto exponent;
964
690
                }
965
12.2k
                else if (c == 'j' || c == 'J') {
966
1.01k
                    goto imaginary;
967
1.01k
                }
968
11.1k
                else if (nonzero && !tok->tok_extra_tokens) {
969
                    /* Old-style octal: now disallowed. */
970
26
                    tok_backup(tok, c);
971
26
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
972
26
                            tok, (int)(tok->start + 1 - tok->line_start),
973
26
                            (int)(zeros_end - tok->line_start),
974
26
                            "leading zeros in decimal integer "
975
26
                            "literals are not permitted; "
976
26
                            "use an 0o prefix for octal integers"));
977
26
                }
978
11.1k
                if (!verify_end_of_number(tok, c, "decimal")) {
979
35
                    return MAKE_TOKEN(ERRORTOKEN);
980
35
                }
981
11.1k
            }
982
30.5k
        }
983
56.2k
        else {
984
            /* Decimal */
985
56.2k
            c = tok_decimal_tail(tok);
986
56.2k
            if (c == 0) {
987
10
                return MAKE_TOKEN(ERRORTOKEN);
988
10
            }
989
56.2k
            {
990
                /* Accept floating-point numbers. */
991
56.2k
                if (c == '.') {
992
3.76k
                    c = tok_nextc(tok);
993
7.59k
        fraction:
994
                    /* Fraction */
995
7.59k
                    if (Py_ISDIGIT(c)) {
996
5.87k
                        c = tok_decimal_tail(tok);
997
5.87k
                        if (c == 0) {
998
1
                            return MAKE_TOKEN(ERRORTOKEN);
999
1
                        }
1000
5.87k
                    }
1001
7.59k
                }
1002
60.0k
                if (c == 'e' || c == 'E') {
1003
8.89k
                    int e;
1004
9.58k
                  exponent:
1005
9.58k
                    e = c;
1006
                    /* Exponent part */
1007
9.58k
                    c = tok_nextc(tok);
1008
9.58k
                    if (c == '+' || c == '-') {
1009
3.28k
                        c = tok_nextc(tok);
1010
3.28k
                        if (!Py_ISDIGIT(c)) {
1011
14
                            tok_backup(tok, c);
1012
14
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1013
14
                        }
1014
6.30k
                    } else if (!Py_ISDIGIT(c)) {
1015
336
                        tok_backup(tok, c);
1016
336
                        if (!verify_end_of_number(tok, e, "decimal")) {
1017
48
                            return MAKE_TOKEN(ERRORTOKEN);
1018
48
                        }
1019
288
                        tok_backup(tok, e);
1020
288
                        p_start = tok->start;
1021
288
                        p_end = tok->cur;
1022
288
                        return MAKE_TOKEN(NUMBER);
1023
336
                    }
1024
9.23k
                    c = tok_decimal_tail(tok);
1025
9.23k
                    if (c == 0) {
1026
1
                        return MAKE_TOKEN(ERRORTOKEN);
1027
1
                    }
1028
9.23k
                }
1029
60.3k
                if (c == 'j' || c == 'J') {
1030
                    /* Imaginary part */
1031
4.67k
        imaginary:
1032
4.67k
                    c = tok_nextc(tok);
1033
4.67k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1034
12
                        return MAKE_TOKEN(ERRORTOKEN);
1035
12
                    }
1036
4.67k
                }
1037
56.7k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1038
146
                    return MAKE_TOKEN(ERRORTOKEN);
1039
146
                }
1040
60.3k
            }
1041
60.3k
        }
1042
89.1k
        tok_backup(tok, c);
1043
89.1k
        p_start = tok->start;
1044
89.1k
        p_end = tok->cur;
1045
89.1k
        return MAKE_TOKEN(NUMBER);
1046
86.7k
    }
1047
1048
888k
  f_string_quote:
1049
888k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1050
888k
        && (c == '\'' || c == '"'))) {
1051
1052
14.8k
        int quote = c;
1053
14.8k
        int quote_size = 1;             /* 1 or 3 */
1054
1055
        /* Nodes of type STRING, especially multi line strings
1056
           must be handled differently in order to get both
1057
           the starting line number and the column offset right.
1058
           (cf. issue 16806) */
1059
14.8k
        tok->first_lineno = tok->lineno;
1060
14.8k
        tok->multi_line_start = tok->line_start;
1061
1062
        /* Find the quote size and start of string */
1063
14.8k
        int after_quote = tok_nextc(tok);
1064
14.8k
        if (after_quote == quote) {
1065
2.28k
            int after_after_quote = tok_nextc(tok);
1066
2.28k
            if (after_after_quote == quote) {
1067
775
                quote_size = 3;
1068
775
            }
1069
1.50k
            else {
1070
                // TODO: Check this
1071
1.50k
                tok_backup(tok, after_after_quote);
1072
1.50k
                tok_backup(tok, after_quote);
1073
1.50k
            }
1074
2.28k
        }
1075
14.8k
        if (after_quote != quote) {
1076
12.5k
            tok_backup(tok, after_quote);
1077
12.5k
        }
1078
1079
1080
14.8k
        p_start = tok->start;
1081
14.8k
        p_end = tok->cur;
1082
14.8k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1083
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1084
2
        }
1085
14.7k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1086
14.7k
        the_current_tok->kind = TOK_FSTRING_MODE;
1087
14.7k
        the_current_tok->quote = quote;
1088
14.7k
        the_current_tok->quote_size = quote_size;
1089
14.7k
        the_current_tok->start = tok->start;
1090
14.7k
        the_current_tok->multi_line_start = tok->line_start;
1091
14.7k
        the_current_tok->first_line = tok->lineno;
1092
14.7k
        the_current_tok->start_offset = -1;
1093
14.7k
        the_current_tok->multi_line_start_offset = -1;
1094
14.7k
        the_current_tok->last_expr_buffer = NULL;
1095
14.7k
        the_current_tok->last_expr_size = 0;
1096
14.7k
        the_current_tok->last_expr_end = -1;
1097
14.7k
        the_current_tok->in_format_spec = 0;
1098
14.7k
        the_current_tok->in_debug = 0;
1099
1100
14.7k
        enum string_kind_t string_kind = FSTRING;
1101
14.7k
        switch (*tok->start) {
1102
907
            case 'T':
1103
3.09k
            case 't':
1104
3.09k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1105
3.09k
                string_kind = TSTRING;
1106
3.09k
                break;
1107
1.74k
            case 'F':
1108
11.3k
            case 'f':
1109
11.3k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1110
11.3k
                break;
1111
111
            case 'R':
1112
384
            case 'r':
1113
384
                the_current_tok->raw = 1;
1114
384
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1115
205
                    string_kind = TSTRING;
1116
205
                }
1117
384
                break;
1118
0
            default:
1119
0
                Py_UNREACHABLE();
1120
14.7k
        }
1121
1122
14.7k
        the_current_tok->string_kind = string_kind;
1123
14.7k
        the_current_tok->curly_bracket_depth = 0;
1124
14.7k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1125
14.7k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1126
14.7k
    }
1127
1128
878k
  letter_quote:
1129
    /* String */
1130
878k
    if (c == '\'' || c == '"') {
1131
56.7k
        int quote = c;
1132
56.7k
        int quote_size = 1;             /* 1 or 3 */
1133
56.7k
        int end_quote_size = 0;
1134
56.7k
        int has_escaped_quote = 0;
1135
1136
        /* Nodes of type STRING, especially multi line strings
1137
           must be handled differently in order to get both
1138
           the starting line number and the column offset right.
1139
           (cf. issue 16806) */
1140
56.7k
        tok->first_lineno = tok->lineno;
1141
56.7k
        tok->multi_line_start = tok->line_start;
1142
1143
        /* Find the quote size and start of string */
1144
56.7k
        c = tok_nextc(tok);
1145
56.7k
        if (c == quote) {
1146
9.56k
            c = tok_nextc(tok);
1147
9.56k
            if (c == quote) {
1148
2.22k
                quote_size = 3;
1149
2.22k
            }
1150
7.34k
            else {
1151
7.34k
                end_quote_size = 1;     /* empty string found */
1152
7.34k
            }
1153
9.56k
        }
1154
56.7k
        if (c != quote) {
1155
54.5k
            tok_backup(tok, c);
1156
54.5k
        }
1157
1158
        /* Get rest of string */
1159
1.13M
        while (end_quote_size != quote_size) {
1160
1.07M
            c = tok_nextc(tok);
1161
1.07M
            if (tok->done == E_ERROR) {
1162
0
                return MAKE_TOKEN(ERRORTOKEN);
1163
0
            }
1164
1.07M
            if (tok->done == E_DECODE) {
1165
0
                break;
1166
0
            }
1167
1.07M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1168
442
                assert(tok->multi_line_start != NULL);
1169
                // shift the tok_state's location into
1170
                // the start of string, and report the error
1171
                // from the initial quote character
1172
442
                tok->cur = (char *)tok->start;
1173
442
                tok->cur++;
1174
442
                tok->line_start = tok->multi_line_start;
1175
442
                int start = tok->lineno;
1176
442
                tok->lineno = tok->first_lineno;
1177
1178
442
                if (INSIDE_FSTRING(tok)) {
1179
                    /* When we are in an f-string, before raising the
1180
                     * unterminated string literal error, check whether
1181
                     * does the initial quote matches with f-strings quotes
1182
                     * and if it is, then this must be a missing '}' token
1183
                     * so raise the proper error */
1184
34
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1185
34
                    if (the_current_tok->quote == quote &&
1186
34
                        the_current_tok->quote_size == quote_size) {
1187
21
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1188
21
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1189
21
                    }
1190
34
                }
1191
1192
421
                if (quote_size == 3) {
1193
19
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1194
19
                                     " (detected at line %d)", start);
1195
19
                    if (c != '\n') {
1196
19
                        tok->done = E_EOFS;
1197
19
                    }
1198
19
                    return MAKE_TOKEN(ERRORTOKEN);
1199
19
                }
1200
402
                else {
1201
402
                    if (has_escaped_quote) {
1202
12
                        _PyTokenizer_syntaxerror(
1203
12
                            tok,
1204
12
                            "unterminated string literal (detected at line %d); "
1205
12
                            "perhaps you escaped the end quote?",
1206
12
                            start
1207
12
                        );
1208
390
                    } else {
1209
390
                        _PyTokenizer_syntaxerror(
1210
390
                            tok, "unterminated string literal (detected at line %d)", start
1211
390
                        );
1212
390
                    }
1213
402
                    if (c != '\n') {
1214
13
                        tok->done = E_EOLS;
1215
13
                    }
1216
402
                    return MAKE_TOKEN(ERRORTOKEN);
1217
402
                }
1218
421
            }
1219
1.07M
            if (c == quote) {
1220
54.9k
                end_quote_size += 1;
1221
54.9k
            }
1222
1.02M
            else {
1223
1.02M
                end_quote_size = 0;
1224
1.02M
                if (c == '\\') {
1225
33.6k
                    c = tok_nextc(tok);  /* skip escaped char */
1226
33.6k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1227
1.61k
                        has_escaped_quote = 1;
1228
1.61k
                    }
1229
33.6k
                    if (c == '\r') {
1230
218
                        c = tok_nextc(tok);
1231
218
                    }
1232
33.6k
                }
1233
1.02M
            }
1234
1.07M
        }
1235
1236
56.3k
        p_start = tok->start;
1237
56.3k
        p_end = tok->cur;
1238
56.3k
        return MAKE_TOKEN(STRING);
1239
56.7k
    }
1240
1241
    /* Line continuation */
1242
821k
    if (c == '\\') {
1243
463
        if ((c = tok_continuation_line(tok)) == -1) {
1244
96
            return MAKE_TOKEN(ERRORTOKEN);
1245
96
        }
1246
367
        tok->cont_line = 1;
1247
367
        goto again; /* Read next line */
1248
463
    }
1249
1250
    /* Punctuation character */
1251
820k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1252
820k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1253
        /* This code block gets executed before the curly_bracket_depth is incremented
1254
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1255
         * to adjust it manually */
1256
45.8k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1257
45.8k
        int in_format_spec = current_tok->in_format_spec;
1258
45.8k
         int cursor_in_format_with_debug =
1259
45.8k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1260
45.8k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1261
45.8k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1262
0
            return MAKE_TOKEN(ENDMARKER);
1263
0
        }
1264
45.8k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1265
12
            return MAKE_TOKEN(ERRORTOKEN);
1266
12
        }
1267
1268
45.7k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1269
3.80k
            current_tok->kind = TOK_FSTRING_MODE;
1270
3.80k
            current_tok->in_format_spec = 1;
1271
3.80k
            p_start = tok->start;
1272
3.80k
            p_end = tok->cur;
1273
3.80k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1274
3.80k
        }
1275
45.7k
    }
1276
1277
    /* Check for two-character token */
1278
817k
    {
1279
817k
        int c2 = tok_nextc(tok);
1280
817k
        int current_token = _PyToken_TwoChars(c, c2);
1281
817k
        if (current_token != OP) {
1282
20.6k
            int c3 = tok_nextc(tok);
1283
20.6k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1284
20.6k
            if (current_token3 != OP) {
1285
964
                current_token = current_token3;
1286
964
            }
1287
19.6k
            else {
1288
19.6k
                tok_backup(tok, c3);
1289
19.6k
            }
1290
20.6k
            p_start = tok->start;
1291
20.6k
            p_end = tok->cur;
1292
20.6k
            return MAKE_TOKEN(current_token);
1293
20.6k
        }
1294
796k
        tok_backup(tok, c2);
1295
796k
    }
1296
1297
    /* Keep track of parentheses nesting level */
1298
0
    switch (c) {
1299
83.6k
    case '(':
1300
115k
    case '[':
1301
159k
    case '{':
1302
159k
        if (tok->level >= MAXLEVEL) {
1303
8
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1304
8
        }
1305
159k
        tok->parenstack[tok->level] = c;
1306
159k
        tok->parenlinenostack[tok->level] = tok->lineno;
1307
159k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1308
159k
        tok->level++;
1309
159k
        if (INSIDE_FSTRING(tok)) {
1310
25.7k
            current_tok->curly_bracket_depth++;
1311
25.7k
        }
1312
159k
        break;
1313
55.6k
    case ')':
1314
66.4k
    case ']':
1315
89.3k
    case '}':
1316
89.3k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1317
44
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1318
44
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1319
44
        }
1320
89.2k
        if (!tok->tok_extra_tokens && !tok->level) {
1321
219
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1322
219
        }
1323
89.0k
        if (tok->level > 0) {
1324
89.0k
            tok->level--;
1325
89.0k
            int opening = tok->parenstack[tok->level];
1326
89.0k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1327
89.0k
                                            (opening == '[' && c == ']') ||
1328
89.0k
                                            (opening == '{' && c == '}'))) {
1329
                /* If the opening bracket belongs to an f-string's expression
1330
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1331
                nested expression, then instead of matching a different
1332
                syntactical construct with it; we'll throw an unmatched
1333
                parentheses error. */
1334
46
                if (INSIDE_FSTRING(tok) && opening == '{') {
1335
4
                    assert(current_tok->curly_bracket_depth >= 0);
1336
4
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1337
4
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1338
2
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1339
2
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1340
2
                    }
1341
4
                }
1342
44
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1343
3
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1344
3
                            "closing parenthesis '%c' does not match "
1345
3
                            "opening parenthesis '%c' on line %d",
1346
3
                            c, opening, tok->parenlinenostack[tok->level]));
1347
3
                }
1348
41
                else {
1349
41
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1350
41
                            "closing parenthesis '%c' does not match "
1351
41
                            "opening parenthesis '%c'",
1352
41
                            c, opening));
1353
41
                }
1354
44
            }
1355
89.0k
        }
1356
1357
89.0k
        if (INSIDE_FSTRING(tok)) {
1358
18.9k
            current_tok->curly_bracket_depth--;
1359
18.9k
            if (current_tok->curly_bracket_depth < 0) {
1360
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1361
1
                    TOK_GET_STRING_PREFIX(tok), c));
1362
1
            }
1363
18.9k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1364
17.2k
                current_tok->curly_bracket_expr_start_depth--;
1365
17.2k
                current_tok->kind = TOK_FSTRING_MODE;
1366
17.2k
                current_tok->in_format_spec = 0;
1367
17.2k
                current_tok->in_debug = 0;
1368
17.2k
            }
1369
18.9k
        }
1370
89.0k
        break;
1371
547k
    default:
1372
547k
        break;
1373
796k
    }
1374
1375
796k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1376
483
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1377
483
    }
1378
1379
795k
    if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
1380
39.2k
        current_tok->in_debug = 1;
1381
39.2k
    }
1382
1383
    /* Punctuation character */
1384
795k
    p_start = tok->start;
1385
795k
    p_end = tok->cur;
1386
795k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1387
796k
}
1388
1389
static int
1390
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1391
45.7k
{
1392
45.7k
    const char *p_start = NULL;
1393
45.7k
    const char *p_end = NULL;
1394
45.7k
    int end_quote_size = 0;
1395
45.7k
    int unicode_escape = 0;
1396
1397
45.7k
    tok->start = tok->cur;
1398
45.7k
    tok->first_lineno = tok->lineno;
1399
45.7k
    tok->starting_col_offset = tok->col_offset;
1400
1401
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1402
    // before it.
1403
45.7k
    int start_char = tok_nextc(tok);
1404
45.7k
    if (start_char == '{') {
1405
10.4k
        int peek1 = tok_nextc(tok);
1406
10.4k
        tok_backup(tok, peek1);
1407
10.4k
        tok_backup(tok, start_char);
1408
10.4k
        if (peek1 != '{') {
1409
8.28k
            current_tok->curly_bracket_expr_start_depth++;
1410
8.28k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1411
8
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1412
8
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1413
8
            }
1414
8.28k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1415
8.28k
            return tok_get_normal_mode(tok, current_tok, token);
1416
8.28k
        }
1417
10.4k
    }
1418
35.2k
    else {
1419
35.2k
        tok_backup(tok, start_char);
1420
35.2k
    }
1421
1422
    // Check if we are at the end of the string
1423
53.3k
    for (int i = 0; i < current_tok->quote_size; i++) {
1424
42.4k
        int quote = tok_nextc(tok);
1425
42.4k
        if (quote != current_tok->quote) {
1426
26.5k
            tok_backup(tok, quote);
1427
26.5k
            goto f_string_middle;
1428
26.5k
        }
1429
42.4k
    }
1430
1431
10.9k
    if (current_tok->last_expr_buffer != NULL) {
1432
5.72k
        PyMem_Free(current_tok->last_expr_buffer);
1433
5.72k
        current_tok->last_expr_buffer = NULL;
1434
5.72k
        current_tok->last_expr_size = 0;
1435
5.72k
        current_tok->last_expr_end = -1;
1436
5.72k
    }
1437
1438
10.9k
    p_start = tok->start;
1439
10.9k
    p_end = tok->cur;
1440
10.9k
    tok->tok_mode_stack_index--;
1441
10.9k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1442
1443
26.5k
f_string_middle:
1444
1445
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1446
    // this.
1447
26.5k
    tok->multi_line_start = tok->line_start;
1448
158k
    while (end_quote_size != current_tok->quote_size) {
1449
153k
        int c = tok_nextc(tok);
1450
153k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1451
0
            return MAKE_TOKEN(ERRORTOKEN);
1452
0
        }
1453
153k
        int in_format_spec = (
1454
153k
                current_tok->in_format_spec
1455
153k
                &&
1456
153k
                INSIDE_FSTRING_EXPR(current_tok)
1457
153k
        );
1458
1459
153k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1460
340
            if (tok->decoding_erred) {
1461
0
                return MAKE_TOKEN(ERRORTOKEN);
1462
0
            }
1463
1464
            // If we are in a format spec and we found a newline,
1465
            // it means that the format spec ends here and we should
1466
            // return to the regular mode.
1467
340
            if (in_format_spec && c == '\n') {
1468
75
                if (current_tok->quote_size == 1) {
1469
75
                    return MAKE_TOKEN(
1470
75
                        _PyTokenizer_syntaxerror(
1471
75
                            tok,
1472
75
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1473
75
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1474
75
                        )
1475
75
                    );
1476
75
                }
1477
0
                tok_backup(tok, c);
1478
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1479
0
                current_tok->in_format_spec = 0;
1480
0
                p_start = tok->start;
1481
0
                p_end = tok->cur;
1482
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1483
75
            }
1484
1485
265
            assert(tok->multi_line_start != NULL);
1486
            // shift the tok_state's location into
1487
            // the start of string, and report the error
1488
            // from the initial quote character
1489
265
            tok->cur = (char *)current_tok->start;
1490
265
            tok->cur++;
1491
265
            tok->line_start = current_tok->multi_line_start;
1492
265
            int start = tok->lineno;
1493
1494
265
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1495
265
            tok->lineno = the_current_tok->first_line;
1496
1497
265
            if (current_tok->quote_size == 3) {
1498
28
                _PyTokenizer_syntaxerror(tok,
1499
28
                                    "unterminated triple-quoted %c-string literal"
1500
28
                                    " (detected at line %d)",
1501
28
                                    TOK_GET_STRING_PREFIX(tok), start);
1502
28
                if (c != '\n') {
1503
28
                    tok->done = E_EOFS;
1504
28
                }
1505
28
                return MAKE_TOKEN(ERRORTOKEN);
1506
28
            }
1507
237
            else {
1508
237
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1509
237
                                    "unterminated %c-string literal (detected at"
1510
237
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1511
237
            }
1512
265
        }
1513
1514
152k
        if (c == current_tok->quote) {
1515
8.87k
            end_quote_size += 1;
1516
8.87k
            continue;
1517
143k
        } else {
1518
143k
            end_quote_size = 0;
1519
143k
        }
1520
1521
143k
        if (c == '{') {
1522
16.5k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1523
0
                return MAKE_TOKEN(ENDMARKER);
1524
0
            }
1525
16.5k
            int peek = tok_nextc(tok);
1526
16.5k
            if (peek != '{' || in_format_spec) {
1527
13.9k
                tok_backup(tok, peek);
1528
13.9k
                tok_backup(tok, c);
1529
13.9k
                current_tok->curly_bracket_expr_start_depth++;
1530
13.9k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1531
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1532
6
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1533
6
                }
1534
13.8k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1535
13.8k
                current_tok->in_format_spec = 0;
1536
13.8k
                p_start = tok->start;
1537
13.8k
                p_end = tok->cur;
1538
13.8k
            } else {
1539
2.69k
                p_start = tok->start;
1540
2.69k
                p_end = tok->cur - 1;
1541
2.69k
            }
1542
16.5k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1543
127k
        } else if (c == '}') {
1544
4.40k
            if (unicode_escape) {
1545
392
                p_start = tok->start;
1546
392
                p_end = tok->cur;
1547
392
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1548
392
            }
1549
4.01k
            int peek = tok_nextc(tok);
1550
1551
            // The tokenizer can only be in the format spec if we have already completed the expression
1552
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1553
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1554
            // brackets, we can bypass it here.
1555
4.01k
            int cursor = current_tok->curly_bracket_depth;
1556
4.01k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1557
1.63k
                p_start = tok->start;
1558
1.63k
                p_end = tok->cur - 1;
1559
2.38k
            } else {
1560
2.38k
                tok_backup(tok, peek);
1561
2.38k
                tok_backup(tok, c);
1562
2.38k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1563
2.38k
                current_tok->in_format_spec = 0;
1564
2.38k
                p_start = tok->start;
1565
2.38k
                p_end = tok->cur;
1566
2.38k
            }
1567
4.01k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1568
122k
        } else if (c == '\\') {
1569
6.90k
            int peek = tok_nextc(tok);
1570
6.90k
            if (peek == '\r') {
1571
69
                peek = tok_nextc(tok);
1572
69
            }
1573
            // Special case when the backslash is right before a curly
1574
            // brace. We have to restore and return the control back
1575
            // to the loop for the next iteration.
1576
6.90k
            if (peek == '{' || peek == '}') {
1577
1.13k
                if (!current_tok->raw) {
1578
933
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1579
0
                        return MAKE_TOKEN(ERRORTOKEN);
1580
0
                    }
1581
933
                }
1582
1.13k
                tok_backup(tok, peek);
1583
1.13k
                continue;
1584
1.13k
            }
1585
1586
5.77k
            if (!current_tok->raw) {
1587
5.52k
                if (peek == 'N') {
1588
                    /* Handle named unicode escapes (\N{BULLET}) */
1589
499
                    peek = tok_nextc(tok);
1590
499
                    if (peek == '{') {
1591
405
                        unicode_escape = 1;
1592
405
                    } else {
1593
94
                        tok_backup(tok, peek);
1594
94
                    }
1595
499
                }
1596
5.52k
            } /* else {
1597
                skip the escaped character
1598
            }*/
1599
5.77k
        }
1600
143k
    }
1601
1602
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1603
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1604
11.4k
    for (int i = 0; i < current_tok->quote_size; i++) {
1605
6.26k
        tok_backup(tok, current_tok->quote);
1606
6.26k
    }
1607
5.16k
    p_start = tok->start;
1608
5.16k
    p_end = tok->cur;
1609
5.16k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1610
26.5k
}
1611
1612
static int
1613
tok_get(struct tok_state *tok, struct token *token)
1614
1.68M
{
1615
1.68M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1616
1.68M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1617
1.63M
        return tok_get_normal_mode(tok, current_tok, token);
1618
1.63M
    } else {
1619
45.7k
        return tok_get_fstring_mode(tok, current_tok, token);
1620
45.7k
    }
1621
1.68M
}
1622
1623
int
1624
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1625
1.68M
{
1626
1.68M
    int result = tok_get(tok, token);
1627
1.68M
    if (tok->decoding_erred) {
1628
0
        result = ERRORTOKEN;
1629
0
        tok->done = E_DECODE;
1630
0
    }
1631
1.68M
    return result;
1632
1.68M
}