Coverage Report

Created: 2026-03-08 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.40k
#define ALTTABSIZE 1
11
12
2.04M
#define is_potential_identifier_start(c) (\
13
2.04M
              (c >= 'a' && c <= 'z')\
14
2.04M
               || (c >= 'A' && c <= 'Z')\
15
2.04M
               || c == '_'\
16
2.04M
               || (c >= 128))
17
18
3.41M
#define is_potential_identifier_char(c) (\
19
3.41M
              (c >= 'a' && c <= 'z')\
20
3.41M
               || (c >= 'A' && c <= 'Z')\
21
3.41M
               || (c >= '0' && c <= '9')\
22
3.41M
               || c == '_'\
23
3.41M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.27M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
18.4k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
27
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
2.14M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
253k
{
55
253k
    return memchr(str, 0, size) != NULL;
56
253k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.6M
{
62
11.6M
    int rc;
63
11.8M
    for (;;) {
64
11.8M
        if (tok->cur != tok->inp) {
65
11.3M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.3M
            tok->col_offset++;
70
11.3M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.3M
        }
72
572k
        if (tok->done != E_OK) {
73
213k
            return EOF;
74
213k
        }
75
359k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
359k
        if (!rc) {
84
106k
            tok->cur = tok->inp;
85
106k
            return EOF;
86
106k
        }
87
253k
        tok->line_start = tok->cur;
88
89
253k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
253k
    }
95
11.6M
    Py_UNREACHABLE();
96
11.6M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.47M
{
102
4.47M
    if (c != EOF) {
103
4.25M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
4.25M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
4.25M
        tok->col_offset--;
110
4.25M
    }
111
4.47M
}
112
113
static int
114
27.1k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
27.1k
    assert(token != NULL);
116
27.1k
    assert(c == '}' || c == ':' || c == '!');
117
27.1k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
27.1k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
17.0k
        return 0;
121
17.0k
    }
122
10.0k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
10.0k
    int hash_detected = 0;
126
10.0k
    int in_string = 0;
127
10.0k
    char quote_char = 0;
128
129
1.15M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.14M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.14M
        if (ch == '\\') {
134
20.8k
            i++;
135
20.8k
            continue;
136
20.8k
        }
137
138
        // Handle quotes
139
1.12M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
184k
            if (!in_string) {
148
67.0k
                in_string = 1;
149
67.0k
                quote_char = ch;
150
67.0k
            }
151
117k
            else if (ch == quote_char) {
152
66.2k
                in_string = 0;
153
66.2k
            }
154
184k
            continue;
155
184k
        }
156
157
        // Check for # outside strings
158
943k
        if (ch == '#' && !in_string) {
159
841
            hash_detected = 1;
160
841
            break;
161
841
        }
162
943k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
10.0k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
841
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
841
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
841
        Py_ssize_t i = 0;  // Input position
172
841
        Py_ssize_t j = 0;  // Output position
173
841
        in_string = 0;     // Whether we're in a string
174
841
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
40.6k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
39.8k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
39.8k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
6.08k
                if (!in_string) {
184
2.61k
                    in_string = 1;
185
2.61k
                    quote_char = ch;
186
3.47k
                } else if (ch == quote_char) {
187
2.60k
                    in_string = 0;
188
2.60k
                }
189
6.08k
                result[j++] = ch;
190
6.08k
            }
191
            // Skip comments
192
33.7k
            else if (ch == '#' && !in_string) {
193
26.7k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
25.9k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
25.7k
                    i++;
196
25.7k
                }
197
1.04k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
283
                    result[j++] = '\n';
199
283
                }
200
1.04k
            }
201
            // Copy other chars
202
32.7k
            else {
203
32.7k
                result[j++] = ch;
204
32.7k
            }
205
39.8k
            i++;
206
39.8k
        }
207
208
841
        result[j] = '\0';  // Null-terminate the result string
209
841
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
841
        PyMem_Free(result);
211
9.23k
    } else {
212
9.23k
        res = PyUnicode_DecodeUTF8(
213
9.23k
            tok_mode->last_expr_buffer,
214
9.23k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
9.23k
            NULL
216
9.23k
        );
217
9.23k
    }
218
219
10.0k
    if (!res) {
220
0
        return -1;
221
0
    }
222
10.0k
    token->metadata = res;
223
10.0k
    return 0;
224
10.0k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
71.9k
{
229
71.9k
    assert(tok->cur != NULL);
230
231
71.9k
    Py_ssize_t size = strlen(tok->cur);
232
71.9k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
71.9k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
44.8k
        case '{':
252
44.8k
            if (tok_mode->last_expr_buffer != NULL) {
253
31.6k
                PyMem_Free(tok_mode->last_expr_buffer);
254
31.6k
            }
255
44.8k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
44.8k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
44.8k
            tok_mode->last_expr_size = size;
260
44.8k
            tok_mode->last_expr_end = -1;
261
44.8k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
44.8k
            break;
263
21.2k
        case '}':
264
23.7k
        case '!':
265
23.7k
            tok_mode->last_expr_end = strlen(tok->start);
266
23.7k
            break;
267
3.37k
        case ':':
268
3.37k
            if (tok_mode->last_expr_end == -1) {
269
2.93k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.93k
            }
271
3.37k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
71.9k
    }
275
71.9k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
71.9k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
6.98k
{
284
6.98k
    const char *s = test;
285
6.98k
    int res = 0;
286
18.8k
    while (1) {
287
18.8k
        int c = tok_nextc(tok);
288
18.8k
        if (*s == 0) {
289
6.90k
            res = !is_potential_identifier_char(c);
290
6.90k
        }
291
11.9k
        else if (c == *s) {
292
11.8k
            s++;
293
11.8k
            continue;
294
11.8k
        }
295
296
6.98k
        tok_backup(tok, c);
297
18.8k
        while (s != test) {
298
11.8k
            tok_backup(tok, *--s);
299
11.8k
        }
300
6.98k
        return res;
301
18.8k
    }
302
6.98k
}
303
304
static int
305
81.5k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
81.5k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
81.4k
    int r = 0;
322
81.4k
    if (c == 'a') {
323
848
        r = lookahead(tok, "nd");
324
848
    }
325
80.6k
    else if (c == 'e') {
326
337
        r = lookahead(tok, "lse");
327
337
    }
328
80.3k
    else if (c == 'f') {
329
3.19k
        r = lookahead(tok, "or");
330
3.19k
    }
331
77.1k
    else if (c == 'i') {
332
1.07k
        int c2 = tok_nextc(tok);
333
1.07k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.06k
            r = 1;
335
1.06k
        }
336
1.07k
        tok_backup(tok, c2);
337
1.07k
    }
338
76.0k
    else if (c == 'o') {
339
2.29k
        r = lookahead(tok, "r");
340
2.29k
    }
341
73.7k
    else if (c == 'n') {
342
308
        r = lookahead(tok, "ot");
343
308
    }
344
81.4k
    if (r) {
345
7.95k
        tok_backup(tok, c);
346
7.95k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
7.95k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
7.95k
        tok_nextc(tok);
352
7.95k
    }
353
73.5k
    else /* In future releases, only error will remain. */
354
73.5k
    if (c < 128 && is_potential_identifier_char(c)) {
355
197
        tok_backup(tok, c);
356
197
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
197
        return 0;
358
197
    }
359
81.3k
    return 1;
360
81.4k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.4k
{
366
11.4k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.4k
    PyObject *s;
370
11.4k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.4k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.4k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
11.4k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.4k
    assert(invalid >= 0);
384
11.4k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.4k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
542
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
542
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
370
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
370
            if (s != NULL) {
391
370
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
370
            }
393
370
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
370
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
370
        }
399
542
        Py_DECREF(s);
400
542
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
282
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
282
        }
403
260
        else {
404
260
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
260
        }
406
542
        return 0;
407
542
    }
408
10.8k
    Py_DECREF(s);
409
10.8k
    return 1;
410
11.4k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
61.1k
{
415
61.1k
    int c;
416
417
61.4k
    while (1) {
418
194k
        do {
419
194k
            c = tok_nextc(tok);
420
194k
        } while (Py_ISDIGIT(c));
421
61.4k
        if (c != '_') {
422
61.1k
            break;
423
61.1k
        }
424
351
        c = tok_nextc(tok);
425
351
        if (!Py_ISDIGIT(c)) {
426
15
            tok_backup(tok, c);
427
15
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
15
            return 0;
429
15
        }
430
351
    }
431
61.1k
    return c;
432
61.1k
}
433
434
static inline int
435
879
tok_continuation_line(struct tok_state *tok) {
436
879
    int c = tok_nextc(tok);
437
879
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
879
    if (c != '\n') {
441
54
        tok->done = E_LINECONT;
442
54
        return -1;
443
54
    }
444
825
    c = tok_nextc(tok);
445
825
    if (c == EOF) {
446
39
        tok->done = E_EOF;
447
39
        tok->cur = tok->inp;
448
39
        return -1;
449
786
    } else {
450
786
        tok_backup(tok, c);
451
786
    }
452
786
    return c;
453
825
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
21.9k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
21.9k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
21.9k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
21.9k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
21.9k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
21.9k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
21.9k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
21.9k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
21.9k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
21.9k
    if (saw_f && saw_t) {
492
2
        RETURN_SYNTAX_ERROR("f", "t");
493
2
    }
494
495
21.9k
#undef RETURN_SYNTAX_ERROR
496
497
21.9k
    return 0;
498
21.9k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
2.10M
{
503
2.10M
    int c;
504
2.10M
    int blankline, nonascii;
505
506
2.10M
    const char *p_start = NULL;
507
2.10M
    const char *p_end = NULL;
508
2.17M
  nextline:
509
2.17M
    tok->start = NULL;
510
2.17M
    tok->starting_col_offset = -1;
511
2.17M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.17M
    if (tok->atbol) {
516
350k
        int col = 0;
517
350k
        int altcol = 0;
518
350k
        tok->atbol = 0;
519
350k
        int cont_line_col = 0;
520
716k
        for (;;) {
521
716k
            c = tok_nextc(tok);
522
716k
            if (c == ' ') {
523
364k
                col++, altcol++;
524
364k
            }
525
352k
            else if (c == '\t') {
526
700
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
700
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
700
            }
529
351k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
777
                col = altcol = 0; /* For Emacs users */
531
777
            }
532
350k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
558
                cont_line_col = cont_line_col ? cont_line_col : col;
538
558
                if ((c = tok_continuation_line(tok)) == -1) {
539
33
                    return MAKE_TOKEN(ERRORTOKEN);
540
33
                }
541
558
            }
542
350k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
350k
            else {
546
350k
                break;
547
350k
            }
548
716k
        }
549
350k
        tok_backup(tok, c);
550
350k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
38.3k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
38.3k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
38.3k
            else {
566
38.3k
                blankline = 1; /* Ignore completely */
567
38.3k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
38.3k
        }
571
350k
        if (!blankline && tok->level == 0) {
572
277k
            col = cont_line_col ? cont_line_col : col;
573
277k
            altcol = cont_line_col ? cont_line_col : altcol;
574
277k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
255k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
255k
            }
580
22.6k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
12.6k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
12.6k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
12.6k
                tok->pendin++;
591
12.6k
                tok->indstack[++tok->indent] = col;
592
12.6k
                tok->altindstack[tok->indent] = altcol;
593
12.6k
            }
594
9.94k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
22.0k
                while (tok->indent > 0 &&
597
18.8k
                    col < tok->indstack[tok->indent]) {
598
12.0k
                    tok->pendin--;
599
12.0k
                    tok->indent--;
600
12.0k
                }
601
9.94k
                if (col != tok->indstack[tok->indent]) {
602
7
                    tok->done = E_DEDENT;
603
7
                    tok->cur = tok->inp;
604
7
                    return MAKE_TOKEN(ERRORTOKEN);
605
7
                }
606
9.94k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.94k
            }
610
277k
        }
611
350k
    }
612
613
2.17M
    tok->start = tok->cur;
614
2.17M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.17M
    if (tok->pendin != 0) {
618
24.7k
        if (tok->pendin < 0) {
619
12.0k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
12.0k
            tok->pendin++;
624
12.0k
            return MAKE_TOKEN(DEDENT);
625
12.0k
        }
626
12.6k
        else {
627
12.6k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
12.6k
            tok->pendin--;
632
12.6k
            return MAKE_TOKEN(INDENT);
633
12.6k
        }
634
24.7k
    }
635
636
    /* Peek ahead at the next character */
637
2.15M
    c = tok_nextc(tok);
638
2.15M
    tok_backup(tok, c);
639
640
2.15M
 again:
641
2.15M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.60M
    do {
644
2.60M
        c = tok_nextc(tok);
645
2.60M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
2.15M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
2.15M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
2.15M
    if (c == '#') {
653
654
31.4k
        const char* p = NULL;
655
31.4k
        const char *prefix, *type_start;
656
31.4k
        int current_starting_col_offset;
657
658
996k
        while (c != EOF && c != '\n' && c != '\r') {
659
965k
            c = tok_nextc(tok);
660
965k
        }
661
662
31.4k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
31.4k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
31.4k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
31.4k
    }
728
729
2.15M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
2.15M
    if (c == EOF) {
735
106k
        if (tok->level) {
736
3.57k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.57k
        }
738
102k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
106k
    }
740
741
    /* Identifier (most frequent token!) */
742
2.04M
    nonascii = 0;
743
2.04M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
715k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
911k
        while (1) {
747
911k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
17.8k
                saw_b = 1;
749
17.8k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
894k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
98.5k
                saw_u = 1;
754
98.5k
            }
755
            /* ur"" and ru"" are not supported */
756
795k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
26.0k
                saw_r = 1;
758
26.0k
            }
759
769k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
42.9k
                saw_f = 1;
761
42.9k
            }
762
726k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
32.7k
                saw_t = 1;
764
32.7k
            }
765
693k
            else {
766
693k
                break;
767
693k
            }
768
218k
            c = tok_nextc(tok);
769
218k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
21.9k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
21.9k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
21.9k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
21.9k
                if (saw_f || saw_t) {
779
18.4k
                    goto f_string_quote;
780
18.4k
                }
781
3.53k
                goto letter_quote;
782
21.9k
            }
783
218k
        }
784
3.33M
        while (is_potential_identifier_char(c)) {
785
2.64M
            if (c >= 128) {
786
128k
                nonascii = 1;
787
128k
            }
788
2.64M
            c = tok_nextc(tok);
789
2.64M
        }
790
693k
        tok_backup(tok, c);
791
693k
        if (nonascii && !verify_identifier(tok)) {
792
542
            return MAKE_TOKEN(ERRORTOKEN);
793
542
        }
794
795
693k
        p_start = tok->start;
796
693k
        p_end = tok->cur;
797
798
693k
        return MAKE_TOKEN(NAME);
799
693k
    }
800
801
1.32M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.32M
    if (c == '\n') {
807
241k
        tok->atbol = 1;
808
241k
        if (blankline || tok->level > 0) {
809
72.3k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
72.2k
            goto nextline;
818
72.3k
        }
819
168k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
168k
        p_start = tok->start;
826
168k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
168k
        tok->cont_line = 0;
828
168k
        return MAKE_TOKEN(NEWLINE);
829
168k
    }
830
831
    /* Period or number starting with period? */
832
1.08M
    if (c == '.') {
833
29.2k
        c = tok_nextc(tok);
834
29.2k
        if (Py_ISDIGIT(c)) {
835
2.77k
            goto fraction;
836
26.4k
        } else if (c == '.') {
837
1.32k
            c = tok_nextc(tok);
838
1.32k
            if (c == '.') {
839
692
                p_start = tok->start;
840
692
                p_end = tok->cur;
841
692
                return MAKE_TOKEN(ELLIPSIS);
842
692
            }
843
637
            else {
844
637
                tok_backup(tok, c);
845
637
            }
846
637
            tok_backup(tok, '.');
847
637
        }
848
25.1k
        else {
849
25.1k
            tok_backup(tok, c);
850
25.1k
        }
851
25.7k
        p_start = tok->start;
852
25.7k
        p_end = tok->cur;
853
25.7k
        return MAKE_TOKEN(DOT);
854
29.2k
    }
855
856
    /* Number */
857
1.05M
    if (Py_ISDIGIT(c)) {
858
78.8k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
31.6k
            c = tok_nextc(tok);
861
31.6k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
14.5k
                c = tok_nextc(tok);
864
14.6k
                do {
865
14.6k
                    if (c == '_') {
866
74
                        c = tok_nextc(tok);
867
74
                    }
868
14.6k
                    if (!Py_ISXDIGIT(c)) {
869
15
                        tok_backup(tok, c);
870
15
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
15
                    }
872
73.2k
                    do {
873
73.2k
                        c = tok_nextc(tok);
874
73.2k
                    } while (Py_ISXDIGIT(c));
875
14.6k
                } while (c == '_');
876
14.5k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
2
                    return MAKE_TOKEN(ERRORTOKEN);
878
2
                }
879
14.5k
            }
880
17.0k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
481
                c = tok_nextc(tok);
883
669
                do {
884
669
                    if (c == '_') {
885
189
                        c = tok_nextc(tok);
886
189
                    }
887
669
                    if (c < '0' || c >= '8') {
888
19
                        if (Py_ISDIGIT(c)) {
889
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
2
                                    "invalid digit '%c' in octal literal", c));
891
2
                        }
892
17
                        else {
893
17
                            tok_backup(tok, c);
894
17
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
17
                        }
896
19
                    }
897
1.67k
                    do {
898
1.67k
                        c = tok_nextc(tok);
899
1.67k
                    } while ('0' <= c && c < '8');
900
650
                } while (c == '_');
901
462
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
461
                if (!verify_end_of_number(tok, c, "octal")) {
906
3
                    return MAKE_TOKEN(ERRORTOKEN);
907
3
                }
908
461
            }
909
16.5k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
388
                c = tok_nextc(tok);
912
525
                do {
913
525
                    if (c == '_') {
914
144
                        c = tok_nextc(tok);
915
144
                    }
916
525
                    if (c != '0' && c != '1') {
917
23
                        if (Py_ISDIGIT(c)) {
918
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
2
                        }
920
21
                        else {
921
21
                            tok_backup(tok, c);
922
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
21
                        }
924
23
                    }
925
3.12k
                    do {
926
3.12k
                        c = tok_nextc(tok);
927
3.12k
                    } while (c == '0' || c == '1');
928
502
                } while (c == '_');
929
365
                if (Py_ISDIGIT(c)) {
930
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
1
                }
932
364
                if (!verify_end_of_number(tok, c, "binary")) {
933
3
                    return MAKE_TOKEN(ERRORTOKEN);
934
3
                }
935
364
            }
936
16.1k
            else {
937
16.1k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
17.8k
                while (1) {
941
17.8k
                    if (c == '_') {
942
205
                        c = tok_nextc(tok);
943
205
                        if (!Py_ISDIGIT(c)) {
944
2
                            tok_backup(tok, c);
945
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
2
                        }
947
205
                    }
948
17.8k
                    if (c != '0') {
949
16.1k
                        break;
950
16.1k
                    }
951
1.72k
                    c = tok_nextc(tok);
952
1.72k
                }
953
16.1k
                char* zeros_end = tok->cur;
954
16.1k
                if (Py_ISDIGIT(c)) {
955
371
                    nonzero = 1;
956
371
                    c = tok_decimal_tail(tok);
957
371
                    if (c == 0) {
958
2
                        return MAKE_TOKEN(ERRORTOKEN);
959
2
                    }
960
371
                }
961
16.1k
                if (c == '.') {
962
738
                    c = tok_nextc(tok);
963
738
                    goto fraction;
964
738
                }
965
15.4k
                else if (c == 'e' || c == 'E') {
966
843
                    goto exponent;
967
843
                }
968
14.5k
                else if (c == 'j' || c == 'J') {
969
565
                    goto imaginary;
970
565
                }
971
14.0k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
28
                    tok_backup(tok, c);
974
28
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
28
                            tok, (int)(tok->start + 1 - tok->line_start),
976
28
                            (int)(zeros_end - tok->line_start),
977
28
                            "leading zeros in decimal integer "
978
28
                            "literals are not permitted; "
979
28
                            "use an 0o prefix for octal integers"));
980
28
                }
981
13.9k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
26
                    return MAKE_TOKEN(ERRORTOKEN);
983
26
                }
984
13.9k
            }
985
31.6k
        }
986
47.2k
        else {
987
            /* Decimal */
988
47.2k
            c = tok_decimal_tail(tok);
989
47.2k
            if (c == 0) {
990
10
                return MAKE_TOKEN(ERRORTOKEN);
991
10
            }
992
47.2k
            {
993
                /* Accept floating-point numbers. */
994
47.2k
                if (c == '.') {
995
2.92k
                    c = tok_nextc(tok);
996
6.43k
        fraction:
997
                    /* Fraction */
998
6.43k
                    if (Py_ISDIGIT(c)) {
999
5.09k
                        c = tok_decimal_tail(tok);
1000
5.09k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
5.09k
                    }
1004
6.43k
                }
1005
50.7k
                if (c == 'e' || c == 'E') {
1006
7.89k
                    int e;
1007
8.73k
                  exponent:
1008
8.73k
                    e = c;
1009
                    /* Exponent part */
1010
8.73k
                    c = tok_nextc(tok);
1011
8.73k
                    if (c == '+' || c == '-') {
1012
3.43k
                        c = tok_nextc(tok);
1013
3.43k
                        if (!Py_ISDIGIT(c)) {
1014
10
                            tok_backup(tok, c);
1015
10
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
10
                        }
1017
5.30k
                    } else if (!Py_ISDIGIT(c)) {
1018
341
                        tok_backup(tok, c);
1019
341
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
35
                            return MAKE_TOKEN(ERRORTOKEN);
1021
35
                        }
1022
306
                        tok_backup(tok, e);
1023
306
                        p_start = tok->start;
1024
306
                        p_end = tok->cur;
1025
306
                        return MAKE_TOKEN(NUMBER);
1026
341
                    }
1027
8.38k
                    c = tok_decimal_tail(tok);
1028
8.38k
                    if (c == 0) {
1029
2
                        return MAKE_TOKEN(ERRORTOKEN);
1030
2
                    }
1031
8.38k
                }
1032
51.2k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.75k
        imaginary:
1035
3.75k
                    c = tok_nextc(tok);
1036
3.75k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
9
                        return MAKE_TOKEN(ERRORTOKEN);
1038
9
                    }
1039
3.75k
                }
1040
48.0k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
119
                    return MAKE_TOKEN(ERRORTOKEN);
1042
119
                }
1043
51.2k
            }
1044
51.2k
        }
1045
81.0k
        tok_backup(tok, c);
1046
81.0k
        p_start = tok->start;
1047
81.0k
        p_end = tok->cur;
1048
81.0k
        return MAKE_TOKEN(NUMBER);
1049
78.8k
    }
1050
1051
998k
  f_string_quote:
1052
998k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
18.4k
        && (c == '\'' || c == '"'))) {
1054
1055
18.4k
        int quote = c;
1056
18.4k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
18.4k
        tok->first_lineno = tok->lineno;
1063
18.4k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
18.4k
        int after_quote = tok_nextc(tok);
1067
18.4k
        if (after_quote == quote) {
1068
2.59k
            int after_after_quote = tok_nextc(tok);
1069
2.59k
            if (after_after_quote == quote) {
1070
657
                quote_size = 3;
1071
657
            }
1072
1.93k
            else {
1073
                // TODO: Check this
1074
1.93k
                tok_backup(tok, after_after_quote);
1075
1.93k
                tok_backup(tok, after_quote);
1076
1.93k
            }
1077
2.59k
        }
1078
18.4k
        if (after_quote != quote) {
1079
15.8k
            tok_backup(tok, after_quote);
1080
15.8k
        }
1081
1082
1083
18.4k
        p_start = tok->start;
1084
18.4k
        p_end = tok->cur;
1085
18.4k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
18.4k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
18.4k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
18.4k
        the_current_tok->quote = quote;
1091
18.4k
        the_current_tok->quote_size = quote_size;
1092
18.4k
        the_current_tok->start = tok->start;
1093
18.4k
        the_current_tok->multi_line_start = tok->line_start;
1094
18.4k
        the_current_tok->first_line = tok->lineno;
1095
18.4k
        the_current_tok->start_offset = -1;
1096
18.4k
        the_current_tok->multi_line_start_offset = -1;
1097
18.4k
        the_current_tok->last_expr_buffer = NULL;
1098
18.4k
        the_current_tok->last_expr_size = 0;
1099
18.4k
        the_current_tok->last_expr_end = -1;
1100
18.4k
        the_current_tok->in_format_spec = 0;
1101
18.4k
        the_current_tok->in_debug = 0;
1102
1103
18.4k
        enum string_kind_t string_kind = FSTRING;
1104
18.4k
        switch (*tok->start) {
1105
1.11k
            case 'T':
1106
4.95k
            case 't':
1107
4.95k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
4.95k
                string_kind = TSTRING;
1109
4.95k
                break;
1110
3.76k
            case 'F':
1111
13.1k
            case 'f':
1112
13.1k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
13.1k
                break;
1114
218
            case 'R':
1115
348
            case 'r':
1116
348
                the_current_tok->raw = 1;
1117
348
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
42
                    string_kind = TSTRING;
1119
42
                }
1120
348
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
18.4k
        }
1124
1125
18.4k
        the_current_tok->string_kind = string_kind;
1126
18.4k
        the_current_tok->curly_bracket_depth = 0;
1127
18.4k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
18.4k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
18.4k
    }
1130
1131
983k
  letter_quote:
1132
    /* String */
1133
983k
    if (c == '\'' || c == '"') {
1134
38.3k
        int quote = c;
1135
38.3k
        int quote_size = 1;             /* 1 or 3 */
1136
38.3k
        int end_quote_size = 0;
1137
38.3k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
38.3k
        tok->first_lineno = tok->lineno;
1144
38.3k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
38.3k
        c = tok_nextc(tok);
1148
38.3k
        if (c == quote) {
1149
7.01k
            c = tok_nextc(tok);
1150
7.01k
            if (c == quote) {
1151
1.22k
                quote_size = 3;
1152
1.22k
            }
1153
5.79k
            else {
1154
5.79k
                end_quote_size = 1;     /* empty string found */
1155
5.79k
            }
1156
7.01k
        }
1157
38.3k
        if (c != quote) {
1158
37.1k
            tok_backup(tok, c);
1159
37.1k
        }
1160
1161
        /* Get rest of string */
1162
570k
        while (end_quote_size != quote_size) {
1163
532k
            c = tok_nextc(tok);
1164
532k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
532k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
532k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
310
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
310
                tok->cur = (char *)tok->start;
1176
310
                tok->cur++;
1177
310
                tok->line_start = tok->multi_line_start;
1178
310
                int start = tok->lineno;
1179
310
                tok->lineno = tok->first_lineno;
1180
1181
310
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
35
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
35
                    if (the_current_tok->quote == quote &&
1189
26
                        the_current_tok->quote_size == quote_size) {
1190
21
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
21
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
21
                    }
1193
35
                }
1194
1195
289
                if (quote_size == 3) {
1196
24
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
24
                                     " (detected at line %d)", start);
1198
24
                    if (c != '\n') {
1199
24
                        tok->done = E_EOFS;
1200
24
                    }
1201
24
                    return MAKE_TOKEN(ERRORTOKEN);
1202
24
                }
1203
265
                else {
1204
265
                    if (has_escaped_quote) {
1205
9
                        _PyTokenizer_syntaxerror(
1206
9
                            tok,
1207
9
                            "unterminated string literal (detected at line %d); "
1208
9
                            "perhaps you escaped the end quote?",
1209
9
                            start
1210
9
                        );
1211
256
                    } else {
1212
256
                        _PyTokenizer_syntaxerror(
1213
256
                            tok, "unterminated string literal (detected at line %d)", start
1214
256
                        );
1215
256
                    }
1216
265
                    if (c != '\n') {
1217
7
                        tok->done = E_EOLS;
1218
7
                    }
1219
265
                    return MAKE_TOKEN(ERRORTOKEN);
1220
265
                }
1221
289
            }
1222
532k
            if (c == quote) {
1223
35.4k
                end_quote_size += 1;
1224
35.4k
            }
1225
496k
            else {
1226
496k
                end_quote_size = 0;
1227
496k
                if (c == '\\') {
1228
25.8k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
25.8k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
963
                        has_escaped_quote = 1;
1231
963
                    }
1232
25.8k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
25.8k
                }
1236
496k
            }
1237
532k
        }
1238
1239
38.0k
        p_start = tok->start;
1240
38.0k
        p_end = tok->cur;
1241
38.0k
        return MAKE_TOKEN(STRING);
1242
38.3k
    }
1243
1244
    /* Line continuation */
1245
944k
    if (c == '\\') {
1246
321
        if ((c = tok_continuation_line(tok)) == -1) {
1247
60
            return MAKE_TOKEN(ERRORTOKEN);
1248
60
        }
1249
261
        tok->cont_line = 1;
1250
261
        goto again; /* Read next line */
1251
321
    }
1252
1253
    /* Punctuation character */
1254
944k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
944k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
61.5k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
61.5k
        int in_format_spec = current_tok->in_format_spec;
1261
61.5k
         int cursor_in_format_with_debug =
1262
61.5k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
61.5k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
61.5k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
61.5k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
61.5k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
4.02k
            current_tok->kind = TOK_FSTRING_MODE;
1273
4.02k
            current_tok->in_format_spec = 1;
1274
4.02k
            p_start = tok->start;
1275
4.02k
            p_end = tok->cur;
1276
4.02k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
4.02k
        }
1278
61.5k
    }
1279
1280
    /* Check for two-character token */
1281
940k
    {
1282
940k
        int c2 = tok_nextc(tok);
1283
940k
        int current_token = _PyToken_TwoChars(c, c2);
1284
940k
        if (current_token != OP) {
1285
21.7k
            int c3 = tok_nextc(tok);
1286
21.7k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
21.7k
            if (current_token3 != OP) {
1288
1.18k
                current_token = current_token3;
1289
1.18k
            }
1290
20.5k
            else {
1291
20.5k
                tok_backup(tok, c3);
1292
20.5k
            }
1293
21.7k
            p_start = tok->start;
1294
21.7k
            p_end = tok->cur;
1295
21.7k
            return MAKE_TOKEN(current_token);
1296
21.7k
        }
1297
918k
        tok_backup(tok, c2);
1298
918k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
66.6k
    case '(':
1303
92.7k
    case '[':
1304
139k
    case '{':
1305
139k
        if (tok->level >= MAXLEVEL) {
1306
18
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
18
        }
1308
139k
        tok->parenstack[tok->level] = c;
1309
139k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
139k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
139k
        tok->level++;
1312
139k
        if (INSIDE_FSTRING(tok)) {
1313
33.8k
            current_tok->curly_bracket_depth++;
1314
33.8k
        }
1315
139k
        break;
1316
39.0k
    case ')':
1317
44.9k
    case ']':
1318
73.1k
    case '}':
1319
73.1k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
50
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
50
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
50
        }
1323
73.0k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
191
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
191
        }
1326
72.8k
        if (tok->level > 0) {
1327
72.8k
            tok->level--;
1328
72.8k
            int opening = tok->parenstack[tok->level];
1329
72.8k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
33.9k
                                            (opening == '[' && c == ']') ||
1331
28.1k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
51
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
2
                    assert(current_tok->curly_bracket_depth >= 0);
1339
2
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
2
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
1
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
1
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
1
                    }
1344
2
                }
1345
50
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
7
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
7
                            "closing parenthesis '%c' does not match "
1348
7
                            "opening parenthesis '%c' on line %d",
1349
7
                            c, opening, tok->parenlinenostack[tok->level]));
1350
7
                }
1351
43
                else {
1352
43
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
43
                            "closing parenthesis '%c' does not match "
1354
43
                            "opening parenthesis '%c'",
1355
43
                            c, opening));
1356
43
                }
1357
50
            }
1358
72.8k
        }
1359
1360
72.8k
        if (INSIDE_FSTRING(tok)) {
1361
24.0k
            current_tok->curly_bracket_depth--;
1362
24.0k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
24.0k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
22.8k
                current_tok->curly_bracket_expr_start_depth--;
1368
22.8k
                current_tok->kind = TOK_FSTRING_MODE;
1369
22.8k
                current_tok->in_format_spec = 0;
1370
22.8k
                current_tok->in_debug = 0;
1371
22.8k
            }
1372
24.0k
        }
1373
72.8k
        break;
1374
706k
    default:
1375
706k
        break;
1376
918k
    }
1377
1378
918k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
391
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
391
    }
1381
1382
918k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
5.42k
        current_tok->in_debug = 1;
1384
5.42k
    }
1385
1386
    /* Punctuation character */
1387
918k
    p_start = tok->start;
1388
918k
    p_end = tok->cur;
1389
918k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
918k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
54.9k
{
1395
54.9k
    const char *p_start = NULL;
1396
54.9k
    const char *p_end = NULL;
1397
54.9k
    int end_quote_size = 0;
1398
54.9k
    int unicode_escape = 0;
1399
1400
54.9k
    tok->start = tok->cur;
1401
54.9k
    tok->first_lineno = tok->lineno;
1402
54.9k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
54.9k
    int start_char = tok_nextc(tok);
1407
54.9k
    if (start_char == '{') {
1408
16.1k
        int peek1 = tok_nextc(tok);
1409
16.1k
        tok_backup(tok, peek1);
1410
16.1k
        tok_backup(tok, start_char);
1411
16.1k
        if (peek1 != '{') {
1412
14.0k
            current_tok->curly_bracket_expr_start_depth++;
1413
14.0k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
1
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
1
            }
1417
14.0k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
14.0k
            return tok_get_normal_mode(tok, current_tok, token);
1419
14.0k
        }
1420
16.1k
    }
1421
38.7k
    else {
1422
38.7k
        tok_backup(tok, start_char);
1423
38.7k
    }
1424
1425
    // Check if we are at the end of the string
1426
59.3k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
46.5k
        int quote = tok_nextc(tok);
1428
46.5k
        if (quote != current_tok->quote) {
1429
28.0k
            tok_backup(tok, quote);
1430
28.0k
            goto f_string_middle;
1431
28.0k
        }
1432
46.5k
    }
1433
1434
12.7k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.70k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.70k
        current_tok->last_expr_buffer = NULL;
1437
7.70k
        current_tok->last_expr_size = 0;
1438
7.70k
        current_tok->last_expr_end = -1;
1439
7.70k
    }
1440
1441
12.7k
    p_start = tok->start;
1442
12.7k
    p_end = tok->cur;
1443
12.7k
    tok->tok_mode_stack_index--;
1444
12.7k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
28.0k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
28.0k
    tok->multi_line_start = tok->line_start;
1451
194k
    while (end_quote_size != current_tok->quote_size) {
1452
189k
        int c = tok_nextc(tok);
1453
189k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
189k
        int in_format_spec = (
1457
189k
                current_tok->in_format_spec
1458
9.77k
                &&
1459
9.77k
                INSIDE_FSTRING_EXPR(current_tok)
1460
189k
        );
1461
1462
189k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
368
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
368
            if (in_format_spec && c == '\n') {
1471
46
                if (current_tok->quote_size == 1) {
1472
46
                    return MAKE_TOKEN(
1473
46
                        _PyTokenizer_syntaxerror(
1474
46
                            tok,
1475
46
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
46
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
46
                        )
1478
46
                    );
1479
46
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
46
            }
1487
1488
368
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
322
            tok->cur = (char *)current_tok->start;
1493
322
            tok->cur++;
1494
322
            tok->line_start = current_tok->multi_line_start;
1495
322
            int start = tok->lineno;
1496
1497
322
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
322
            tok->lineno = the_current_tok->first_line;
1499
1500
322
            if (current_tok->quote_size == 3) {
1501
27
                _PyTokenizer_syntaxerror(tok,
1502
27
                                    "unterminated triple-quoted %c-string literal"
1503
27
                                    " (detected at line %d)",
1504
27
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
27
                if (c != '\n') {
1506
27
                    tok->done = E_EOFS;
1507
27
                }
1508
27
                return MAKE_TOKEN(ERRORTOKEN);
1509
27
            }
1510
295
            else {
1511
295
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
295
                                    "unterminated %c-string literal (detected at"
1513
295
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
295
            }
1515
322
        }
1516
1517
189k
        if (c == current_tok->quote) {
1518
10.4k
            end_quote_size += 1;
1519
10.4k
            continue;
1520
178k
        } else {
1521
178k
            end_quote_size = 0;
1522
178k
        }
1523
1524
178k
        if (c == '{') {
1525
17.6k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
17.6k
            int peek = tok_nextc(tok);
1529
17.6k
            if (peek != '{' || in_format_spec) {
1530
15.1k
                tok_backup(tok, peek);
1531
15.1k
                tok_backup(tok, c);
1532
15.1k
                current_tok->curly_bracket_expr_start_depth++;
1533
15.1k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
4
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
4
                }
1537
15.1k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
15.1k
                current_tok->in_format_spec = 0;
1539
15.1k
                p_start = tok->start;
1540
15.1k
                p_end = tok->cur;
1541
15.1k
            } else {
1542
2.55k
                p_start = tok->start;
1543
2.55k
                p_end = tok->cur - 1;
1544
2.55k
            }
1545
17.6k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
160k
        } else if (c == '}') {
1547
4.57k
            if (unicode_escape) {
1548
282
                p_start = tok->start;
1549
282
                p_end = tok->cur;
1550
282
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
282
            }
1552
4.29k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
4.29k
            int cursor = current_tok->curly_bracket_depth;
1559
4.29k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.27k
                p_start = tok->start;
1561
1.27k
                p_end = tok->cur - 1;
1562
3.01k
            } else {
1563
3.01k
                tok_backup(tok, peek);
1564
3.01k
                tok_backup(tok, c);
1565
3.01k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
3.01k
                current_tok->in_format_spec = 0;
1567
3.01k
                p_start = tok->start;
1568
3.01k
                p_end = tok->cur;
1569
3.01k
            }
1570
4.29k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
156k
        } else if (c == '\\') {
1572
5.51k
            int peek = tok_nextc(tok);
1573
5.51k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
5.51k
            if (peek == '{' || peek == '}') {
1580
1.15k
                if (!current_tok->raw) {
1581
957
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
957
                }
1585
1.15k
                tok_backup(tok, peek);
1586
1.15k
                continue;
1587
1.15k
            }
1588
1589
4.36k
            if (!current_tok->raw) {
1590
4.07k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
540
                    peek = tok_nextc(tok);
1593
540
                    if (peek == '{') {
1594
290
                        unicode_escape = 1;
1595
290
                    } else {
1596
250
                        tok_backup(tok, peek);
1597
250
                    }
1598
540
                }
1599
4.07k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
4.36k
        }
1603
178k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
12.0k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
6.52k
        tok_backup(tok, current_tok->quote);
1609
6.52k
    }
1610
5.48k
    p_start = tok->start;
1611
5.48k
    p_end = tok->cur;
1612
5.48k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
28.0k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
2.14M
{
1618
2.14M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
2.14M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
2.08M
        return tok_get_normal_mode(tok, current_tok, token);
1621
2.08M
    } else {
1622
54.9k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
54.9k
    }
1624
2.14M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
2.14M
{
1629
2.14M
    int result = tok_get(tok, token);
1630
2.14M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
2.14M
    return result;
1635
2.14M
}