Coverage Report

Created: 2026-02-09 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.46k
#define ALTTABSIZE 1
11
12
1.18M
#define is_potential_identifier_start(c) (\
13
1.18M
              (c >= 'a' && c <= 'z')\
14
1.18M
               || (c >= 'A' && c <= 'Z')\
15
1.18M
               || c == '_'\
16
1.18M
               || (c >= 128))
17
18
1.74M
#define is_potential_identifier_char(c) (\
19
1.74M
              (c >= 'a' && c <= 'z')\
20
1.74M
               || (c >= 'A' && c <= 'Z')\
21
1.74M
               || (c >= '0' && c <= '9')\
22
1.74M
               || c == '_'\
23
1.74M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.28M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
14.7k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
27
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.18M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
156k
{
55
156k
    return memchr(str, 0, size) != NULL;
56
156k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
7.23M
{
62
7.23M
    int rc;
63
7.39M
    for (;;) {
64
7.39M
        if (tok->cur != tok->inp) {
65
7.19M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
7.19M
            tok->col_offset++;
70
7.19M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
7.19M
        }
72
197k
        if (tok->done != E_OK) {
73
27.2k
            return EOF;
74
27.2k
        }
75
170k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
170k
        if (!rc) {
84
13.7k
            tok->cur = tok->inp;
85
13.7k
            return EOF;
86
13.7k
        }
87
156k
        tok->line_start = tok->cur;
88
89
156k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
156k
    }
95
7.23M
    Py_UNREACHABLE();
96
7.23M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
2.52M
{
102
2.52M
    if (c != EOF) {
103
2.50M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
2.50M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
2.50M
        tok->col_offset--;
110
2.50M
    }
111
2.52M
}
112
113
static int
114
21.0k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
21.0k
    assert(token != NULL);
116
21.0k
    assert(c == '}' || c == ':' || c == '!');
117
21.0k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
21.0k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
12.2k
        return 0;
121
12.2k
    }
122
8.77k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
8.77k
    int hash_detected = 0;
126
8.77k
    int in_string = 0;
127
8.77k
    char quote_char = 0;
128
129
860k
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
852k
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
852k
        if (ch == '\\') {
134
16.3k
            i++;
135
16.3k
            continue;
136
16.3k
        }
137
138
        // Handle quotes
139
836k
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
145k
            if (!in_string) {
148
53.0k
                in_string = 1;
149
53.0k
                quote_char = ch;
150
53.0k
            }
151
92.6k
            else if (ch == quote_char) {
152
52.4k
                in_string = 0;
153
52.4k
            }
154
145k
            continue;
155
145k
        }
156
157
        // Check for # outside strings
158
690k
        if (ch == '#' && !in_string) {
159
731
            hash_detected = 1;
160
731
            break;
161
731
        }
162
690k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
8.77k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
731
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
731
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
731
        Py_ssize_t i = 0;  // Input position
172
731
        Py_ssize_t j = 0;  // Output position
173
731
        in_string = 0;     // Whether we're in a string
174
731
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
44.7k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
44.0k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
44.0k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
6.91k
                if (!in_string) {
184
2.88k
                    in_string = 1;
185
2.88k
                    quote_char = ch;
186
4.03k
                } else if (ch == quote_char) {
187
2.87k
                    in_string = 0;
188
2.87k
                }
189
6.91k
                result[j++] = ch;
190
6.91k
            }
191
            // Skip comments
192
37.1k
            else if (ch == '#' && !in_string) {
193
24.4k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
23.7k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
23.4k
                    i++;
196
23.4k
                }
197
973
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
320
                    result[j++] = '\n';
199
320
                }
200
973
            }
201
            // Copy other chars
202
36.1k
            else {
203
36.1k
                result[j++] = ch;
204
36.1k
            }
205
44.0k
            i++;
206
44.0k
        }
207
208
731
        result[j] = '\0';  // Null-terminate the result string
209
731
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
731
        PyMem_Free(result);
211
8.04k
    } else {
212
8.04k
        res = PyUnicode_DecodeUTF8(
213
8.04k
            tok_mode->last_expr_buffer,
214
8.04k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.04k
            NULL
216
8.04k
        );
217
8.04k
    }
218
219
8.77k
    if (!res) {
220
0
        return -1;
221
0
    }
222
8.77k
    token->metadata = res;
223
8.77k
    return 0;
224
8.77k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
54.7k
{
229
54.7k
    assert(tok->cur != NULL);
230
231
54.7k
    Py_ssize_t size = strlen(tok->cur);
232
54.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
54.7k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
33.6k
        case '{':
252
33.6k
            if (tok_mode->last_expr_buffer != NULL) {
253
23.3k
                PyMem_Free(tok_mode->last_expr_buffer);
254
23.3k
            }
255
33.6k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
33.6k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
33.6k
            tok_mode->last_expr_size = size;
260
33.6k
            tok_mode->last_expr_end = -1;
261
33.6k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
33.6k
            break;
263
16.7k
        case '}':
264
18.4k
        case '!':
265
18.4k
            tok_mode->last_expr_end = strlen(tok->start);
266
18.4k
            break;
267
2.57k
        case ':':
268
2.57k
            if (tok_mode->last_expr_end == -1) {
269
2.21k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.21k
            }
271
2.57k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
54.7k
    }
275
54.7k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
54.7k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
6.64k
{
284
6.64k
    const char *s = test;
285
6.64k
    int res = 0;
286
17.7k
    while (1) {
287
17.7k
        int c = tok_nextc(tok);
288
17.7k
        if (*s == 0) {
289
6.56k
            res = !is_potential_identifier_char(c);
290
6.56k
        }
291
11.2k
        else if (c == *s) {
292
11.1k
            s++;
293
11.1k
            continue;
294
11.1k
        }
295
296
6.64k
        tok_backup(tok, c);
297
17.7k
        while (s != test) {
298
11.1k
            tok_backup(tok, *--s);
299
11.1k
        }
300
6.64k
        return res;
301
17.7k
    }
302
6.64k
}
303
304
static int
305
71.7k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
71.7k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
24
        return 1;
310
24
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
71.6k
    int r = 0;
322
71.6k
    if (c == 'a') {
323
789
        r = lookahead(tok, "nd");
324
789
    }
325
70.9k
    else if (c == 'e') {
326
432
        r = lookahead(tok, "lse");
327
432
    }
328
70.4k
    else if (c == 'f') {
329
2.66k
        r = lookahead(tok, "or");
330
2.66k
    }
331
67.8k
    else if (c == 'i') {
332
897
        int c2 = tok_nextc(tok);
333
897
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
886
            r = 1;
335
886
        }
336
897
        tok_backup(tok, c2);
337
897
    }
338
66.9k
    else if (c == 'o') {
339
2.43k
        r = lookahead(tok, "r");
340
2.43k
    }
341
64.4k
    else if (c == 'n') {
342
323
        r = lookahead(tok, "ot");
343
323
    }
344
71.6k
    if (r) {
345
7.43k
        tok_backup(tok, c);
346
7.43k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
7.43k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
7.43k
        tok_nextc(tok);
352
7.43k
    }
353
64.2k
    else /* In future releases, only error will remain. */
354
64.2k
    if (c < 128 && is_potential_identifier_char(c)) {
355
198
        tok_backup(tok, c);
356
198
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
198
        return 0;
358
198
    }
359
71.4k
    return 1;
360
71.6k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
10.4k
{
366
10.4k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
10.4k
    PyObject *s;
370
10.4k
    if (tok->decoding_erred)
371
0
        return 0;
372
10.4k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
10.4k
    if (s == NULL) {
374
2
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
2
            tok->done = E_DECODE;
376
2
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
2
        return 0;
381
2
    }
382
10.4k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
10.4k
    assert(invalid >= 0);
384
10.4k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
10.4k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
587
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
587
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
389
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
389
            if (s != NULL) {
391
389
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
389
            }
393
389
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
389
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
389
        }
399
587
        Py_DECREF(s);
400
587
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
280
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
280
        }
403
307
        else {
404
307
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
307
        }
406
587
        return 0;
407
587
    }
408
9.88k
    Py_DECREF(s);
409
9.88k
    return 1;
410
10.4k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
56.4k
{
415
56.4k
    int c;
416
417
56.6k
    while (1) {
418
177k
        do {
419
177k
            c = tok_nextc(tok);
420
177k
        } while (Py_ISDIGIT(c));
421
56.6k
        if (c != '_') {
422
56.4k
            break;
423
56.4k
        }
424
160
        c = tok_nextc(tok);
425
160
        if (!Py_ISDIGIT(c)) {
426
11
            tok_backup(tok, c);
427
11
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
11
            return 0;
429
11
        }
430
160
    }
431
56.4k
    return c;
432
56.4k
}
433
434
static inline int
435
729
tok_continuation_line(struct tok_state *tok) {
436
729
    int c = tok_nextc(tok);
437
729
    if (c == '\r') {
438
52
        c = tok_nextc(tok);
439
52
    }
440
729
    if (c != '\n') {
441
54
        tok->done = E_LINECONT;
442
54
        return -1;
443
54
    }
444
675
    c = tok_nextc(tok);
445
675
    if (c == EOF) {
446
45
        tok->done = E_EOF;
447
45
        tok->cur = tok->inp;
448
45
        return -1;
449
630
    } else {
450
630
        tok_backup(tok, c);
451
630
    }
452
630
    return c;
453
675
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
18.5k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
18.5k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
18.5k
    do {                                                                  \
464
9
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
9
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
9
            (int)(tok->cur - tok->line_start),                            \
467
9
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
9
        return -1;                                                        \
469
9
    } while (0)
470
471
18.5k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
18.5k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
18.5k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
18.5k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
18.5k
    if (saw_b && saw_f) {
485
3
        RETURN_SYNTAX_ERROR("b", "f");
486
3
    }
487
18.5k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
18.5k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
18.5k
#undef RETURN_SYNTAX_ERROR
496
497
18.5k
    return 0;
498
18.5k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.14M
{
503
1.14M
    int c;
504
1.14M
    int blankline, nonascii;
505
506
1.14M
    const char *p_start = NULL;
507
1.14M
    const char *p_end = NULL;
508
1.21M
  nextline:
509
1.21M
    tok->start = NULL;
510
1.21M
    tok->starting_col_offset = -1;
511
1.21M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.21M
    if (tok->atbol) {
516
159k
        int col = 0;
517
159k
        int altcol = 0;
518
159k
        tok->atbol = 0;
519
159k
        int cont_line_col = 0;
520
520k
        for (;;) {
521
520k
            c = tok_nextc(tok);
522
520k
            if (c == ' ') {
523
358k
                col++, altcol++;
524
358k
            }
525
162k
            else if (c == '\t') {
526
732
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
732
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
732
            }
529
161k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.03k
                col = altcol = 0; /* For Emacs users */
531
1.03k
            }
532
160k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
517
                cont_line_col = cont_line_col ? cont_line_col : col;
538
517
                if ((c = tok_continuation_line(tok)) == -1) {
539
42
                    return MAKE_TOKEN(ERRORTOKEN);
540
42
                }
541
517
            }
542
159k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
159k
            else {
546
159k
                break;
547
159k
            }
548
520k
        }
549
159k
        tok_backup(tok, c);
550
159k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
37.5k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
37.5k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
37.5k
            else {
566
37.5k
                blankline = 1; /* Ignore completely */
567
37.5k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
37.5k
        }
571
159k
        if (!blankline && tok->level == 0) {
572
89.5k
            col = cont_line_col ? cont_line_col : col;
573
89.5k
            altcol = cont_line_col ? cont_line_col : altcol;
574
89.5k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
68.3k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
68.3k
            }
580
21.1k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
11.9k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
11.9k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
11.9k
                tok->pendin++;
591
11.9k
                tok->indstack[++tok->indent] = col;
592
11.9k
                tok->altindstack[tok->indent] = altcol;
593
11.9k
            }
594
9.20k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
20.4k
                while (tok->indent > 0 &&
597
17.5k
                    col < tok->indstack[tok->indent]) {
598
11.2k
                    tok->pendin--;
599
11.2k
                    tok->indent--;
600
11.2k
                }
601
9.20k
                if (col != tok->indstack[tok->indent]) {
602
4
                    tok->done = E_DEDENT;
603
4
                    tok->cur = tok->inp;
604
4
                    return MAKE_TOKEN(ERRORTOKEN);
605
4
                }
606
9.19k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.19k
            }
610
89.5k
        }
611
159k
    }
612
613
1.21M
    tok->start = tok->cur;
614
1.21M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
1.21M
    if (tok->pendin != 0) {
618
23.2k
        if (tok->pendin < 0) {
619
11.2k
            if (tok->tok_extra_tokens) {
620
24
                p_start = tok->cur;
621
24
                p_end = tok->cur;
622
24
            }
623
11.2k
            tok->pendin++;
624
11.2k
            return MAKE_TOKEN(DEDENT);
625
11.2k
        }
626
11.9k
        else {
627
11.9k
            if (tok->tok_extra_tokens) {
628
24
                p_start = tok->buf;
629
24
                p_end = tok->cur;
630
24
            }
631
11.9k
            tok->pendin--;
632
11.9k
            return MAKE_TOKEN(INDENT);
633
11.9k
        }
634
23.2k
    }
635
636
    /* Peek ahead at the next character */
637
1.19M
    c = tok_nextc(tok);
638
1.19M
    tok_backup(tok, c);
639
640
1.19M
 again:
641
1.19M
    tok->start = NULL;
642
    /* Skip spaces */
643
1.45M
    do {
644
1.45M
        c = tok_nextc(tok);
645
1.45M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
1.19M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
1.19M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
1.19M
    if (c == '#') {
653
654
30.7k
        const char* p = NULL;
655
30.7k
        const char *prefix, *type_start;
656
30.7k
        int current_starting_col_offset;
657
658
966k
        while (c != EOF && c != '\n' && c != '\r') {
659
935k
            c = tok_nextc(tok);
660
935k
        }
661
662
30.7k
        if (tok->tok_extra_tokens) {
663
6
            p = tok->start;
664
6
        }
665
666
30.7k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
30.7k
        if (tok->tok_extra_tokens) {
721
6
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
6
            p_start = p;
723
6
            p_end = tok->cur;
724
6
            tok->comment_newline = blankline;
725
6
            return MAKE_TOKEN(COMMENT);
726
6
        }
727
30.7k
    }
728
729
1.19M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
1.19M
    if (c == EOF) {
735
13.6k
        if (tok->level) {
736
3.40k
            return MAKE_TOKEN(ERRORTOKEN);
737
3.40k
        }
738
10.2k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
13.6k
    }
740
741
    /* Identifier (most frequent token!) */
742
1.18M
    nonascii = 0;
743
1.18M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
390k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
484k
        while (1) {
747
484k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
15.4k
                saw_b = 1;
749
15.4k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
468k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
6.11k
                saw_u = 1;
754
6.11k
            }
755
            /* ur"" and ru"" are not supported */
756
462k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
25.0k
                saw_r = 1;
758
25.0k
            }
759
437k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
37.2k
                saw_f = 1;
761
37.2k
            }
762
400k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
28.7k
                saw_t = 1;
764
28.7k
            }
765
371k
            else {
766
371k
                break;
767
371k
            }
768
112k
            c = tok_nextc(tok);
769
112k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
18.5k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
18.5k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
18.5k
                if (status < 0) {
774
9
                    return MAKE_TOKEN(ERRORTOKEN);
775
9
                }
776
777
                // Handle valid f or t string creation:
778
18.5k
                if (saw_f || saw_t) {
779
14.7k
                    goto f_string_quote;
780
14.7k
                }
781
3.82k
                goto letter_quote;
782
18.5k
            }
783
112k
        }
784
1.67M
        while (is_potential_identifier_char(c)) {
785
1.30M
            if (c >= 128) {
786
135k
                nonascii = 1;
787
135k
            }
788
1.30M
            c = tok_nextc(tok);
789
1.30M
        }
790
371k
        tok_backup(tok, c);
791
371k
        if (nonascii && !verify_identifier(tok)) {
792
589
            return MAKE_TOKEN(ERRORTOKEN);
793
589
        }
794
795
371k
        p_start = tok->start;
796
371k
        p_end = tok->cur;
797
798
371k
        return MAKE_TOKEN(NAME);
799
371k
    }
800
801
791k
    if (c == '\r') {
802
264
        c = tok_nextc(tok);
803
264
    }
804
805
    /* Newline */
806
791k
    if (c == '\n') {
807
143k
        tok->atbol = 1;
808
143k
        if (blankline || tok->level > 0) {
809
70.3k
            if (tok->tok_extra_tokens) {
810
14
                if (tok->comment_newline) {
811
0
                    tok->comment_newline = 0;
812
0
                }
813
14
                p_start = tok->start;
814
14
                p_end = tok->cur;
815
14
                return MAKE_TOKEN(NL);
816
14
            }
817
70.2k
            goto nextline;
818
70.3k
        }
819
73.6k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
6
            tok->comment_newline = 0;
821
6
            p_start = tok->start;
822
6
            p_end = tok->cur;
823
6
            return MAKE_TOKEN(NL);
824
6
        }
825
73.6k
        p_start = tok->start;
826
73.6k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
73.6k
        tok->cont_line = 0;
828
73.6k
        return MAKE_TOKEN(NEWLINE);
829
73.6k
    }
830
831
    /* Period or number starting with period? */
832
647k
    if (c == '.') {
833
25.9k
        c = tok_nextc(tok);
834
25.9k
        if (Py_ISDIGIT(c)) {
835
2.99k
            goto fraction;
836
23.0k
        } else if (c == '.') {
837
963
            c = tok_nextc(tok);
838
963
            if (c == '.') {
839
495
                p_start = tok->start;
840
495
                p_end = tok->cur;
841
495
                return MAKE_TOKEN(ELLIPSIS);
842
495
            }
843
468
            else {
844
468
                tok_backup(tok, c);
845
468
            }
846
468
            tok_backup(tok, '.');
847
468
        }
848
22.0k
        else {
849
22.0k
            tok_backup(tok, c);
850
22.0k
        }
851
22.5k
        p_start = tok->start;
852
22.5k
        p_end = tok->cur;
853
22.5k
        return MAKE_TOKEN(DOT);
854
25.9k
    }
855
856
    /* Number */
857
621k
    if (Py_ISDIGIT(c)) {
858
68.8k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
25.8k
            c = tok_nextc(tok);
861
25.8k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
13.7k
                c = tok_nextc(tok);
864
13.8k
                do {
865
13.8k
                    if (c == '_') {
866
77
                        c = tok_nextc(tok);
867
77
                    }
868
13.8k
                    if (!Py_ISXDIGIT(c)) {
869
16
                        tok_backup(tok, c);
870
16
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
16
                    }
872
72.2k
                    do {
873
72.2k
                        c = tok_nextc(tok);
874
72.2k
                    } while (Py_ISXDIGIT(c));
875
13.8k
                } while (c == '_');
876
13.7k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
1
                    return MAKE_TOKEN(ERRORTOKEN);
878
1
                }
879
13.7k
            }
880
12.0k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
426
                c = tok_nextc(tok);
883
720
                do {
884
720
                    if (c == '_') {
885
295
                        c = tok_nextc(tok);
886
295
                    }
887
720
                    if (c < '0' || c >= '8') {
888
19
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
18
                        else {
893
18
                            tok_backup(tok, c);
894
18
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
18
                        }
896
19
                    }
897
1.74k
                    do {
898
1.74k
                        c = tok_nextc(tok);
899
1.74k
                    } while ('0' <= c && c < '8');
900
701
                } while (c == '_');
901
407
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
406
                if (!verify_end_of_number(tok, c, "octal")) {
906
4
                    return MAKE_TOKEN(ERRORTOKEN);
907
4
                }
908
406
            }
909
11.6k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
386
                c = tok_nextc(tok);
912
674
                do {
913
674
                    if (c == '_') {
914
298
                        c = tok_nextc(tok);
915
298
                    }
916
674
                    if (c != '0' && c != '1') {
917
23
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
22
                        else {
921
22
                            tok_backup(tok, c);
922
22
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
22
                        }
924
23
                    }
925
3.11k
                    do {
926
3.11k
                        c = tok_nextc(tok);
927
3.11k
                    } while (c == '0' || c == '1');
928
651
                } while (c == '_');
929
363
                if (Py_ISDIGIT(c)) {
930
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
1
                }
932
362
                if (!verify_end_of_number(tok, c, "binary")) {
933
1
                    return MAKE_TOKEN(ERRORTOKEN);
934
1
                }
935
362
            }
936
11.2k
            else {
937
11.2k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
12.2k
                while (1) {
941
12.2k
                    if (c == '_') {
942
111
                        c = tok_nextc(tok);
943
111
                        if (!Py_ISDIGIT(c)) {
944
2
                            tok_backup(tok, c);
945
2
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
2
                        }
947
111
                    }
948
12.2k
                    if (c != '0') {
949
11.2k
                        break;
950
11.2k
                    }
951
1.03k
                    c = tok_nextc(tok);
952
1.03k
                }
953
11.2k
                char* zeros_end = tok->cur;
954
11.2k
                if (Py_ISDIGIT(c)) {
955
346
                    nonzero = 1;
956
346
                    c = tok_decimal_tail(tok);
957
346
                    if (c == 0) {
958
2
                        return MAKE_TOKEN(ERRORTOKEN);
959
2
                    }
960
346
                }
961
11.2k
                if (c == '.') {
962
583
                    c = tok_nextc(tok);
963
583
                    goto fraction;
964
583
                }
965
10.6k
                else if (c == 'e' || c == 'E') {
966
886
                    goto exponent;
967
886
                }
968
9.78k
                else if (c == 'j' || c == 'J') {
969
581
                    goto imaginary;
970
581
                }
971
9.19k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
21
                    tok_backup(tok, c);
974
21
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
21
                            tok, (int)(tok->start + 1 - tok->line_start),
976
21
                            (int)(zeros_end - tok->line_start),
977
21
                            "leading zeros in decimal integer "
978
21
                            "literals are not permitted; "
979
21
                            "use an 0o prefix for octal integers"));
980
21
                }
981
9.17k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
27
                    return MAKE_TOKEN(ERRORTOKEN);
983
27
                }
984
9.17k
            }
985
25.8k
        }
986
42.9k
        else {
987
            /* Decimal */
988
42.9k
            c = tok_decimal_tail(tok);
989
42.9k
            if (c == 0) {
990
7
                return MAKE_TOKEN(ERRORTOKEN);
991
7
            }
992
42.9k
            {
993
                /* Accept floating-point numbers. */
994
42.9k
                if (c == '.') {
995
2.77k
                    c = tok_nextc(tok);
996
6.35k
        fraction:
997
                    /* Fraction */
998
6.35k
                    if (Py_ISDIGIT(c)) {
999
5.10k
                        c = tok_decimal_tail(tok);
1000
5.10k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
5.10k
                    }
1004
6.35k
                }
1005
46.5k
                if (c == 'e' || c == 'E') {
1006
7.61k
                    int e;
1007
8.50k
                  exponent:
1008
8.50k
                    e = c;
1009
                    /* Exponent part */
1010
8.50k
                    c = tok_nextc(tok);
1011
8.50k
                    if (c == '+' || c == '-') {
1012
3.53k
                        c = tok_nextc(tok);
1013
3.53k
                        if (!Py_ISDIGIT(c)) {
1014
11
                            tok_backup(tok, c);
1015
11
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
11
                        }
1017
4.96k
                    } else if (!Py_ISDIGIT(c)) {
1018
433
                        tok_backup(tok, c);
1019
433
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
30
                            return MAKE_TOKEN(ERRORTOKEN);
1021
30
                        }
1022
403
                        tok_backup(tok, e);
1023
403
                        p_start = tok->start;
1024
403
                        p_end = tok->cur;
1025
403
                        return MAKE_TOKEN(NUMBER);
1026
433
                    }
1027
8.06k
                    c = tok_decimal_tail(tok);
1028
8.06k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
8.06k
                }
1032
46.9k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
3.14k
        imaginary:
1035
3.14k
                    c = tok_nextc(tok);
1036
3.14k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
6
                        return MAKE_TOKEN(ERRORTOKEN);
1038
6
                    }
1039
3.14k
                }
1040
44.4k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
129
                    return MAKE_TOKEN(ERRORTOKEN);
1042
129
                }
1043
46.9k
            }
1044
46.9k
        }
1045
71.1k
        tok_backup(tok, c);
1046
71.1k
        p_start = tok->start;
1047
71.1k
        p_end = tok->cur;
1048
71.1k
        return MAKE_TOKEN(NUMBER);
1049
68.8k
    }
1050
1051
567k
  f_string_quote:
1052
567k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
14.7k
        && (c == '\'' || c == '"'))) {
1054
1055
14.7k
        int quote = c;
1056
14.7k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
14.7k
        tok->first_lineno = tok->lineno;
1063
14.7k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
14.7k
        int after_quote = tok_nextc(tok);
1067
14.7k
        if (after_quote == quote) {
1068
2.14k
            int after_after_quote = tok_nextc(tok);
1069
2.14k
            if (after_after_quote == quote) {
1070
499
                quote_size = 3;
1071
499
            }
1072
1.64k
            else {
1073
                // TODO: Check this
1074
1.64k
                tok_backup(tok, after_after_quote);
1075
1.64k
                tok_backup(tok, after_quote);
1076
1.64k
            }
1077
2.14k
        }
1078
14.7k
        if (after_quote != quote) {
1079
12.5k
            tok_backup(tok, after_quote);
1080
12.5k
        }
1081
1082
1083
14.7k
        p_start = tok->start;
1084
14.7k
        p_end = tok->cur;
1085
14.7k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
14.7k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
14.7k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
14.7k
        the_current_tok->quote = quote;
1091
14.7k
        the_current_tok->quote_size = quote_size;
1092
14.7k
        the_current_tok->start = tok->start;
1093
14.7k
        the_current_tok->multi_line_start = tok->line_start;
1094
14.7k
        the_current_tok->first_line = tok->lineno;
1095
14.7k
        the_current_tok->start_offset = -1;
1096
14.7k
        the_current_tok->multi_line_start_offset = -1;
1097
14.7k
        the_current_tok->last_expr_buffer = NULL;
1098
14.7k
        the_current_tok->last_expr_size = 0;
1099
14.7k
        the_current_tok->last_expr_end = -1;
1100
14.7k
        the_current_tok->in_format_spec = 0;
1101
14.7k
        the_current_tok->in_debug = 0;
1102
1103
14.7k
        enum string_kind_t string_kind = FSTRING;
1104
14.7k
        switch (*tok->start) {
1105
590
            case 'T':
1106
3.88k
            case 't':
1107
3.88k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
3.88k
                string_kind = TSTRING;
1109
3.88k
                break;
1110
1.62k
            case 'F':
1111
10.6k
            case 'f':
1112
10.6k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
10.6k
                break;
1114
70
            case 'R':
1115
224
            case 'r':
1116
224
                the_current_tok->raw = 1;
1117
224
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
73
                    string_kind = TSTRING;
1119
73
                }
1120
224
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
14.7k
        }
1124
1125
14.7k
        the_current_tok->string_kind = string_kind;
1126
14.7k
        the_current_tok->curly_bracket_depth = 0;
1127
14.7k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
14.7k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
14.7k
    }
1130
1131
556k
  letter_quote:
1132
    /* String */
1133
556k
    if (c == '\'' || c == '"') {
1134
37.2k
        int quote = c;
1135
37.2k
        int quote_size = 1;             /* 1 or 3 */
1136
37.2k
        int end_quote_size = 0;
1137
37.2k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
37.2k
        tok->first_lineno = tok->lineno;
1144
37.2k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
37.2k
        c = tok_nextc(tok);
1148
37.2k
        if (c == quote) {
1149
7.40k
            c = tok_nextc(tok);
1150
7.40k
            if (c == quote) {
1151
1.23k
                quote_size = 3;
1152
1.23k
            }
1153
6.16k
            else {
1154
6.16k
                end_quote_size = 1;     /* empty string found */
1155
6.16k
            }
1156
7.40k
        }
1157
37.2k
        if (c != quote) {
1158
35.9k
            tok_backup(tok, c);
1159
35.9k
        }
1160
1161
        /* Get rest of string */
1162
511k
        while (end_quote_size != quote_size) {
1163
474k
            c = tok_nextc(tok);
1164
474k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
474k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
474k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
290
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
290
                tok->cur = (char *)tok->start;
1176
290
                tok->cur++;
1177
290
                tok->line_start = tok->multi_line_start;
1178
290
                int start = tok->lineno;
1179
290
                tok->lineno = tok->first_lineno;
1180
1181
290
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
33
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
33
                    if (the_current_tok->quote == quote &&
1189
27
                        the_current_tok->quote_size == quote_size) {
1190
20
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
20
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
20
                    }
1193
33
                }
1194
1195
270
                if (quote_size == 3) {
1196
27
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
27
                                     " (detected at line %d)", start);
1198
27
                    if (c != '\n') {
1199
27
                        tok->done = E_EOFS;
1200
27
                    }
1201
27
                    return MAKE_TOKEN(ERRORTOKEN);
1202
27
                }
1203
243
                else {
1204
243
                    if (has_escaped_quote) {
1205
9
                        _PyTokenizer_syntaxerror(
1206
9
                            tok,
1207
9
                            "unterminated string literal (detected at line %d); "
1208
9
                            "perhaps you escaped the end quote?",
1209
9
                            start
1210
9
                        );
1211
234
                    } else {
1212
234
                        _PyTokenizer_syntaxerror(
1213
234
                            tok, "unterminated string literal (detected at line %d)", start
1214
234
                        );
1215
234
                    }
1216
243
                    if (c != '\n') {
1217
14
                        tok->done = E_EOLS;
1218
14
                    }
1219
243
                    return MAKE_TOKEN(ERRORTOKEN);
1220
243
                }
1221
270
            }
1222
474k
            if (c == quote) {
1223
34.0k
                end_quote_size += 1;
1224
34.0k
            }
1225
440k
            else {
1226
440k
                end_quote_size = 0;
1227
440k
                if (c == '\\') {
1228
21.2k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
21.2k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
750
                        has_escaped_quote = 1;
1231
750
                    }
1232
21.2k
                    if (c == '\r') {
1233
31
                        c = tok_nextc(tok);
1234
31
                    }
1235
21.2k
                }
1236
440k
            }
1237
474k
        }
1238
1239
36.9k
        p_start = tok->start;
1240
36.9k
        p_end = tok->cur;
1241
36.9k
        return MAKE_TOKEN(STRING);
1242
37.2k
    }
1243
1244
    /* Line continuation */
1245
519k
    if (c == '\\') {
1246
212
        if ((c = tok_continuation_line(tok)) == -1) {
1247
57
            return MAKE_TOKEN(ERRORTOKEN);
1248
57
        }
1249
155
        tok->cont_line = 1;
1250
155
        goto again; /* Read next line */
1251
212
    }
1252
1253
    /* Punctuation character */
1254
518k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
518k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
47.1k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
47.1k
        int in_format_spec = current_tok->in_format_spec;
1261
47.1k
         int cursor_in_format_with_debug =
1262
47.1k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
47.1k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
47.1k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
47.1k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
47.1k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
3.17k
            current_tok->kind = TOK_FSTRING_MODE;
1273
3.17k
            current_tok->in_format_spec = 1;
1274
3.17k
            p_start = tok->start;
1275
3.17k
            p_end = tok->cur;
1276
3.17k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
3.17k
        }
1278
47.1k
    }
1279
1280
    /* Check for two-character token */
1281
515k
    {
1282
515k
        int c2 = tok_nextc(tok);
1283
515k
        int current_token = _PyToken_TwoChars(c, c2);
1284
515k
        if (current_token != OP) {
1285
20.9k
            int c3 = tok_nextc(tok);
1286
20.9k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
20.9k
            if (current_token3 != OP) {
1288
2.24k
                current_token = current_token3;
1289
2.24k
            }
1290
18.6k
            else {
1291
18.6k
                tok_backup(tok, c3);
1292
18.6k
            }
1293
20.9k
            p_start = tok->start;
1294
20.9k
            p_end = tok->cur;
1295
20.9k
            return MAKE_TOKEN(current_token);
1296
20.9k
        }
1297
494k
        tok_backup(tok, c2);
1298
494k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
61.1k
    case '(':
1303
77.9k
    case '[':
1304
112k
    case '{':
1305
112k
        if (tok->level >= MAXLEVEL) {
1306
7
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
7
        }
1308
112k
        tok->parenstack[tok->level] = c;
1309
112k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
112k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
112k
        tok->level++;
1312
112k
        if (INSIDE_FSTRING(tok)) {
1313
24.4k
            current_tok->curly_bracket_depth++;
1314
24.4k
        }
1315
112k
        break;
1316
37.6k
    case ')':
1317
43.8k
    case ']':
1318
65.7k
    case '}':
1319
65.7k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
51
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
51
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
51
        }
1323
65.6k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
176
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
176
        }
1326
65.5k
        if (tok->level > 0) {
1327
65.5k
            tok->level--;
1328
65.5k
            int opening = tok->parenstack[tok->level];
1329
65.5k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
27.9k
                                            (opening == '[' && c == ']') ||
1331
21.8k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
36
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
2
                    assert(current_tok->curly_bracket_depth >= 0);
1339
2
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
2
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
1
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
1
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
1
                    }
1344
2
                }
1345
35
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
6
                            "closing parenthesis '%c' does not match "
1348
6
                            "opening parenthesis '%c' on line %d",
1349
6
                            c, opening, tok->parenlinenostack[tok->level]));
1350
6
                }
1351
29
                else {
1352
29
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
29
                            "closing parenthesis '%c' does not match "
1354
29
                            "opening parenthesis '%c'",
1355
29
                            c, opening));
1356
29
                }
1357
35
            }
1358
65.5k
        }
1359
1360
65.4k
        if (INSIDE_FSTRING(tok)) {
1361
19.1k
            current_tok->curly_bracket_depth--;
1362
19.1k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
19.1k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
18.1k
                current_tok->curly_bracket_expr_start_depth--;
1368
18.1k
                current_tok->kind = TOK_FSTRING_MODE;
1369
18.1k
                current_tok->in_format_spec = 0;
1370
18.1k
                current_tok->in_debug = 0;
1371
18.1k
            }
1372
19.1k
        }
1373
65.4k
        break;
1374
316k
    default:
1375
316k
        break;
1376
494k
    }
1377
1378
494k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
362
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
362
    }
1381
1382
494k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
4.72k
        current_tok->in_debug = 1;
1384
4.72k
    }
1385
1386
    /* Punctuation character */
1387
494k
    p_start = tok->start;
1388
494k
    p_end = tok->cur;
1389
494k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
494k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
44.7k
{
1395
44.7k
    const char *p_start = NULL;
1396
44.7k
    const char *p_end = NULL;
1397
44.7k
    int end_quote_size = 0;
1398
44.7k
    int unicode_escape = 0;
1399
1400
44.7k
    tok->start = tok->cur;
1401
44.7k
    tok->first_lineno = tok->lineno;
1402
44.7k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
44.7k
    int start_char = tok_nextc(tok);
1407
44.7k
    if (start_char == '{') {
1408
12.1k
        int peek1 = tok_nextc(tok);
1409
12.1k
        tok_backup(tok, peek1);
1410
12.1k
        tok_backup(tok, start_char);
1411
12.1k
        if (peek1 != '{') {
1412
10.6k
            current_tok->curly_bracket_expr_start_depth++;
1413
10.6k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
2
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
2
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
2
            }
1417
10.6k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
10.6k
            return tok_get_normal_mode(tok, current_tok, token);
1419
10.6k
        }
1420
12.1k
    }
1421
32.6k
    else {
1422
32.6k
        tok_backup(tok, start_char);
1423
32.6k
    }
1424
1425
    // Check if we are at the end of the string
1426
49.7k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
38.4k
        int quote = tok_nextc(tok);
1428
38.4k
        if (quote != current_tok->quote) {
1429
22.8k
            tok_backup(tok, quote);
1430
22.8k
            goto f_string_middle;
1431
22.8k
        }
1432
38.4k
    }
1433
1434
11.2k
    if (current_tok->last_expr_buffer != NULL) {
1435
7.01k
        PyMem_Free(current_tok->last_expr_buffer);
1436
7.01k
        current_tok->last_expr_buffer = NULL;
1437
7.01k
        current_tok->last_expr_size = 0;
1438
7.01k
        current_tok->last_expr_end = -1;
1439
7.01k
    }
1440
1441
11.2k
    p_start = tok->start;
1442
11.2k
    p_end = tok->cur;
1443
11.2k
    tok->tok_mode_stack_index--;
1444
11.2k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
22.8k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
22.8k
    tok->multi_line_start = tok->line_start;
1451
143k
    while (end_quote_size != current_tok->quote_size) {
1452
138k
        int c = tok_nextc(tok);
1453
138k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
138k
        int in_format_spec = (
1457
138k
                current_tok->in_format_spec
1458
7.80k
                &&
1459
7.80k
                INSIDE_FSTRING_EXPR(current_tok)
1460
138k
        );
1461
1462
138k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
363
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
363
            if (in_format_spec && c == '\n') {
1471
48
                if (current_tok->quote_size == 1) {
1472
48
                    return MAKE_TOKEN(
1473
48
                        _PyTokenizer_syntaxerror(
1474
48
                            tok,
1475
48
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
48
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
48
                        )
1478
48
                    );
1479
48
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
48
            }
1487
1488
363
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
315
            tok->cur = (char *)current_tok->start;
1493
315
            tok->cur++;
1494
315
            tok->line_start = current_tok->multi_line_start;
1495
315
            int start = tok->lineno;
1496
1497
315
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
315
            tok->lineno = the_current_tok->first_line;
1499
1500
315
            if (current_tok->quote_size == 3) {
1501
27
                _PyTokenizer_syntaxerror(tok,
1502
27
                                    "unterminated triple-quoted %c-string literal"
1503
27
                                    " (detected at line %d)",
1504
27
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
27
                if (c != '\n') {
1506
27
                    tok->done = E_EOFS;
1507
27
                }
1508
27
                return MAKE_TOKEN(ERRORTOKEN);
1509
27
            }
1510
288
            else {
1511
288
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
288
                                    "unterminated %c-string literal (detected at"
1513
288
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
288
            }
1515
315
        }
1516
1517
138k
        if (c == current_tok->quote) {
1518
7.60k
            end_quote_size += 1;
1519
7.60k
            continue;
1520
130k
        } else {
1521
130k
            end_quote_size = 0;
1522
130k
        }
1523
1524
130k
        if (c == '{') {
1525
13.2k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
13.2k
            int peek = tok_nextc(tok);
1529
13.2k
            if (peek != '{' || in_format_spec) {
1530
11.3k
                tok_backup(tok, peek);
1531
11.3k
                tok_backup(tok, c);
1532
11.3k
                current_tok->curly_bracket_expr_start_depth++;
1533
11.3k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
4
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
4
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
4
                }
1537
11.3k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
11.3k
                current_tok->in_format_spec = 0;
1539
11.3k
                p_start = tok->start;
1540
11.3k
                p_end = tok->cur;
1541
11.3k
            } else {
1542
1.92k
                p_start = tok->start;
1543
1.92k
                p_end = tok->cur - 1;
1544
1.92k
            }
1545
13.2k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
117k
        } else if (c == '}') {
1547
4.30k
            if (unicode_escape) {
1548
424
                p_start = tok->start;
1549
424
                p_end = tok->cur;
1550
424
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
424
            }
1552
3.88k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
3.88k
            int cursor = current_tok->curly_bracket_depth;
1559
3.88k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.45k
                p_start = tok->start;
1561
1.45k
                p_end = tok->cur - 1;
1562
2.43k
            } else {
1563
2.43k
                tok_backup(tok, peek);
1564
2.43k
                tok_backup(tok, c);
1565
2.43k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
2.43k
                current_tok->in_format_spec = 0;
1567
2.43k
                p_start = tok->start;
1568
2.43k
                p_end = tok->cur;
1569
2.43k
            }
1570
3.88k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
112k
        } else if (c == '\\') {
1572
4.61k
            int peek = tok_nextc(tok);
1573
4.61k
            if (peek == '\r') {
1574
18
                peek = tok_nextc(tok);
1575
18
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
4.61k
            if (peek == '{' || peek == '}') {
1580
1.11k
                if (!current_tok->raw) {
1581
918
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
918
                }
1585
1.11k
                tok_backup(tok, peek);
1586
1.11k
                continue;
1587
1.11k
            }
1588
1589
3.49k
            if (!current_tok->raw) {
1590
3.25k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
501
                    peek = tok_nextc(tok);
1593
501
                    if (peek == '{') {
1594
432
                        unicode_escape = 1;
1595
432
                    } else {
1596
69
                        tok_backup(tok, peek);
1597
69
                    }
1598
501
                }
1599
3.25k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
3.49k
        }
1603
130k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
10.5k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
5.64k
        tok_backup(tok, current_tok->quote);
1609
5.64k
    }
1610
4.88k
    p_start = tok->start;
1611
4.88k
    p_end = tok->cur;
1612
4.88k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
22.8k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
1.18M
{
1618
1.18M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
1.18M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
1.13M
        return tok_get_normal_mode(tok, current_tok, token);
1621
1.13M
    } else {
1622
44.7k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
44.7k
    }
1624
1.18M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
1.18M
{
1629
1.18M
    int result = tok_get(tok, token);
1630
1.18M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
1.18M
    return result;
1635
1.18M
}