Coverage Report

Created: 2025-10-12 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.23k
#define ALTTABSIZE 1
11
12
1.63M
#define is_potential_identifier_start(c) (\
13
1.63M
              (c >= 'a' && c <= 'z')\
14
1.63M
               || (c >= 'A' && c <= 'Z')\
15
1.63M
               || c == '_'\
16
1.63M
               || (c >= 128))
17
18
2.36M
#define is_potential_identifier_char(c) (\
19
2.36M
              (c >= 'a' && c <= 'z')\
20
2.36M
               || (c >= 'A' && c <= 'Z')\
21
2.36M
               || (c >= '0' && c <= '9')\
22
2.36M
               || c == '_'\
23
2.36M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.76M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
15.4k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
29
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.64M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
227k
{
55
227k
    return memchr(str, 0, size) != NULL;
56
227k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
10.4M
{
62
10.4M
    int rc;
63
10.6M
    for (;;) {
64
10.6M
        if (tok->cur != tok->inp) {
65
10.3M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
10.3M
            tok->col_offset++;
70
10.3M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
10.3M
        }
72
276k
        if (tok->done != E_OK) {
73
32.6k
            return EOF;
74
32.6k
        }
75
243k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
243k
        if (!rc) {
84
16.4k
            tok->cur = tok->inp;
85
16.4k
            return EOF;
86
16.4k
        }
87
227k
        tok->line_start = tok->cur;
88
89
227k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
227k
    }
95
10.4M
    Py_UNREACHABLE();
96
10.4M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.47M
{
102
3.47M
    if (c != EOF) {
103
3.44M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.44M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.44M
        tok->col_offset--;
110
3.44M
    }
111
3.47M
}
112
113
static int
114
23.6k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
23.6k
    assert(token != NULL);
116
23.6k
    assert(c == '}' || c == ':' || c == '!');
117
23.6k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
23.6k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
14.4k
        return 0;
121
14.4k
    }
122
9.23k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
9.23k
    int hash_detected = 0;
126
9.23k
    int in_string = 0;
127
9.23k
    char quote_char = 0;
128
129
1.07M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.06M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.06M
        if (ch == '\\') {
134
21.9k
            i++;
135
21.9k
            continue;
136
21.9k
        }
137
138
        // Handle quotes
139
1.04M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
157k
            if (!in_string) {
148
57.5k
                in_string = 1;
149
57.5k
                quote_char = ch;
150
57.5k
            }
151
99.8k
            else if (ch == quote_char) {
152
56.7k
                in_string = 0;
153
56.7k
            }
154
157k
            continue;
155
157k
        }
156
157
        // Check for # outside strings
158
883k
        if (ch == '#' && !in_string) {
159
837
            hash_detected = 1;
160
837
            break;
161
837
        }
162
883k
    }
163
    // If we found a # character in the expression, we need to handle comments
164
9.23k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
837
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
837
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
837
        Py_ssize_t i = 0;  // Input position
172
837
        Py_ssize_t j = 0;  // Output position
173
837
        in_string = 0;     // Whether we're in a string
174
837
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
70.5k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
69.7k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
69.7k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
10.1k
                if (!in_string) {
184
3.89k
                    in_string = 1;
185
3.89k
                    quote_char = ch;
186
6.25k
                } else if (ch == quote_char) {
187
3.87k
                    in_string = 0;
188
3.87k
                }
189
10.1k
                result[j++] = ch;
190
10.1k
            }
191
            // Skip comments
192
59.5k
            else if (ch == '#' && !in_string) {
193
44.8k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
44.1k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
43.8k
                    i++;
196
43.8k
                }
197
978
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
279
                    result[j++] = '\n';
199
279
                }
200
978
            }
201
            // Copy other chars
202
58.6k
            else {
203
58.6k
                result[j++] = ch;
204
58.6k
            }
205
69.7k
            i++;
206
69.7k
        }
207
208
837
        result[j] = '\0';  // Null-terminate the result string
209
837
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
837
        PyMem_Free(result);
211
8.40k
    } else {
212
8.40k
        res = PyUnicode_DecodeUTF8(
213
8.40k
            tok_mode->last_expr_buffer,
214
8.40k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.40k
            NULL
216
8.40k
        );
217
8.40k
    }
218
219
9.23k
    if (!res) {
220
0
        return -1;
221
0
    }
222
9.23k
    token->metadata = res;
223
9.23k
    return 0;
224
9.23k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
65.9k
{
229
65.9k
    assert(tok->cur != NULL);
230
231
65.9k
    Py_ssize_t size = strlen(tok->cur);
232
65.9k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
65.9k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
42.3k
        case '{':
252
42.3k
            if (tok_mode->last_expr_buffer != NULL) {
253
31.8k
                PyMem_Free(tok_mode->last_expr_buffer);
254
31.8k
            }
255
42.3k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
42.3k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
42.3k
            tok_mode->last_expr_size = size;
260
42.3k
            tok_mode->last_expr_end = -1;
261
42.3k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
42.3k
            break;
263
19.5k
        case '}':
264
21.0k
        case '!':
265
21.0k
            tok_mode->last_expr_end = strlen(tok->start);
266
21.0k
            break;
267
2.62k
        case ':':
268
2.62k
            if (tok_mode->last_expr_end == -1) {
269
2.57k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.57k
            }
271
2.62k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
65.9k
    }
275
65.9k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
65.9k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
8.79k
{
284
8.79k
    const char *s = test;
285
8.79k
    int res = 0;
286
23.4k
    while (1) {
287
23.4k
        int c = tok_nextc(tok);
288
23.4k
        if (*s == 0) {
289
8.70k
            res = !is_potential_identifier_char(c);
290
8.70k
        }
291
14.7k
        else if (c == *s) {
292
14.6k
            s++;
293
14.6k
            continue;
294
14.6k
        }
295
296
8.79k
        tok_backup(tok, c);
297
23.4k
        while (s != test) {
298
14.6k
            tok_backup(tok, *--s);
299
14.6k
        }
300
8.79k
        return res;
301
23.4k
    }
302
8.79k
}
303
304
static int
305
93.9k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
93.9k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
93.9k
    int r = 0;
322
93.9k
    if (c == 'a') {
323
1.16k
        r = lookahead(tok, "nd");
324
1.16k
    }
325
92.7k
    else if (c == 'e') {
326
592
        r = lookahead(tok, "lse");
327
592
    }
328
92.1k
    else if (c == 'f') {
329
3.41k
        r = lookahead(tok, "or");
330
3.41k
    }
331
88.7k
    else if (c == 'i') {
332
1.84k
        int c2 = tok_nextc(tok);
333
1.84k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.83k
            r = 1;
335
1.83k
        }
336
1.84k
        tok_backup(tok, c2);
337
1.84k
    }
338
86.8k
    else if (c == 'o') {
339
3.31k
        r = lookahead(tok, "r");
340
3.31k
    }
341
83.5k
    else if (c == 'n') {
342
304
        r = lookahead(tok, "ot");
343
304
    }
344
93.9k
    if (r) {
345
10.5k
        tok_backup(tok, c);
346
10.5k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.5k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.5k
        tok_nextc(tok);
352
10.5k
    }
353
83.3k
    else /* In future releases, only error will remain. */
354
83.3k
    if (c < 128 && is_potential_identifier_char(c)) {
355
194
        tok_backup(tok, c);
356
194
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
194
        return 0;
358
194
    }
359
93.7k
    return 1;
360
93.9k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
13.5k
{
366
13.5k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
13.5k
    PyObject *s;
370
13.5k
    if (tok->decoding_erred)
371
0
        return 0;
372
13.5k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
13.5k
    if (s == NULL) {
374
1
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
1
            tok->done = E_DECODE;
376
1
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
1
        return 0;
381
1
    }
382
13.5k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
13.5k
    assert(invalid >= 0);
384
13.5k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
13.5k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
747
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
747
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
543
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
543
            if (s != NULL) {
391
543
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
543
            }
393
543
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
543
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
543
        }
399
747
        Py_DECREF(s);
400
747
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
418
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
418
        }
403
329
        else {
404
329
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
329
        }
406
747
        return 0;
407
747
    }
408
12.7k
    Py_DECREF(s);
409
12.7k
    return 1;
410
13.5k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
75.3k
{
415
75.3k
    int c;
416
417
75.8k
    while (1) {
418
215k
        do {
419
215k
            c = tok_nextc(tok);
420
215k
        } while (Py_ISDIGIT(c));
421
75.8k
        if (c != '_') {
422
75.3k
            break;
423
75.3k
        }
424
486
        c = tok_nextc(tok);
425
486
        if (!Py_ISDIGIT(c)) {
426
13
            tok_backup(tok, c);
427
13
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
13
            return 0;
429
13
        }
430
486
    }
431
75.3k
    return c;
432
75.3k
}
433
434
static inline int
435
1.10k
tok_continuation_line(struct tok_state *tok) {
436
1.10k
    int c = tok_nextc(tok);
437
1.10k
    if (c == '\r') {
438
69
        c = tok_nextc(tok);
439
69
    }
440
1.10k
    if (c != '\n') {
441
68
        tok->done = E_LINECONT;
442
68
        return -1;
443
68
    }
444
1.03k
    c = tok_nextc(tok);
445
1.03k
    if (c == EOF) {
446
49
        tok->done = E_EOF;
447
49
        tok->cur = tok->inp;
448
49
        return -1;
449
983
    } else {
450
983
        tok_backup(tok, c);
451
983
    }
452
983
    return c;
453
1.03k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
20.0k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
20.0k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
20.0k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
20.0k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
19.9k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
19.9k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
19.9k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
19.9k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
19.9k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
19.9k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
19.9k
#undef RETURN_SYNTAX_ERROR
496
497
19.9k
    return 0;
498
19.9k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.60M
{
503
1.60M
    int c;
504
1.60M
    int blankline, nonascii;
505
506
1.60M
    const char *p_start = NULL;
507
1.60M
    const char *p_end = NULL;
508
1.69M
  nextline:
509
1.69M
    tok->start = NULL;
510
1.69M
    tok->starting_col_offset = -1;
511
1.69M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.69M
    if (tok->atbol) {
516
225k
        int col = 0;
517
225k
        int altcol = 0;
518
225k
        tok->atbol = 0;
519
225k
        int cont_line_col = 0;
520
955k
        for (;;) {
521
955k
            c = tok_nextc(tok);
522
955k
            if (c == ' ') {
523
727k
                col++, altcol++;
524
727k
            }
525
227k
            else if (c == '\t') {
526
616
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
616
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
616
            }
529
227k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.07k
                col = altcol = 0; /* For Emacs users */
531
1.07k
            }
532
226k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
657
                cont_line_col = cont_line_col ? cont_line_col : col;
538
657
                if ((c = tok_continuation_line(tok)) == -1) {
539
42
                    return MAKE_TOKEN(ERRORTOKEN);
540
42
                }
541
657
            }
542
225k
            else {
543
225k
                break;
544
225k
            }
545
955k
        }
546
225k
        tok_backup(tok, c);
547
225k
        if (c == '#' || c == '\n' || c == '\r') {
548
            /* Lines with only whitespace and/or comments
549
               shouldn't affect the indentation and are
550
               not passed to the parser as NEWLINE tokens,
551
               except *totally* empty lines in interactive
552
               mode, which signal the end of a command group. */
553
49.7k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
554
0
                blankline = 0; /* Let it through */
555
0
            }
556
49.7k
            else if (tok->prompt != NULL && tok->lineno == 1) {
557
                /* In interactive mode, if the first line contains
558
                   only spaces and/or a comment, let it through. */
559
0
                blankline = 0;
560
0
                col = altcol = 0;
561
0
            }
562
49.7k
            else {
563
49.7k
                blankline = 1; /* Ignore completely */
564
49.7k
            }
565
            /* We can't jump back right here since we still
566
               may need to skip to the end of a comment */
567
49.7k
        }
568
225k
        if (!blankline && tok->level == 0) {
569
134k
            col = cont_line_col ? cont_line_col : col;
570
134k
            altcol = cont_line_col ? cont_line_col : altcol;
571
134k
            if (col == tok->indstack[tok->indent]) {
572
                /* No change */
573
95.0k
                if (altcol != tok->altindstack[tok->indent]) {
574
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
575
1
                }
576
95.0k
            }
577
39.7k
            else if (col > tok->indstack[tok->indent]) {
578
                /* Indent -- always one */
579
22.2k
                if (tok->indent+1 >= MAXINDENT) {
580
0
                    tok->done = E_TOODEEP;
581
0
                    tok->cur = tok->inp;
582
0
                    return MAKE_TOKEN(ERRORTOKEN);
583
0
                }
584
22.2k
                if (altcol <= tok->altindstack[tok->indent]) {
585
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
586
2
                }
587
22.2k
                tok->pendin++;
588
22.2k
                tok->indstack[++tok->indent] = col;
589
22.2k
                tok->altindstack[tok->indent] = altcol;
590
22.2k
            }
591
17.4k
            else /* col < tok->indstack[tok->indent] */ {
592
                /* Dedent -- any number, must be consistent */
593
39.0k
                while (tok->indent > 0 &&
594
34.5k
                    col < tok->indstack[tok->indent]) {
595
21.5k
                    tok->pendin--;
596
21.5k
                    tok->indent--;
597
21.5k
                }
598
17.4k
                if (col != tok->indstack[tok->indent]) {
599
3
                    tok->done = E_DEDENT;
600
3
                    tok->cur = tok->inp;
601
3
                    return MAKE_TOKEN(ERRORTOKEN);
602
3
                }
603
17.4k
                if (altcol != tok->altindstack[tok->indent]) {
604
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
605
1
                }
606
17.4k
            }
607
134k
        }
608
225k
    }
609
610
1.69M
    tok->start = tok->cur;
611
1.69M
    tok->starting_col_offset = tok->col_offset;
612
613
    /* Return pending indents/dedents */
614
1.69M
    if (tok->pendin != 0) {
615
43.7k
        if (tok->pendin < 0) {
616
21.5k
            if (tok->tok_extra_tokens) {
617
0
                p_start = tok->cur;
618
0
                p_end = tok->cur;
619
0
            }
620
21.5k
            tok->pendin++;
621
21.5k
            return MAKE_TOKEN(DEDENT);
622
21.5k
        }
623
22.2k
        else {
624
22.2k
            if (tok->tok_extra_tokens) {
625
0
                p_start = tok->buf;
626
0
                p_end = tok->cur;
627
0
            }
628
22.2k
            tok->pendin--;
629
22.2k
            return MAKE_TOKEN(INDENT);
630
22.2k
        }
631
43.7k
    }
632
633
    /* Peek ahead at the next character */
634
1.65M
    c = tok_nextc(tok);
635
1.65M
    tok_backup(tok, c);
636
637
1.65M
 again:
638
1.65M
    tok->start = NULL;
639
    /* Skip spaces */
640
1.99M
    do {
641
1.99M
        c = tok_nextc(tok);
642
1.99M
    } while (c == ' ' || c == '\t' || c == '\014');
643
644
    /* Set start of current token */
645
1.65M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
646
1.65M
    tok->starting_col_offset = tok->col_offset - 1;
647
648
    /* Skip comment, unless it's a type comment */
649
1.65M
    if (c == '#') {
650
651
39.4k
        const char* p = NULL;
652
39.4k
        const char *prefix, *type_start;
653
39.4k
        int current_starting_col_offset;
654
655
1.29M
        while (c != EOF && c != '\n' && c != '\r') {
656
1.25M
            c = tok_nextc(tok);
657
1.25M
        }
658
659
39.4k
        if (tok->tok_extra_tokens) {
660
0
            p = tok->start;
661
0
        }
662
663
39.4k
        if (tok->type_comments) {
664
0
            p = tok->start;
665
0
            current_starting_col_offset = tok->starting_col_offset;
666
0
            prefix = type_comment_prefix;
667
0
            while (*prefix && p < tok->cur) {
668
0
                if (*prefix == ' ') {
669
0
                    while (*p == ' ' || *p == '\t') {
670
0
                        p++;
671
0
                        current_starting_col_offset++;
672
0
                    }
673
0
                } else if (*prefix == *p) {
674
0
                    p++;
675
0
                    current_starting_col_offset++;
676
0
                } else {
677
0
                    break;
678
0
                }
679
680
0
                prefix++;
681
0
            }
682
683
            /* This is a type comment if we matched all of type_comment_prefix. */
684
0
            if (!*prefix) {
685
0
                int is_type_ignore = 1;
686
                // +6 in order to skip the word 'ignore'
687
0
                const char *ignore_end = p + 6;
688
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
689
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
690
691
0
                type_start = p;
692
693
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
694
                 * or anything ASCII and non-alphanumeric. */
695
0
                is_type_ignore = (
696
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
697
0
                    && !(tok->cur > ignore_end
698
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
699
700
0
                if (is_type_ignore) {
701
0
                    p_start = ignore_end;
702
0
                    p_end = tok->cur;
703
704
                    /* If this type ignore is the only thing on the line, consume the newline also. */
705
0
                    if (blankline) {
706
0
                        tok_nextc(tok);
707
0
                        tok->atbol = 1;
708
0
                    }
709
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
710
0
                } else {
711
0
                    p_start = type_start;
712
0
                    p_end = tok->cur;
713
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
714
0
                }
715
0
            }
716
0
        }
717
39.4k
        if (tok->tok_extra_tokens) {
718
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
719
0
            p_start = p;
720
0
            p_end = tok->cur;
721
0
            tok->comment_newline = blankline;
722
0
            return MAKE_TOKEN(COMMENT);
723
0
        }
724
39.4k
    }
725
726
1.65M
    if (tok->done == E_INTERACT_STOP) {
727
0
        return MAKE_TOKEN(ENDMARKER);
728
0
    }
729
730
    /* Check for EOF and errors now */
731
1.65M
    if (c == EOF) {
732
16.3k
        if (tok->level) {
733
4.01k
            return MAKE_TOKEN(ERRORTOKEN);
734
4.01k
        }
735
12.3k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
736
16.3k
    }
737
738
    /* Identifier (most frequent token!) */
739
1.63M
    nonascii = 0;
740
1.63M
    if (is_potential_identifier_start(c)) {
741
        /* Process the various legal combinations of b"", r"", u"", and f"". */
742
513k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
743
631k
        while (1) {
744
631k
            if (!saw_b && (c == 'b' || c == 'B')) {
745
20.1k
                saw_b = 1;
746
20.1k
            }
747
            /* Since this is a backwards compatibility support literal we don't
748
               want to support it in arbitrary order like byte literals. */
749
611k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
750
6.91k
                saw_u = 1;
751
6.91k
            }
752
            /* ur"" and ru"" are not supported */
753
604k
            else if (!saw_r && (c == 'r' || c == 'R')) {
754
34.8k
                saw_r = 1;
755
34.8k
            }
756
569k
            else if (!saw_f && (c == 'f' || c == 'F')) {
757
44.3k
                saw_f = 1;
758
44.3k
            }
759
525k
            else if (!saw_t && (c == 't' || c == 'T')) {
760
31.8k
                saw_t = 1;
761
31.8k
            }
762
493k
            else {
763
493k
                break;
764
493k
            }
765
138k
            c = tok_nextc(tok);
766
138k
            if (c == '"' || c == '\'') {
767
                // Raise error on incompatible string prefixes:
768
20.0k
                int status = maybe_raise_syntax_error_for_string_prefixes(
769
20.0k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
770
20.0k
                if (status < 0) {
771
7
                    return MAKE_TOKEN(ERRORTOKEN);
772
7
                }
773
774
                // Handle valid f or t string creation:
775
19.9k
                if (saw_f || saw_t) {
776
15.4k
                    goto f_string_quote;
777
15.4k
                }
778
4.54k
                goto letter_quote;
779
19.9k
            }
780
138k
        }
781
2.27M
        while (is_potential_identifier_char(c)) {
782
1.77M
            if (c >= 128) {
783
140k
                nonascii = 1;
784
140k
            }
785
1.77M
            c = tok_nextc(tok);
786
1.77M
        }
787
493k
        tok_backup(tok, c);
788
493k
        if (nonascii && !verify_identifier(tok)) {
789
748
            return MAKE_TOKEN(ERRORTOKEN);
790
748
        }
791
792
492k
        p_start = tok->start;
793
492k
        p_end = tok->cur;
794
795
492k
        return MAKE_TOKEN(NAME);
796
493k
    }
797
798
1.12M
    if (c == '\r') {
799
416
        c = tok_nextc(tok);
800
416
    }
801
802
    /* Newline */
803
1.12M
    if (c == '\n') {
804
206k
        tok->atbol = 1;
805
206k
        if (blankline || tok->level > 0) {
806
90.7k
            if (tok->tok_extra_tokens) {
807
0
                if (tok->comment_newline) {
808
0
                    tok->comment_newline = 0;
809
0
                }
810
0
                p_start = tok->start;
811
0
                p_end = tok->cur;
812
0
                return MAKE_TOKEN(NL);
813
0
            }
814
90.7k
            goto nextline;
815
90.7k
        }
816
115k
        if (tok->comment_newline && tok->tok_extra_tokens) {
817
0
            tok->comment_newline = 0;
818
0
            p_start = tok->start;
819
0
            p_end = tok->cur;
820
0
            return MAKE_TOKEN(NL);
821
0
        }
822
115k
        p_start = tok->start;
823
115k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
824
115k
        tok->cont_line = 0;
825
115k
        return MAKE_TOKEN(NEWLINE);
826
115k
    }
827
828
    /* Period or number starting with period? */
829
916k
    if (c == '.') {
830
33.8k
        c = tok_nextc(tok);
831
33.8k
        if (Py_ISDIGIT(c)) {
832
3.40k
            goto fraction;
833
30.4k
        } else if (c == '.') {
834
1.56k
            c = tok_nextc(tok);
835
1.56k
            if (c == '.') {
836
978
                p_start = tok->start;
837
978
                p_end = tok->cur;
838
978
                return MAKE_TOKEN(ELLIPSIS);
839
978
            }
840
589
            else {
841
589
                tok_backup(tok, c);
842
589
            }
843
589
            tok_backup(tok, '.');
844
589
        }
845
28.8k
        else {
846
28.8k
            tok_backup(tok, c);
847
28.8k
        }
848
29.4k
        p_start = tok->start;
849
29.4k
        p_end = tok->cur;
850
29.4k
        return MAKE_TOKEN(DOT);
851
33.8k
    }
852
853
    /* Number */
854
882k
    if (Py_ISDIGIT(c)) {
855
90.6k
        if (c == '0') {
856
            /* Hex, octal or binary -- maybe. */
857
32.0k
            c = tok_nextc(tok);
858
32.0k
            if (c == 'x' || c == 'X') {
859
                /* Hex */
860
15.7k
                c = tok_nextc(tok);
861
15.9k
                do {
862
15.9k
                    if (c == '_') {
863
207
                        c = tok_nextc(tok);
864
207
                    }
865
15.9k
                    if (!Py_ISXDIGIT(c)) {
866
18
                        tok_backup(tok, c);
867
18
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
868
18
                    }
869
76.6k
                    do {
870
76.6k
                        c = tok_nextc(tok);
871
76.6k
                    } while (Py_ISXDIGIT(c));
872
15.9k
                } while (c == '_');
873
15.7k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
874
3
                    return MAKE_TOKEN(ERRORTOKEN);
875
3
                }
876
15.7k
            }
877
16.3k
            else if (c == 'o' || c == 'O') {
878
                /* Octal */
879
572
                c = tok_nextc(tok);
880
985
                do {
881
985
                    if (c == '_') {
882
419
                        c = tok_nextc(tok);
883
419
                    }
884
985
                    if (c < '0' || c >= '8') {
885
19
                        if (Py_ISDIGIT(c)) {
886
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
887
1
                                    "invalid digit '%c' in octal literal", c));
888
1
                        }
889
18
                        else {
890
18
                            tok_backup(tok, c);
891
18
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
892
18
                        }
893
19
                    }
894
2.66k
                    do {
895
2.66k
                        c = tok_nextc(tok);
896
2.66k
                    } while ('0' <= c && c < '8');
897
966
                } while (c == '_');
898
553
                if (Py_ISDIGIT(c)) {
899
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
900
1
                            "invalid digit '%c' in octal literal", c));
901
1
                }
902
552
                if (!verify_end_of_number(tok, c, "octal")) {
903
3
                    return MAKE_TOKEN(ERRORTOKEN);
904
3
                }
905
552
            }
906
15.7k
            else if (c == 'b' || c == 'B') {
907
                /* Binary */
908
561
                c = tok_nextc(tok);
909
912
                do {
910
912
                    if (c == '_') {
911
360
                        c = tok_nextc(tok);
912
360
                    }
913
912
                    if (c != '0' && c != '1') {
914
22
                        if (Py_ISDIGIT(c)) {
915
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
916
1
                        }
917
21
                        else {
918
21
                            tok_backup(tok, c);
919
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
920
21
                        }
921
22
                    }
922
3.86k
                    do {
923
3.86k
                        c = tok_nextc(tok);
924
3.86k
                    } while (c == '0' || c == '1');
925
890
                } while (c == '_');
926
539
                if (Py_ISDIGIT(c)) {
927
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
928
2
                }
929
537
                if (!verify_end_of_number(tok, c, "binary")) {
930
2
                    return MAKE_TOKEN(ERRORTOKEN);
931
2
                }
932
537
            }
933
15.1k
            else {
934
15.1k
                int nonzero = 0;
935
                /* maybe old-style octal; c is first char of it */
936
                /* in any case, allow '0' as a literal */
937
16.3k
                while (1) {
938
16.3k
                    if (c == '_') {
939
91
                        c = tok_nextc(tok);
940
91
                        if (!Py_ISDIGIT(c)) {
941
3
                            tok_backup(tok, c);
942
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
943
3
                        }
944
91
                    }
945
16.3k
                    if (c != '0') {
946
15.1k
                        break;
947
15.1k
                    }
948
1.22k
                    c = tok_nextc(tok);
949
1.22k
                }
950
15.1k
                char* zeros_end = tok->cur;
951
15.1k
                if (Py_ISDIGIT(c)) {
952
436
                    nonzero = 1;
953
436
                    c = tok_decimal_tail(tok);
954
436
                    if (c == 0) {
955
1
                        return MAKE_TOKEN(ERRORTOKEN);
956
1
                    }
957
436
                }
958
15.1k
                if (c == '.') {
959
873
                    c = tok_nextc(tok);
960
873
                    goto fraction;
961
873
                }
962
14.2k
                else if (c == 'e' || c == 'E') {
963
846
                    goto exponent;
964
846
                }
965
13.4k
                else if (c == 'j' || c == 'J') {
966
803
                    goto imaginary;
967
803
                }
968
12.6k
                else if (nonzero && !tok->tok_extra_tokens) {
969
                    /* Old-style octal: now disallowed. */
970
20
                    tok_backup(tok, c);
971
20
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
972
20
                            tok, (int)(tok->start + 1 - tok->line_start),
973
20
                            (int)(zeros_end - tok->line_start),
974
20
                            "leading zeros in decimal integer "
975
20
                            "literals are not permitted; "
976
20
                            "use an 0o prefix for octal integers"));
977
20
                }
978
12.6k
                if (!verify_end_of_number(tok, c, "decimal")) {
979
22
                    return MAKE_TOKEN(ERRORTOKEN);
980
22
                }
981
12.6k
            }
982
32.0k
        }
983
58.5k
        else {
984
            /* Decimal */
985
58.5k
            c = tok_decimal_tail(tok);
986
58.5k
            if (c == 0) {
987
10
                return MAKE_TOKEN(ERRORTOKEN);
988
10
            }
989
58.5k
            {
990
                /* Accept floating-point numbers. */
991
58.5k
                if (c == '.') {
992
3.84k
                    c = tok_nextc(tok);
993
8.12k
        fraction:
994
                    /* Fraction */
995
8.12k
                    if (Py_ISDIGIT(c)) {
996
6.24k
                        c = tok_decimal_tail(tok);
997
6.24k
                        if (c == 0) {
998
1
                            return MAKE_TOKEN(ERRORTOKEN);
999
1
                        }
1000
6.24k
                    }
1001
8.12k
                }
1002
62.7k
                if (c == 'e' || c == 'E') {
1003
9.91k
                    int e;
1004
10.7k
                  exponent:
1005
10.7k
                    e = c;
1006
                    /* Exponent part */
1007
10.7k
                    c = tok_nextc(tok);
1008
10.7k
                    if (c == '+' || c == '-') {
1009
4.02k
                        c = tok_nextc(tok);
1010
4.02k
                        if (!Py_ISDIGIT(c)) {
1011
11
                            tok_backup(tok, c);
1012
11
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1013
11
                        }
1014
6.74k
                    } else if (!Py_ISDIGIT(c)) {
1015
590
                        tok_backup(tok, c);
1016
590
                        if (!verify_end_of_number(tok, e, "decimal")) {
1017
37
                            return MAKE_TOKEN(ERRORTOKEN);
1018
37
                        }
1019
553
                        tok_backup(tok, e);
1020
553
                        p_start = tok->start;
1021
553
                        p_end = tok->cur;
1022
553
                        return MAKE_TOKEN(NUMBER);
1023
590
                    }
1024
10.1k
                    c = tok_decimal_tail(tok);
1025
10.1k
                    if (c == 0) {
1026
1
                        return MAKE_TOKEN(ERRORTOKEN);
1027
1
                    }
1028
10.1k
                }
1029
63.0k
                if (c == 'j' || c == 'J') {
1030
                    /* Imaginary part */
1031
3.75k
        imaginary:
1032
3.75k
                    c = tok_nextc(tok);
1033
3.75k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1034
8
                        return MAKE_TOKEN(ERRORTOKEN);
1035
8
                    }
1036
3.75k
                }
1037
60.0k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1038
119
                    return MAKE_TOKEN(ERRORTOKEN);
1039
119
                }
1040
63.0k
            }
1041
63.0k
        }
1042
93.1k
        tok_backup(tok, c);
1043
93.1k
        p_start = tok->start;
1044
93.1k
        p_end = tok->cur;
1045
93.1k
        return MAKE_TOKEN(NUMBER);
1046
90.6k
    }
1047
1048
807k
  f_string_quote:
1049
807k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1050
15.4k
        && (c == '\'' || c == '"'))) {
1051
1052
15.4k
        int quote = c;
1053
15.4k
        int quote_size = 1;             /* 1 or 3 */
1054
1055
        /* Nodes of type STRING, especially multi line strings
1056
           must be handled differently in order to get both
1057
           the starting line number and the column offset right.
1058
           (cf. issue 16806) */
1059
15.4k
        tok->first_lineno = tok->lineno;
1060
15.4k
        tok->multi_line_start = tok->line_start;
1061
1062
        /* Find the quote size and start of string */
1063
15.4k
        int after_quote = tok_nextc(tok);
1064
15.4k
        if (after_quote == quote) {
1065
2.25k
            int after_after_quote = tok_nextc(tok);
1066
2.25k
            if (after_after_quote == quote) {
1067
626
                quote_size = 3;
1068
626
            }
1069
1.62k
            else {
1070
                // TODO: Check this
1071
1.62k
                tok_backup(tok, after_after_quote);
1072
1.62k
                tok_backup(tok, after_quote);
1073
1.62k
            }
1074
2.25k
        }
1075
15.4k
        if (after_quote != quote) {
1076
13.1k
            tok_backup(tok, after_quote);
1077
13.1k
        }
1078
1079
1080
15.4k
        p_start = tok->start;
1081
15.4k
        p_end = tok->cur;
1082
15.4k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1083
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1084
1
        }
1085
15.4k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1086
15.4k
        the_current_tok->kind = TOK_FSTRING_MODE;
1087
15.4k
        the_current_tok->quote = quote;
1088
15.4k
        the_current_tok->quote_size = quote_size;
1089
15.4k
        the_current_tok->start = tok->start;
1090
15.4k
        the_current_tok->multi_line_start = tok->line_start;
1091
15.4k
        the_current_tok->first_line = tok->lineno;
1092
15.4k
        the_current_tok->start_offset = -1;
1093
15.4k
        the_current_tok->multi_line_start_offset = -1;
1094
15.4k
        the_current_tok->last_expr_buffer = NULL;
1095
15.4k
        the_current_tok->last_expr_size = 0;
1096
15.4k
        the_current_tok->last_expr_end = -1;
1097
15.4k
        the_current_tok->in_format_spec = 0;
1098
15.4k
        the_current_tok->in_debug = 0;
1099
1100
15.4k
        enum string_kind_t string_kind = FSTRING;
1101
15.4k
        switch (*tok->start) {
1102
414
            case 'T':
1103
3.88k
            case 't':
1104
3.88k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1105
3.88k
                string_kind = TSTRING;
1106
3.88k
                break;
1107
1.68k
            case 'F':
1108
11.2k
            case 'f':
1109
11.2k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1110
11.2k
                break;
1111
119
            case 'R':
1112
287
            case 'r':
1113
287
                the_current_tok->raw = 1;
1114
287
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1115
72
                    string_kind = TSTRING;
1116
72
                }
1117
287
                break;
1118
0
            default:
1119
0
                Py_UNREACHABLE();
1120
15.4k
        }
1121
1122
15.4k
        the_current_tok->string_kind = string_kind;
1123
15.4k
        the_current_tok->curly_bracket_depth = 0;
1124
15.4k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1125
15.4k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1126
15.4k
    }
1127
1128
796k
  letter_quote:
1129
    /* String */
1130
796k
    if (c == '\'' || c == '"') {
1131
54.6k
        int quote = c;
1132
54.6k
        int quote_size = 1;             /* 1 or 3 */
1133
54.6k
        int end_quote_size = 0;
1134
54.6k
        int has_escaped_quote = 0;
1135
1136
        /* Nodes of type STRING, especially multi line strings
1137
           must be handled differently in order to get both
1138
           the starting line number and the column offset right.
1139
           (cf. issue 16806) */
1140
54.6k
        tok->first_lineno = tok->lineno;
1141
54.6k
        tok->multi_line_start = tok->line_start;
1142
1143
        /* Find the quote size and start of string */
1144
54.6k
        c = tok_nextc(tok);
1145
54.6k
        if (c == quote) {
1146
8.97k
            c = tok_nextc(tok);
1147
8.97k
            if (c == quote) {
1148
2.49k
                quote_size = 3;
1149
2.49k
            }
1150
6.47k
            else {
1151
6.47k
                end_quote_size = 1;     /* empty string found */
1152
6.47k
            }
1153
8.97k
        }
1154
54.6k
        if (c != quote) {
1155
52.1k
            tok_backup(tok, c);
1156
52.1k
        }
1157
1158
        /* Get rest of string */
1159
1.11M
        while (end_quote_size != quote_size) {
1160
1.06M
            c = tok_nextc(tok);
1161
1.06M
            if (tok->done == E_ERROR) {
1162
0
                return MAKE_TOKEN(ERRORTOKEN);
1163
0
            }
1164
1.06M
            if (tok->done == E_DECODE) {
1165
0
                break;
1166
0
            }
1167
1.06M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1168
283
                assert(tok->multi_line_start != NULL);
1169
                // shift the tok_state's location into
1170
                // the start of string, and report the error
1171
                // from the initial quote character
1172
283
                tok->cur = (char *)tok->start;
1173
283
                tok->cur++;
1174
283
                tok->line_start = tok->multi_line_start;
1175
283
                int start = tok->lineno;
1176
283
                tok->lineno = tok->first_lineno;
1177
1178
283
                if (INSIDE_FSTRING(tok)) {
1179
                    /* When we are in an f-string, before raising the
1180
                     * unterminated string literal error, check whether
1181
                     * does the initial quote matches with f-strings quotes
1182
                     * and if it is, then this must be a missing '}' token
1183
                     * so raise the proper error */
1184
27
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1185
27
                    if (the_current_tok->quote == quote &&
1186
23
                        the_current_tok->quote_size == quote_size) {
1187
20
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1188
20
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1189
20
                    }
1190
27
                }
1191
1192
263
                if (quote_size == 3) {
1193
13
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1194
13
                                     " (detected at line %d)", start);
1195
13
                    if (c != '\n') {
1196
13
                        tok->done = E_EOFS;
1197
13
                    }
1198
13
                    return MAKE_TOKEN(ERRORTOKEN);
1199
13
                }
1200
250
                else {
1201
250
                    if (has_escaped_quote) {
1202
10
                        _PyTokenizer_syntaxerror(
1203
10
                            tok,
1204
10
                            "unterminated string literal (detected at line %d); "
1205
10
                            "perhaps you escaped the end quote?",
1206
10
                            start
1207
10
                        );
1208
240
                    } else {
1209
240
                        _PyTokenizer_syntaxerror(
1210
240
                            tok, "unterminated string literal (detected at line %d)", start
1211
240
                        );
1212
240
                    }
1213
250
                    if (c != '\n') {
1214
13
                        tok->done = E_EOLS;
1215
13
                    }
1216
250
                    return MAKE_TOKEN(ERRORTOKEN);
1217
250
                }
1218
263
            }
1219
1.06M
            if (c == quote) {
1220
54.4k
                end_quote_size += 1;
1221
54.4k
            }
1222
1.00M
            else {
1223
1.00M
                end_quote_size = 0;
1224
1.00M
                if (c == '\\') {
1225
26.8k
                    c = tok_nextc(tok);  /* skip escaped char */
1226
26.8k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1227
989
                        has_escaped_quote = 1;
1228
989
                    }
1229
26.8k
                    if (c == '\r') {
1230
195
                        c = tok_nextc(tok);
1231
195
                    }
1232
26.8k
                }
1233
1.00M
            }
1234
1.06M
        }
1235
1236
54.3k
        p_start = tok->start;
1237
54.3k
        p_end = tok->cur;
1238
54.3k
        return MAKE_TOKEN(STRING);
1239
54.6k
    }
1240
1241
    /* Line continuation */
1242
741k
    if (c == '\\') {
1243
443
        if ((c = tok_continuation_line(tok)) == -1) {
1244
75
            return MAKE_TOKEN(ERRORTOKEN);
1245
75
        }
1246
368
        tok->cont_line = 1;
1247
368
        goto again; /* Read next line */
1248
443
    }
1249
1250
    /* Punctuation character */
1251
741k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1252
741k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1253
        /* This code block gets executed before the curly_bracket_depth is incremented
1254
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1255
         * to adjust it manually */
1256
54.8k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1257
54.8k
        int in_format_spec = current_tok->in_format_spec;
1258
54.8k
         int cursor_in_format_with_debug =
1259
54.8k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1260
54.8k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1261
54.8k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1262
0
            return MAKE_TOKEN(ENDMARKER);
1263
0
        }
1264
54.8k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1265
0
            return MAKE_TOKEN(ERRORTOKEN);
1266
0
        }
1267
1268
54.8k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1269
3.87k
            current_tok->kind = TOK_FSTRING_MODE;
1270
3.87k
            current_tok->in_format_spec = 1;
1271
3.87k
            p_start = tok->start;
1272
3.87k
            p_end = tok->cur;
1273
3.87k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1274
3.87k
        }
1275
54.8k
    }
1276
1277
    /* Check for two-character token */
1278
737k
    {
1279
737k
        int c2 = tok_nextc(tok);
1280
737k
        int current_token = _PyToken_TwoChars(c, c2);
1281
737k
        if (current_token != OP) {
1282
22.4k
            int c3 = tok_nextc(tok);
1283
22.4k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1284
22.4k
            if (current_token3 != OP) {
1285
880
                current_token = current_token3;
1286
880
            }
1287
21.6k
            else {
1288
21.6k
                tok_backup(tok, c3);
1289
21.6k
            }
1290
22.4k
            p_start = tok->start;
1291
22.4k
            p_end = tok->cur;
1292
22.4k
            return MAKE_TOKEN(current_token);
1293
22.4k
        }
1294
715k
        tok_backup(tok, c2);
1295
715k
    }
1296
1297
    /* Keep track of parentheses nesting level */
1298
0
    switch (c) {
1299
84.4k
    case '(':
1300
115k
    case '[':
1301
159k
    case '{':
1302
159k
        if (tok->level >= MAXLEVEL) {
1303
3
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1304
3
        }
1305
159k
        tok->parenstack[tok->level] = c;
1306
159k
        tok->parenlinenostack[tok->level] = tok->lineno;
1307
159k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1308
159k
        tok->level++;
1309
159k
        if (INSIDE_FSTRING(tok)) {
1310
29.8k
            current_tok->curly_bracket_depth++;
1311
29.8k
        }
1312
159k
        break;
1313
58.9k
    case ')':
1314
70.0k
    case ']':
1315
96.1k
    case '}':
1316
96.1k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1317
43
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1318
43
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1319
43
        }
1320
96.1k
        if (!tok->tok_extra_tokens && !tok->level) {
1321
162
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1322
162
        }
1323
95.9k
        if (tok->level > 0) {
1324
95.9k
            tok->level--;
1325
95.9k
            int opening = tok->parenstack[tok->level];
1326
95.9k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1327
37.0k
                                            (opening == '[' && c == ']') ||
1328
25.9k
                                            (opening == '{' && c == '}'))) {
1329
                /* If the opening bracket belongs to an f-string's expression
1330
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1331
                nested expression, then instead of matching a different
1332
                syntactical construct with it; we'll throw an unmatched
1333
                parentheses error. */
1334
43
                if (INSIDE_FSTRING(tok) && opening == '{') {
1335
6
                    assert(current_tok->curly_bracket_depth >= 0);
1336
6
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1337
6
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1338
4
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1339
4
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1340
4
                    }
1341
6
                }
1342
39
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1343
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1344
2
                            "closing parenthesis '%c' does not match "
1345
2
                            "opening parenthesis '%c' on line %d",
1346
2
                            c, opening, tok->parenlinenostack[tok->level]));
1347
2
                }
1348
37
                else {
1349
37
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1350
37
                            "closing parenthesis '%c' does not match "
1351
37
                            "opening parenthesis '%c'",
1352
37
                            c, opening));
1353
37
                }
1354
39
            }
1355
95.9k
        }
1356
1357
95.8k
        if (INSIDE_FSTRING(tok)) {
1358
22.9k
            current_tok->curly_bracket_depth--;
1359
22.9k
            if (current_tok->curly_bracket_depth < 0) {
1360
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1361
1
                    TOK_GET_STRING_PREFIX(tok), c));
1362
1
            }
1363
22.9k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1364
21.4k
                current_tok->curly_bracket_expr_start_depth--;
1365
21.4k
                current_tok->kind = TOK_FSTRING_MODE;
1366
21.4k
                current_tok->in_format_spec = 0;
1367
21.4k
                current_tok->in_debug = 0;
1368
21.4k
            }
1369
22.9k
        }
1370
95.8k
        break;
1371
459k
    default:
1372
459k
        break;
1373
715k
    }
1374
1375
714k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1376
426
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1377
426
    }
1378
1379
714k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1380
4.43k
        current_tok->in_debug = 1;
1381
4.43k
    }
1382
1383
    /* Punctuation character */
1384
714k
    p_start = tok->start;
1385
714k
    p_end = tok->cur;
1386
714k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1387
714k
}
1388
1389
static int
1390
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1391
51.5k
{
1392
51.5k
    const char *p_start = NULL;
1393
51.5k
    const char *p_end = NULL;
1394
51.5k
    int end_quote_size = 0;
1395
51.5k
    int unicode_escape = 0;
1396
1397
51.5k
    tok->start = tok->cur;
1398
51.5k
    tok->first_lineno = tok->lineno;
1399
51.5k
    tok->starting_col_offset = tok->col_offset;
1400
1401
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1402
    // before it.
1403
51.5k
    int start_char = tok_nextc(tok);
1404
51.5k
    if (start_char == '{') {
1405
14.0k
        int peek1 = tok_nextc(tok);
1406
14.0k
        tok_backup(tok, peek1);
1407
14.0k
        tok_backup(tok, start_char);
1408
14.0k
        if (peek1 != '{') {
1409
11.2k
            current_tok->curly_bracket_expr_start_depth++;
1410
11.2k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1411
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1412
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1413
3
            }
1414
11.2k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1415
11.2k
            return tok_get_normal_mode(tok, current_tok, token);
1416
11.2k
        }
1417
14.0k
    }
1418
37.4k
    else {
1419
37.4k
        tok_backup(tok, start_char);
1420
37.4k
    }
1421
1422
    // Check if we are at the end of the string
1423
58.6k
    for (int i = 0; i < current_tok->quote_size; i++) {
1424
47.2k
        int quote = tok_nextc(tok);
1425
47.2k
        if (quote != current_tok->quote) {
1426
28.8k
            tok_backup(tok, quote);
1427
28.8k
            goto f_string_middle;
1428
28.8k
        }
1429
47.2k
    }
1430
1431
11.4k
    if (current_tok->last_expr_buffer != NULL) {
1432
6.62k
        PyMem_Free(current_tok->last_expr_buffer);
1433
6.62k
        current_tok->last_expr_buffer = NULL;
1434
6.62k
        current_tok->last_expr_size = 0;
1435
6.62k
        current_tok->last_expr_end = -1;
1436
6.62k
    }
1437
1438
11.4k
    p_start = tok->start;
1439
11.4k
    p_end = tok->cur;
1440
11.4k
    tok->tok_mode_stack_index--;
1441
11.4k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1442
1443
28.8k
f_string_middle:
1444
1445
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1446
    // this.
1447
28.8k
    tok->multi_line_start = tok->line_start;
1448
157k
    while (end_quote_size != current_tok->quote_size) {
1449
151k
        int c = tok_nextc(tok);
1450
151k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1451
0
            return MAKE_TOKEN(ERRORTOKEN);
1452
0
        }
1453
151k
        int in_format_spec = (
1454
151k
                current_tok->in_format_spec
1455
8.52k
                &&
1456
8.52k
                INSIDE_FSTRING_EXPR(current_tok)
1457
151k
        );
1458
1459
151k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1460
406
            if (tok->decoding_erred) {
1461
0
                return MAKE_TOKEN(ERRORTOKEN);
1462
0
            }
1463
1464
            // If we are in a format spec and we found a newline,
1465
            // it means that the format spec ends here and we should
1466
            // return to the regular mode.
1467
406
            if (in_format_spec && c == '\n') {
1468
47
                if (current_tok->quote_size == 1) {
1469
47
                    return MAKE_TOKEN(
1470
47
                        _PyTokenizer_syntaxerror(
1471
47
                            tok,
1472
47
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1473
47
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1474
47
                        )
1475
47
                    );
1476
47
                }
1477
0
                tok_backup(tok, c);
1478
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1479
0
                current_tok->in_format_spec = 0;
1480
0
                p_start = tok->start;
1481
0
                p_end = tok->cur;
1482
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1483
47
            }
1484
1485
406
            assert(tok->multi_line_start != NULL);
1486
            // shift the tok_state's location into
1487
            // the start of string, and report the error
1488
            // from the initial quote character
1489
359
            tok->cur = (char *)current_tok->start;
1490
359
            tok->cur++;
1491
359
            tok->line_start = current_tok->multi_line_start;
1492
359
            int start = tok->lineno;
1493
1494
359
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1495
359
            tok->lineno = the_current_tok->first_line;
1496
1497
359
            if (current_tok->quote_size == 3) {
1498
29
                _PyTokenizer_syntaxerror(tok,
1499
29
                                    "unterminated triple-quoted %c-string literal"
1500
29
                                    " (detected at line %d)",
1501
29
                                    TOK_GET_STRING_PREFIX(tok), start);
1502
29
                if (c != '\n') {
1503
29
                    tok->done = E_EOFS;
1504
29
                }
1505
29
                return MAKE_TOKEN(ERRORTOKEN);
1506
29
            }
1507
330
            else {
1508
330
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1509
330
                                    "unterminated %c-string literal (detected at"
1510
330
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1511
330
            }
1512
359
        }
1513
1514
151k
        if (c == current_tok->quote) {
1515
7.93k
            end_quote_size += 1;
1516
7.93k
            continue;
1517
143k
        } else {
1518
143k
            end_quote_size = 0;
1519
143k
        }
1520
1521
143k
        if (c == '{') {
1522
18.5k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1523
0
                return MAKE_TOKEN(ENDMARKER);
1524
0
            }
1525
18.5k
            int peek = tok_nextc(tok);
1526
18.5k
            if (peek != '{' || in_format_spec) {
1527
14.9k
                tok_backup(tok, peek);
1528
14.9k
                tok_backup(tok, c);
1529
14.9k
                current_tok->curly_bracket_expr_start_depth++;
1530
14.9k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1531
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1532
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1533
5
                }
1534
14.9k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1535
14.9k
                current_tok->in_format_spec = 0;
1536
14.9k
                p_start = tok->start;
1537
14.9k
                p_end = tok->cur;
1538
14.9k
            } else {
1539
3.53k
                p_start = tok->start;
1540
3.53k
                p_end = tok->cur - 1;
1541
3.53k
            }
1542
18.5k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1543
125k
        } else if (c == '}') {
1544
4.57k
            if (unicode_escape) {
1545
366
                p_start = tok->start;
1546
366
                p_end = tok->cur;
1547
366
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1548
366
            }
1549
4.20k
            int peek = tok_nextc(tok);
1550
1551
            // The tokenizer can only be in the format spec if we have already completed the expression
1552
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1553
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1554
            // brackets, we can bypass it here.
1555
4.20k
            int cursor = current_tok->curly_bracket_depth;
1556
4.20k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1557
1.52k
                p_start = tok->start;
1558
1.52k
                p_end = tok->cur - 1;
1559
2.68k
            } else {
1560
2.68k
                tok_backup(tok, peek);
1561
2.68k
                tok_backup(tok, c);
1562
2.68k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1563
2.68k
                current_tok->in_format_spec = 0;
1564
2.68k
                p_start = tok->start;
1565
2.68k
                p_end = tok->cur;
1566
2.68k
            }
1567
4.20k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1568
120k
        } else if (c == '\\') {
1569
5.28k
            int peek = tok_nextc(tok);
1570
5.28k
            if (peek == '\r') {
1571
66
                peek = tok_nextc(tok);
1572
66
            }
1573
            // Special case when the backslash is right before a curly
1574
            // brace. We have to restore and return the control back
1575
            // to the loop for the next iteration.
1576
5.28k
            if (peek == '{' || peek == '}') {
1577
1.38k
                if (!current_tok->raw) {
1578
1.18k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1579
1
                        return MAKE_TOKEN(ERRORTOKEN);
1580
1
                    }
1581
1.18k
                }
1582
1.37k
                tok_backup(tok, peek);
1583
1.37k
                continue;
1584
1.38k
            }
1585
1586
3.90k
            if (!current_tok->raw) {
1587
3.88k
                if (peek == 'N') {
1588
                    /* Handle named unicode escapes (\N{BULLET}) */
1589
421
                    peek = tok_nextc(tok);
1590
421
                    if (peek == '{') {
1591
379
                        unicode_escape = 1;
1592
379
                    } else {
1593
42
                        tok_backup(tok, peek);
1594
42
                    }
1595
421
                }
1596
3.88k
            } /* else {
1597
                skip the escaped character
1598
            }*/
1599
3.90k
        }
1600
143k
    }
1601
1602
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1603
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1604
11.4k
    for (int i = 0; i < current_tok->quote_size; i++) {
1605
6.09k
        tok_backup(tok, current_tok->quote);
1606
6.09k
    }
1607
5.38k
    p_start = tok->start;
1608
5.38k
    p_end = tok->cur;
1609
5.38k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1610
28.8k
}
1611
1612
static int
1613
tok_get(struct tok_state *tok, struct token *token)
1614
1.64M
{
1615
1.64M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1616
1.64M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1617
1.59M
        return tok_get_normal_mode(tok, current_tok, token);
1618
1.59M
    } else {
1619
51.5k
        return tok_get_fstring_mode(tok, current_tok, token);
1620
51.5k
    }
1621
1.64M
}
1622
1623
int
1624
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1625
1.64M
{
1626
1.64M
    int result = tok_get(tok, token);
1627
1.64M
    if (tok->decoding_erred) {
1628
0
        result = ERRORTOKEN;
1629
0
        tok->done = E_DECODE;
1630
0
    }
1631
1.64M
    return result;
1632
1.64M
}