Coverage Report

Created: 2025-08-24 07:03

/src/cpython/Parser/lexer/lexer.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.62k
#define ALTTABSIZE 1
11
12
1.77M
#define is_potential_identifier_start(c) (\
13
1.77M
              (c >= 'a' && c <= 'z')\
14
1.77M
               || (c >= 'A' && c <= 'Z')\
15
1.77M
               || c == '_'\
16
1.77M
               || (c >= 128))
17
18
2.40M
#define is_potential_identifier_char(c) (\
19
2.40M
              (c >= 'a' && c <= 'z')\
20
2.40M
               || (c >= 'A' && c <= 'Z')\
21
2.40M
               || (c >= '0' && c <= '9')\
22
2.40M
               || c == '_'\
23
2.40M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.90M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
16.7k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
36
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.78M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
229k
{
55
229k
    return memchr(str, 0, size) != NULL;
56
229k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
10.8M
{
62
10.8M
    int rc;
63
11.1M
    for (;;) {
64
11.1M
        if (tok->cur != tok->inp) {
65
10.8M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
10.8M
            tok->col_offset++;
70
10.8M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
10.8M
        }
72
281k
        if (tok->done != E_OK) {
73
34.0k
            return EOF;
74
34.0k
        }
75
247k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
247k
        if (!rc) {
84
17.1k
            tok->cur = tok->inp;
85
17.1k
            return EOF;
86
17.1k
        }
87
229k
        tok->line_start = tok->cur;
88
89
229k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
229k
    }
95
10.8M
    Py_UNREACHABLE();
96
10.8M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.74M
{
102
3.74M
    if (c != EOF) {
103
3.71M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.71M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.71M
        tok->col_offset--;
110
3.71M
    }
111
3.74M
}
112
113
static int
114
22.5k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
22.5k
    assert(token != NULL);
116
22.5k
    assert(c == '}' || c == ':' || c == '!');
117
22.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
22.5k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
12.9k
        return 0;
121
12.9k
    }
122
9.63k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
9.63k
    int hash_detected = 0;
126
9.63k
    int in_string = 0;
127
9.63k
    char quote_char = 0;
128
129
1.66M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.65M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.65M
        if (ch == '\\') {
134
33.3k
            i++;
135
33.3k
            continue;
136
33.3k
        }
137
138
        // Handle quotes
139
1.62M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
196k
            if (!in_string) {
148
69.0k
                in_string = 1;
149
69.0k
                quote_char = ch;
150
69.0k
            }
151
127k
            else if (ch == quote_char) {
152
68.0k
                in_string = 0;
153
68.0k
            }
154
196k
            continue;
155
196k
        }
156
157
        // Check for # outside strings
158
1.42M
        if (ch == '#' && !in_string) {
159
833
            hash_detected = 1;
160
833
            break;
161
833
        }
162
1.42M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
9.63k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
833
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
833
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
833
        Py_ssize_t i = 0;  // Input position
172
833
        Py_ssize_t j = 0;  // Output position
173
833
        in_string = 0;     // Whether we're in a string
174
833
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
64.1k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
63.3k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
63.3k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
9.11k
                if (!in_string) {
184
3.55k
                    in_string = 1;
185
3.55k
                    quote_char = ch;
186
5.55k
                } else if (ch == quote_char) {
187
3.53k
                    in_string = 0;
188
3.53k
                }
189
9.11k
                result[j++] = ch;
190
9.11k
            }
191
            // Skip comments
192
54.2k
            else if (ch == '#' && !in_string) {
193
53.5k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
53.5k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
52.5k
                    i++;
196
52.5k
                }
197
1.03k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
302
                    result[j++] = '\n';
199
302
                }
200
1.03k
            }
201
            // Copy other chars
202
53.1k
            else {
203
53.1k
                result[j++] = ch;
204
53.1k
            }
205
63.3k
            i++;
206
63.3k
        }
207
208
833
        result[j] = '\0';  // Null-terminate the result string
209
833
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
833
        PyMem_Free(result);
211
8.80k
    } else {
212
8.80k
        res = PyUnicode_DecodeUTF8(
213
8.80k
            tok_mode->last_expr_buffer,
214
8.80k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.80k
            NULL
216
8.80k
        );
217
8.80k
    }
218
219
9.63k
    if (!res) {
220
11
        return -1;
221
11
    }
222
9.62k
    token->metadata = res;
223
9.62k
    return 0;
224
9.63k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
63.5k
{
229
63.5k
    assert(tok->cur != NULL);
230
231
63.5k
    Py_ssize_t size = strlen(tok->cur);
232
63.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
63.5k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
40.9k
        case '{':
252
40.9k
            if (tok_mode->last_expr_buffer != NULL) {
253
29.3k
                PyMem_Free(tok_mode->last_expr_buffer);
254
29.3k
            }
255
40.9k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
40.9k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
40.9k
            tok_mode->last_expr_size = size;
260
40.9k
            tok_mode->last_expr_end = -1;
261
40.9k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
40.9k
            break;
263
18.1k
        case '}':
264
19.8k
        case '!':
265
19.8k
            tok_mode->last_expr_end = strlen(tok->start);
266
19.8k
            break;
267
2.75k
        case ':':
268
2.75k
            if (tok_mode->last_expr_end == -1) {
269
2.48k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.48k
            }
271
2.75k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
63.5k
    }
275
63.5k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
63.5k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
7.68k
{
284
7.68k
    const char *s = test;
285
7.68k
    int res = 0;
286
20.4k
    while (1) {
287
20.4k
        int c = tok_nextc(tok);
288
20.4k
        if (*s == 0) {
289
7.58k
            res = !is_potential_identifier_char(c);
290
7.58k
        }
291
12.8k
        else if (c == *s) {
292
12.7k
            s++;
293
12.7k
            continue;
294
12.7k
        }
295
296
7.68k
        tok_backup(tok, c);
297
20.4k
        while (s != test) {
298
12.7k
            tok_backup(tok, *--s);
299
12.7k
        }
300
7.68k
        return res;
301
20.4k
    }
302
7.68k
}
303
304
static int
305
102k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
102k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
102k
    int r = 0;
322
102k
    if (c == 'a') {
323
787
        r = lookahead(tok, "nd");
324
787
    }
325
101k
    else if (c == 'e') {
326
428
        r = lookahead(tok, "lse");
327
428
    }
328
101k
    else if (c == 'f') {
329
3.31k
        r = lookahead(tok, "or");
330
3.31k
    }
331
97.8k
    else if (c == 'i') {
332
2.28k
        int c2 = tok_nextc(tok);
333
2.28k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
2.27k
            r = 1;
335
2.27k
        }
336
2.28k
        tok_backup(tok, c2);
337
2.28k
    }
338
95.6k
    else if (c == 'o') {
339
2.86k
        r = lookahead(tok, "r");
340
2.86k
    }
341
92.7k
    else if (c == 'n') {
342
288
        r = lookahead(tok, "ot");
343
288
    }
344
102k
    if (r) {
345
9.84k
        tok_backup(tok, c);
346
9.84k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
9.84k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
9.84k
        tok_nextc(tok);
352
9.84k
    }
353
92.5k
    else /* In future releases, only error will remain. */
354
92.5k
    if (c < 128 && is_potential_identifier_char(c)) {
355
206
        tok_backup(tok, c);
356
206
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
206
        return 0;
358
206
    }
359
102k
    return 1;
360
102k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
13.7k
{
366
13.7k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
13.7k
    PyObject *s;
370
13.7k
    if (tok->decoding_erred)
371
0
        return 0;
372
13.7k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
13.7k
    if (s == NULL) {
374
1.02k
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
1.02k
            tok->done = E_DECODE;
376
1.02k
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
1.02k
        return 0;
381
1.02k
    }
382
12.6k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
12.6k
    assert(invalid >= 0);
384
12.6k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
12.6k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
691
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
691
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
476
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
476
            if (s != NULL) {
391
476
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
476
            }
393
476
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
476
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
476
        }
399
691
        Py_DECREF(s);
400
691
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
387
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
387
        }
403
304
        else {
404
304
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
304
        }
406
691
        return 0;
407
691
    }
408
12.0k
    Py_DECREF(s);
409
12.0k
    return 1;
410
12.6k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
82.9k
{
415
82.9k
    int c;
416
417
83.4k
    while (1) {
418
234k
        do {
419
234k
            c = tok_nextc(tok);
420
234k
        } while (Py_ISDIGIT(c));
421
83.4k
        if (c != '_') {
422
82.9k
            break;
423
82.9k
        }
424
536
        c = tok_nextc(tok);
425
536
        if (!Py_ISDIGIT(c)) {
426
17
            tok_backup(tok, c);
427
17
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
17
            return 0;
429
17
        }
430
536
    }
431
82.9k
    return c;
432
82.9k
}
433
434
static inline int
435
1.09k
tok_continuation_line(struct tok_state *tok) {
436
1.09k
    int c = tok_nextc(tok);
437
1.09k
    if (c == '\r') {
438
71
        c = tok_nextc(tok);
439
71
    }
440
1.09k
    if (c != '\n') {
441
59
        tok->done = E_LINECONT;
442
59
        return -1;
443
59
    }
444
1.03k
    c = tok_nextc(tok);
445
1.03k
    if (c == EOF) {
446
51
        tok->done = E_EOF;
447
51
        tok->cur = tok->inp;
448
51
        return -1;
449
986
    } else {
450
986
        tok_backup(tok, c);
451
986
    }
452
986
    return c;
453
1.03k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
22.0k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
22.0k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
22.0k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
22.0k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
22.0k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
22.0k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
22.0k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
22.0k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
22.0k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
22.0k
    if (saw_f && saw_t) {
492
2
        RETURN_SYNTAX_ERROR("f", "t");
493
2
    }
494
495
22.0k
#undef RETURN_SYNTAX_ERROR
496
497
22.0k
    return 0;
498
22.0k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.74M
{
503
1.74M
    int c;
504
1.74M
    int blankline, nonascii;
505
506
1.74M
    const char *p_start = NULL;
507
1.74M
    const char *p_end = NULL;
508
1.83M
  nextline:
509
1.83M
    tok->start = NULL;
510
1.83M
    tok->starting_col_offset = -1;
511
1.83M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.83M
    if (tok->atbol) {
516
229k
        int col = 0;
517
229k
        int altcol = 0;
518
229k
        tok->atbol = 0;
519
229k
        int cont_line_col = 0;
520
913k
        for (;;) {
521
913k
            c = tok_nextc(tok);
522
913k
            if (c == ' ') {
523
680k
                col++, altcol++;
524
680k
            }
525
232k
            else if (c == '\t') {
526
810
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
810
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
810
            }
529
232k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.96k
                col = altcol = 0; /* For Emacs users */
531
1.96k
            }
532
230k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
658
                cont_line_col = cont_line_col ? cont_line_col : col;
538
658
                if ((c = tok_continuation_line(tok)) == -1) {
539
41
                    return MAKE_TOKEN(ERRORTOKEN);
540
41
                }
541
658
            }
542
229k
            else {
543
229k
                break;
544
229k
            }
545
913k
        }
546
229k
        tok_backup(tok, c);
547
229k
        if (c == '#' || c == '\n' || c == '\r') {
548
            /* Lines with only whitespace and/or comments
549
               shouldn't affect the indentation and are
550
               not passed to the parser as NEWLINE tokens,
551
               except *totally* empty lines in interactive
552
               mode, which signal the end of a command group. */
553
45.5k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
554
0
                blankline = 0; /* Let it through */
555
0
            }
556
45.5k
            else if (tok->prompt != NULL && tok->lineno == 1) {
557
                /* In interactive mode, if the first line contains
558
                   only spaces and/or a comment, let it through. */
559
0
                blankline = 0;
560
0
                col = altcol = 0;
561
0
            }
562
45.5k
            else {
563
45.5k
                blankline = 1; /* Ignore completely */
564
45.5k
            }
565
            /* We can't jump back right here since we still
566
               may need to skip to the end of a comment */
567
45.5k
        }
568
229k
        if (!blankline && tok->level == 0) {
569
143k
            col = cont_line_col ? cont_line_col : col;
570
143k
            altcol = cont_line_col ? cont_line_col : altcol;
571
143k
            if (col == tok->indstack[tok->indent]) {
572
                /* No change */
573
104k
                if (altcol != tok->altindstack[tok->indent]) {
574
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
575
1
                }
576
104k
            }
577
38.2k
            else if (col > tok->indstack[tok->indent]) {
578
                /* Indent -- always one */
579
21.4k
                if (tok->indent+1 >= MAXINDENT) {
580
0
                    tok->done = E_TOODEEP;
581
0
                    tok->cur = tok->inp;
582
0
                    return MAKE_TOKEN(ERRORTOKEN);
583
0
                }
584
21.4k
                if (altcol <= tok->altindstack[tok->indent]) {
585
3
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
586
3
                }
587
21.4k
                tok->pendin++;
588
21.4k
                tok->indstack[++tok->indent] = col;
589
21.4k
                tok->altindstack[tok->indent] = altcol;
590
21.4k
            }
591
16.8k
            else /* col < tok->indstack[tok->indent] */ {
592
                /* Dedent -- any number, must be consistent */
593
37.4k
                while (tok->indent > 0 &&
594
37.4k
                    col < tok->indstack[tok->indent]) {
595
20.6k
                    tok->pendin--;
596
20.6k
                    tok->indent--;
597
20.6k
                }
598
16.8k
                if (col != tok->indstack[tok->indent]) {
599
9
                    tok->done = E_DEDENT;
600
9
                    tok->cur = tok->inp;
601
9
                    return MAKE_TOKEN(ERRORTOKEN);
602
9
                }
603
16.8k
                if (altcol != tok->altindstack[tok->indent]) {
604
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
605
1
                }
606
16.8k
            }
607
143k
        }
608
229k
    }
609
610
1.83M
    tok->start = tok->cur;
611
1.83M
    tok->starting_col_offset = tok->col_offset;
612
613
    /* Return pending indents/dedents */
614
1.83M
    if (tok->pendin != 0) {
615
42.0k
        if (tok->pendin < 0) {
616
20.5k
            if (tok->tok_extra_tokens) {
617
0
                p_start = tok->cur;
618
0
                p_end = tok->cur;
619
0
            }
620
20.5k
            tok->pendin++;
621
20.5k
            return MAKE_TOKEN(DEDENT);
622
20.5k
        }
623
21.4k
        else {
624
21.4k
            if (tok->tok_extra_tokens) {
625
0
                p_start = tok->buf;
626
0
                p_end = tok->cur;
627
0
            }
628
21.4k
            tok->pendin--;
629
21.4k
            return MAKE_TOKEN(INDENT);
630
21.4k
        }
631
42.0k
    }
632
633
    /* Peek ahead at the next character */
634
1.78M
    c = tok_nextc(tok);
635
1.78M
    tok_backup(tok, c);
636
637
1.78M
 again:
638
1.78M
    tok->start = NULL;
639
    /* Skip spaces */
640
2.13M
    do {
641
2.13M
        c = tok_nextc(tok);
642
2.13M
    } while (c == ' ' || c == '\t' || c == '\014');
643
644
    /* Set start of current token */
645
1.78M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
646
1.78M
    tok->starting_col_offset = tok->col_offset - 1;
647
648
    /* Skip comment, unless it's a type comment */
649
1.78M
    if (c == '#') {
650
651
42.3k
        const char* p = NULL;
652
42.3k
        const char *prefix, *type_start;
653
42.3k
        int current_starting_col_offset;
654
655
1.31M
        while (c != EOF && c != '\n' && c != '\r') {
656
1.26M
            c = tok_nextc(tok);
657
1.26M
        }
658
659
42.3k
        if (tok->tok_extra_tokens) {
660
0
            p = tok->start;
661
0
        }
662
663
42.3k
        if (tok->type_comments) {
664
0
            p = tok->start;
665
0
            current_starting_col_offset = tok->starting_col_offset;
666
0
            prefix = type_comment_prefix;
667
0
            while (*prefix && p < tok->cur) {
668
0
                if (*prefix == ' ') {
669
0
                    while (*p == ' ' || *p == '\t') {
670
0
                        p++;
671
0
                        current_starting_col_offset++;
672
0
                    }
673
0
                } else if (*prefix == *p) {
674
0
                    p++;
675
0
                    current_starting_col_offset++;
676
0
                } else {
677
0
                    break;
678
0
                }
679
680
0
                prefix++;
681
0
            }
682
683
            /* This is a type comment if we matched all of type_comment_prefix. */
684
0
            if (!*prefix) {
685
0
                int is_type_ignore = 1;
686
                // +6 in order to skip the word 'ignore'
687
0
                const char *ignore_end = p + 6;
688
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
689
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
690
691
0
                type_start = p;
692
693
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
694
                 * or anything ASCII and non-alphanumeric. */
695
0
                is_type_ignore = (
696
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
697
0
                    && !(tok->cur > ignore_end
698
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
699
700
0
                if (is_type_ignore) {
701
0
                    p_start = ignore_end;
702
0
                    p_end = tok->cur;
703
704
                    /* If this type ignore is the only thing on the line, consume the newline also. */
705
0
                    if (blankline) {
706
0
                        tok_nextc(tok);
707
0
                        tok->atbol = 1;
708
0
                    }
709
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
710
0
                } else {
711
0
                    p_start = type_start;
712
0
                    p_end = tok->cur;
713
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
714
0
                }
715
0
            }
716
0
        }
717
42.3k
        if (tok->tok_extra_tokens) {
718
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
719
0
            p_start = p;
720
0
            p_end = tok->cur;
721
0
            tok->comment_newline = blankline;
722
0
            return MAKE_TOKEN(COMMENT);
723
0
        }
724
42.3k
    }
725
726
1.78M
    if (tok->done == E_INTERACT_STOP) {
727
0
        return MAKE_TOKEN(ENDMARKER);
728
0
    }
729
730
    /* Check for EOF and errors now */
731
1.78M
    if (c == EOF) {
732
17.0k
        if (tok->level) {
733
4.08k
            return MAKE_TOKEN(ERRORTOKEN);
734
4.08k
        }
735
12.9k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
736
17.0k
    }
737
738
    /* Identifier (most frequent token!) */
739
1.77M
    nonascii = 0;
740
1.77M
    if (is_potential_identifier_start(c)) {
741
        /* Process the various legal combinations of b"", r"", u"", and f"". */
742
529k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
743
646k
        while (1) {
744
646k
            if (!saw_b && (c == 'b' || c == 'B')) {
745
20.6k
                saw_b = 1;
746
20.6k
            }
747
            /* Since this is a backwards compatibility support literal we don't
748
               want to support it in arbitrary order like byte literals. */
749
626k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
750
6.79k
                saw_u = 1;
751
6.79k
            }
752
            /* ur"" and ru"" are not supported */
753
619k
            else if (!saw_r && (c == 'r' || c == 'R')) {
754
37.7k
                saw_r = 1;
755
37.7k
            }
756
581k
            else if (!saw_f && (c == 'f' || c == 'F')) {
757
43.7k
                saw_f = 1;
758
43.7k
            }
759
537k
            else if (!saw_t && (c == 't' || c == 'T')) {
760
30.7k
                saw_t = 1;
761
30.7k
            }
762
507k
            else {
763
507k
                break;
764
507k
            }
765
139k
            c = tok_nextc(tok);
766
139k
            if (c == '"' || c == '\'') {
767
                // Raise error on incompatible string prefixes:
768
22.0k
                int status = maybe_raise_syntax_error_for_string_prefixes(
769
22.0k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
770
22.0k
                if (status < 0) {
771
8
                    return MAKE_TOKEN(ERRORTOKEN);
772
8
                }
773
774
                // Handle valid f or t string creation:
775
22.0k
                if (saw_f || saw_t) {
776
16.7k
                    goto f_string_quote;
777
16.7k
                }
778
5.23k
                goto letter_quote;
779
22.0k
            }
780
139k
        }
781
2.30M
        while (is_potential_identifier_char(c)) {
782
1.79M
            if (c >= 128) {
783
146k
                nonascii = 1;
784
146k
            }
785
1.79M
            c = tok_nextc(tok);
786
1.79M
        }
787
507k
        tok_backup(tok, c);
788
507k
        if (nonascii && !verify_identifier(tok)) {
789
1.71k
            return MAKE_TOKEN(ERRORTOKEN);
790
1.71k
        }
791
792
505k
        p_start = tok->start;
793
505k
        p_end = tok->cur;
794
795
505k
        return MAKE_TOKEN(NAME);
796
507k
    }
797
798
1.24M
    if (c == '\r') {
799
437
        c = tok_nextc(tok);
800
437
    }
801
802
    /* Newline */
803
1.24M
    if (c == '\n') {
804
208k
        tok->atbol = 1;
805
208k
        if (blankline || tok->level > 0) {
806
86.3k
            if (tok->tok_extra_tokens) {
807
0
                if (tok->comment_newline) {
808
0
                    tok->comment_newline = 0;
809
0
                }
810
0
                p_start = tok->start;
811
0
                p_end = tok->cur;
812
0
                return MAKE_TOKEN(NL);
813
0
            }
814
86.3k
            goto nextline;
815
86.3k
        }
816
122k
        if (tok->comment_newline && tok->tok_extra_tokens) {
817
0
            tok->comment_newline = 0;
818
0
            p_start = tok->start;
819
0
            p_end = tok->cur;
820
0
            return MAKE_TOKEN(NL);
821
0
        }
822
122k
        p_start = tok->start;
823
122k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
824
122k
        tok->cont_line = 0;
825
122k
        return MAKE_TOKEN(NEWLINE);
826
122k
    }
827
828
    /* Period or number starting with period? */
829
1.03M
    if (c == '.') {
830
34.3k
        c = tok_nextc(tok);
831
34.3k
        if (Py_ISDIGIT(c)) {
832
2.85k
            goto fraction;
833
31.4k
        } else if (c == '.') {
834
3.27k
            c = tok_nextc(tok);
835
3.27k
            if (c == '.') {
836
2.50k
                p_start = tok->start;
837
2.50k
                p_end = tok->cur;
838
2.50k
                return MAKE_TOKEN(ELLIPSIS);
839
2.50k
            }
840
772
            else {
841
772
                tok_backup(tok, c);
842
772
            }
843
772
            tok_backup(tok, '.');
844
772
        }
845
28.2k
        else {
846
28.2k
            tok_backup(tok, c);
847
28.2k
        }
848
28.9k
        p_start = tok->start;
849
28.9k
        p_end = tok->cur;
850
28.9k
        return MAKE_TOKEN(DOT);
851
34.3k
    }
852
853
    /* Number */
854
1.00M
    if (Py_ISDIGIT(c)) {
855
99.6k
        if (c == '0') {
856
            /* Hex, octal or binary -- maybe. */
857
33.9k
            c = tok_nextc(tok);
858
33.9k
            if (c == 'x' || c == 'X') {
859
                /* Hex */
860
15.9k
                c = tok_nextc(tok);
861
16.1k
                do {
862
16.1k
                    if (c == '_') {
863
214
                        c = tok_nextc(tok);
864
214
                    }
865
16.1k
                    if (!Py_ISXDIGIT(c)) {
866
22
                        tok_backup(tok, c);
867
22
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
868
22
                    }
869
78.9k
                    do {
870
78.9k
                        c = tok_nextc(tok);
871
78.9k
                    } while (Py_ISXDIGIT(c));
872
16.1k
                } while (c == '_');
873
15.9k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
874
1
                    return MAKE_TOKEN(ERRORTOKEN);
875
1
                }
876
15.9k
            }
877
17.9k
            else if (c == 'o' || c == 'O') {
878
                /* Octal */
879
637
                c = tok_nextc(tok);
880
1.16k
                do {
881
1.16k
                    if (c == '_') {
882
532
                        c = tok_nextc(tok);
883
532
                    }
884
1.16k
                    if (c < '0' || c >= '8') {
885
23
                        if (Py_ISDIGIT(c)) {
886
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
887
1
                                    "invalid digit '%c' in octal literal", c));
888
1
                        }
889
22
                        else {
890
22
                            tok_backup(tok, c);
891
22
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
892
22
                        }
893
23
                    }
894
3.27k
                    do {
895
3.27k
                        c = tok_nextc(tok);
896
3.27k
                    } while ('0' <= c && c < '8');
897
1.14k
                } while (c == '_');
898
614
                if (Py_ISDIGIT(c)) {
899
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
900
1
                            "invalid digit '%c' in octal literal", c));
901
1
                }
902
613
                if (!verify_end_of_number(tok, c, "octal")) {
903
2
                    return MAKE_TOKEN(ERRORTOKEN);
904
2
                }
905
613
            }
906
17.2k
            else if (c == 'b' || c == 'B') {
907
                /* Binary */
908
567
                c = tok_nextc(tok);
909
929
                do {
910
929
                    if (c == '_') {
911
371
                        c = tok_nextc(tok);
912
371
                    }
913
929
                    if (c != '0' && c != '1') {
914
21
                        if (Py_ISDIGIT(c)) {
915
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
916
1
                        }
917
20
                        else {
918
20
                            tok_backup(tok, c);
919
20
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
920
20
                        }
921
21
                    }
922
4.15k
                    do {
923
4.15k
                        c = tok_nextc(tok);
924
4.15k
                    } while (c == '0' || c == '1');
925
908
                } while (c == '_');
926
546
                if (Py_ISDIGIT(c)) {
927
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
928
2
                }
929
544
                if (!verify_end_of_number(tok, c, "binary")) {
930
2
                    return MAKE_TOKEN(ERRORTOKEN);
931
2
                }
932
544
            }
933
16.7k
            else {
934
16.7k
                int nonzero = 0;
935
                /* maybe old-style octal; c is first char of it */
936
                /* in any case, allow '0' as a literal */
937
18.1k
                while (1) {
938
18.1k
                    if (c == '_') {
939
91
                        c = tok_nextc(tok);
940
91
                        if (!Py_ISDIGIT(c)) {
941
3
                            tok_backup(tok, c);
942
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
943
3
                        }
944
91
                    }
945
18.1k
                    if (c != '0') {
946
16.7k
                        break;
947
16.7k
                    }
948
1.39k
                    c = tok_nextc(tok);
949
1.39k
                }
950
16.7k
                char* zeros_end = tok->cur;
951
16.7k
                if (Py_ISDIGIT(c)) {
952
624
                    nonzero = 1;
953
624
                    c = tok_decimal_tail(tok);
954
624
                    if (c == 0) {
955
1
                        return MAKE_TOKEN(ERRORTOKEN);
956
1
                    }
957
624
                }
958
16.7k
                if (c == '.') {
959
893
                    c = tok_nextc(tok);
960
893
                    goto fraction;
961
893
                }
962
15.8k
                else if (c == 'e' || c == 'E') {
963
838
                    goto exponent;
964
838
                }
965
14.9k
                else if (c == 'j' || c == 'J') {
966
794
                    goto imaginary;
967
794
                }
968
14.2k
                else if (nonzero && !tok->tok_extra_tokens) {
969
                    /* Old-style octal: now disallowed. */
970
22
                    tok_backup(tok, c);
971
22
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
972
22
                            tok, (int)(tok->start + 1 - tok->line_start),
973
22
                            (int)(zeros_end - tok->line_start),
974
22
                            "leading zeros in decimal integer "
975
22
                            "literals are not permitted; "
976
22
                            "use an 0o prefix for octal integers"));
977
22
                }
978
14.1k
                if (!verify_end_of_number(tok, c, "decimal")) {
979
28
                    return MAKE_TOKEN(ERRORTOKEN);
980
28
                }
981
14.1k
            }
982
33.9k
        }
983
65.7k
        else {
984
            /* Decimal */
985
65.7k
            c = tok_decimal_tail(tok);
986
65.7k
            if (c == 0) {
987
13
                return MAKE_TOKEN(ERRORTOKEN);
988
13
            }
989
65.7k
            {
990
                /* Accept floating-point numbers. */
991
65.7k
                if (c == '.') {
992
4.01k
                    c = tok_nextc(tok);
993
7.76k
        fraction:
994
                    /* Fraction */
995
7.76k
                    if (Py_ISDIGIT(c)) {
996
5.79k
                        c = tok_decimal_tail(tok);
997
5.79k
                        if (c == 0) {
998
2
                            return MAKE_TOKEN(ERRORTOKEN);
999
2
                        }
1000
5.79k
                    }
1001
7.76k
                }
1002
69.5k
                if (c == 'e' || c == 'E') {
1003
10.3k
                    int e;
1004
11.2k
                  exponent:
1005
11.2k
                    e = c;
1006
                    /* Exponent part */
1007
11.2k
                    c = tok_nextc(tok);
1008
11.2k
                    if (c == '+' || c == '-') {
1009
3.92k
                        c = tok_nextc(tok);
1010
3.92k
                        if (!Py_ISDIGIT(c)) {
1011
12
                            tok_backup(tok, c);
1012
12
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1013
12
                        }
1014
7.28k
                    } else if (!Py_ISDIGIT(c)) {
1015
434
                        tok_backup(tok, c);
1016
434
                        if (!verify_end_of_number(tok, e, "decimal")) {
1017
43
                            return MAKE_TOKEN(ERRORTOKEN);
1018
43
                        }
1019
391
                        tok_backup(tok, e);
1020
391
                        p_start = tok->start;
1021
391
                        p_end = tok->cur;
1022
391
                        return MAKE_TOKEN(NUMBER);
1023
434
                    }
1024
10.7k
                    c = tok_decimal_tail(tok);
1025
10.7k
                    if (c == 0) {
1026
1
                        return MAKE_TOKEN(ERRORTOKEN);
1027
1
                    }
1028
10.7k
                }
1029
69.9k
                if (c == 'j' || c == 'J') {
1030
                    /* Imaginary part */
1031
4.58k
        imaginary:
1032
4.58k
                    c = tok_nextc(tok);
1033
4.58k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1034
8
                        return MAKE_TOKEN(ERRORTOKEN);
1035
8
                    }
1036
4.58k
                }
1037
66.1k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1038
122
                    return MAKE_TOKEN(ERRORTOKEN);
1039
122
                }
1040
69.9k
            }
1041
69.9k
        }
1042
101k
        tok_backup(tok, c);
1043
101k
        p_start = tok->start;
1044
101k
        p_end = tok->cur;
1045
101k
        return MAKE_TOKEN(NUMBER);
1046
99.6k
    }
1047
1048
917k
  f_string_quote:
1049
917k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1050
917k
        && (c == '\'' || c == '"'))) {
1051
1052
16.7k
        int quote = c;
1053
16.7k
        int quote_size = 1;             /* 1 or 3 */
1054
1055
        /* Nodes of type STRING, especially multi line strings
1056
           must be handled differently in order to get both
1057
           the starting line number and the column offset right.
1058
           (cf. issue 16806) */
1059
16.7k
        tok->first_lineno = tok->lineno;
1060
16.7k
        tok->multi_line_start = tok->line_start;
1061
1062
        /* Find the quote size and start of string */
1063
16.7k
        int after_quote = tok_nextc(tok);
1064
16.7k
        if (after_quote == quote) {
1065
2.38k
            int after_after_quote = tok_nextc(tok);
1066
2.38k
            if (after_after_quote == quote) {
1067
813
                quote_size = 3;
1068
813
            }
1069
1.57k
            else {
1070
                // TODO: Check this
1071
1.57k
                tok_backup(tok, after_after_quote);
1072
1.57k
                tok_backup(tok, after_quote);
1073
1.57k
            }
1074
2.38k
        }
1075
16.7k
        if (after_quote != quote) {
1076
14.4k
            tok_backup(tok, after_quote);
1077
14.4k
        }
1078
1079
1080
16.7k
        p_start = tok->start;
1081
16.7k
        p_end = tok->cur;
1082
16.7k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1083
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1084
2
        }
1085
16.7k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1086
16.7k
        the_current_tok->kind = TOK_FSTRING_MODE;
1087
16.7k
        the_current_tok->quote = quote;
1088
16.7k
        the_current_tok->quote_size = quote_size;
1089
16.7k
        the_current_tok->start = tok->start;
1090
16.7k
        the_current_tok->multi_line_start = tok->line_start;
1091
16.7k
        the_current_tok->first_line = tok->lineno;
1092
16.7k
        the_current_tok->start_offset = -1;
1093
16.7k
        the_current_tok->multi_line_start_offset = -1;
1094
16.7k
        the_current_tok->last_expr_buffer = NULL;
1095
16.7k
        the_current_tok->last_expr_size = 0;
1096
16.7k
        the_current_tok->last_expr_end = -1;
1097
16.7k
        the_current_tok->in_format_spec = 0;
1098
16.7k
        the_current_tok->in_debug = 0;
1099
1100
16.7k
        enum string_kind_t string_kind = FSTRING;
1101
16.7k
        switch (*tok->start) {
1102
664
            case 'T':
1103
4.40k
            case 't':
1104
4.40k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1105
4.40k
                string_kind = TSTRING;
1106
4.40k
                break;
1107
1.58k
            case 'F':
1108
11.8k
            case 'f':
1109
11.8k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1110
11.8k
                break;
1111
92
            case 'R':
1112
497
            case 'r':
1113
497
                the_current_tok->raw = 1;
1114
497
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1115
206
                    string_kind = TSTRING;
1116
206
                }
1117
497
                break;
1118
0
            default:
1119
0
                Py_UNREACHABLE();
1120
16.7k
        }
1121
1122
16.7k
        the_current_tok->string_kind = string_kind;
1123
16.7k
        the_current_tok->curly_bracket_depth = 0;
1124
16.7k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1125
16.7k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1126
16.7k
    }
1127
1128
905k
  letter_quote:
1129
    /* String */
1130
905k
    if (c == '\'' || c == '"') {
1131
59.0k
        int quote = c;
1132
59.0k
        int quote_size = 1;             /* 1 or 3 */
1133
59.0k
        int end_quote_size = 0;
1134
59.0k
        int has_escaped_quote = 0;
1135
1136
        /* Nodes of type STRING, especially multi line strings
1137
           must be handled differently in order to get both
1138
           the starting line number and the column offset right.
1139
           (cf. issue 16806) */
1140
59.0k
        tok->first_lineno = tok->lineno;
1141
59.0k
        tok->multi_line_start = tok->line_start;
1142
1143
        /* Find the quote size and start of string */
1144
59.0k
        c = tok_nextc(tok);
1145
59.0k
        if (c == quote) {
1146
10.4k
            c = tok_nextc(tok);
1147
10.4k
            if (c == quote) {
1148
2.59k
                quote_size = 3;
1149
2.59k
            }
1150
7.81k
            else {
1151
7.81k
                end_quote_size = 1;     /* empty string found */
1152
7.81k
            }
1153
10.4k
        }
1154
59.0k
        if (c != quote) {
1155
56.4k
            tok_backup(tok, c);
1156
56.4k
        }
1157
1158
        /* Get rest of string */
1159
1.16M
        while (end_quote_size != quote_size) {
1160
1.10M
            c = tok_nextc(tok);
1161
1.10M
            if (tok->done == E_ERROR) {
1162
0
                return MAKE_TOKEN(ERRORTOKEN);
1163
0
            }
1164
1.10M
            if (tok->done == E_DECODE) {
1165
0
                break;
1166
0
            }
1167
1.10M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1168
411
                assert(tok->multi_line_start != NULL);
1169
                // shift the tok_state's location into
1170
                // the start of string, and report the error
1171
                // from the initial quote character
1172
411
                tok->cur = (char *)tok->start;
1173
411
                tok->cur++;
1174
411
                tok->line_start = tok->multi_line_start;
1175
411
                int start = tok->lineno;
1176
411
                tok->lineno = tok->first_lineno;
1177
1178
411
                if (INSIDE_FSTRING(tok)) {
1179
                    /* When we are in an f-string, before raising the
1180
                     * unterminated string literal error, check whether
1181
                     * does the initial quote matches with f-strings quotes
1182
                     * and if it is, then this must be a missing '}' token
1183
                     * so raise the proper error */
1184
25
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1185
25
                    if (the_current_tok->quote == quote &&
1186
25
                        the_current_tok->quote_size == quote_size) {
1187
18
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1188
18
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1189
18
                    }
1190
25
                }
1191
1192
393
                if (quote_size == 3) {
1193
19
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1194
19
                                     " (detected at line %d)", start);
1195
19
                    if (c != '\n') {
1196
19
                        tok->done = E_EOFS;
1197
19
                    }
1198
19
                    return MAKE_TOKEN(ERRORTOKEN);
1199
19
                }
1200
374
                else {
1201
374
                    if (has_escaped_quote) {
1202
10
                        _PyTokenizer_syntaxerror(
1203
10
                            tok,
1204
10
                            "unterminated string literal (detected at line %d); "
1205
10
                            "perhaps you escaped the end quote?",
1206
10
                            start
1207
10
                        );
1208
364
                    } else {
1209
364
                        _PyTokenizer_syntaxerror(
1210
364
                            tok, "unterminated string literal (detected at line %d)", start
1211
364
                        );
1212
364
                    }
1213
374
                    if (c != '\n') {
1214
16
                        tok->done = E_EOLS;
1215
16
                    }
1216
374
                    return MAKE_TOKEN(ERRORTOKEN);
1217
374
                }
1218
393
            }
1219
1.10M
            if (c == quote) {
1220
57.7k
                end_quote_size += 1;
1221
57.7k
            }
1222
1.04M
            else {
1223
1.04M
                end_quote_size = 0;
1224
1.04M
                if (c == '\\') {
1225
29.2k
                    c = tok_nextc(tok);  /* skip escaped char */
1226
29.2k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1227
1.39k
                        has_escaped_quote = 1;
1228
1.39k
                    }
1229
29.2k
                    if (c == '\r') {
1230
205
                        c = tok_nextc(tok);
1231
205
                    }
1232
29.2k
                }
1233
1.04M
            }
1234
1.10M
        }
1235
1236
58.5k
        p_start = tok->start;
1237
58.5k
        p_end = tok->cur;
1238
58.5k
        return MAKE_TOKEN(STRING);
1239
59.0k
    }
1240
1241
    /* Line continuation */
1242
846k
    if (c == '\\') {
1243
438
        if ((c = tok_continuation_line(tok)) == -1) {
1244
69
            return MAKE_TOKEN(ERRORTOKEN);
1245
69
        }
1246
369
        tok->cont_line = 1;
1247
369
        goto again; /* Read next line */
1248
438
    }
1249
1250
    /* Punctuation character */
1251
846k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1252
846k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1253
        /* This code block gets executed before the curly_bracket_depth is incremented
1254
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1255
         * to adjust it manually */
1256
53.0k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1257
53.0k
        int in_format_spec = current_tok->in_format_spec;
1258
53.0k
         int cursor_in_format_with_debug =
1259
53.0k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1260
53.0k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1261
53.0k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1262
0
            return MAKE_TOKEN(ENDMARKER);
1263
0
        }
1264
53.0k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1265
11
            return MAKE_TOKEN(ERRORTOKEN);
1266
11
        }
1267
1268
53.0k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1269
3.79k
            current_tok->kind = TOK_FSTRING_MODE;
1270
3.79k
            current_tok->in_format_spec = 1;
1271
3.79k
            p_start = tok->start;
1272
3.79k
            p_end = tok->cur;
1273
3.79k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1274
3.79k
        }
1275
53.0k
    }
1276
1277
    /* Check for two-character token */
1278
842k
    {
1279
842k
        int c2 = tok_nextc(tok);
1280
842k
        int current_token = _PyToken_TwoChars(c, c2);
1281
842k
        if (current_token != OP) {
1282
22.9k
            int c3 = tok_nextc(tok);
1283
22.9k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1284
22.9k
            if (current_token3 != OP) {
1285
992
                current_token = current_token3;
1286
992
            }
1287
21.9k
            else {
1288
21.9k
                tok_backup(tok, c3);
1289
21.9k
            }
1290
22.9k
            p_start = tok->start;
1291
22.9k
            p_end = tok->cur;
1292
22.9k
            return MAKE_TOKEN(current_token);
1293
22.9k
        }
1294
819k
        tok_backup(tok, c2);
1295
819k
    }
1296
1297
    /* Keep track of parentheses nesting level */
1298
0
    switch (c) {
1299
87.4k
    case '(':
1300
123k
    case '[':
1301
170k
    case '{':
1302
170k
        if (tok->level >= MAXLEVEL) {
1303
4
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1304
4
        }
1305
170k
        tok->parenstack[tok->level] = c;
1306
170k
        tok->parenlinenostack[tok->level] = tok->lineno;
1307
170k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1308
170k
        tok->level++;
1309
170k
        if (INSIDE_FSTRING(tok)) {
1310
29.4k
            current_tok->curly_bracket_depth++;
1311
29.4k
        }
1312
170k
        break;
1313
59.4k
    case ')':
1314
70.9k
    case ']':
1315
96.4k
    case '}':
1316
96.4k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1317
53
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1318
53
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1319
53
        }
1320
96.4k
        if (!tok->tok_extra_tokens && !tok->level) {
1321
228
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1322
228
        }
1323
96.1k
        if (tok->level > 0) {
1324
96.1k
            tok->level--;
1325
96.1k
            int opening = tok->parenstack[tok->level];
1326
96.1k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1327
96.1k
                                            (opening == '[' && c == ']') ||
1328
96.1k
                                            (opening == '{' && c == '}'))) {
1329
                /* If the opening bracket belongs to an f-string's expression
1330
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1331
                nested expression, then instead of matching a different
1332
                syntactical construct with it; we'll throw an unmatched
1333
                parentheses error. */
1334
50
                if (INSIDE_FSTRING(tok) && opening == '{') {
1335
9
                    assert(current_tok->curly_bracket_depth >= 0);
1336
9
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1337
9
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1338
5
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1339
5
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1340
5
                    }
1341
9
                }
1342
45
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1343
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1344
2
                            "closing parenthesis '%c' does not match "
1345
2
                            "opening parenthesis '%c' on line %d",
1346
2
                            c, opening, tok->parenlinenostack[tok->level]));
1347
2
                }
1348
43
                else {
1349
43
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1350
43
                            "closing parenthesis '%c' does not match "
1351
43
                            "opening parenthesis '%c'",
1352
43
                            c, opening));
1353
43
                }
1354
45
            }
1355
96.1k
        }
1356
1357
96.1k
        if (INSIDE_FSTRING(tok)) {
1358
21.8k
            current_tok->curly_bracket_depth--;
1359
21.8k
            if (current_tok->curly_bracket_depth < 0) {
1360
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1361
1
                    TOK_GET_STRING_PREFIX(tok), c));
1362
1
            }
1363
21.8k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1364
20.0k
                current_tok->curly_bracket_expr_start_depth--;
1365
20.0k
                current_tok->kind = TOK_FSTRING_MODE;
1366
20.0k
                current_tok->in_format_spec = 0;
1367
20.0k
                current_tok->in_debug = 0;
1368
20.0k
            }
1369
21.8k
        }
1370
96.1k
        break;
1371
552k
    default:
1372
552k
        break;
1373
819k
    }
1374
1375
819k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1376
499
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1377
499
    }
1378
1379
818k
    if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
1380
42.3k
        current_tok->in_debug = 1;
1381
42.3k
    }
1382
1383
    /* Punctuation character */
1384
818k
    p_start = tok->start;
1385
818k
    p_end = tok->cur;
1386
818k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1387
819k
}
1388
1389
static int
1390
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1391
51.9k
{
1392
51.9k
    const char *p_start = NULL;
1393
51.9k
    const char *p_end = NULL;
1394
51.9k
    int end_quote_size = 0;
1395
51.9k
    int unicode_escape = 0;
1396
1397
51.9k
    tok->start = tok->cur;
1398
51.9k
    tok->first_lineno = tok->lineno;
1399
51.9k
    tok->starting_col_offset = tok->col_offset;
1400
1401
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1402
    // before it.
1403
51.9k
    int start_char = tok_nextc(tok);
1404
51.9k
    if (start_char == '{') {
1405
13.8k
        int peek1 = tok_nextc(tok);
1406
13.8k
        tok_backup(tok, peek1);
1407
13.8k
        tok_backup(tok, start_char);
1408
13.8k
        if (peek1 != '{') {
1409
10.9k
            current_tok->curly_bracket_expr_start_depth++;
1410
10.9k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1411
6
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1412
6
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1413
6
            }
1414
10.9k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1415
10.9k
            return tok_get_normal_mode(tok, current_tok, token);
1416
10.9k
        }
1417
13.8k
    }
1418
38.1k
    else {
1419
38.1k
        tok_backup(tok, start_char);
1420
38.1k
    }
1421
1422
    // Check if we are at the end of the string
1423
59.1k
    for (int i = 0; i < current_tok->quote_size; i++) {
1424
47.0k
        int quote = tok_nextc(tok);
1425
47.0k
        if (quote != current_tok->quote) {
1426
28.8k
            tok_backup(tok, quote);
1427
28.8k
            goto f_string_middle;
1428
28.8k
        }
1429
47.0k
    }
1430
1431
12.1k
    if (current_tok->last_expr_buffer != NULL) {
1432
7.13k
        PyMem_Free(current_tok->last_expr_buffer);
1433
7.13k
        current_tok->last_expr_buffer = NULL;
1434
7.13k
        current_tok->last_expr_size = 0;
1435
7.13k
        current_tok->last_expr_end = -1;
1436
7.13k
    }
1437
1438
12.1k
    p_start = tok->start;
1439
12.1k
    p_end = tok->cur;
1440
12.1k
    tok->tok_mode_stack_index--;
1441
12.1k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1442
1443
28.8k
f_string_middle:
1444
1445
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1446
    // this.
1447
28.8k
    tok->multi_line_start = tok->line_start;
1448
156k
    while (end_quote_size != current_tok->quote_size) {
1449
150k
        int c = tok_nextc(tok);
1450
150k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1451
0
            return MAKE_TOKEN(ERRORTOKEN);
1452
0
        }
1453
150k
        int in_format_spec = (
1454
150k
                current_tok->in_format_spec
1455
150k
                &&
1456
150k
                INSIDE_FSTRING_EXPR(current_tok)
1457
150k
        );
1458
1459
150k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1460
476
            if (tok->decoding_erred) {
1461
0
                return MAKE_TOKEN(ERRORTOKEN);
1462
0
            }
1463
1464
            // If we are in a format spec and we found a newline,
1465
            // it means that the format spec ends here and we should
1466
            // return to the regular mode.
1467
476
            if (in_format_spec && c == '\n') {
1468
87
                if (current_tok->quote_size == 1) {
1469
87
                    return MAKE_TOKEN(
1470
87
                        _PyTokenizer_syntaxerror(
1471
87
                            tok,
1472
87
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1473
87
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1474
87
                        )
1475
87
                    );
1476
87
                }
1477
0
                tok_backup(tok, c);
1478
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1479
0
                current_tok->in_format_spec = 0;
1480
0
                p_start = tok->start;
1481
0
                p_end = tok->cur;
1482
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1483
87
            }
1484
1485
389
            assert(tok->multi_line_start != NULL);
1486
            // shift the tok_state's location into
1487
            // the start of string, and report the error
1488
            // from the initial quote character
1489
389
            tok->cur = (char *)current_tok->start;
1490
389
            tok->cur++;
1491
389
            tok->line_start = current_tok->multi_line_start;
1492
389
            int start = tok->lineno;
1493
1494
389
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1495
389
            tok->lineno = the_current_tok->first_line;
1496
1497
389
            if (current_tok->quote_size == 3) {
1498
36
                _PyTokenizer_syntaxerror(tok,
1499
36
                                    "unterminated triple-quoted %c-string literal"
1500
36
                                    " (detected at line %d)",
1501
36
                                    TOK_GET_STRING_PREFIX(tok), start);
1502
36
                if (c != '\n') {
1503
36
                    tok->done = E_EOFS;
1504
36
                }
1505
36
                return MAKE_TOKEN(ERRORTOKEN);
1506
36
            }
1507
353
            else {
1508
353
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1509
353
                                    "unterminated %c-string literal (detected at"
1510
353
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1511
353
            }
1512
389
        }
1513
1514
150k
        if (c == current_tok->quote) {
1515
8.37k
            end_quote_size += 1;
1516
8.37k
            continue;
1517
141k
        } else {
1518
141k
            end_quote_size = 0;
1519
141k
        }
1520
1521
141k
        if (c == '{') {
1522
17.9k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1523
0
                return MAKE_TOKEN(ENDMARKER);
1524
0
            }
1525
17.9k
            int peek = tok_nextc(tok);
1526
17.9k
            if (peek != '{' || in_format_spec) {
1527
14.4k
                tok_backup(tok, peek);
1528
14.4k
                tok_backup(tok, c);
1529
14.4k
                current_tok->curly_bracket_expr_start_depth++;
1530
14.4k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1531
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1532
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1533
5
                }
1534
14.4k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1535
14.4k
                current_tok->in_format_spec = 0;
1536
14.4k
                p_start = tok->start;
1537
14.4k
                p_end = tok->cur;
1538
14.4k
            } else {
1539
3.43k
                p_start = tok->start;
1540
3.43k
                p_end = tok->cur - 1;
1541
3.43k
            }
1542
17.8k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1543
123k
        } else if (c == '}') {
1544
4.84k
            if (unicode_escape) {
1545
494
                p_start = tok->start;
1546
494
                p_end = tok->cur;
1547
494
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1548
494
            }
1549
4.34k
            int peek = tok_nextc(tok);
1550
1551
            // The tokenizer can only be in the format spec if we have already completed the expression
1552
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1553
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1554
            // brackets, we can bypass it here.
1555
4.34k
            int cursor = current_tok->curly_bracket_depth;
1556
4.34k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1557
1.73k
                p_start = tok->start;
1558
1.73k
                p_end = tok->cur - 1;
1559
2.61k
            } else {
1560
2.61k
                tok_backup(tok, peek);
1561
2.61k
                tok_backup(tok, c);
1562
2.61k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1563
2.61k
                current_tok->in_format_spec = 0;
1564
2.61k
                p_start = tok->start;
1565
2.61k
                p_end = tok->cur;
1566
2.61k
            }
1567
4.34k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1568
119k
        } else if (c == '\\') {
1569
6.16k
            int peek = tok_nextc(tok);
1570
6.16k
            if (peek == '\r') {
1571
69
                peek = tok_nextc(tok);
1572
69
            }
1573
            // Special case when the backslash is right before a curly
1574
            // brace. We have to restore and return the control back
1575
            // to the loop for the next iteration.
1576
6.16k
            if (peek == '{' || peek == '}') {
1577
1.32k
                if (!current_tok->raw) {
1578
1.12k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1579
0
                        return MAKE_TOKEN(ERRORTOKEN);
1580
0
                    }
1581
1.12k
                }
1582
1.32k
                tok_backup(tok, peek);
1583
1.32k
                continue;
1584
1.32k
            }
1585
1586
4.84k
            if (!current_tok->raw) {
1587
4.57k
                if (peek == 'N') {
1588
                    /* Handle named unicode escapes (\N{BULLET}) */
1589
758
                    peek = tok_nextc(tok);
1590
758
                    if (peek == '{') {
1591
513
                        unicode_escape = 1;
1592
513
                    } else {
1593
245
                        tok_backup(tok, peek);
1594
245
                    }
1595
758
                }
1596
4.57k
            } /* else {
1597
                skip the escaped character
1598
            }*/
1599
4.84k
        }
1600
141k
    }
1601
1602
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1603
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1604
12.2k
    for (int i = 0; i < current_tok->quote_size; i++) {
1605
6.55k
        tok_backup(tok, current_tok->quote);
1606
6.55k
    }
1607
5.67k
    p_start = tok->start;
1608
5.67k
    p_end = tok->cur;
1609
5.67k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1610
28.8k
}
1611
1612
static int
1613
tok_get(struct tok_state *tok, struct token *token)
1614
1.78M
{
1615
1.78M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1616
1.78M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1617
1.73M
        return tok_get_normal_mode(tok, current_tok, token);
1618
1.73M
    } else {
1619
51.9k
        return tok_get_fstring_mode(tok, current_tok, token);
1620
51.9k
    }
1621
1.78M
}
1622
1623
int
1624
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1625
1.78M
{
1626
1.78M
    int result = tok_get(tok, token);
1627
1.78M
    if (tok->decoding_erred) {
1628
0
        result = ERRORTOKEN;
1629
0
        tok->done = E_DECODE;
1630
0
    }
1631
1.78M
    return result;
1632
1.78M
}