Coverage Report

Created: 2026-05-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.30k
#define ALTTABSIZE 1
11
12
2.09M
#define is_potential_identifier_start(c) (\
13
2.09M
              (c >= 'a' && c <= 'z')\
14
2.09M
               || (c >= 'A' && c <= 'Z')\
15
2.09M
               || c == '_'\
16
2.09M
               || (c >= 128))
17
18
3.19M
#define is_potential_identifier_char(c) (\
19
3.19M
              (c >= 'a' && c <= 'z')\
20
3.19M
               || (c >= 'A' && c <= 'Z')\
21
3.19M
               || (c >= '0' && c <= '9')\
22
3.19M
               || c == '_'\
23
3.19M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
2.28M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
18.6k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
29
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
2.15M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
282k
{
55
282k
    return memchr(str, 0, size) != NULL;
56
282k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.6M
{
62
11.6M
    int rc;
63
11.9M
    for (;;) {
64
11.9M
        if (tok->cur != tok->inp) {
65
11.3M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.3M
            tok->col_offset++;
70
11.3M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.3M
        }
72
566k
        if (tok->done != E_OK) {
73
189k
            return EOF;
74
189k
        }
75
377k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
377k
        if (!rc) {
84
94.8k
            tok->cur = tok->inp;
85
94.8k
            return EOF;
86
94.8k
        }
87
282k
        tok->line_start = tok->cur;
88
89
282k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
282k
    }
95
11.6M
    Py_UNREACHABLE();
96
11.6M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
4.56M
{
102
4.56M
    if (c != EOF) {
103
4.37M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
4.37M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
4.37M
        tok->col_offset--;
110
4.37M
    }
111
4.56M
}
112
113
static int
114
27.5k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
27.5k
    assert(token != NULL);
116
27.5k
    assert(c == '}' || c == ':' || c == '!');
117
27.5k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
27.5k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
15.1k
        return 0;
121
15.1k
    }
122
12.3k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
12.3k
    int hash_detected = 0;
126
12.3k
    int in_string = 0;
127
12.3k
    char quote_char = 0;
128
129
1.74M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.72M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.72M
        if (ch == '\\') {
134
24.1k
            i++;
135
24.1k
            continue;
136
24.1k
        }
137
138
        // Handle quotes
139
1.70M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
314k
            if (!in_string) {
148
117k
                in_string = 1;
149
117k
                quote_char = ch;
150
117k
            }
151
197k
            else if (ch == quote_char) {
152
116k
                in_string = 0;
153
116k
            }
154
314k
            continue;
155
314k
        }
156
157
        // Check for # outside strings
158
1.39M
        if (ch == '#' && !in_string) {
159
944
            hash_detected = 1;
160
944
            break;
161
944
        }
162
1.39M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
12.3k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
944
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
944
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
944
        Py_ssize_t i = 0;  // Input position
172
944
        Py_ssize_t j = 0;  // Output position
173
944
        in_string = 0;     // Whether we're in a string
174
944
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
217k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
216k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
216k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
23.9k
                if (!in_string) {
184
8.07k
                    in_string = 1;
185
8.07k
                    quote_char = ch;
186
15.8k
                } else if (ch == quote_char) {
187
8.03k
                    in_string = 0;
188
8.03k
                }
189
23.9k
                result[j++] = ch;
190
23.9k
            }
191
            // Skip comments
192
192k
            else if (ch == '#' && !in_string) {
193
237k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
236k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
235k
                    i++;
196
235k
                }
197
1.13k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
294
                    result[j++] = '\n';
199
294
                }
200
1.13k
            }
201
            // Copy other chars
202
191k
            else {
203
191k
                result[j++] = ch;
204
191k
            }
205
216k
            i++;
206
216k
        }
207
208
944
        result[j] = '\0';  // Null-terminate the result string
209
944
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
944
        PyMem_Free(result);
211
11.4k
    } else {
212
11.4k
        res = PyUnicode_DecodeUTF8(
213
11.4k
            tok_mode->last_expr_buffer,
214
11.4k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
11.4k
            NULL
216
11.4k
        );
217
11.4k
    }
218
219
12.3k
    if (!res) {
220
0
        return -1;
221
0
    }
222
12.3k
    token->metadata = res;
223
12.3k
    return 0;
224
12.3k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
71.7k
{
229
71.7k
    assert(tok->cur != NULL);
230
231
71.7k
    Py_ssize_t size = strlen(tok->cur);
232
71.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
71.7k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
44.2k
        case '{':
252
44.2k
            if (tok_mode->last_expr_buffer != NULL) {
253
30.9k
                PyMem_Free(tok_mode->last_expr_buffer);
254
30.9k
            }
255
44.2k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
44.2k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
44.2k
            tok_mode->last_expr_size = size;
260
44.2k
            tok_mode->last_expr_end = -1;
261
44.2k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
44.2k
            break;
263
21.2k
        case '}':
264
23.5k
        case '!':
265
23.5k
            tok_mode->last_expr_end = strlen(tok->start);
266
23.5k
            break;
267
3.97k
        case ':':
268
3.97k
            if (tok_mode->last_expr_end == -1) {
269
3.74k
               tok_mode->last_expr_end = strlen(tok->start);
270
3.74k
            }
271
3.97k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
71.7k
    }
275
71.7k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
71.7k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
8.90k
{
284
8.90k
    const char *s = test;
285
8.90k
    int res = 0;
286
22.9k
    while (1) {
287
22.9k
        int c = tok_nextc(tok);
288
22.9k
        if (*s == 0) {
289
8.78k
            res = !is_potential_identifier_char(c);
290
8.78k
        }
291
14.1k
        else if (c == *s) {
292
14.0k
            s++;
293
14.0k
            continue;
294
14.0k
        }
295
296
8.90k
        tok_backup(tok, c);
297
22.9k
        while (s != test) {
298
14.0k
            tok_backup(tok, *--s);
299
14.0k
        }
300
8.90k
        return res;
301
22.9k
    }
302
8.90k
}
303
304
static int
305
91.9k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
91.9k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
56
        return 1;
310
56
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
91.8k
    int r = 0;
322
91.8k
    if (c == 'a') {
323
908
        r = lookahead(tok, "nd");
324
908
    }
325
90.9k
    else if (c == 'e') {
326
744
        r = lookahead(tok, "lse");
327
744
    }
328
90.2k
    else if (c == 'f') {
329
2.65k
        r = lookahead(tok, "or");
330
2.65k
    }
331
87.5k
    else if (c == 'i') {
332
1.26k
        int c2 = tok_nextc(tok);
333
1.26k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.25k
            r = 1;
335
1.25k
        }
336
1.26k
        tok_backup(tok, c2);
337
1.26k
    }
338
86.3k
    else if (c == 'o') {
339
4.28k
        r = lookahead(tok, "r");
340
4.28k
    }
341
82.0k
    else if (c == 'n') {
342
310
        r = lookahead(tok, "ot");
343
310
    }
344
91.8k
    if (r) {
345
10.0k
        tok_backup(tok, c);
346
10.0k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.0k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.0k
        tok_nextc(tok);
352
10.0k
    }
353
81.8k
    else /* In future releases, only error will remain. */
354
81.8k
    if (c < 128 && is_potential_identifier_char(c)) {
355
240
        tok_backup(tok, c);
356
240
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
240
        return 0;
358
240
    }
359
91.6k
    return 1;
360
91.8k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
11.1k
{
366
11.1k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
11.1k
    PyObject *s;
370
11.1k
    if (tok->decoding_erred)
371
0
        return 0;
372
11.1k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
11.1k
    if (s == NULL) {
374
0
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
0
            tok->done = E_DECODE;
376
0
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
0
        return 0;
381
0
    }
382
11.1k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
11.1k
    assert(invalid >= 0);
384
11.1k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
11.1k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
502
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
502
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
324
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
324
            if (s != NULL) {
391
324
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
324
            }
393
324
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
324
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
324
        }
399
502
        Py_DECREF(s);
400
502
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
283
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
283
        }
403
219
        else {
404
219
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
219
        }
406
502
        return 0;
407
502
    }
408
10.6k
    Py_DECREF(s);
409
10.6k
    return 1;
410
11.1k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
75.2k
{
415
75.2k
    int c;
416
417
75.6k
    while (1) {
418
223k
        do {
419
223k
            c = tok_nextc(tok);
420
223k
        } while (Py_ISDIGIT(c));
421
75.6k
        if (c != '_') {
422
75.2k
            break;
423
75.2k
        }
424
366
        c = tok_nextc(tok);
425
366
        if (!Py_ISDIGIT(c)) {
426
17
            tok_backup(tok, c);
427
17
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
17
            return 0;
429
17
        }
430
366
    }
431
75.2k
    return c;
432
75.2k
}
433
434
static inline int
435
821
tok_continuation_line(struct tok_state *tok) {
436
821
    int c = tok_nextc(tok);
437
821
    if (c == '\r') {
438
0
        c = tok_nextc(tok);
439
0
    }
440
821
    if (c != '\n') {
441
62
        tok->done = E_LINECONT;
442
62
        return -1;
443
62
    }
444
759
    c = tok_nextc(tok);
445
759
    if (c == EOF) {
446
45
        tok->done = E_EOF;
447
45
        tok->cur = tok->inp;
448
45
        return -1;
449
714
    } else {
450
714
        tok_backup(tok, c);
451
714
    }
452
714
    return c;
453
759
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
22.5k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
22.5k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
22.5k
    do {                                                                  \
464
8
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
8
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
8
            (int)(tok->cur - tok->line_start),                            \
467
8
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
8
        return -1;                                                        \
469
8
    } while (0)
470
471
22.5k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
22.5k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
22.5k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
22.5k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
22.5k
    if (saw_b && saw_f) {
485
2
        RETURN_SYNTAX_ERROR("b", "f");
486
2
    }
487
22.5k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
22.5k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
22.5k
#undef RETURN_SYNTAX_ERROR
496
497
22.5k
    return 0;
498
22.5k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
2.10M
{
503
2.10M
    int c;
504
2.10M
    int blankline, nonascii;
505
506
2.10M
    const char *p_start = NULL;
507
2.10M
    const char *p_end = NULL;
508
2.21M
  nextline:
509
2.21M
    tok->start = NULL;
510
2.21M
    tok->starting_col_offset = -1;
511
2.21M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
2.21M
    if (tok->atbol) {
516
368k
        int col = 0;
517
368k
        int altcol = 0;
518
368k
        tok->atbol = 0;
519
368k
        int cont_line_col = 0;
520
727k
        for (;;) {
521
727k
            c = tok_nextc(tok);
522
727k
            if (c == ' ') {
523
357k
                col++, altcol++;
524
357k
            }
525
370k
            else if (c == '\t') {
526
654
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
654
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
654
            }
529
369k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
764
                col = altcol = 0; /* For Emacs users */
531
764
            }
532
368k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
485
                cont_line_col = cont_line_col ? cont_line_col : col;
538
485
                if ((c = tok_continuation_line(tok)) == -1) {
539
24
                    return MAKE_TOKEN(ERRORTOKEN);
540
24
                }
541
485
            }
542
368k
            else if (c == EOF && PyErr_Occurred()) {
543
0
                return MAKE_TOKEN(ERRORTOKEN);
544
0
            }
545
368k
            else {
546
368k
                break;
547
368k
            }
548
727k
        }
549
368k
        tok_backup(tok, c);
550
368k
        if (c == '#' || c == '\n' || c == '\r') {
551
            /* Lines with only whitespace and/or comments
552
               shouldn't affect the indentation and are
553
               not passed to the parser as NEWLINE tokens,
554
               except *totally* empty lines in interactive
555
               mode, which signal the end of a command group. */
556
72.7k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
557
0
                blankline = 0; /* Let it through */
558
0
            }
559
72.7k
            else if (tok->prompt != NULL && tok->lineno == 1) {
560
                /* In interactive mode, if the first line contains
561
                   only spaces and/or a comment, let it through. */
562
0
                blankline = 0;
563
0
                col = altcol = 0;
564
0
            }
565
72.7k
            else {
566
72.7k
                blankline = 1; /* Ignore completely */
567
72.7k
            }
568
            /* We can't jump back right here since we still
569
               may need to skip to the end of a comment */
570
72.7k
        }
571
368k
        if (!blankline && tok->level == 0) {
572
260k
            col = cont_line_col ? cont_line_col : col;
573
260k
            altcol = cont_line_col ? cont_line_col : altcol;
574
260k
            if (col == tok->indstack[tok->indent]) {
575
                /* No change */
576
238k
                if (altcol != tok->altindstack[tok->indent]) {
577
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
578
1
                }
579
238k
            }
580
21.9k
            else if (col > tok->indstack[tok->indent]) {
581
                /* Indent -- always one */
582
12.2k
                if (tok->indent+1 >= MAXINDENT) {
583
0
                    tok->done = E_TOODEEP;
584
0
                    tok->cur = tok->inp;
585
0
                    return MAKE_TOKEN(ERRORTOKEN);
586
0
                }
587
12.2k
                if (altcol <= tok->altindstack[tok->indent]) {
588
2
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
589
2
                }
590
12.2k
                tok->pendin++;
591
12.2k
                tok->indstack[++tok->indent] = col;
592
12.2k
                tok->altindstack[tok->indent] = altcol;
593
12.2k
            }
594
9.67k
            else /* col < tok->indstack[tok->indent] */ {
595
                /* Dedent -- any number, must be consistent */
596
21.3k
                while (tok->indent > 0 &&
597
18.0k
                    col < tok->indstack[tok->indent]) {
598
11.6k
                    tok->pendin--;
599
11.6k
                    tok->indent--;
600
11.6k
                }
601
9.67k
                if (col != tok->indstack[tok->indent]) {
602
8
                    tok->done = E_DEDENT;
603
8
                    tok->cur = tok->inp;
604
8
                    return MAKE_TOKEN(ERRORTOKEN);
605
8
                }
606
9.66k
                if (altcol != tok->altindstack[tok->indent]) {
607
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
608
1
                }
609
9.66k
            }
610
260k
        }
611
368k
    }
612
613
2.21M
    tok->start = tok->cur;
614
2.21M
    tok->starting_col_offset = tok->col_offset;
615
616
    /* Return pending indents/dedents */
617
2.21M
    if (tok->pendin != 0) {
618
23.8k
        if (tok->pendin < 0) {
619
11.6k
            if (tok->tok_extra_tokens) {
620
60
                p_start = tok->cur;
621
60
                p_end = tok->cur;
622
60
            }
623
11.6k
            tok->pendin++;
624
11.6k
            return MAKE_TOKEN(DEDENT);
625
11.6k
        }
626
12.2k
        else {
627
12.2k
            if (tok->tok_extra_tokens) {
628
64
                p_start = tok->buf;
629
64
                p_end = tok->cur;
630
64
            }
631
12.2k
            tok->pendin--;
632
12.2k
            return MAKE_TOKEN(INDENT);
633
12.2k
        }
634
23.8k
    }
635
636
    /* Peek ahead at the next character */
637
2.19M
    c = tok_nextc(tok);
638
2.19M
    tok_backup(tok, c);
639
640
2.19M
 again:
641
2.19M
    tok->start = NULL;
642
    /* Skip spaces */
643
2.62M
    do {
644
2.62M
        c = tok_nextc(tok);
645
2.62M
    } while (c == ' ' || c == '\t' || c == '\014');
646
647
    /* Set start of current token */
648
2.19M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
649
2.19M
    tok->starting_col_offset = tok->col_offset - 1;
650
651
    /* Skip comment, unless it's a type comment */
652
2.19M
    if (c == '#') {
653
654
32.6k
        const char* p = NULL;
655
32.6k
        const char *prefix, *type_start;
656
32.6k
        int current_starting_col_offset;
657
658
1.04M
        while (c != EOF && c != '\n' && c != '\r') {
659
1.01M
            c = tok_nextc(tok);
660
1.01M
        }
661
662
32.6k
        if (tok->tok_extra_tokens) {
663
44
            p = tok->start;
664
44
        }
665
666
32.6k
        if (tok->type_comments) {
667
0
            p = tok->start;
668
0
            current_starting_col_offset = tok->starting_col_offset;
669
0
            prefix = type_comment_prefix;
670
0
            while (*prefix && p < tok->cur) {
671
0
                if (*prefix == ' ') {
672
0
                    while (*p == ' ' || *p == '\t') {
673
0
                        p++;
674
0
                        current_starting_col_offset++;
675
0
                    }
676
0
                } else if (*prefix == *p) {
677
0
                    p++;
678
0
                    current_starting_col_offset++;
679
0
                } else {
680
0
                    break;
681
0
                }
682
683
0
                prefix++;
684
0
            }
685
686
            /* This is a type comment if we matched all of type_comment_prefix. */
687
0
            if (!*prefix) {
688
0
                int is_type_ignore = 1;
689
                // +6 in order to skip the word 'ignore'
690
0
                const char *ignore_end = p + 6;
691
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
692
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
693
694
0
                type_start = p;
695
696
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
697
                 * or anything ASCII and non-alphanumeric. */
698
0
                is_type_ignore = (
699
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
700
0
                    && !(tok->cur > ignore_end
701
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
702
703
0
                if (is_type_ignore) {
704
0
                    p_start = ignore_end;
705
0
                    p_end = tok->cur;
706
707
                    /* If this type ignore is the only thing on the line, consume the newline also. */
708
0
                    if (blankline) {
709
0
                        tok_nextc(tok);
710
0
                        tok->atbol = 1;
711
0
                    }
712
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
713
0
                } else {
714
0
                    p_start = type_start;
715
0
                    p_end = tok->cur;
716
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
717
0
                }
718
0
            }
719
0
        }
720
32.6k
        if (tok->tok_extra_tokens) {
721
44
            tok_backup(tok, c);  /* don't eat the newline or EOF */
722
44
            p_start = p;
723
44
            p_end = tok->cur;
724
44
            tok->comment_newline = blankline;
725
44
            return MAKE_TOKEN(COMMENT);
726
44
        }
727
32.6k
    }
728
729
2.19M
    if (tok->done == E_INTERACT_STOP) {
730
0
        return MAKE_TOKEN(ENDMARKER);
731
0
    }
732
733
    /* Check for EOF and errors now */
734
2.19M
    if (c == EOF) {
735
94.7k
        if (tok->level) {
736
4.02k
            return MAKE_TOKEN(ERRORTOKEN);
737
4.02k
        }
738
90.7k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
739
94.7k
    }
740
741
    /* Identifier (most frequent token!) */
742
2.09M
    nonascii = 0;
743
2.09M
    if (is_potential_identifier_start(c)) {
744
        /* Process the various legal combinations of b"", r"", u"", and f"". */
745
691k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
746
876k
        while (1) {
747
876k
            if (!saw_b && (c == 'b' || c == 'B')) {
748
19.5k
                saw_b = 1;
749
19.5k
            }
750
            /* Since this is a backwards compatibility support literal we don't
751
               want to support it in arbitrary order like byte literals. */
752
857k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
753
84.8k
                saw_u = 1;
754
84.8k
            }
755
            /* ur"" and ru"" are not supported */
756
772k
            else if (!saw_r && (c == 'r' || c == 'R')) {
757
25.1k
                saw_r = 1;
758
25.1k
            }
759
747k
            else if (!saw_f && (c == 'f' || c == 'F')) {
760
43.2k
                saw_f = 1;
761
43.2k
            }
762
704k
            else if (!saw_t && (c == 't' || c == 'T')) {
763
35.0k
                saw_t = 1;
764
35.0k
            }
765
668k
            else {
766
668k
                break;
767
668k
            }
768
207k
            c = tok_nextc(tok);
769
207k
            if (c == '"' || c == '\'') {
770
                // Raise error on incompatible string prefixes:
771
22.5k
                int status = maybe_raise_syntax_error_for_string_prefixes(
772
22.5k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
773
22.5k
                if (status < 0) {
774
8
                    return MAKE_TOKEN(ERRORTOKEN);
775
8
                }
776
777
                // Handle valid f or t string creation:
778
22.5k
                if (saw_f || saw_t) {
779
18.6k
                    goto f_string_quote;
780
18.6k
                }
781
3.88k
                goto letter_quote;
782
22.5k
            }
783
207k
        }
784
3.10M
        while (is_potential_identifier_char(c)) {
785
2.44M
            if (c >= 128) {
786
124k
                nonascii = 1;
787
124k
            }
788
2.44M
            c = tok_nextc(tok);
789
2.44M
        }
790
668k
        tok_backup(tok, c);
791
668k
        if (nonascii && !verify_identifier(tok)) {
792
502
            return MAKE_TOKEN(ERRORTOKEN);
793
502
        }
794
795
668k
        p_start = tok->start;
796
668k
        p_end = tok->cur;
797
798
668k
        return MAKE_TOKEN(NAME);
799
668k
    }
800
801
1.40M
    if (c == '\r') {
802
0
        c = tok_nextc(tok);
803
0
    }
804
805
    /* Newline */
806
1.40M
    if (c == '\n') {
807
270k
        tok->atbol = 1;
808
270k
        if (blankline || tok->level > 0) {
809
108k
            if (tok->tok_extra_tokens) {
810
128
                if (tok->comment_newline) {
811
24
                    tok->comment_newline = 0;
812
24
                }
813
128
                p_start = tok->start;
814
128
                p_end = tok->cur;
815
128
                return MAKE_TOKEN(NL);
816
128
            }
817
108k
            goto nextline;
818
108k
        }
819
162k
        if (tok->comment_newline && tok->tok_extra_tokens) {
820
12
            tok->comment_newline = 0;
821
12
            p_start = tok->start;
822
12
            p_end = tok->cur;
823
12
            return MAKE_TOKEN(NL);
824
12
        }
825
162k
        p_start = tok->start;
826
162k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
827
162k
        tok->cont_line = 0;
828
162k
        return MAKE_TOKEN(NEWLINE);
829
162k
    }
830
831
    /* Period or number starting with period? */
832
1.13M
    if (c == '.') {
833
30.6k
        c = tok_nextc(tok);
834
30.6k
        if (Py_ISDIGIT(c)) {
835
3.35k
            goto fraction;
836
27.2k
        } else if (c == '.') {
837
1.74k
            c = tok_nextc(tok);
838
1.74k
            if (c == '.') {
839
1.04k
                p_start = tok->start;
840
1.04k
                p_end = tok->cur;
841
1.04k
                return MAKE_TOKEN(ELLIPSIS);
842
1.04k
            }
843
704
            else {
844
704
                tok_backup(tok, c);
845
704
            }
846
704
            tok_backup(tok, '.');
847
704
        }
848
25.5k
        else {
849
25.5k
            tok_backup(tok, c);
850
25.5k
        }
851
26.2k
        p_start = tok->start;
852
26.2k
        p_end = tok->cur;
853
26.2k
        return MAKE_TOKEN(DOT);
854
30.6k
    }
855
856
    /* Number */
857
1.10M
    if (Py_ISDIGIT(c)) {
858
88.7k
        if (c == '0') {
859
            /* Hex, octal or binary -- maybe. */
860
29.3k
            c = tok_nextc(tok);
861
29.3k
            if (c == 'x' || c == 'X') {
862
                /* Hex */
863
14.0k
                c = tok_nextc(tok);
864
14.2k
                do {
865
14.2k
                    if (c == '_') {
866
222
                        c = tok_nextc(tok);
867
222
                    }
868
14.2k
                    if (!Py_ISXDIGIT(c)) {
869
15
                        tok_backup(tok, c);
870
15
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
871
15
                    }
872
73.5k
                    do {
873
73.5k
                        c = tok_nextc(tok);
874
73.5k
                    } while (Py_ISXDIGIT(c));
875
14.2k
                } while (c == '_');
876
13.9k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
877
1
                    return MAKE_TOKEN(ERRORTOKEN);
878
1
                }
879
13.9k
            }
880
15.3k
            else if (c == 'o' || c == 'O') {
881
                /* Octal */
882
715
                c = tok_nextc(tok);
883
968
                do {
884
968
                    if (c == '_') {
885
255
                        c = tok_nextc(tok);
886
255
                    }
887
968
                    if (c < '0' || c >= '8') {
888
18
                        if (Py_ISDIGIT(c)) {
889
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
890
1
                                    "invalid digit '%c' in octal literal", c));
891
1
                        }
892
17
                        else {
893
17
                            tok_backup(tok, c);
894
17
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
895
17
                        }
896
18
                    }
897
4.64k
                    do {
898
4.64k
                        c = tok_nextc(tok);
899
4.64k
                    } while ('0' <= c && c < '8');
900
950
                } while (c == '_');
901
697
                if (Py_ISDIGIT(c)) {
902
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
903
1
                            "invalid digit '%c' in octal literal", c));
904
1
                }
905
696
                if (!verify_end_of_number(tok, c, "octal")) {
906
6
                    return MAKE_TOKEN(ERRORTOKEN);
907
6
                }
908
696
            }
909
14.6k
            else if (c == 'b' || c == 'B') {
910
                /* Binary */
911
483
                c = tok_nextc(tok);
912
606
                do {
913
606
                    if (c == '_') {
914
129
                        c = tok_nextc(tok);
915
129
                    }
916
606
                    if (c != '0' && c != '1') {
917
20
                        if (Py_ISDIGIT(c)) {
918
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
919
1
                        }
920
19
                        else {
921
19
                            tok_backup(tok, c);
922
19
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
923
19
                        }
924
20
                    }
925
3.02k
                    do {
926
3.02k
                        c = tok_nextc(tok);
927
3.02k
                    } while (c == '0' || c == '1');
928
586
                } while (c == '_');
929
463
                if (Py_ISDIGIT(c)) {
930
3
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
931
3
                }
932
460
                if (!verify_end_of_number(tok, c, "binary")) {
933
4
                    return MAKE_TOKEN(ERRORTOKEN);
934
4
                }
935
460
            }
936
14.1k
            else {
937
14.1k
                int nonzero = 0;
938
                /* maybe old-style octal; c is first char of it */
939
                /* in any case, allow '0' as a literal */
940
16.1k
                while (1) {
941
16.1k
                    if (c == '_') {
942
232
                        c = tok_nextc(tok);
943
232
                        if (!Py_ISDIGIT(c)) {
944
3
                            tok_backup(tok, c);
945
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
946
3
                        }
947
232
                    }
948
16.1k
                    if (c != '0') {
949
14.1k
                        break;
950
14.1k
                    }
951
2.03k
                    c = tok_nextc(tok);
952
2.03k
                }
953
14.1k
                char* zeros_end = tok->cur;
954
14.1k
                if (Py_ISDIGIT(c)) {
955
483
                    nonzero = 1;
956
483
                    c = tok_decimal_tail(tok);
957
483
                    if (c == 0) {
958
1
                        return MAKE_TOKEN(ERRORTOKEN);
959
1
                    }
960
483
                }
961
14.1k
                if (c == '.') {
962
997
                    c = tok_nextc(tok);
963
997
                    goto fraction;
964
997
                }
965
13.1k
                else if (c == 'e' || c == 'E') {
966
697
                    goto exponent;
967
697
                }
968
12.4k
                else if (c == 'j' || c == 'J') {
969
772
                    goto imaginary;
970
772
                }
971
11.6k
                else if (nonzero && !tok->tok_extra_tokens) {
972
                    /* Old-style octal: now disallowed. */
973
34
                    tok_backup(tok, c);
974
34
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
975
34
                            tok, (int)(tok->start + 1 - tok->line_start),
976
34
                            (int)(zeros_end - tok->line_start),
977
34
                            "leading zeros in decimal integer "
978
34
                            "literals are not permitted; "
979
34
                            "use an 0o prefix for octal integers"));
980
34
                }
981
11.6k
                if (!verify_end_of_number(tok, c, "decimal")) {
982
27
                    return MAKE_TOKEN(ERRORTOKEN);
983
27
                }
984
11.6k
            }
985
29.3k
        }
986
59.3k
        else {
987
            /* Decimal */
988
59.3k
            c = tok_decimal_tail(tok);
989
59.3k
            if (c == 0) {
990
14
                return MAKE_TOKEN(ERRORTOKEN);
991
14
            }
992
59.3k
            {
993
                /* Accept floating-point numbers. */
994
59.3k
                if (c == '.') {
995
3.60k
                    c = tok_nextc(tok);
996
7.96k
        fraction:
997
                    /* Fraction */
998
7.96k
                    if (Py_ISDIGIT(c)) {
999
6.09k
                        c = tok_decimal_tail(tok);
1000
6.09k
                        if (c == 0) {
1001
1
                            return MAKE_TOKEN(ERRORTOKEN);
1002
1
                        }
1003
6.09k
                    }
1004
7.96k
                }
1005
63.6k
                if (c == 'e' || c == 'E') {
1006
9.36k
                    int e;
1007
10.0k
                  exponent:
1008
10.0k
                    e = c;
1009
                    /* Exponent part */
1010
10.0k
                    c = tok_nextc(tok);
1011
10.0k
                    if (c == '+' || c == '-') {
1012
4.02k
                        c = tok_nextc(tok);
1013
4.02k
                        if (!Py_ISDIGIT(c)) {
1014
14
                            tok_backup(tok, c);
1015
14
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1016
14
                        }
1017
6.04k
                    } else if (!Py_ISDIGIT(c)) {
1018
730
                        tok_backup(tok, c);
1019
730
                        if (!verify_end_of_number(tok, e, "decimal")) {
1020
47
                            return MAKE_TOKEN(ERRORTOKEN);
1021
47
                        }
1022
683
                        tok_backup(tok, e);
1023
683
                        p_start = tok->start;
1024
683
                        p_end = tok->cur;
1025
683
                        return MAKE_TOKEN(NUMBER);
1026
730
                    }
1027
9.31k
                    c = tok_decimal_tail(tok);
1028
9.31k
                    if (c == 0) {
1029
1
                        return MAKE_TOKEN(ERRORTOKEN);
1030
1
                    }
1031
9.31k
                }
1032
63.6k
                if (c == 'j' || c == 'J') {
1033
                    /* Imaginary part */
1034
4.84k
        imaginary:
1035
4.84k
                    c = tok_nextc(tok);
1036
4.84k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1037
13
                        return MAKE_TOKEN(ERRORTOKEN);
1038
13
                    }
1039
4.84k
                }
1040
59.5k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1041
142
                    return MAKE_TOKEN(ERRORTOKEN);
1042
142
                }
1043
63.6k
            }
1044
63.6k
        }
1045
91.0k
        tok_backup(tok, c);
1046
91.0k
        p_start = tok->start;
1047
91.0k
        p_end = tok->cur;
1048
91.0k
        return MAKE_TOKEN(NUMBER);
1049
88.7k
    }
1050
1051
1.03M
  f_string_quote:
1052
1.03M
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1053
18.6k
        && (c == '\'' || c == '"'))) {
1054
1055
18.6k
        int quote = c;
1056
18.6k
        int quote_size = 1;             /* 1 or 3 */
1057
1058
        /* Nodes of type STRING, especially multi line strings
1059
           must be handled differently in order to get both
1060
           the starting line number and the column offset right.
1061
           (cf. issue 16806) */
1062
18.6k
        tok->first_lineno = tok->lineno;
1063
18.6k
        tok->multi_line_start = tok->line_start;
1064
1065
        /* Find the quote size and start of string */
1066
18.6k
        int after_quote = tok_nextc(tok);
1067
18.6k
        if (after_quote == quote) {
1068
3.16k
            int after_after_quote = tok_nextc(tok);
1069
3.16k
            if (after_after_quote == quote) {
1070
1.16k
                quote_size = 3;
1071
1.16k
            }
1072
1.99k
            else {
1073
                // TODO: Check this
1074
1.99k
                tok_backup(tok, after_after_quote);
1075
1.99k
                tok_backup(tok, after_quote);
1076
1.99k
            }
1077
3.16k
        }
1078
18.6k
        if (after_quote != quote) {
1079
15.5k
            tok_backup(tok, after_quote);
1080
15.5k
        }
1081
1082
1083
18.6k
        p_start = tok->start;
1084
18.6k
        p_end = tok->cur;
1085
18.6k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1086
1
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1087
1
        }
1088
18.6k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1089
18.6k
        the_current_tok->kind = TOK_FSTRING_MODE;
1090
18.6k
        the_current_tok->quote = quote;
1091
18.6k
        the_current_tok->quote_size = quote_size;
1092
18.6k
        the_current_tok->start = tok->start;
1093
18.6k
        the_current_tok->multi_line_start = tok->line_start;
1094
18.6k
        the_current_tok->first_line = tok->lineno;
1095
18.6k
        the_current_tok->start_offset = -1;
1096
18.6k
        the_current_tok->multi_line_start_offset = -1;
1097
18.6k
        the_current_tok->last_expr_buffer = NULL;
1098
18.6k
        the_current_tok->last_expr_size = 0;
1099
18.6k
        the_current_tok->last_expr_end = -1;
1100
18.6k
        the_current_tok->in_format_spec = 0;
1101
18.6k
        the_current_tok->in_debug = 0;
1102
1103
18.6k
        enum string_kind_t string_kind = FSTRING;
1104
18.6k
        switch (*tok->start) {
1105
1.00k
            case 'T':
1106
5.01k
            case 't':
1107
5.01k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1108
5.01k
                string_kind = TSTRING;
1109
5.01k
                break;
1110
1.98k
            case 'F':
1111
13.0k
            case 'f':
1112
13.0k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1113
13.0k
                break;
1114
196
            case 'R':
1115
572
            case 'r':
1116
572
                the_current_tok->raw = 1;
1117
572
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1118
166
                    string_kind = TSTRING;
1119
166
                }
1120
572
                break;
1121
0
            default:
1122
0
                Py_UNREACHABLE();
1123
18.6k
        }
1124
1125
18.6k
        the_current_tok->string_kind = string_kind;
1126
18.6k
        the_current_tok->curly_bracket_depth = 0;
1127
18.6k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1128
18.6k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1129
18.6k
    }
1130
1131
1.01M
  letter_quote:
1132
    /* String */
1133
1.01M
    if (c == '\'' || c == '"') {
1134
39.1k
        int quote = c;
1135
39.1k
        int quote_size = 1;             /* 1 or 3 */
1136
39.1k
        int end_quote_size = 0;
1137
39.1k
        int has_escaped_quote = 0;
1138
1139
        /* Nodes of type STRING, especially multi line strings
1140
           must be handled differently in order to get both
1141
           the starting line number and the column offset right.
1142
           (cf. issue 16806) */
1143
39.1k
        tok->first_lineno = tok->lineno;
1144
39.1k
        tok->multi_line_start = tok->line_start;
1145
1146
        /* Find the quote size and start of string */
1147
39.1k
        c = tok_nextc(tok);
1148
39.1k
        if (c == quote) {
1149
7.41k
            c = tok_nextc(tok);
1150
7.41k
            if (c == quote) {
1151
1.50k
                quote_size = 3;
1152
1.50k
            }
1153
5.90k
            else {
1154
5.90k
                end_quote_size = 1;     /* empty string found */
1155
5.90k
            }
1156
7.41k
        }
1157
39.1k
        if (c != quote) {
1158
37.6k
            tok_backup(tok, c);
1159
37.6k
        }
1160
1161
        /* Get rest of string */
1162
552k
        while (end_quote_size != quote_size) {
1163
513k
            c = tok_nextc(tok);
1164
513k
            if (tok->done == E_ERROR) {
1165
0
                return MAKE_TOKEN(ERRORTOKEN);
1166
0
            }
1167
513k
            if (tok->done == E_DECODE) {
1168
0
                break;
1169
0
            }
1170
513k
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1171
382
                assert(tok->multi_line_start != NULL);
1172
                // shift the tok_state's location into
1173
                // the start of string, and report the error
1174
                // from the initial quote character
1175
382
                tok->cur = (char *)tok->start;
1176
382
                tok->cur++;
1177
382
                tok->line_start = tok->multi_line_start;
1178
382
                int start = tok->lineno;
1179
382
                tok->lineno = tok->first_lineno;
1180
1181
382
                if (INSIDE_FSTRING(tok)) {
1182
                    /* When we are in an f-string, before raising the
1183
                     * unterminated string literal error, check whether
1184
                     * does the initial quote matches with f-strings quotes
1185
                     * and if it is, then this must be a missing '}' token
1186
                     * so raise the proper error */
1187
43
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1188
43
                    if (the_current_tok->quote == quote &&
1189
31
                        the_current_tok->quote_size == quote_size) {
1190
30
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1191
30
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1192
30
                    }
1193
43
                }
1194
1195
352
                if (quote_size == 3) {
1196
28
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1197
28
                                     " (detected at line %d)", start);
1198
28
                    if (c != '\n') {
1199
28
                        tok->done = E_EOFS;
1200
28
                    }
1201
28
                    return MAKE_TOKEN(ERRORTOKEN);
1202
28
                }
1203
324
                else {
1204
324
                    if (has_escaped_quote) {
1205
9
                        _PyTokenizer_syntaxerror(
1206
9
                            tok,
1207
9
                            "unterminated string literal (detected at line %d); "
1208
9
                            "perhaps you escaped the end quote?",
1209
9
                            start
1210
9
                        );
1211
315
                    } else {
1212
315
                        _PyTokenizer_syntaxerror(
1213
315
                            tok, "unterminated string literal (detected at line %d)", start
1214
315
                        );
1215
315
                    }
1216
324
                    if (c != '\n') {
1217
10
                        tok->done = E_EOLS;
1218
10
                    }
1219
324
                    return MAKE_TOKEN(ERRORTOKEN);
1220
324
                }
1221
352
            }
1222
513k
            if (c == quote) {
1223
36.8k
                end_quote_size += 1;
1224
36.8k
            }
1225
476k
            else {
1226
476k
                end_quote_size = 0;
1227
476k
                if (c == '\\') {
1228
23.9k
                    c = tok_nextc(tok);  /* skip escaped char */
1229
23.9k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1230
671
                        has_escaped_quote = 1;
1231
671
                    }
1232
23.9k
                    if (c == '\r') {
1233
0
                        c = tok_nextc(tok);
1234
0
                    }
1235
23.9k
                }
1236
476k
            }
1237
513k
        }
1238
1239
38.7k
        p_start = tok->start;
1240
38.7k
        p_end = tok->cur;
1241
38.7k
        return MAKE_TOKEN(STRING);
1242
39.1k
    }
1243
1244
    /* Line continuation */
1245
979k
    if (c == '\\') {
1246
336
        if ((c = tok_continuation_line(tok)) == -1) {
1247
83
            return MAKE_TOKEN(ERRORTOKEN);
1248
83
        }
1249
253
        tok->cont_line = 1;
1250
253
        goto again; /* Read next line */
1251
336
    }
1252
1253
    /* Punctuation character */
1254
979k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1255
979k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1256
        /* This code block gets executed before the curly_bracket_depth is incremented
1257
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1258
         * to adjust it manually */
1259
62.8k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1260
62.8k
        int in_format_spec = current_tok->in_format_spec;
1261
62.8k
         int cursor_in_format_with_debug =
1262
62.8k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1263
62.8k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1264
62.8k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1265
0
            return MAKE_TOKEN(ENDMARKER);
1266
0
        }
1267
62.8k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1268
0
            return MAKE_TOKEN(ERRORTOKEN);
1269
0
        }
1270
1271
62.8k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1272
5.65k
            current_tok->kind = TOK_FSTRING_MODE;
1273
5.65k
            current_tok->in_format_spec = 1;
1274
5.65k
            p_start = tok->start;
1275
5.65k
            p_end = tok->cur;
1276
5.65k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1277
5.65k
        }
1278
62.8k
    }
1279
1280
    /* Check for two-character token */
1281
973k
    {
1282
973k
        int c2 = tok_nextc(tok);
1283
973k
        int current_token = _PyToken_TwoChars(c, c2);
1284
973k
        if (current_token != OP) {
1285
23.4k
            int c3 = tok_nextc(tok);
1286
23.4k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1287
23.4k
            if (current_token3 != OP) {
1288
1.30k
                current_token = current_token3;
1289
1.30k
            }
1290
22.1k
            else {
1291
22.1k
                tok_backup(tok, c3);
1292
22.1k
            }
1293
23.4k
            p_start = tok->start;
1294
23.4k
            p_end = tok->cur;
1295
23.4k
            return MAKE_TOKEN(current_token);
1296
23.4k
        }
1297
950k
        tok_backup(tok, c2);
1298
950k
    }
1299
1300
    /* Keep track of parentheses nesting level */
1301
0
    switch (c) {
1302
72.7k
    case '(':
1303
111k
    case '[':
1304
158k
    case '{':
1305
158k
        if (tok->level >= MAXLEVEL) {
1306
12
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1307
12
        }
1308
158k
        tok->parenstack[tok->level] = c;
1309
158k
        tok->parenlinenostack[tok->level] = tok->lineno;
1310
158k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1311
158k
        tok->level++;
1312
158k
        if (INSIDE_FSTRING(tok)) {
1313
34.3k
            current_tok->curly_bracket_depth++;
1314
34.3k
        }
1315
158k
        break;
1316
46.3k
    case ')':
1317
53.0k
    case ']':
1318
81.3k
    case '}':
1319
81.3k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1320
52
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1321
52
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1322
52
        }
1323
81.2k
        if (!tok->tok_extra_tokens && !tok->level) {
1324
231
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1325
231
        }
1326
81.0k
        if (tok->level > 0) {
1327
81.0k
            tok->level--;
1328
81.0k
            int opening = tok->parenstack[tok->level];
1329
81.0k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1330
34.7k
                                            (opening == '[' && c == ']') ||
1331
28.1k
                                            (opening == '{' && c == '}'))) {
1332
                /* If the opening bracket belongs to an f-string's expression
1333
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1334
                nested expression, then instead of matching a different
1335
                syntactical construct with it; we'll throw an unmatched
1336
                parentheses error. */
1337
59
                if (INSIDE_FSTRING(tok) && opening == '{') {
1338
3
                    assert(current_tok->curly_bracket_depth >= 0);
1339
3
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1340
3
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1341
2
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1342
2
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1343
2
                    }
1344
3
                }
1345
57
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1346
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1347
6
                            "closing parenthesis '%c' does not match "
1348
6
                            "opening parenthesis '%c' on line %d",
1349
6
                            c, opening, tok->parenlinenostack[tok->level]));
1350
6
                }
1351
51
                else {
1352
51
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1353
51
                            "closing parenthesis '%c' does not match "
1354
51
                            "opening parenthesis '%c'",
1355
51
                            c, opening));
1356
51
                }
1357
57
            }
1358
81.0k
        }
1359
1360
80.9k
        if (INSIDE_FSTRING(tok)) {
1361
25.0k
            current_tok->curly_bracket_depth--;
1362
25.0k
            if (current_tok->curly_bracket_depth < 0) {
1363
0
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1364
0
                    TOK_GET_STRING_PREFIX(tok), c));
1365
0
            }
1366
25.0k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1367
23.5k
                current_tok->curly_bracket_expr_start_depth--;
1368
23.5k
                current_tok->kind = TOK_FSTRING_MODE;
1369
23.5k
                current_tok->in_format_spec = 0;
1370
23.5k
                current_tok->in_debug = 0;
1371
23.5k
            }
1372
25.0k
        }
1373
80.9k
        break;
1374
711k
    default:
1375
711k
        break;
1376
950k
    }
1377
1378
950k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1379
434
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1380
434
    }
1381
1382
949k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1383
6.36k
        current_tok->in_debug = 1;
1384
6.36k
    }
1385
1386
    /* Punctuation character */
1387
949k
    p_start = tok->start;
1388
949k
    p_end = tok->cur;
1389
949k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1390
950k
}
1391
1392
static int
1393
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1394
58.1k
{
1395
58.1k
    const char *p_start = NULL;
1396
58.1k
    const char *p_end = NULL;
1397
58.1k
    int end_quote_size = 0;
1398
58.1k
    int unicode_escape = 0;
1399
1400
58.1k
    tok->start = tok->cur;
1401
58.1k
    tok->first_lineno = tok->lineno;
1402
58.1k
    tok->starting_col_offset = tok->col_offset;
1403
1404
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1405
    // before it.
1406
58.1k
    int start_char = tok_nextc(tok);
1407
58.1k
    if (start_char == '{') {
1408
16.6k
        int peek1 = tok_nextc(tok);
1409
16.6k
        tok_backup(tok, peek1);
1410
16.6k
        tok_backup(tok, start_char);
1411
16.6k
        if (peek1 != '{') {
1412
14.0k
            current_tok->curly_bracket_expr_start_depth++;
1413
14.0k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1414
4
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1415
4
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1416
4
            }
1417
14.0k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1418
14.0k
            return tok_get_normal_mode(tok, current_tok, token);
1419
14.0k
        }
1420
16.6k
    }
1421
41.4k
    else {
1422
41.4k
        tok_backup(tok, start_char);
1423
41.4k
    }
1424
1425
    // Check if we are at the end of the string
1426
62.8k
    for (int i = 0; i < current_tok->quote_size; i++) {
1427
48.7k
        int quote = tok_nextc(tok);
1428
48.7k
        if (quote != current_tok->quote) {
1429
29.9k
            tok_backup(tok, quote);
1430
29.9k
            goto f_string_middle;
1431
29.9k
        }
1432
48.7k
    }
1433
1434
14.1k
    if (current_tok->last_expr_buffer != NULL) {
1435
8.98k
        PyMem_Free(current_tok->last_expr_buffer);
1436
8.98k
        current_tok->last_expr_buffer = NULL;
1437
8.98k
        current_tok->last_expr_size = 0;
1438
8.98k
        current_tok->last_expr_end = -1;
1439
8.98k
    }
1440
1441
14.1k
    p_start = tok->start;
1442
14.1k
    p_end = tok->cur;
1443
14.1k
    tok->tok_mode_stack_index--;
1444
14.1k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1445
1446
29.9k
f_string_middle:
1447
1448
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1449
    // this.
1450
29.9k
    tok->multi_line_start = tok->line_start;
1451
217k
    while (end_quote_size != current_tok->quote_size) {
1452
212k
        int c = tok_nextc(tok);
1453
212k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1454
0
            return MAKE_TOKEN(ERRORTOKEN);
1455
0
        }
1456
212k
        int in_format_spec = (
1457
212k
                current_tok->in_format_spec
1458
13.9k
                &&
1459
13.9k
                INSIDE_FSTRING_EXPR(current_tok)
1460
212k
        );
1461
1462
212k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1463
373
            if (tok->decoding_erred) {
1464
0
                return MAKE_TOKEN(ERRORTOKEN);
1465
0
            }
1466
1467
            // If we are in a format spec and we found a newline,
1468
            // it means that the format spec ends here and we should
1469
            // return to the regular mode.
1470
373
            if (in_format_spec && c == '\n') {
1471
44
                if (current_tok->quote_size == 1) {
1472
44
                    return MAKE_TOKEN(
1473
44
                        _PyTokenizer_syntaxerror(
1474
44
                            tok,
1475
44
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1476
44
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1477
44
                        )
1478
44
                    );
1479
44
                }
1480
0
                tok_backup(tok, c);
1481
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1482
0
                current_tok->in_format_spec = 0;
1483
0
                p_start = tok->start;
1484
0
                p_end = tok->cur;
1485
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1486
44
            }
1487
1488
373
            assert(tok->multi_line_start != NULL);
1489
            // shift the tok_state's location into
1490
            // the start of string, and report the error
1491
            // from the initial quote character
1492
329
            tok->cur = (char *)current_tok->start;
1493
329
            tok->cur++;
1494
329
            tok->line_start = current_tok->multi_line_start;
1495
329
            int start = tok->lineno;
1496
1497
329
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1498
329
            tok->lineno = the_current_tok->first_line;
1499
1500
329
            if (current_tok->quote_size == 3) {
1501
29
                _PyTokenizer_syntaxerror(tok,
1502
29
                                    "unterminated triple-quoted %c-string literal"
1503
29
                                    " (detected at line %d)",
1504
29
                                    TOK_GET_STRING_PREFIX(tok), start);
1505
29
                if (c != '\n') {
1506
29
                    tok->done = E_EOFS;
1507
29
                }
1508
29
                return MAKE_TOKEN(ERRORTOKEN);
1509
29
            }
1510
300
            else {
1511
300
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1512
300
                                    "unterminated %c-string literal (detected at"
1513
300
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1514
300
            }
1515
329
        }
1516
1517
211k
        if (c == current_tok->quote) {
1518
11.0k
            end_quote_size += 1;
1519
11.0k
            continue;
1520
200k
        } else {
1521
200k
            end_quote_size = 0;
1522
200k
        }
1523
1524
200k
        if (c == '{') {
1525
18.1k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1526
0
                return MAKE_TOKEN(ENDMARKER);
1527
0
            }
1528
18.1k
            int peek = tok_nextc(tok);
1529
18.1k
            if (peek != '{' || in_format_spec) {
1530
14.9k
                tok_backup(tok, peek);
1531
14.9k
                tok_backup(tok, c);
1532
14.9k
                current_tok->curly_bracket_expr_start_depth++;
1533
14.9k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1534
8
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1535
8
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1536
8
                }
1537
14.9k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1538
14.9k
                current_tok->in_format_spec = 0;
1539
14.9k
                p_start = tok->start;
1540
14.9k
                p_end = tok->cur;
1541
14.9k
            } else {
1542
3.16k
                p_start = tok->start;
1543
3.16k
                p_end = tok->cur - 1;
1544
3.16k
            }
1545
18.1k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1546
182k
        } else if (c == '}') {
1547
5.73k
            if (unicode_escape) {
1548
314
                p_start = tok->start;
1549
314
                p_end = tok->cur;
1550
314
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1551
314
            }
1552
5.41k
            int peek = tok_nextc(tok);
1553
1554
            // The tokenizer can only be in the format spec if we have already completed the expression
1555
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1556
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1557
            // brackets, we can bypass it here.
1558
5.41k
            int cursor = current_tok->curly_bracket_depth;
1559
5.41k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1560
1.14k
                p_start = tok->start;
1561
1.14k
                p_end = tok->cur - 1;
1562
4.27k
            } else {
1563
4.27k
                tok_backup(tok, peek);
1564
4.27k
                tok_backup(tok, c);
1565
4.27k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1566
4.27k
                current_tok->in_format_spec = 0;
1567
4.27k
                p_start = tok->start;
1568
4.27k
                p_end = tok->cur;
1569
4.27k
            }
1570
5.41k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1571
176k
        } else if (c == '\\') {
1572
6.05k
            int peek = tok_nextc(tok);
1573
6.05k
            if (peek == '\r') {
1574
0
                peek = tok_nextc(tok);
1575
0
            }
1576
            // Special case when the backslash is right before a curly
1577
            // brace. We have to restore and return the control back
1578
            // to the loop for the next iteration.
1579
6.05k
            if (peek == '{' || peek == '}') {
1580
917
                if (!current_tok->raw) {
1581
722
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1582
0
                        return MAKE_TOKEN(ERRORTOKEN);
1583
0
                    }
1584
722
                }
1585
917
                tok_backup(tok, peek);
1586
917
                continue;
1587
917
            }
1588
1589
5.13k
            if (!current_tok->raw) {
1590
4.88k
                if (peek == 'N') {
1591
                    /* Handle named unicode escapes (\N{BULLET}) */
1592
535
                    peek = tok_nextc(tok);
1593
535
                    if (peek == '{') {
1594
326
                        unicode_escape = 1;
1595
326
                    } else {
1596
209
                        tok_backup(tok, peek);
1597
209
                    }
1598
535
                }
1599
4.88k
            } /* else {
1600
                skip the escaped character
1601
            }*/
1602
5.13k
        }
1603
200k
    }
1604
1605
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1606
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1607
13.1k
    for (int i = 0; i < current_tok->quote_size; i++) {
1608
7.48k
        tok_backup(tok, current_tok->quote);
1609
7.48k
    }
1610
5.66k
    p_start = tok->start;
1611
5.66k
    p_end = tok->cur;
1612
5.66k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1613
29.9k
}
1614
1615
static int
1616
tok_get(struct tok_state *tok, struct token *token)
1617
2.15M
{
1618
2.15M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1619
2.15M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1620
2.09M
        return tok_get_normal_mode(tok, current_tok, token);
1621
2.09M
    } else {
1622
58.1k
        return tok_get_fstring_mode(tok, current_tok, token);
1623
58.1k
    }
1624
2.15M
}
1625
1626
int
1627
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1628
2.15M
{
1629
2.15M
    int result = tok_get(tok, token);
1630
2.15M
    if (tok->decoding_erred) {
1631
0
        result = ERRORTOKEN;
1632
0
        tok->done = E_DECODE;
1633
0
    }
1634
2.15M
    return result;
1635
2.15M
}