Coverage Report

Created: 2025-08-26 06:26

/src/cpython/Parser/lexer/lexer.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.62k
#define ALTTABSIZE 1
11
12
1.78M
#define is_potential_identifier_start(c) (\
13
1.78M
              (c >= 'a' && c <= 'z')\
14
1.78M
               || (c >= 'A' && c <= 'Z')\
15
1.78M
               || c == '_'\
16
1.78M
               || (c >= 128))
17
18
2.40M
#define is_potential_identifier_char(c) (\
19
2.40M
              (c >= 'a' && c <= 'z')\
20
2.40M
               || (c >= 'A' && c <= 'Z')\
21
2.40M
               || (c >= '0' && c <= '9')\
22
2.40M
               || c == '_'\
23
2.40M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.91M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
17.2k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
35
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.79M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
228k
{
55
228k
    return memchr(str, 0, size) != NULL;
56
228k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
10.9M
{
62
10.9M
    int rc;
63
11.1M
    for (;;) {
64
11.1M
        if (tok->cur != tok->inp) {
65
10.9M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
10.9M
            tok->col_offset++;
70
10.9M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
10.9M
        }
72
280k
        if (tok->done != E_OK) {
73
34.4k
            return EOF;
74
34.4k
        }
75
245k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
245k
        if (!rc) {
84
17.3k
            tok->cur = tok->inp;
85
17.3k
            return EOF;
86
17.3k
        }
87
228k
        tok->line_start = tok->cur;
88
89
228k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
228k
    }
95
10.9M
    Py_UNREACHABLE();
96
10.9M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.76M
{
102
3.76M
    if (c != EOF) {
103
3.73M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.73M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.73M
        tok->col_offset--;
110
3.73M
    }
111
3.76M
}
112
113
static int
114
22.9k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
22.9k
    assert(token != NULL);
116
22.9k
    assert(c == '}' || c == ':' || c == '!');
117
22.9k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
22.9k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
13.1k
        return 0;
121
13.1k
    }
122
9.83k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
9.83k
    int hash_detected = 0;
126
9.83k
    int in_string = 0;
127
9.83k
    char quote_char = 0;
128
129
1.91M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.90M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.90M
        if (ch == '\\') {
134
38.3k
            i++;
135
38.3k
            continue;
136
38.3k
        }
137
138
        // Handle quotes
139
1.86M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
219k
            if (!in_string) {
148
76.7k
                in_string = 1;
149
76.7k
                quote_char = ch;
150
76.7k
            }
151
142k
            else if (ch == quote_char) {
152
75.5k
                in_string = 0;
153
75.5k
            }
154
219k
            continue;
155
219k
        }
156
157
        // Check for # outside strings
158
1.64M
        if (ch == '#' && !in_string) {
159
841
            hash_detected = 1;
160
841
            break;
161
841
        }
162
1.64M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
9.83k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
841
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
841
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
841
        Py_ssize_t i = 0;  // Input position
172
841
        Py_ssize_t j = 0;  // Output position
173
841
        in_string = 0;     // Whether we're in a string
174
841
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
61.0k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
60.1k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
60.1k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
8.79k
                if (!in_string) {
184
3.48k
                    in_string = 1;
185
3.48k
                    quote_char = ch;
186
5.31k
                } else if (ch == quote_char) {
187
3.46k
                    in_string = 0;
188
3.46k
                }
189
8.79k
                result[j++] = ch;
190
8.79k
            }
191
            // Skip comments
192
51.3k
            else if (ch == '#' && !in_string) {
193
55.6k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
55.6k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
54.5k
                    i++;
196
54.5k
                }
197
1.03k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
287
                    result[j++] = '\n';
199
287
                }
200
1.03k
            }
201
            // Copy other chars
202
50.3k
            else {
203
50.3k
                result[j++] = ch;
204
50.3k
            }
205
60.1k
            i++;
206
60.1k
        }
207
208
841
        result[j] = '\0';  // Null-terminate the result string
209
841
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
841
        PyMem_Free(result);
211
8.99k
    } else {
212
8.99k
        res = PyUnicode_DecodeUTF8(
213
8.99k
            tok_mode->last_expr_buffer,
214
8.99k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.99k
            NULL
216
8.99k
        );
217
8.99k
    }
218
219
9.83k
    if (!res) {
220
10
        return -1;
221
10
    }
222
9.82k
    token->metadata = res;
223
9.82k
    return 0;
224
9.83k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
64.7k
{
229
64.7k
    assert(tok->cur != NULL);
230
231
64.7k
    Py_ssize_t size = strlen(tok->cur);
232
64.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
64.7k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
41.7k
        case '{':
252
41.7k
            if (tok_mode->last_expr_buffer != NULL) {
253
29.7k
                PyMem_Free(tok_mode->last_expr_buffer);
254
29.7k
            }
255
41.7k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
41.7k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
41.7k
            tok_mode->last_expr_size = size;
260
41.7k
            tok_mode->last_expr_end = -1;
261
41.7k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
41.7k
            break;
263
18.4k
        case '}':
264
20.1k
        case '!':
265
20.1k
            tok_mode->last_expr_end = strlen(tok->start);
266
20.1k
            break;
267
2.86k
        case ':':
268
2.86k
            if (tok_mode->last_expr_end == -1) {
269
2.60k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.60k
            }
271
2.86k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
64.7k
    }
275
64.7k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
64.7k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
7.64k
{
284
7.64k
    const char *s = test;
285
7.64k
    int res = 0;
286
20.2k
    while (1) {
287
20.2k
        int c = tok_nextc(tok);
288
20.2k
        if (*s == 0) {
289
7.55k
            res = !is_potential_identifier_char(c);
290
7.55k
        }
291
12.7k
        else if (c == *s) {
292
12.6k
            s++;
293
12.6k
            continue;
294
12.6k
        }
295
296
7.64k
        tok_backup(tok, c);
297
20.2k
        while (s != test) {
298
12.6k
            tok_backup(tok, *--s);
299
12.6k
        }
300
7.64k
        return res;
301
20.2k
    }
302
7.64k
}
303
304
static int
305
104k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
104k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
104k
    int r = 0;
322
104k
    if (c == 'a') {
323
751
        r = lookahead(tok, "nd");
324
751
    }
325
103k
    else if (c == 'e') {
326
426
        r = lookahead(tok, "lse");
327
426
    }
328
103k
    else if (c == 'f') {
329
3.31k
        r = lookahead(tok, "or");
330
3.31k
    }
331
100k
    else if (c == 'i') {
332
2.34k
        int c2 = tok_nextc(tok);
333
2.34k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
2.33k
            r = 1;
335
2.33k
        }
336
2.34k
        tok_backup(tok, c2);
337
2.34k
    }
338
97.8k
    else if (c == 'o') {
339
2.86k
        r = lookahead(tok, "r");
340
2.86k
    }
341
94.9k
    else if (c == 'n') {
342
287
        r = lookahead(tok, "ot");
343
287
    }
344
104k
    if (r) {
345
9.87k
        tok_backup(tok, c);
346
9.87k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
9.87k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
9.87k
        tok_nextc(tok);
352
9.87k
    }
353
94.7k
    else /* In future releases, only error will remain. */
354
94.7k
    if (c < 128 && is_potential_identifier_char(c)) {
355
201
        tok_backup(tok, c);
356
201
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
201
        return 0;
358
201
    }
359
104k
    return 1;
360
104k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
14.1k
{
366
14.1k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
14.1k
    PyObject *s;
370
14.1k
    if (tok->decoding_erred)
371
0
        return 0;
372
14.1k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
14.1k
    if (s == NULL) {
374
1.07k
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
1.07k
            tok->done = E_DECODE;
376
1.07k
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
1.07k
        return 0;
381
1.07k
    }
382
13.0k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
13.0k
    assert(invalid >= 0);
384
13.0k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
13.0k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
707
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
707
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
479
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
479
            if (s != NULL) {
391
479
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
479
            }
393
479
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
479
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
479
        }
399
707
        Py_DECREF(s);
400
707
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
394
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
394
        }
403
313
        else {
404
313
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
313
        }
406
707
        return 0;
407
707
    }
408
12.3k
    Py_DECREF(s);
409
12.3k
    return 1;
410
13.0k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
83.8k
{
415
83.8k
    int c;
416
417
84.4k
    while (1) {
418
234k
        do {
419
234k
            c = tok_nextc(tok);
420
234k
        } while (Py_ISDIGIT(c));
421
84.4k
        if (c != '_') {
422
83.8k
            break;
423
83.8k
        }
424
531
        c = tok_nextc(tok);
425
531
        if (!Py_ISDIGIT(c)) {
426
17
            tok_backup(tok, c);
427
17
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
17
            return 0;
429
17
        }
430
531
    }
431
83.8k
    return c;
432
83.8k
}
433
434
static inline int
435
1.09k
tok_continuation_line(struct tok_state *tok) {
436
1.09k
    int c = tok_nextc(tok);
437
1.09k
    if (c == '\r') {
438
72
        c = tok_nextc(tok);
439
72
    }
440
1.09k
    if (c != '\n') {
441
63
        tok->done = E_LINECONT;
442
63
        return -1;
443
63
    }
444
1.03k
    c = tok_nextc(tok);
445
1.03k
    if (c == EOF) {
446
49
        tok->done = E_EOF;
447
49
        tok->cur = tok->inp;
448
49
        return -1;
449
982
    } else {
450
982
        tok_backup(tok, c);
451
982
    }
452
982
    return c;
453
1.03k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
22.7k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
22.7k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
22.7k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
22.7k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
22.7k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
22.7k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
22.7k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
22.7k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
22.7k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
22.7k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
22.7k
#undef RETURN_SYNTAX_ERROR
496
497
22.7k
    return 0;
498
22.7k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.75M
{
503
1.75M
    int c;
504
1.75M
    int blankline, nonascii;
505
506
1.75M
    const char *p_start = NULL;
507
1.75M
    const char *p_end = NULL;
508
1.84M
  nextline:
509
1.84M
    tok->start = NULL;
510
1.84M
    tok->starting_col_offset = -1;
511
1.84M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.84M
    if (tok->atbol) {
516
228k
        int col = 0;
517
228k
        int altcol = 0;
518
228k
        tok->atbol = 0;
519
228k
        int cont_line_col = 0;
520
915k
        for (;;) {
521
915k
            c = tok_nextc(tok);
522
915k
            if (c == ' ') {
523
683k
                col++, altcol++;
524
683k
            }
525
231k
            else if (c == '\t') {
526
813
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
813
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
813
            }
529
230k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.95k
                col = altcol = 0; /* For Emacs users */
531
1.95k
            }
532
228k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
659
                cont_line_col = cont_line_col ? cont_line_col : col;
538
659
                if ((c = tok_continuation_line(tok)) == -1) {
539
42
                    return MAKE_TOKEN(ERRORTOKEN);
540
42
                }
541
659
            }
542
228k
            else {
543
228k
                break;
544
228k
            }
545
915k
        }
546
228k
        tok_backup(tok, c);
547
228k
        if (c == '#' || c == '\n' || c == '\r') {
548
            /* Lines with only whitespace and/or comments
549
               shouldn't affect the indentation and are
550
               not passed to the parser as NEWLINE tokens,
551
               except *totally* empty lines in interactive
552
               mode, which signal the end of a command group. */
553
45.0k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
554
0
                blankline = 0; /* Let it through */
555
0
            }
556
45.0k
            else if (tok->prompt != NULL && tok->lineno == 1) {
557
                /* In interactive mode, if the first line contains
558
                   only spaces and/or a comment, let it through. */
559
0
                blankline = 0;
560
0
                col = altcol = 0;
561
0
            }
562
45.0k
            else {
563
45.0k
                blankline = 1; /* Ignore completely */
564
45.0k
            }
565
            /* We can't jump back right here since we still
566
               may need to skip to the end of a comment */
567
45.0k
        }
568
228k
        if (!blankline && tok->level == 0) {
569
142k
            col = cont_line_col ? cont_line_col : col;
570
142k
            altcol = cont_line_col ? cont_line_col : altcol;
571
142k
            if (col == tok->indstack[tok->indent]) {
572
                /* No change */
573
103k
                if (altcol != tok->altindstack[tok->indent]) {
574
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
575
1
                }
576
103k
            }
577
38.4k
            else if (col > tok->indstack[tok->indent]) {
578
                /* Indent -- always one */
579
21.5k
                if (tok->indent+1 >= MAXINDENT) {
580
0
                    tok->done = E_TOODEEP;
581
0
                    tok->cur = tok->inp;
582
0
                    return MAKE_TOKEN(ERRORTOKEN);
583
0
                }
584
21.5k
                if (altcol <= tok->altindstack[tok->indent]) {
585
3
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
586
3
                }
587
21.5k
                tok->pendin++;
588
21.5k
                tok->indstack[++tok->indent] = col;
589
21.5k
                tok->altindstack[tok->indent] = altcol;
590
21.5k
            }
591
16.9k
            else /* col < tok->indstack[tok->indent] */ {
592
                /* Dedent -- any number, must be consistent */
593
37.6k
                while (tok->indent > 0 &&
594
37.6k
                    col < tok->indstack[tok->indent]) {
595
20.6k
                    tok->pendin--;
596
20.6k
                    tok->indent--;
597
20.6k
                }
598
16.9k
                if (col != tok->indstack[tok->indent]) {
599
9
                    tok->done = E_DEDENT;
600
9
                    tok->cur = tok->inp;
601
9
                    return MAKE_TOKEN(ERRORTOKEN);
602
9
                }
603
16.9k
                if (altcol != tok->altindstack[tok->indent]) {
604
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
605
1
                }
606
16.9k
            }
607
142k
        }
608
228k
    }
609
610
1.84M
    tok->start = tok->cur;
611
1.84M
    tok->starting_col_offset = tok->col_offset;
612
613
    /* Return pending indents/dedents */
614
1.84M
    if (tok->pendin != 0) {
615
42.1k
        if (tok->pendin < 0) {
616
20.6k
            if (tok->tok_extra_tokens) {
617
0
                p_start = tok->cur;
618
0
                p_end = tok->cur;
619
0
            }
620
20.6k
            tok->pendin++;
621
20.6k
            return MAKE_TOKEN(DEDENT);
622
20.6k
        }
623
21.5k
        else {
624
21.5k
            if (tok->tok_extra_tokens) {
625
0
                p_start = tok->buf;
626
0
                p_end = tok->cur;
627
0
            }
628
21.5k
            tok->pendin--;
629
21.5k
            return MAKE_TOKEN(INDENT);
630
21.5k
        }
631
42.1k
    }
632
633
    /* Peek ahead at the next character */
634
1.80M
    c = tok_nextc(tok);
635
1.80M
    tok_backup(tok, c);
636
637
1.80M
 again:
638
1.80M
    tok->start = NULL;
639
    /* Skip spaces */
640
2.14M
    do {
641
2.14M
        c = tok_nextc(tok);
642
2.14M
    } while (c == ' ' || c == '\t' || c == '\014');
643
644
    /* Set start of current token */
645
1.80M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
646
1.80M
    tok->starting_col_offset = tok->col_offset - 1;
647
648
    /* Skip comment, unless it's a type comment */
649
1.80M
    if (c == '#') {
650
651
42.4k
        const char* p = NULL;
652
42.4k
        const char *prefix, *type_start;
653
42.4k
        int current_starting_col_offset;
654
655
1.31M
        while (c != EOF && c != '\n' && c != '\r') {
656
1.27M
            c = tok_nextc(tok);
657
1.27M
        }
658
659
42.4k
        if (tok->tok_extra_tokens) {
660
0
            p = tok->start;
661
0
        }
662
663
42.4k
        if (tok->type_comments) {
664
0
            p = tok->start;
665
0
            current_starting_col_offset = tok->starting_col_offset;
666
0
            prefix = type_comment_prefix;
667
0
            while (*prefix && p < tok->cur) {
668
0
                if (*prefix == ' ') {
669
0
                    while (*p == ' ' || *p == '\t') {
670
0
                        p++;
671
0
                        current_starting_col_offset++;
672
0
                    }
673
0
                } else if (*prefix == *p) {
674
0
                    p++;
675
0
                    current_starting_col_offset++;
676
0
                } else {
677
0
                    break;
678
0
                }
679
680
0
                prefix++;
681
0
            }
682
683
            /* This is a type comment if we matched all of type_comment_prefix. */
684
0
            if (!*prefix) {
685
0
                int is_type_ignore = 1;
686
                // +6 in order to skip the word 'ignore'
687
0
                const char *ignore_end = p + 6;
688
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
689
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
690
691
0
                type_start = p;
692
693
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
694
                 * or anything ASCII and non-alphanumeric. */
695
0
                is_type_ignore = (
696
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
697
0
                    && !(tok->cur > ignore_end
698
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
699
700
0
                if (is_type_ignore) {
701
0
                    p_start = ignore_end;
702
0
                    p_end = tok->cur;
703
704
                    /* If this type ignore is the only thing on the line, consume the newline also. */
705
0
                    if (blankline) {
706
0
                        tok_nextc(tok);
707
0
                        tok->atbol = 1;
708
0
                    }
709
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
710
0
                } else {
711
0
                    p_start = type_start;
712
0
                    p_end = tok->cur;
713
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
714
0
                }
715
0
            }
716
0
        }
717
42.4k
        if (tok->tok_extra_tokens) {
718
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
719
0
            p_start = p;
720
0
            p_end = tok->cur;
721
0
            tok->comment_newline = blankline;
722
0
            return MAKE_TOKEN(COMMENT);
723
0
        }
724
42.4k
    }
725
726
1.80M
    if (tok->done == E_INTERACT_STOP) {
727
0
        return MAKE_TOKEN(ENDMARKER);
728
0
    }
729
730
    /* Check for EOF and errors now */
731
1.80M
    if (c == EOF) {
732
17.2k
        if (tok->level) {
733
4.10k
            return MAKE_TOKEN(ERRORTOKEN);
734
4.10k
        }
735
13.1k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
736
17.2k
    }
737
738
    /* Identifier (most frequent token!) */
739
1.78M
    nonascii = 0;
740
1.78M
    if (is_potential_identifier_start(c)) {
741
        /* Process the various legal combinations of b"", r"", u"", and f"". */
742
528k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
743
647k
        while (1) {
744
647k
            if (!saw_b && (c == 'b' || c == 'B')) {
745
21.0k
                saw_b = 1;
746
21.0k
            }
747
            /* Since this is a backwards compatibility support literal we don't
748
               want to support it in arbitrary order like byte literals. */
749
626k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
750
6.88k
                saw_u = 1;
751
6.88k
            }
752
            /* ur"" and ru"" are not supported */
753
619k
            else if (!saw_r && (c == 'r' || c == 'R')) {
754
37.9k
                saw_r = 1;
755
37.9k
            }
756
581k
            else if (!saw_f && (c == 'f' || c == 'F')) {
757
44.4k
                saw_f = 1;
758
44.4k
            }
759
537k
            else if (!saw_t && (c == 't' || c == 'T')) {
760
31.1k
                saw_t = 1;
761
31.1k
            }
762
505k
            else {
763
505k
                break;
764
505k
            }
765
141k
            c = tok_nextc(tok);
766
141k
            if (c == '"' || c == '\'') {
767
                // Raise error on incompatible string prefixes:
768
22.7k
                int status = maybe_raise_syntax_error_for_string_prefixes(
769
22.7k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
770
22.7k
                if (status < 0) {
771
7
                    return MAKE_TOKEN(ERRORTOKEN);
772
7
                }
773
774
                // Handle valid f or t string creation:
775
22.7k
                if (saw_f || saw_t) {
776
17.2k
                    goto f_string_quote;
777
17.2k
                }
778
5.46k
                goto letter_quote;
779
22.7k
            }
780
141k
        }
781
2.30M
        while (is_potential_identifier_char(c)) {
782
1.79M
            if (c >= 128) {
783
144k
                nonascii = 1;
784
144k
            }
785
1.79M
            c = tok_nextc(tok);
786
1.79M
        }
787
505k
        tok_backup(tok, c);
788
505k
        if (nonascii && !verify_identifier(tok)) {
789
1.78k
            return MAKE_TOKEN(ERRORTOKEN);
790
1.78k
        }
791
792
504k
        p_start = tok->start;
793
504k
        p_end = tok->cur;
794
795
504k
        return MAKE_TOKEN(NAME);
796
505k
    }
797
798
1.25M
    if (c == '\r') {
799
442
        c = tok_nextc(tok);
800
442
    }
801
802
    /* Newline */
803
1.25M
    if (c == '\n') {
804
206k
        tok->atbol = 1;
805
206k
        if (blankline || tok->level > 0) {
806
85.9k
            if (tok->tok_extra_tokens) {
807
0
                if (tok->comment_newline) {
808
0
                    tok->comment_newline = 0;
809
0
                }
810
0
                p_start = tok->start;
811
0
                p_end = tok->cur;
812
0
                return MAKE_TOKEN(NL);
813
0
            }
814
85.9k
            goto nextline;
815
85.9k
        }
816
120k
        if (tok->comment_newline && tok->tok_extra_tokens) {
817
0
            tok->comment_newline = 0;
818
0
            p_start = tok->start;
819
0
            p_end = tok->cur;
820
0
            return MAKE_TOKEN(NL);
821
0
        }
822
120k
        p_start = tok->start;
823
120k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
824
120k
        tok->cont_line = 0;
825
120k
        return MAKE_TOKEN(NEWLINE);
826
120k
    }
827
828
    /* Period or number starting with period? */
829
1.04M
    if (c == '.') {
830
34.4k
        c = tok_nextc(tok);
831
34.4k
        if (Py_ISDIGIT(c)) {
832
2.87k
            goto fraction;
833
31.5k
        } else if (c == '.') {
834
3.28k
            c = tok_nextc(tok);
835
3.28k
            if (c == '.') {
836
2.50k
                p_start = tok->start;
837
2.50k
                p_end = tok->cur;
838
2.50k
                return MAKE_TOKEN(ELLIPSIS);
839
2.50k
            }
840
779
            else {
841
779
                tok_backup(tok, c);
842
779
            }
843
779
            tok_backup(tok, '.');
844
779
        }
845
28.3k
        else {
846
28.3k
            tok_backup(tok, c);
847
28.3k
        }
848
29.0k
        p_start = tok->start;
849
29.0k
        p_end = tok->cur;
850
29.0k
        return MAKE_TOKEN(DOT);
851
34.4k
    }
852
853
    /* Number */
854
1.01M
    if (Py_ISDIGIT(c)) {
855
101k
        if (c == '0') {
856
            /* Hex, octal or binary -- maybe. */
857
35.2k
            c = tok_nextc(tok);
858
35.2k
            if (c == 'x' || c == 'X') {
859
                /* Hex */
860
15.8k
                c = tok_nextc(tok);
861
16.0k
                do {
862
16.0k
                    if (c == '_') {
863
215
                        c = tok_nextc(tok);
864
215
                    }
865
16.0k
                    if (!Py_ISXDIGIT(c)) {
866
21
                        tok_backup(tok, c);
867
21
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
868
21
                    }
869
78.8k
                    do {
870
78.8k
                        c = tok_nextc(tok);
871
78.8k
                    } while (Py_ISXDIGIT(c));
872
16.0k
                } while (c == '_');
873
15.8k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
874
2
                    return MAKE_TOKEN(ERRORTOKEN);
875
2
                }
876
15.8k
            }
877
19.3k
            else if (c == 'o' || c == 'O') {
878
                /* Octal */
879
667
                c = tok_nextc(tok);
880
1.19k
                do {
881
1.19k
                    if (c == '_') {
882
529
                        c = tok_nextc(tok);
883
529
                    }
884
1.19k
                    if (c < '0' || c >= '8') {
885
22
                        if (Py_ISDIGIT(c)) {
886
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
887
1
                                    "invalid digit '%c' in octal literal", c));
888
1
                        }
889
21
                        else {
890
21
                            tok_backup(tok, c);
891
21
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
892
21
                        }
893
22
                    }
894
3.26k
                    do {
895
3.26k
                        c = tok_nextc(tok);
896
3.26k
                    } while ('0' <= c && c < '8');
897
1.17k
                } while (c == '_');
898
645
                if (Py_ISDIGIT(c)) {
899
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
900
1
                            "invalid digit '%c' in octal literal", c));
901
1
                }
902
644
                if (!verify_end_of_number(tok, c, "octal")) {
903
2
                    return MAKE_TOKEN(ERRORTOKEN);
904
2
                }
905
644
            }
906
18.7k
            else if (c == 'b' || c == 'B') {
907
                /* Binary */
908
562
                c = tok_nextc(tok);
909
896
                do {
910
896
                    if (c == '_') {
911
342
                        c = tok_nextc(tok);
912
342
                    }
913
896
                    if (c != '0' && c != '1') {
914
19
                        if (Py_ISDIGIT(c)) {
915
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
916
1
                        }
917
18
                        else {
918
18
                            tok_backup(tok, c);
919
18
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
920
18
                        }
921
19
                    }
922
4.15k
                    do {
923
4.15k
                        c = tok_nextc(tok);
924
4.15k
                    } while (c == '0' || c == '1');
925
877
                } while (c == '_');
926
543
                if (Py_ISDIGIT(c)) {
927
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
928
2
                }
929
541
                if (!verify_end_of_number(tok, c, "binary")) {
930
2
                    return MAKE_TOKEN(ERRORTOKEN);
931
2
                }
932
541
            }
933
18.1k
            else {
934
18.1k
                int nonzero = 0;
935
                /* maybe old-style octal; c is first char of it */
936
                /* in any case, allow '0' as a literal */
937
19.4k
                while (1) {
938
19.4k
                    if (c == '_') {
939
93
                        c = tok_nextc(tok);
940
93
                        if (!Py_ISDIGIT(c)) {
941
3
                            tok_backup(tok, c);
942
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
943
3
                        }
944
93
                    }
945
19.4k
                    if (c != '0') {
946
18.1k
                        break;
947
18.1k
                    }
948
1.31k
                    c = tok_nextc(tok);
949
1.31k
                }
950
18.1k
                char* zeros_end = tok->cur;
951
18.1k
                if (Py_ISDIGIT(c)) {
952
613
                    nonzero = 1;
953
613
                    c = tok_decimal_tail(tok);
954
613
                    if (c == 0) {
955
2
                        return MAKE_TOKEN(ERRORTOKEN);
956
2
                    }
957
613
                }
958
18.1k
                if (c == '.') {
959
875
                    c = tok_nextc(tok);
960
875
                    goto fraction;
961
875
                }
962
17.2k
                else if (c == 'e' || c == 'E') {
963
842
                    goto exponent;
964
842
                }
965
16.4k
                else if (c == 'j' || c == 'J') {
966
881
                    goto imaginary;
967
881
                }
968
15.5k
                else if (nonzero && !tok->tok_extra_tokens) {
969
                    /* Old-style octal: now disallowed. */
970
21
                    tok_backup(tok, c);
971
21
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
972
21
                            tok, (int)(tok->start + 1 - tok->line_start),
973
21
                            (int)(zeros_end - tok->line_start),
974
21
                            "leading zeros in decimal integer "
975
21
                            "literals are not permitted; "
976
21
                            "use an 0o prefix for octal integers"));
977
21
                }
978
15.5k
                if (!verify_end_of_number(tok, c, "decimal")) {
979
28
                    return MAKE_TOKEN(ERRORTOKEN);
980
28
                }
981
15.5k
            }
982
35.2k
        }
983
66.6k
        else {
984
            /* Decimal */
985
66.6k
            c = tok_decimal_tail(tok);
986
66.6k
            if (c == 0) {
987
12
                return MAKE_TOKEN(ERRORTOKEN);
988
12
            }
989
66.6k
            {
990
                /* Accept floating-point numbers. */
991
66.6k
                if (c == '.') {
992
3.96k
                    c = tok_nextc(tok);
993
7.71k
        fraction:
994
                    /* Fraction */
995
7.71k
                    if (Py_ISDIGIT(c)) {
996
5.77k
                        c = tok_decimal_tail(tok);
997
5.77k
                        if (c == 0) {
998
2
                            return MAKE_TOKEN(ERRORTOKEN);
999
2
                        }
1000
5.77k
                    }
1001
7.71k
                }
1002
70.4k
                if (c == 'e' || c == 'E') {
1003
10.4k
                    int e;
1004
11.2k
                  exponent:
1005
11.2k
                    e = c;
1006
                    /* Exponent part */
1007
11.2k
                    c = tok_nextc(tok);
1008
11.2k
                    if (c == '+' || c == '-') {
1009
4.03k
                        c = tok_nextc(tok);
1010
4.03k
                        if (!Py_ISDIGIT(c)) {
1011
13
                            tok_backup(tok, c);
1012
13
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1013
13
                        }
1014
7.24k
                    } else if (!Py_ISDIGIT(c)) {
1015
431
                        tok_backup(tok, c);
1016
431
                        if (!verify_end_of_number(tok, e, "decimal")) {
1017
42
                            return MAKE_TOKEN(ERRORTOKEN);
1018
42
                        }
1019
389
                        tok_backup(tok, e);
1020
389
                        p_start = tok->start;
1021
389
                        p_end = tok->cur;
1022
389
                        return MAKE_TOKEN(NUMBER);
1023
431
                    }
1024
10.8k
                    c = tok_decimal_tail(tok);
1025
10.8k
                    if (c == 0) {
1026
1
                        return MAKE_TOKEN(ERRORTOKEN);
1027
1
                    }
1028
10.8k
                }
1029
70.8k
                if (c == 'j' || c == 'J') {
1030
                    /* Imaginary part */
1031
4.54k
        imaginary:
1032
4.54k
                    c = tok_nextc(tok);
1033
4.54k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1034
8
                        return MAKE_TOKEN(ERRORTOKEN);
1035
8
                    }
1036
4.54k
                }
1037
67.1k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1038
117
                    return MAKE_TOKEN(ERRORTOKEN);
1039
117
                }
1040
70.8k
            }
1041
70.8k
        }
1042
104k
        tok_backup(tok, c);
1043
104k
        p_start = tok->start;
1044
104k
        p_end = tok->cur;
1045
104k
        return MAKE_TOKEN(NUMBER);
1046
101k
    }
1047
1048
928k
  f_string_quote:
1049
928k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1050
928k
        && (c == '\'' || c == '"'))) {
1051
1052
17.2k
        int quote = c;
1053
17.2k
        int quote_size = 1;             /* 1 or 3 */
1054
1055
        /* Nodes of type STRING, especially multi line strings
1056
           must be handled differently in order to get both
1057
           the starting line number and the column offset right.
1058
           (cf. issue 16806) */
1059
17.2k
        tok->first_lineno = tok->lineno;
1060
17.2k
        tok->multi_line_start = tok->line_start;
1061
1062
        /* Find the quote size and start of string */
1063
17.2k
        int after_quote = tok_nextc(tok);
1064
17.2k
        if (after_quote == quote) {
1065
2.40k
            int after_after_quote = tok_nextc(tok);
1066
2.40k
            if (after_after_quote == quote) {
1067
809
                quote_size = 3;
1068
809
            }
1069
1.59k
            else {
1070
                // TODO: Check this
1071
1.59k
                tok_backup(tok, after_after_quote);
1072
1.59k
                tok_backup(tok, after_quote);
1073
1.59k
            }
1074
2.40k
        }
1075
17.2k
        if (after_quote != quote) {
1076
14.8k
            tok_backup(tok, after_quote);
1077
14.8k
        }
1078
1079
1080
17.2k
        p_start = tok->start;
1081
17.2k
        p_end = tok->cur;
1082
17.2k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1083
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1084
2
        }
1085
17.2k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1086
17.2k
        the_current_tok->kind = TOK_FSTRING_MODE;
1087
17.2k
        the_current_tok->quote = quote;
1088
17.2k
        the_current_tok->quote_size = quote_size;
1089
17.2k
        the_current_tok->start = tok->start;
1090
17.2k
        the_current_tok->multi_line_start = tok->line_start;
1091
17.2k
        the_current_tok->first_line = tok->lineno;
1092
17.2k
        the_current_tok->start_offset = -1;
1093
17.2k
        the_current_tok->multi_line_start_offset = -1;
1094
17.2k
        the_current_tok->last_expr_buffer = NULL;
1095
17.2k
        the_current_tok->last_expr_size = 0;
1096
17.2k
        the_current_tok->last_expr_end = -1;
1097
17.2k
        the_current_tok->in_format_spec = 0;
1098
17.2k
        the_current_tok->in_debug = 0;
1099
1100
17.2k
        enum string_kind_t string_kind = FSTRING;
1101
17.2k
        switch (*tok->start) {
1102
804
            case 'T':
1103
4.61k
            case 't':
1104
4.61k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1105
4.61k
                string_kind = TSTRING;
1106
4.61k
                break;
1107
1.57k
            case 'F':
1108
12.1k
            case 'f':
1109
12.1k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1110
12.1k
                break;
1111
109
            case 'R':
1112
512
            case 'r':
1113
512
                the_current_tok->raw = 1;
1114
512
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1115
204
                    string_kind = TSTRING;
1116
204
                }
1117
512
                break;
1118
0
            default:
1119
0
                Py_UNREACHABLE();
1120
17.2k
        }
1121
1122
17.2k
        the_current_tok->string_kind = string_kind;
1123
17.2k
        the_current_tok->curly_bracket_depth = 0;
1124
17.2k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1125
17.2k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1126
17.2k
    }
1127
1128
916k
  letter_quote:
1129
    /* String */
1130
916k
    if (c == '\'' || c == '"') {
1131
59.9k
        int quote = c;
1132
59.9k
        int quote_size = 1;             /* 1 or 3 */
1133
59.9k
        int end_quote_size = 0;
1134
59.9k
        int has_escaped_quote = 0;
1135
1136
        /* Nodes of type STRING, especially multi line strings
1137
           must be handled differently in order to get both
1138
           the starting line number and the column offset right.
1139
           (cf. issue 16806) */
1140
59.9k
        tok->first_lineno = tok->lineno;
1141
59.9k
        tok->multi_line_start = tok->line_start;
1142
1143
        /* Find the quote size and start of string */
1144
59.9k
        c = tok_nextc(tok);
1145
59.9k
        if (c == quote) {
1146
10.8k
            c = tok_nextc(tok);
1147
10.8k
            if (c == quote) {
1148
2.54k
                quote_size = 3;
1149
2.54k
            }
1150
8.34k
            else {
1151
8.34k
                end_quote_size = 1;     /* empty string found */
1152
8.34k
            }
1153
10.8k
        }
1154
59.9k
        if (c != quote) {
1155
57.3k
            tok_backup(tok, c);
1156
57.3k
        }
1157
1158
        /* Get rest of string */
1159
1.18M
        while (end_quote_size != quote_size) {
1160
1.12M
            c = tok_nextc(tok);
1161
1.12M
            if (tok->done == E_ERROR) {
1162
0
                return MAKE_TOKEN(ERRORTOKEN);
1163
0
            }
1164
1.12M
            if (tok->done == E_DECODE) {
1165
0
                break;
1166
0
            }
1167
1.12M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1168
423
                assert(tok->multi_line_start != NULL);
1169
                // shift the tok_state's location into
1170
                // the start of string, and report the error
1171
                // from the initial quote character
1172
423
                tok->cur = (char *)tok->start;
1173
423
                tok->cur++;
1174
423
                tok->line_start = tok->multi_line_start;
1175
423
                int start = tok->lineno;
1176
423
                tok->lineno = tok->first_lineno;
1177
1178
423
                if (INSIDE_FSTRING(tok)) {
1179
                    /* When we are in an f-string, before raising the
1180
                     * unterminated string literal error, check whether
1181
                     * does the initial quote matches with f-strings quotes
1182
                     * and if it is, then this must be a missing '}' token
1183
                     * so raise the proper error */
1184
27
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1185
27
                    if (the_current_tok->quote == quote &&
1186
27
                        the_current_tok->quote_size == quote_size) {
1187
19
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1188
19
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1189
19
                    }
1190
27
                }
1191
1192
404
                if (quote_size == 3) {
1193
17
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1194
17
                                     " (detected at line %d)", start);
1195
17
                    if (c != '\n') {
1196
17
                        tok->done = E_EOFS;
1197
17
                    }
1198
17
                    return MAKE_TOKEN(ERRORTOKEN);
1199
17
                }
1200
387
                else {
1201
387
                    if (has_escaped_quote) {
1202
10
                        _PyTokenizer_syntaxerror(
1203
10
                            tok,
1204
10
                            "unterminated string literal (detected at line %d); "
1205
10
                            "perhaps you escaped the end quote?",
1206
10
                            start
1207
10
                        );
1208
377
                    } else {
1209
377
                        _PyTokenizer_syntaxerror(
1210
377
                            tok, "unterminated string literal (detected at line %d)", start
1211
377
                        );
1212
377
                    }
1213
387
                    if (c != '\n') {
1214
15
                        tok->done = E_EOLS;
1215
15
                    }
1216
387
                    return MAKE_TOKEN(ERRORTOKEN);
1217
387
                }
1218
404
            }
1219
1.12M
            if (c == quote) {
1220
58.0k
                end_quote_size += 1;
1221
58.0k
            }
1222
1.06M
            else {
1223
1.06M
                end_quote_size = 0;
1224
1.06M
                if (c == '\\') {
1225
29.7k
                    c = tok_nextc(tok);  /* skip escaped char */
1226
29.7k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1227
1.36k
                        has_escaped_quote = 1;
1228
1.36k
                    }
1229
29.7k
                    if (c == '\r') {
1230
221
                        c = tok_nextc(tok);
1231
221
                    }
1232
29.7k
                }
1233
1.06M
            }
1234
1.12M
        }
1235
1236
59.4k
        p_start = tok->start;
1237
59.4k
        p_end = tok->cur;
1238
59.4k
        return MAKE_TOKEN(STRING);
1239
59.9k
    }
1240
1241
    /* Line continuation */
1242
856k
    if (c == '\\') {
1243
435
        if ((c = tok_continuation_line(tok)) == -1) {
1244
70
            return MAKE_TOKEN(ERRORTOKEN);
1245
70
        }
1246
365
        tok->cont_line = 1;
1247
365
        goto again; /* Read next line */
1248
435
    }
1249
1250
    /* Punctuation character */
1251
856k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1252
856k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1253
        /* This code block gets executed before the curly_bracket_depth is incremented
1254
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1255
         * to adjust it manually */
1256
54.4k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1257
54.4k
        int in_format_spec = current_tok->in_format_spec;
1258
54.4k
         int cursor_in_format_with_debug =
1259
54.4k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1260
54.4k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1261
54.4k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1262
0
            return MAKE_TOKEN(ENDMARKER);
1263
0
        }
1264
54.4k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1265
10
            return MAKE_TOKEN(ERRORTOKEN);
1266
10
        }
1267
1268
54.3k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1269
3.99k
            current_tok->kind = TOK_FSTRING_MODE;
1270
3.99k
            current_tok->in_format_spec = 1;
1271
3.99k
            p_start = tok->start;
1272
3.99k
            p_end = tok->cur;
1273
3.99k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1274
3.99k
        }
1275
54.3k
    }
1276
1277
    /* Check for two-character token */
1278
852k
    {
1279
852k
        int c2 = tok_nextc(tok);
1280
852k
        int current_token = _PyToken_TwoChars(c, c2);
1281
852k
        if (current_token != OP) {
1282
22.9k
            int c3 = tok_nextc(tok);
1283
22.9k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1284
22.9k
            if (current_token3 != OP) {
1285
992
                current_token = current_token3;
1286
992
            }
1287
21.9k
            else {
1288
21.9k
                tok_backup(tok, c3);
1289
21.9k
            }
1290
22.9k
            p_start = tok->start;
1291
22.9k
            p_end = tok->cur;
1292
22.9k
            return MAKE_TOKEN(current_token);
1293
22.9k
        }
1294
829k
        tok_backup(tok, c2);
1295
829k
    }
1296
1297
    /* Keep track of parentheses nesting level */
1298
0
    switch (c) {
1299
90.2k
    case '(':
1300
126k
    case '[':
1301
174k
    case '{':
1302
174k
        if (tok->level >= MAXLEVEL) {
1303
3
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1304
3
        }
1305
174k
        tok->parenstack[tok->level] = c;
1306
174k
        tok->parenlinenostack[tok->level] = tok->lineno;
1307
174k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1308
174k
        tok->level++;
1309
174k
        if (INSIDE_FSTRING(tok)) {
1310
30.2k
            current_tok->curly_bracket_depth++;
1311
30.2k
        }
1312
174k
        break;
1313
61.6k
    case ')':
1314
73.1k
    case ']':
1315
99.3k
    case '}':
1316
99.3k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1317
56
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1318
56
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1319
56
        }
1320
99.3k
        if (!tok->tok_extra_tokens && !tok->level) {
1321
225
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1322
225
        }
1323
99.0k
        if (tok->level > 0) {
1324
99.0k
            tok->level--;
1325
99.0k
            int opening = tok->parenstack[tok->level];
1326
99.0k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1327
99.0k
                                            (opening == '[' && c == ']') ||
1328
99.0k
                                            (opening == '{' && c == '}'))) {
1329
                /* If the opening bracket belongs to an f-string's expression
1330
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1331
                nested expression, then instead of matching a different
1332
                syntactical construct with it; we'll throw an unmatched
1333
                parentheses error. */
1334
54
                if (INSIDE_FSTRING(tok) && opening == '{') {
1335
11
                    assert(current_tok->curly_bracket_depth >= 0);
1336
11
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1337
11
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1338
6
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1339
6
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1340
6
                    }
1341
11
                }
1342
48
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1343
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1344
2
                            "closing parenthesis '%c' does not match "
1345
2
                            "opening parenthesis '%c' on line %d",
1346
2
                            c, opening, tok->parenlinenostack[tok->level]));
1347
2
                }
1348
46
                else {
1349
46
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1350
46
                            "closing parenthesis '%c' does not match "
1351
46
                            "opening parenthesis '%c'",
1352
46
                            c, opening));
1353
46
                }
1354
48
            }
1355
99.0k
        }
1356
1357
99.0k
        if (INSIDE_FSTRING(tok)) {
1358
22.2k
            current_tok->curly_bracket_depth--;
1359
22.2k
            if (current_tok->curly_bracket_depth < 0) {
1360
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1361
1
                    TOK_GET_STRING_PREFIX(tok), c));
1362
1
            }
1363
22.2k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1364
20.4k
                current_tok->curly_bracket_expr_start_depth--;
1365
20.4k
                current_tok->kind = TOK_FSTRING_MODE;
1366
20.4k
                current_tok->in_format_spec = 0;
1367
20.4k
                current_tok->in_debug = 0;
1368
20.4k
            }
1369
22.2k
        }
1370
99.0k
        break;
1371
555k
    default:
1372
555k
        break;
1373
829k
    }
1374
1375
829k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1376
495
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1377
495
    }
1378
1379
828k
    if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
1380
42.6k
        current_tok->in_debug = 1;
1381
42.6k
    }
1382
1383
    /* Punctuation character */
1384
828k
    p_start = tok->start;
1385
828k
    p_end = tok->cur;
1386
828k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1387
829k
}
1388
1389
static int
1390
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1391
53.0k
{
1392
53.0k
    const char *p_start = NULL;
1393
53.0k
    const char *p_end = NULL;
1394
53.0k
    int end_quote_size = 0;
1395
53.0k
    int unicode_escape = 0;
1396
1397
53.0k
    tok->start = tok->cur;
1398
53.0k
    tok->first_lineno = tok->lineno;
1399
53.0k
    tok->starting_col_offset = tok->col_offset;
1400
1401
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1402
    // before it.
1403
53.0k
    int start_char = tok_nextc(tok);
1404
53.0k
    if (start_char == '{') {
1405
14.2k
        int peek1 = tok_nextc(tok);
1406
14.2k
        tok_backup(tok, peek1);
1407
14.2k
        tok_backup(tok, start_char);
1408
14.2k
        if (peek1 != '{') {
1409
11.3k
            current_tok->curly_bracket_expr_start_depth++;
1410
11.3k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1411
7
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1412
7
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1413
7
            }
1414
11.3k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1415
11.3k
            return tok_get_normal_mode(tok, current_tok, token);
1416
11.3k
        }
1417
14.2k
    }
1418
38.7k
    else {
1419
38.7k
        tok_backup(tok, start_char);
1420
38.7k
    }
1421
1422
    // Check if we are at the end of the string
1423
60.0k
    for (int i = 0; i < current_tok->quote_size; i++) {
1424
47.6k
        int quote = tok_nextc(tok);
1425
47.6k
        if (quote != current_tok->quote) {
1426
29.2k
            tok_backup(tok, quote);
1427
29.2k
            goto f_string_middle;
1428
29.2k
        }
1429
47.6k
    }
1430
1431
12.3k
    if (current_tok->last_expr_buffer != NULL) {
1432
7.36k
        PyMem_Free(current_tok->last_expr_buffer);
1433
7.36k
        current_tok->last_expr_buffer = NULL;
1434
7.36k
        current_tok->last_expr_size = 0;
1435
7.36k
        current_tok->last_expr_end = -1;
1436
7.36k
    }
1437
1438
12.3k
    p_start = tok->start;
1439
12.3k
    p_end = tok->cur;
1440
12.3k
    tok->tok_mode_stack_index--;
1441
12.3k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1442
1443
29.2k
f_string_middle:
1444
1445
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1446
    // this.
1447
29.2k
    tok->multi_line_start = tok->line_start;
1448
158k
    while (end_quote_size != current_tok->quote_size) {
1449
152k
        int c = tok_nextc(tok);
1450
152k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1451
0
            return MAKE_TOKEN(ERRORTOKEN);
1452
0
        }
1453
152k
        int in_format_spec = (
1454
152k
                current_tok->in_format_spec
1455
152k
                &&
1456
152k
                INSIDE_FSTRING_EXPR(current_tok)
1457
152k
        );
1458
1459
152k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1460
475
            if (tok->decoding_erred) {
1461
0
                return MAKE_TOKEN(ERRORTOKEN);
1462
0
            }
1463
1464
            // If we are in a format spec and we found a newline,
1465
            // it means that the format spec ends here and we should
1466
            // return to the regular mode.
1467
475
            if (in_format_spec && c == '\n') {
1468
85
                if (current_tok->quote_size == 1) {
1469
85
                    return MAKE_TOKEN(
1470
85
                        _PyTokenizer_syntaxerror(
1471
85
                            tok,
1472
85
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1473
85
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1474
85
                        )
1475
85
                    );
1476
85
                }
1477
0
                tok_backup(tok, c);
1478
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1479
0
                current_tok->in_format_spec = 0;
1480
0
                p_start = tok->start;
1481
0
                p_end = tok->cur;
1482
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1483
85
            }
1484
1485
390
            assert(tok->multi_line_start != NULL);
1486
            // shift the tok_state's location into
1487
            // the start of string, and report the error
1488
            // from the initial quote character
1489
390
            tok->cur = (char *)current_tok->start;
1490
390
            tok->cur++;
1491
390
            tok->line_start = current_tok->multi_line_start;
1492
390
            int start = tok->lineno;
1493
1494
390
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1495
390
            tok->lineno = the_current_tok->first_line;
1496
1497
390
            if (current_tok->quote_size == 3) {
1498
35
                _PyTokenizer_syntaxerror(tok,
1499
35
                                    "unterminated triple-quoted %c-string literal"
1500
35
                                    " (detected at line %d)",
1501
35
                                    TOK_GET_STRING_PREFIX(tok), start);
1502
35
                if (c != '\n') {
1503
35
                    tok->done = E_EOFS;
1504
35
                }
1505
35
                return MAKE_TOKEN(ERRORTOKEN);
1506
35
            }
1507
355
            else {
1508
355
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1509
355
                                    "unterminated %c-string literal (detected at"
1510
355
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1511
355
            }
1512
390
        }
1513
1514
152k
        if (c == current_tok->quote) {
1515
8.55k
            end_quote_size += 1;
1516
8.55k
            continue;
1517
143k
        } else {
1518
143k
            end_quote_size = 0;
1519
143k
        }
1520
1521
143k
        if (c == '{') {
1522
18.1k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1523
0
                return MAKE_TOKEN(ENDMARKER);
1524
0
            }
1525
18.1k
            int peek = tok_nextc(tok);
1526
18.1k
            if (peek != '{' || in_format_spec) {
1527
14.7k
                tok_backup(tok, peek);
1528
14.7k
                tok_backup(tok, c);
1529
14.7k
                current_tok->curly_bracket_expr_start_depth++;
1530
14.7k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1531
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1532
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1533
5
                }
1534
14.7k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1535
14.7k
                current_tok->in_format_spec = 0;
1536
14.7k
                p_start = tok->start;
1537
14.7k
                p_end = tok->cur;
1538
14.7k
            } else {
1539
3.40k
                p_start = tok->start;
1540
3.40k
                p_end = tok->cur - 1;
1541
3.40k
            }
1542
18.1k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1543
125k
        } else if (c == '}') {
1544
4.91k
            if (unicode_escape) {
1545
486
                p_start = tok->start;
1546
486
                p_end = tok->cur;
1547
486
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1548
486
            }
1549
4.42k
            int peek = tok_nextc(tok);
1550
1551
            // The tokenizer can only be in the format spec if we have already completed the expression
1552
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1553
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1554
            // brackets, we can bypass it here.
1555
4.42k
            int cursor = current_tok->curly_bracket_depth;
1556
4.42k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1557
1.74k
                p_start = tok->start;
1558
1.74k
                p_end = tok->cur - 1;
1559
2.67k
            } else {
1560
2.67k
                tok_backup(tok, peek);
1561
2.67k
                tok_backup(tok, c);
1562
2.67k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1563
2.67k
                current_tok->in_format_spec = 0;
1564
2.67k
                p_start = tok->start;
1565
2.67k
                p_end = tok->cur;
1566
2.67k
            }
1567
4.42k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1568
120k
        } else if (c == '\\') {
1569
6.30k
            int peek = tok_nextc(tok);
1570
6.30k
            if (peek == '\r') {
1571
69
                peek = tok_nextc(tok);
1572
69
            }
1573
            // Special case when the backslash is right before a curly
1574
            // brace. We have to restore and return the control back
1575
            // to the loop for the next iteration.
1576
6.30k
            if (peek == '{' || peek == '}') {
1577
1.32k
                if (!current_tok->raw) {
1578
1.13k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1579
0
                        return MAKE_TOKEN(ERRORTOKEN);
1580
0
                    }
1581
1.13k
                }
1582
1.32k
                tok_backup(tok, peek);
1583
1.32k
                continue;
1584
1.32k
            }
1585
1586
4.97k
            if (!current_tok->raw) {
1587
4.71k
                if (peek == 'N') {
1588
                    /* Handle named unicode escapes (\N{BULLET}) */
1589
750
                    peek = tok_nextc(tok);
1590
750
                    if (peek == '{') {
1591
506
                        unicode_escape = 1;
1592
506
                    } else {
1593
244
                        tok_backup(tok, peek);
1594
244
                    }
1595
750
                }
1596
4.71k
            } /* else {
1597
                skip the escaped character
1598
            }*/
1599
4.97k
        }
1600
143k
    }
1601
1602
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1603
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1604
12.3k
    for (int i = 0; i < current_tok->quote_size; i++) {
1605
6.60k
        tok_backup(tok, current_tok->quote);
1606
6.60k
    }
1607
5.72k
    p_start = tok->start;
1608
5.72k
    p_end = tok->cur;
1609
5.72k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1610
29.2k
}
1611
1612
static int
1613
tok_get(struct tok_state *tok, struct token *token)
1614
1.79M
{
1615
1.79M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1616
1.79M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1617
1.74M
        return tok_get_normal_mode(tok, current_tok, token);
1618
1.74M
    } else {
1619
53.0k
        return tok_get_fstring_mode(tok, current_tok, token);
1620
53.0k
    }
1621
1.79M
}
1622
1623
int
1624
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1625
1.79M
{
1626
1.79M
    int result = tok_get(tok, token);
1627
1.79M
    if (tok->decoding_erred) {
1628
0
        result = ERRORTOKEN;
1629
0
        tok->done = E_DECODE;
1630
0
    }
1631
1.79M
    return result;
1632
1.79M
}