Coverage Report

Created: 2025-10-10 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Parser/lexer/lexer.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.63k
#define ALTTABSIZE 1
11
12
1.81M
#define is_potential_identifier_start(c) (\
13
1.81M
              (c >= 'a' && c <= 'z')\
14
1.81M
               || (c >= 'A' && c <= 'Z')\
15
1.81M
               || c == '_'\
16
1.81M
               || (c >= 128))
17
18
2.53M
#define is_potential_identifier_char(c) (\
19
2.53M
              (c >= 'a' && c <= 'z')\
20
2.53M
               || (c >= 'A' && c <= 'Z')\
21
2.53M
               || (c >= '0' && c <= '9')\
22
2.53M
               || c == '_'\
23
2.53M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.93M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
16.8k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
35
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.81M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
243k
{
55
243k
    return memchr(str, 0, size) != NULL;
56
243k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
11.2M
{
62
11.2M
    int rc;
63
11.4M
    for (;;) {
64
11.4M
        if (tok->cur != tok->inp) {
65
11.1M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
11.1M
            tok->col_offset++;
70
11.1M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
11.1M
        }
72
292k
        if (tok->done != E_OK) {
73
32.9k
            return EOF;
74
32.9k
        }
75
259k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
259k
        if (!rc) {
84
16.6k
            tok->cur = tok->inp;
85
16.6k
            return EOF;
86
16.6k
        }
87
243k
        tok->line_start = tok->cur;
88
89
243k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
243k
    }
95
11.2M
    Py_UNREACHABLE();
96
11.2M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.83M
{
102
3.83M
    if (c != EOF) {
103
3.80M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.80M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.80M
        tok->col_offset--;
110
3.80M
    }
111
3.83M
}
112
113
static int
114
24.1k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
24.1k
    assert(token != NULL);
116
24.1k
    assert(c == '}' || c == ':' || c == '!');
117
24.1k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
24.1k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
14.1k
        return 0;
121
14.1k
    }
122
10.0k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
10.0k
    int hash_detected = 0;
126
10.0k
    int in_string = 0;
127
10.0k
    char quote_char = 0;
128
129
1.78M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.77M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.77M
        if (ch == '\\') {
134
37.7k
            i++;
135
37.7k
            continue;
136
37.7k
        }
137
138
        // Handle quotes
139
1.73M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
237k
            if (!in_string) {
148
85.0k
                in_string = 1;
149
85.0k
                quote_char = ch;
150
85.0k
            }
151
152k
            else if (ch == quote_char) {
152
83.9k
                in_string = 0;
153
83.9k
            }
154
237k
            continue;
155
237k
        }
156
157
        // Check for # outside strings
158
1.50M
        if (ch == '#' && !in_string) {
159
882
            hash_detected = 1;
160
882
            break;
161
882
        }
162
1.50M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
10.0k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
882
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
882
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
882
        Py_ssize_t i = 0;  // Input position
172
882
        Py_ssize_t j = 0;  // Output position
173
882
        in_string = 0;     // Whether we're in a string
174
882
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
100k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
99.5k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
99.5k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
13.1k
                if (!in_string) {
184
5.01k
                    in_string = 1;
185
5.01k
                    quote_char = ch;
186
8.14k
                } else if (ch == quote_char) {
187
5.01k
                    in_string = 0;
188
5.01k
                }
189
13.1k
                result[j++] = ch;
190
13.1k
            }
191
            // Skip comments
192
86.3k
            else if (ch == '#' && !in_string) {
193
57.1k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
56.4k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
56.1k
                    i++;
196
56.1k
                }
197
1.00k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
261
                    result[j++] = '\n';
199
261
                }
200
1.00k
            }
201
            // Copy other chars
202
85.3k
            else {
203
85.3k
                result[j++] = ch;
204
85.3k
            }
205
99.5k
            i++;
206
99.5k
        }
207
208
882
        result[j] = '\0';  // Null-terminate the result string
209
882
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
882
        PyMem_Free(result);
211
9.14k
    } else {
212
9.14k
        res = PyUnicode_DecodeUTF8(
213
9.14k
            tok_mode->last_expr_buffer,
214
9.14k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
9.14k
            NULL
216
9.14k
        );
217
9.14k
    }
218
219
10.0k
    if (!res) {
220
12
        return -1;
221
12
    }
222
10.0k
    token->metadata = res;
223
10.0k
    return 0;
224
10.0k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
67.7k
{
229
67.7k
    assert(tok->cur != NULL);
230
231
67.7k
    Py_ssize_t size = strlen(tok->cur);
232
67.7k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
67.7k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
43.6k
        case '{':
252
43.6k
            if (tok_mode->last_expr_buffer != NULL) {
253
31.9k
                PyMem_Free(tok_mode->last_expr_buffer);
254
31.9k
            }
255
43.6k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
43.6k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
43.6k
            tok_mode->last_expr_size = size;
260
43.6k
            tok_mode->last_expr_end = -1;
261
43.6k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
43.6k
            break;
263
19.7k
        case '}':
264
21.3k
        case '!':
265
21.3k
            tok_mode->last_expr_end = strlen(tok->start);
266
21.3k
            break;
267
2.84k
        case ':':
268
2.84k
            if (tok_mode->last_expr_end == -1) {
269
2.76k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.76k
            }
271
2.84k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
67.7k
    }
275
67.7k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
67.7k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
9.16k
{
284
9.16k
    const char *s = test;
285
9.16k
    int res = 0;
286
24.6k
    while (1) {
287
24.6k
        int c = tok_nextc(tok);
288
24.6k
        if (*s == 0) {
289
9.07k
            res = !is_potential_identifier_char(c);
290
9.07k
        }
291
15.5k
        else if (c == *s) {
292
15.4k
            s++;
293
15.4k
            continue;
294
15.4k
        }
295
296
9.16k
        tok_backup(tok, c);
297
24.6k
        while (s != test) {
298
15.4k
            tok_backup(tok, *--s);
299
15.4k
        }
300
9.16k
        return res;
301
24.6k
    }
302
9.16k
}
303
304
static int
305
103k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
103k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
103k
    int r = 0;
322
103k
    if (c == 'a') {
323
1.26k
        r = lookahead(tok, "nd");
324
1.26k
    }
325
102k
    else if (c == 'e') {
326
629
        r = lookahead(tok, "lse");
327
629
    }
328
101k
    else if (c == 'f') {
329
3.65k
        r = lookahead(tok, "or");
330
3.65k
    }
331
97.7k
    else if (c == 'i') {
332
1.94k
        int c2 = tok_nextc(tok);
333
1.94k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
1.92k
            r = 1;
335
1.92k
        }
336
1.94k
        tok_backup(tok, c2);
337
1.94k
    }
338
95.8k
    else if (c == 'o') {
339
3.31k
        r = lookahead(tok, "r");
340
3.31k
    }
341
92.4k
    else if (c == 'n') {
342
308
        r = lookahead(tok, "ot");
343
308
    }
344
103k
    if (r) {
345
10.9k
        tok_backup(tok, c);
346
10.9k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.9k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.9k
        tok_nextc(tok);
352
10.9k
    }
353
92.3k
    else /* In future releases, only error will remain. */
354
92.3k
    if (c < 128 && is_potential_identifier_char(c)) {
355
204
        tok_backup(tok, c);
356
204
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
204
        return 0;
358
204
    }
359
103k
    return 1;
360
103k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
13.9k
{
366
13.9k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
13.9k
    PyObject *s;
370
13.9k
    if (tok->decoding_erred)
371
0
        return 0;
372
13.9k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
13.9k
    if (s == NULL) {
374
955
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
955
            tok->done = E_DECODE;
376
955
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
955
        return 0;
381
955
    }
382
13.0k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
13.0k
    assert(invalid >= 0);
384
13.0k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
13.0k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
683
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
683
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
480
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
480
            if (s != NULL) {
391
480
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
480
            }
393
480
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
480
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
480
        }
399
683
        Py_DECREF(s);
400
683
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
385
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
385
        }
403
298
        else {
404
298
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
298
        }
406
683
        return 0;
407
683
    }
408
12.3k
    Py_DECREF(s);
409
12.3k
    return 1;
410
13.0k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
82.8k
{
415
82.8k
    int c;
416
417
83.2k
    while (1) {
418
231k
        do {
419
231k
            c = tok_nextc(tok);
420
231k
        } while (Py_ISDIGIT(c));
421
83.2k
        if (c != '_') {
422
82.8k
            break;
423
82.8k
        }
424
487
        c = tok_nextc(tok);
425
487
        if (!Py_ISDIGIT(c)) {
426
15
            tok_backup(tok, c);
427
15
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
15
            return 0;
429
15
        }
430
487
    }
431
82.8k
    return c;
432
82.8k
}
433
434
static inline int
435
1.09k
tok_continuation_line(struct tok_state *tok) {
436
1.09k
    int c = tok_nextc(tok);
437
1.09k
    if (c == '\r') {
438
69
        c = tok_nextc(tok);
439
69
    }
440
1.09k
    if (c != '\n') {
441
61
        tok->done = E_LINECONT;
442
61
        return -1;
443
61
    }
444
1.03k
    c = tok_nextc(tok);
445
1.03k
    if (c == EOF) {
446
49
        tok->done = E_EOF;
447
49
        tok->cur = tok->inp;
448
49
        return -1;
449
981
    } else {
450
981
        tok_backup(tok, c);
451
981
    }
452
981
    return c;
453
1.03k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
21.4k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
21.4k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
21.4k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
21.4k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
21.4k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
21.4k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
21.4k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
21.4k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
21.4k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
21.3k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
21.3k
#undef RETURN_SYNTAX_ERROR
496
497
21.3k
    return 0;
498
21.3k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.77M
{
503
1.77M
    int c;
504
1.77M
    int blankline, nonascii;
505
506
1.77M
    const char *p_start = NULL;
507
1.77M
    const char *p_end = NULL;
508
1.87M
  nextline:
509
1.87M
    tok->start = NULL;
510
1.87M
    tok->starting_col_offset = -1;
511
1.87M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.87M
    if (tok->atbol) {
516
241k
        int col = 0;
517
241k
        int altcol = 0;
518
241k
        tok->atbol = 0;
519
241k
        int cont_line_col = 0;
520
978k
        for (;;) {
521
978k
            c = tok_nextc(tok);
522
978k
            if (c == ' ') {
523
734k
                col++, altcol++;
524
734k
            }
525
244k
            else if (c == '\t') {
526
816
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
816
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
816
            }
529
243k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.44k
                col = altcol = 0; /* For Emacs users */
531
1.44k
            }
532
241k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
657
                cont_line_col = cont_line_col ? cont_line_col : col;
538
657
                if ((c = tok_continuation_line(tok)) == -1) {
539
41
                    return MAKE_TOKEN(ERRORTOKEN);
540
41
                }
541
657
            }
542
241k
            else {
543
241k
                break;
544
241k
            }
545
978k
        }
546
241k
        tok_backup(tok, c);
547
241k
        if (c == '#' || c == '\n' || c == '\r') {
548
            /* Lines with only whitespace and/or comments
549
               shouldn't affect the indentation and are
550
               not passed to the parser as NEWLINE tokens,
551
               except *totally* empty lines in interactive
552
               mode, which signal the end of a command group. */
553
56.1k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
554
0
                blankline = 0; /* Let it through */
555
0
            }
556
56.1k
            else if (tok->prompt != NULL && tok->lineno == 1) {
557
                /* In interactive mode, if the first line contains
558
                   only spaces and/or a comment, let it through. */
559
0
                blankline = 0;
560
0
                col = altcol = 0;
561
0
            }
562
56.1k
            else {
563
56.1k
                blankline = 1; /* Ignore completely */
564
56.1k
            }
565
            /* We can't jump back right here since we still
566
               may need to skip to the end of a comment */
567
56.1k
        }
568
241k
        if (!blankline && tok->level == 0) {
569
143k
            col = cont_line_col ? cont_line_col : col;
570
143k
            altcol = cont_line_col ? cont_line_col : altcol;
571
143k
            if (col == tok->indstack[tok->indent]) {
572
                /* No change */
573
102k
                if (altcol != tok->altindstack[tok->indent]) {
574
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
575
1
                }
576
102k
            }
577
40.6k
            else if (col > tok->indstack[tok->indent]) {
578
                /* Indent -- always one */
579
22.7k
                if (tok->indent+1 >= MAXINDENT) {
580
0
                    tok->done = E_TOODEEP;
581
0
                    tok->cur = tok->inp;
582
0
                    return MAKE_TOKEN(ERRORTOKEN);
583
0
                }
584
22.7k
                if (altcol <= tok->altindstack[tok->indent]) {
585
3
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
586
3
                }
587
22.7k
                tok->pendin++;
588
22.7k
                tok->indstack[++tok->indent] = col;
589
22.7k
                tok->altindstack[tok->indent] = altcol;
590
22.7k
            }
591
17.9k
            else /* col < tok->indstack[tok->indent] */ {
592
                /* Dedent -- any number, must be consistent */
593
39.8k
                while (tok->indent > 0 &&
594
34.9k
                    col < tok->indstack[tok->indent]) {
595
21.9k
                    tok->pendin--;
596
21.9k
                    tok->indent--;
597
21.9k
                }
598
17.9k
                if (col != tok->indstack[tok->indent]) {
599
10
                    tok->done = E_DEDENT;
600
10
                    tok->cur = tok->inp;
601
10
                    return MAKE_TOKEN(ERRORTOKEN);
602
10
                }
603
17.8k
                if (altcol != tok->altindstack[tok->indent]) {
604
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
605
1
                }
606
17.8k
            }
607
143k
        }
608
241k
    }
609
610
1.87M
    tok->start = tok->cur;
611
1.87M
    tok->starting_col_offset = tok->col_offset;
612
613
    /* Return pending indents/dedents */
614
1.87M
    if (tok->pendin != 0) {
615
44.7k
        if (tok->pendin < 0) {
616
21.9k
            if (tok->tok_extra_tokens) {
617
0
                p_start = tok->cur;
618
0
                p_end = tok->cur;
619
0
            }
620
21.9k
            tok->pendin++;
621
21.9k
            return MAKE_TOKEN(DEDENT);
622
21.9k
        }
623
22.7k
        else {
624
22.7k
            if (tok->tok_extra_tokens) {
625
0
                p_start = tok->buf;
626
0
                p_end = tok->cur;
627
0
            }
628
22.7k
            tok->pendin--;
629
22.7k
            return MAKE_TOKEN(INDENT);
630
22.7k
        }
631
44.7k
    }
632
633
    /* Peek ahead at the next character */
634
1.82M
    c = tok_nextc(tok);
635
1.82M
    tok_backup(tok, c);
636
637
1.82M
 again:
638
1.82M
    tok->start = NULL;
639
    /* Skip spaces */
640
2.19M
    do {
641
2.19M
        c = tok_nextc(tok);
642
2.19M
    } while (c == ' ' || c == '\t' || c == '\014');
643
644
    /* Set start of current token */
645
1.82M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
646
1.82M
    tok->starting_col_offset = tok->col_offset - 1;
647
648
    /* Skip comment, unless it's a type comment */
649
1.82M
    if (c == '#') {
650
651
43.2k
        const char* p = NULL;
652
43.2k
        const char *prefix, *type_start;
653
43.2k
        int current_starting_col_offset;
654
655
1.33M
        while (c != EOF && c != '\n' && c != '\r') {
656
1.28M
            c = tok_nextc(tok);
657
1.28M
        }
658
659
43.2k
        if (tok->tok_extra_tokens) {
660
0
            p = tok->start;
661
0
        }
662
663
43.2k
        if (tok->type_comments) {
664
0
            p = tok->start;
665
0
            current_starting_col_offset = tok->starting_col_offset;
666
0
            prefix = type_comment_prefix;
667
0
            while (*prefix && p < tok->cur) {
668
0
                if (*prefix == ' ') {
669
0
                    while (*p == ' ' || *p == '\t') {
670
0
                        p++;
671
0
                        current_starting_col_offset++;
672
0
                    }
673
0
                } else if (*prefix == *p) {
674
0
                    p++;
675
0
                    current_starting_col_offset++;
676
0
                } else {
677
0
                    break;
678
0
                }
679
680
0
                prefix++;
681
0
            }
682
683
            /* This is a type comment if we matched all of type_comment_prefix. */
684
0
            if (!*prefix) {
685
0
                int is_type_ignore = 1;
686
                // +6 in order to skip the word 'ignore'
687
0
                const char *ignore_end = p + 6;
688
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
689
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
690
691
0
                type_start = p;
692
693
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
694
                 * or anything ASCII and non-alphanumeric. */
695
0
                is_type_ignore = (
696
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
697
0
                    && !(tok->cur > ignore_end
698
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
699
700
0
                if (is_type_ignore) {
701
0
                    p_start = ignore_end;
702
0
                    p_end = tok->cur;
703
704
                    /* If this type ignore is the only thing on the line, consume the newline also. */
705
0
                    if (blankline) {
706
0
                        tok_nextc(tok);
707
0
                        tok->atbol = 1;
708
0
                    }
709
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
710
0
                } else {
711
0
                    p_start = type_start;
712
0
                    p_end = tok->cur;
713
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
714
0
                }
715
0
            }
716
0
        }
717
43.2k
        if (tok->tok_extra_tokens) {
718
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
719
0
            p_start = p;
720
0
            p_end = tok->cur;
721
0
            tok->comment_newline = blankline;
722
0
            return MAKE_TOKEN(COMMENT);
723
0
        }
724
43.2k
    }
725
726
1.82M
    if (tok->done == E_INTERACT_STOP) {
727
0
        return MAKE_TOKEN(ENDMARKER);
728
0
    }
729
730
    /* Check for EOF and errors now */
731
1.82M
    if (c == EOF) {
732
16.4k
        if (tok->level) {
733
4.05k
            return MAKE_TOKEN(ERRORTOKEN);
734
4.05k
        }
735
12.4k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
736
16.4k
    }
737
738
    /* Identifier (most frequent token!) */
739
1.81M
    nonascii = 0;
740
1.81M
    if (is_potential_identifier_start(c)) {
741
        /* Process the various legal combinations of b"", r"", u"", and f"". */
742
553k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
743
682k
        while (1) {
744
682k
            if (!saw_b && (c == 'b' || c == 'B')) {
745
21.8k
                saw_b = 1;
746
21.8k
            }
747
            /* Since this is a backwards compatibility support literal we don't
748
               want to support it in arbitrary order like byte literals. */
749
660k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
750
7.01k
                saw_u = 1;
751
7.01k
            }
752
            /* ur"" and ru"" are not supported */
753
653k
            else if (!saw_r && (c == 'r' || c == 'R')) {
754
38.2k
                saw_r = 1;
755
38.2k
            }
756
615k
            else if (!saw_f && (c == 'f' || c == 'F')) {
757
46.5k
                saw_f = 1;
758
46.5k
            }
759
568k
            else if (!saw_t && (c == 't' || c == 'T')) {
760
36.3k
                saw_t = 1;
761
36.3k
            }
762
532k
            else {
763
532k
                break;
764
532k
            }
765
149k
            c = tok_nextc(tok);
766
149k
            if (c == '"' || c == '\'') {
767
                // Raise error on incompatible string prefixes:
768
21.4k
                int status = maybe_raise_syntax_error_for_string_prefixes(
769
21.4k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
770
21.4k
                if (status < 0) {
771
7
                    return MAKE_TOKEN(ERRORTOKEN);
772
7
                }
773
774
                // Handle valid f or t string creation:
775
21.3k
                if (saw_f || saw_t) {
776
16.8k
                    goto f_string_quote;
777
16.8k
                }
778
4.58k
                goto letter_quote;
779
21.3k
            }
780
149k
        }
781
2.43M
        while (is_potential_identifier_char(c)) {
782
1.89M
            if (c >= 128) {
783
161k
                nonascii = 1;
784
161k
            }
785
1.89M
            c = tok_nextc(tok);
786
1.89M
        }
787
532k
        tok_backup(tok, c);
788
532k
        if (nonascii && !verify_identifier(tok)) {
789
1.63k
            return MAKE_TOKEN(ERRORTOKEN);
790
1.63k
        }
791
792
530k
        p_start = tok->start;
793
530k
        p_end = tok->cur;
794
795
530k
        return MAKE_TOKEN(NAME);
796
532k
    }
797
798
1.25M
    if (c == '\r') {
799
414
        c = tok_nextc(tok);
800
414
    }
801
802
    /* Newline */
803
1.25M
    if (c == '\n') {
804
220k
        tok->atbol = 1;
805
220k
        if (blankline || tok->level > 0) {
806
98.0k
            if (tok->tok_extra_tokens) {
807
0
                if (tok->comment_newline) {
808
0
                    tok->comment_newline = 0;
809
0
                }
810
0
                p_start = tok->start;
811
0
                p_end = tok->cur;
812
0
                return MAKE_TOKEN(NL);
813
0
            }
814
98.0k
            goto nextline;
815
98.0k
        }
816
122k
        if (tok->comment_newline && tok->tok_extra_tokens) {
817
0
            tok->comment_newline = 0;
818
0
            p_start = tok->start;
819
0
            p_end = tok->cur;
820
0
            return MAKE_TOKEN(NL);
821
0
        }
822
122k
        p_start = tok->start;
823
122k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
824
122k
        tok->cont_line = 0;
825
122k
        return MAKE_TOKEN(NEWLINE);
826
122k
    }
827
828
    /* Period or number starting with period? */
829
1.03M
    if (c == '.') {
830
34.5k
        c = tok_nextc(tok);
831
34.5k
        if (Py_ISDIGIT(c)) {
832
3.39k
            goto fraction;
833
31.1k
        } else if (c == '.') {
834
1.61k
            c = tok_nextc(tok);
835
1.61k
            if (c == '.') {
836
984
                p_start = tok->start;
837
984
                p_end = tok->cur;
838
984
                return MAKE_TOKEN(ELLIPSIS);
839
984
            }
840
627
            else {
841
627
                tok_backup(tok, c);
842
627
            }
843
627
            tok_backup(tok, '.');
844
627
        }
845
29.5k
        else {
846
29.5k
            tok_backup(tok, c);
847
29.5k
        }
848
30.1k
        p_start = tok->start;
849
30.1k
        p_end = tok->cur;
850
30.1k
        return MAKE_TOKEN(DOT);
851
34.5k
    }
852
853
    /* Number */
854
1.00M
    if (Py_ISDIGIT(c)) {
855
100k
        if (c == '0') {
856
            /* Hex, octal or binary -- maybe. */
857
34.1k
            c = tok_nextc(tok);
858
34.1k
            if (c == 'x' || c == 'X') {
859
                /* Hex */
860
15.8k
                c = tok_nextc(tok);
861
16.1k
                do {
862
16.1k
                    if (c == '_') {
863
212
                        c = tok_nextc(tok);
864
212
                    }
865
16.1k
                    if (!Py_ISXDIGIT(c)) {
866
19
                        tok_backup(tok, c);
867
19
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
868
19
                    }
869
79.3k
                    do {
870
79.3k
                        c = tok_nextc(tok);
871
79.3k
                    } while (Py_ISXDIGIT(c));
872
16.0k
                } while (c == '_');
873
15.8k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
874
2
                    return MAKE_TOKEN(ERRORTOKEN);
875
2
                }
876
15.8k
            }
877
18.2k
            else if (c == 'o' || c == 'O') {
878
                /* Octal */
879
616
                c = tok_nextc(tok);
880
1.02k
                do {
881
1.02k
                    if (c == '_') {
882
411
                        c = tok_nextc(tok);
883
411
                    }
884
1.02k
                    if (c < '0' || c >= '8') {
885
21
                        if (Py_ISDIGIT(c)) {
886
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
887
1
                                    "invalid digit '%c' in octal literal", c));
888
1
                        }
889
20
                        else {
890
20
                            tok_backup(tok, c);
891
20
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
892
20
                        }
893
21
                    }
894
2.64k
                    do {
895
2.64k
                        c = tok_nextc(tok);
896
2.64k
                    } while ('0' <= c && c < '8');
897
1.00k
                } while (c == '_');
898
595
                if (Py_ISDIGIT(c)) {
899
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
900
1
                            "invalid digit '%c' in octal literal", c));
901
1
                }
902
594
                if (!verify_end_of_number(tok, c, "octal")) {
903
3
                    return MAKE_TOKEN(ERRORTOKEN);
904
3
                }
905
594
            }
906
17.6k
            else if (c == 'b' || c == 'B') {
907
                /* Binary */
908
558
                c = tok_nextc(tok);
909
908
                do {
910
908
                    if (c == '_') {
911
358
                        c = tok_nextc(tok);
912
358
                    }
913
908
                    if (c != '0' && c != '1') {
914
21
                        if (Py_ISDIGIT(c)) {
915
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
916
1
                        }
917
20
                        else {
918
20
                            tok_backup(tok, c);
919
20
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
920
20
                        }
921
21
                    }
922
4.05k
                    do {
923
4.05k
                        c = tok_nextc(tok);
924
4.05k
                    } while (c == '0' || c == '1');
925
887
                } while (c == '_');
926
537
                if (Py_ISDIGIT(c)) {
927
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
928
2
                }
929
535
                if (!verify_end_of_number(tok, c, "binary")) {
930
1
                    return MAKE_TOKEN(ERRORTOKEN);
931
1
                }
932
535
            }
933
17.0k
            else {
934
17.0k
                int nonzero = 0;
935
                /* maybe old-style octal; c is first char of it */
936
                /* in any case, allow '0' as a literal */
937
18.5k
                while (1) {
938
18.5k
                    if (c == '_') {
939
91
                        c = tok_nextc(tok);
940
91
                        if (!Py_ISDIGIT(c)) {
941
3
                            tok_backup(tok, c);
942
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
943
3
                        }
944
91
                    }
945
18.5k
                    if (c != '0') {
946
17.0k
                        break;
947
17.0k
                    }
948
1.48k
                    c = tok_nextc(tok);
949
1.48k
                }
950
17.0k
                char* zeros_end = tok->cur;
951
17.0k
                if (Py_ISDIGIT(c)) {
952
467
                    nonzero = 1;
953
467
                    c = tok_decimal_tail(tok);
954
467
                    if (c == 0) {
955
1
                        return MAKE_TOKEN(ERRORTOKEN);
956
1
                    }
957
467
                }
958
17.0k
                if (c == '.') {
959
829
                    c = tok_nextc(tok);
960
829
                    goto fraction;
961
829
                }
962
16.2k
                else if (c == 'e' || c == 'E') {
963
856
                    goto exponent;
964
856
                }
965
15.3k
                else if (c == 'j' || c == 'J') {
966
907
                    goto imaginary;
967
907
                }
968
14.4k
                else if (nonzero && !tok->tok_extra_tokens) {
969
                    /* Old-style octal: now disallowed. */
970
20
                    tok_backup(tok, c);
971
20
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
972
20
                            tok, (int)(tok->start + 1 - tok->line_start),
973
20
                            (int)(zeros_end - tok->line_start),
974
20
                            "leading zeros in decimal integer "
975
20
                            "literals are not permitted; "
976
20
                            "use an 0o prefix for octal integers"));
977
20
                }
978
14.4k
                if (!verify_end_of_number(tok, c, "decimal")) {
979
31
                    return MAKE_TOKEN(ERRORTOKEN);
980
31
                }
981
14.4k
            }
982
34.1k
        }
983
65.8k
        else {
984
            /* Decimal */
985
65.8k
            c = tok_decimal_tail(tok);
986
65.8k
            if (c == 0) {
987
10
                return MAKE_TOKEN(ERRORTOKEN);
988
10
            }
989
65.8k
            {
990
                /* Accept floating-point numbers. */
991
65.8k
                if (c == '.') {
992
3.82k
                    c = tok_nextc(tok);
993
8.05k
        fraction:
994
                    /* Fraction */
995
8.05k
                    if (Py_ISDIGIT(c)) {
996
6.17k
                        c = tok_decimal_tail(tok);
997
6.17k
                        if (c == 0) {
998
2
                            return MAKE_TOKEN(ERRORTOKEN);
999
2
                        }
1000
6.17k
                    }
1001
8.05k
                }
1002
70.0k
                if (c == 'e' || c == 'E') {
1003
10.1k
                    int e;
1004
10.9k
                  exponent:
1005
10.9k
                    e = c;
1006
                    /* Exponent part */
1007
10.9k
                    c = tok_nextc(tok);
1008
10.9k
                    if (c == '+' || c == '-') {
1009
4.14k
                        c = tok_nextc(tok);
1010
4.14k
                        if (!Py_ISDIGIT(c)) {
1011
11
                            tok_backup(tok, c);
1012
11
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1013
11
                        }
1014
6.82k
                    } else if (!Py_ISDIGIT(c)) {
1015
627
                        tok_backup(tok, c);
1016
627
                        if (!verify_end_of_number(tok, e, "decimal")) {
1017
38
                            return MAKE_TOKEN(ERRORTOKEN);
1018
38
                        }
1019
589
                        tok_backup(tok, e);
1020
589
                        p_start = tok->start;
1021
589
                        p_end = tok->cur;
1022
589
                        return MAKE_TOKEN(NUMBER);
1023
627
                    }
1024
10.3k
                    c = tok_decimal_tail(tok);
1025
10.3k
                    if (c == 0) {
1026
2
                        return MAKE_TOKEN(ERRORTOKEN);
1027
2
                    }
1028
10.3k
                }
1029
70.2k
                if (c == 'j' || c == 'J') {
1030
                    /* Imaginary part */
1031
4.36k
        imaginary:
1032
4.36k
                    c = tok_nextc(tok);
1033
4.36k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1034
11
                        return MAKE_TOKEN(ERRORTOKEN);
1035
11
                    }
1036
4.36k
                }
1037
66.8k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1038
118
                    return MAKE_TOKEN(ERRORTOKEN);
1039
118
                }
1040
70.2k
            }
1041
70.2k
        }
1042
102k
        tok_backup(tok, c);
1043
102k
        p_start = tok->start;
1044
102k
        p_end = tok->cur;
1045
102k
        return MAKE_TOKEN(NUMBER);
1046
100k
    }
1047
1048
919k
  f_string_quote:
1049
919k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1050
16.8k
        && (c == '\'' || c == '"'))) {
1051
1052
16.8k
        int quote = c;
1053
16.8k
        int quote_size = 1;             /* 1 or 3 */
1054
1055
        /* Nodes of type STRING, especially multi line strings
1056
           must be handled differently in order to get both
1057
           the starting line number and the column offset right.
1058
           (cf. issue 16806) */
1059
16.8k
        tok->first_lineno = tok->lineno;
1060
16.8k
        tok->multi_line_start = tok->line_start;
1061
1062
        /* Find the quote size and start of string */
1063
16.8k
        int after_quote = tok_nextc(tok);
1064
16.8k
        if (after_quote == quote) {
1065
2.48k
            int after_after_quote = tok_nextc(tok);
1066
2.48k
            if (after_after_quote == quote) {
1067
720
                quote_size = 3;
1068
720
            }
1069
1.76k
            else {
1070
                // TODO: Check this
1071
1.76k
                tok_backup(tok, after_after_quote);
1072
1.76k
                tok_backup(tok, after_quote);
1073
1.76k
            }
1074
2.48k
        }
1075
16.8k
        if (after_quote != quote) {
1076
14.3k
            tok_backup(tok, after_quote);
1077
14.3k
        }
1078
1079
1080
16.8k
        p_start = tok->start;
1081
16.8k
        p_end = tok->cur;
1082
16.8k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1083
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1084
2
        }
1085
16.8k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1086
16.8k
        the_current_tok->kind = TOK_FSTRING_MODE;
1087
16.8k
        the_current_tok->quote = quote;
1088
16.8k
        the_current_tok->quote_size = quote_size;
1089
16.8k
        the_current_tok->start = tok->start;
1090
16.8k
        the_current_tok->multi_line_start = tok->line_start;
1091
16.8k
        the_current_tok->first_line = tok->lineno;
1092
16.8k
        the_current_tok->start_offset = -1;
1093
16.8k
        the_current_tok->multi_line_start_offset = -1;
1094
16.8k
        the_current_tok->last_expr_buffer = NULL;
1095
16.8k
        the_current_tok->last_expr_size = 0;
1096
16.8k
        the_current_tok->last_expr_end = -1;
1097
16.8k
        the_current_tok->in_format_spec = 0;
1098
16.8k
        the_current_tok->in_debug = 0;
1099
1100
16.8k
        enum string_kind_t string_kind = FSTRING;
1101
16.8k
        switch (*tok->start) {
1102
670
            case 'T':
1103
4.33k
            case 't':
1104
4.33k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1105
4.33k
                string_kind = TSTRING;
1106
4.33k
                break;
1107
1.61k
            case 'F':
1108
11.9k
            case 'f':
1109
11.9k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1110
11.9k
                break;
1111
217
            case 'R':
1112
491
            case 'r':
1113
491
                the_current_tok->raw = 1;
1114
491
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1115
72
                    string_kind = TSTRING;
1116
72
                }
1117
491
                break;
1118
0
            default:
1119
0
                Py_UNREACHABLE();
1120
16.8k
        }
1121
1122
16.8k
        the_current_tok->string_kind = string_kind;
1123
16.8k
        the_current_tok->curly_bracket_depth = 0;
1124
16.8k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1125
16.8k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1126
16.8k
    }
1127
1128
906k
  letter_quote:
1129
    /* String */
1130
906k
    if (c == '\'' || c == '"') {
1131
59.5k
        int quote = c;
1132
59.5k
        int quote_size = 1;             /* 1 or 3 */
1133
59.5k
        int end_quote_size = 0;
1134
59.5k
        int has_escaped_quote = 0;
1135
1136
        /* Nodes of type STRING, especially multi line strings
1137
           must be handled differently in order to get both
1138
           the starting line number and the column offset right.
1139
           (cf. issue 16806) */
1140
59.5k
        tok->first_lineno = tok->lineno;
1141
59.5k
        tok->multi_line_start = tok->line_start;
1142
1143
        /* Find the quote size and start of string */
1144
59.5k
        c = tok_nextc(tok);
1145
59.5k
        if (c == quote) {
1146
10.1k
            c = tok_nextc(tok);
1147
10.1k
            if (c == quote) {
1148
2.58k
                quote_size = 3;
1149
2.58k
            }
1150
7.55k
            else {
1151
7.55k
                end_quote_size = 1;     /* empty string found */
1152
7.55k
            }
1153
10.1k
        }
1154
59.5k
        if (c != quote) {
1155
56.9k
            tok_backup(tok, c);
1156
56.9k
        }
1157
1158
        /* Get rest of string */
1159
1.17M
        while (end_quote_size != quote_size) {
1160
1.11M
            c = tok_nextc(tok);
1161
1.11M
            if (tok->done == E_ERROR) {
1162
0
                return MAKE_TOKEN(ERRORTOKEN);
1163
0
            }
1164
1.11M
            if (tok->done == E_DECODE) {
1165
0
                break;
1166
0
            }
1167
1.11M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1168
421
                assert(tok->multi_line_start != NULL);
1169
                // shift the tok_state's location into
1170
                // the start of string, and report the error
1171
                // from the initial quote character
1172
421
                tok->cur = (char *)tok->start;
1173
421
                tok->cur++;
1174
421
                tok->line_start = tok->multi_line_start;
1175
421
                int start = tok->lineno;
1176
421
                tok->lineno = tok->first_lineno;
1177
1178
421
                if (INSIDE_FSTRING(tok)) {
1179
                    /* When we are in an f-string, before raising the
1180
                     * unterminated string literal error, check whether
1181
                     * does the initial quote matches with f-strings quotes
1182
                     * and if it is, then this must be a missing '}' token
1183
                     * so raise the proper error */
1184
32
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1185
32
                    if (the_current_tok->quote == quote &&
1186
27
                        the_current_tok->quote_size == quote_size) {
1187
21
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1188
21
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1189
21
                    }
1190
32
                }
1191
1192
400
                if (quote_size == 3) {
1193
16
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1194
16
                                     " (detected at line %d)", start);
1195
16
                    if (c != '\n') {
1196
16
                        tok->done = E_EOFS;
1197
16
                    }
1198
16
                    return MAKE_TOKEN(ERRORTOKEN);
1199
16
                }
1200
384
                else {
1201
384
                    if (has_escaped_quote) {
1202
10
                        _PyTokenizer_syntaxerror(
1203
10
                            tok,
1204
10
                            "unterminated string literal (detected at line %d); "
1205
10
                            "perhaps you escaped the end quote?",
1206
10
                            start
1207
10
                        );
1208
374
                    } else {
1209
374
                        _PyTokenizer_syntaxerror(
1210
374
                            tok, "unterminated string literal (detected at line %d)", start
1211
374
                        );
1212
374
                    }
1213
384
                    if (c != '\n') {
1214
14
                        tok->done = E_EOLS;
1215
14
                    }
1216
384
                    return MAKE_TOKEN(ERRORTOKEN);
1217
384
                }
1218
400
            }
1219
1.11M
            if (c == quote) {
1220
58.3k
                end_quote_size += 1;
1221
58.3k
            }
1222
1.05M
            else {
1223
1.05M
                end_quote_size = 0;
1224
1.05M
                if (c == '\\') {
1225
27.6k
                    c = tok_nextc(tok);  /* skip escaped char */
1226
27.6k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1227
986
                        has_escaped_quote = 1;
1228
986
                    }
1229
27.6k
                    if (c == '\r') {
1230
195
                        c = tok_nextc(tok);
1231
195
                    }
1232
27.6k
                }
1233
1.05M
            }
1234
1.11M
        }
1235
1236
59.1k
        p_start = tok->start;
1237
59.1k
        p_end = tok->cur;
1238
59.1k
        return MAKE_TOKEN(STRING);
1239
59.5k
    }
1240
1241
    /* Line continuation */
1242
847k
    if (c == '\\') {
1243
434
        if ((c = tok_continuation_line(tok)) == -1) {
1244
69
            return MAKE_TOKEN(ERRORTOKEN);
1245
69
        }
1246
365
        tok->cont_line = 1;
1247
365
        goto again; /* Read next line */
1248
434
    }
1249
1250
    /* Punctuation character */
1251
846k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1252
846k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1253
        /* This code block gets executed before the curly_bracket_depth is incremented
1254
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1255
         * to adjust it manually */
1256
56.8k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1257
56.8k
        int in_format_spec = current_tok->in_format_spec;
1258
56.8k
         int cursor_in_format_with_debug =
1259
56.8k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1260
56.8k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1261
56.8k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1262
0
            return MAKE_TOKEN(ENDMARKER);
1263
0
        }
1264
56.8k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1265
12
            return MAKE_TOKEN(ERRORTOKEN);
1266
12
        }
1267
1268
56.8k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1269
4.10k
            current_tok->kind = TOK_FSTRING_MODE;
1270
4.10k
            current_tok->in_format_spec = 1;
1271
4.10k
            p_start = tok->start;
1272
4.10k
            p_end = tok->cur;
1273
4.10k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1274
4.10k
        }
1275
56.8k
    }
1276
1277
    /* Check for two-character token */
1278
842k
    {
1279
842k
        int c2 = tok_nextc(tok);
1280
842k
        int current_token = _PyToken_TwoChars(c, c2);
1281
842k
        if (current_token != OP) {
1282
23.6k
            int c3 = tok_nextc(tok);
1283
23.6k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1284
23.6k
            if (current_token3 != OP) {
1285
1.11k
                current_token = current_token3;
1286
1.11k
            }
1287
22.5k
            else {
1288
22.5k
                tok_backup(tok, c3);
1289
22.5k
            }
1290
23.6k
            p_start = tok->start;
1291
23.6k
            p_end = tok->cur;
1292
23.6k
            return MAKE_TOKEN(current_token);
1293
23.6k
        }
1294
819k
        tok_backup(tok, c2);
1295
819k
    }
1296
1297
    /* Keep track of parentheses nesting level */
1298
0
    switch (c) {
1299
92.6k
    case '(':
1300
128k
    case '[':
1301
179k
    case '{':
1302
179k
        if (tok->level >= MAXLEVEL) {
1303
3
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1304
3
        }
1305
179k
        tok->parenstack[tok->level] = c;
1306
179k
        tok->parenlinenostack[tok->level] = tok->lineno;
1307
179k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1308
179k
        tok->level++;
1309
179k
        if (INSIDE_FSTRING(tok)) {
1310
31.1k
            current_tok->curly_bracket_depth++;
1311
31.1k
        }
1312
179k
        break;
1313
63.6k
    case ')':
1314
75.3k
    case ']':
1315
103k
    case '}':
1316
103k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1317
46
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1318
46
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1319
46
        }
1320
102k
        if (!tok->tok_extra_tokens && !tok->level) {
1321
204
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1322
204
        }
1323
102k
        if (tok->level > 0) {
1324
102k
            tok->level--;
1325
102k
            int opening = tok->parenstack[tok->level];
1326
102k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1327
39.2k
                                            (opening == '[' && c == ']') ||
1328
27.6k
                                            (opening == '{' && c == '}'))) {
1329
                /* If the opening bracket belongs to an f-string's expression
1330
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1331
                nested expression, then instead of matching a different
1332
                syntactical construct with it; we'll throw an unmatched
1333
                parentheses error. */
1334
47
                if (INSIDE_FSTRING(tok) && opening == '{') {
1335
9
                    assert(current_tok->curly_bracket_depth >= 0);
1336
9
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1337
9
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1338
6
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1339
6
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1340
6
                    }
1341
9
                }
1342
41
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1343
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1344
2
                            "closing parenthesis '%c' does not match "
1345
2
                            "opening parenthesis '%c' on line %d",
1346
2
                            c, opening, tok->parenlinenostack[tok->level]));
1347
2
                }
1348
39
                else {
1349
39
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1350
39
                            "closing parenthesis '%c' does not match "
1351
39
                            "opening parenthesis '%c'",
1352
39
                            c, opening));
1353
39
                }
1354
41
            }
1355
102k
        }
1356
1357
102k
        if (INSIDE_FSTRING(tok)) {
1358
23.3k
            current_tok->curly_bracket_depth--;
1359
23.3k
            if (current_tok->curly_bracket_depth < 0) {
1360
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1361
1
                    TOK_GET_STRING_PREFIX(tok), c));
1362
1
            }
1363
23.3k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1364
21.7k
                current_tok->curly_bracket_expr_start_depth--;
1365
21.7k
                current_tok->kind = TOK_FSTRING_MODE;
1366
21.7k
                current_tok->in_format_spec = 0;
1367
21.7k
                current_tok->in_debug = 0;
1368
21.7k
            }
1369
23.3k
        }
1370
102k
        break;
1371
536k
    default:
1372
536k
        break;
1373
819k
    }
1374
1375
818k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1376
494
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1377
494
    }
1378
1379
818k
    if( c == '=' && INSIDE_FSTRING_EXPR_AT_TOP(current_tok)) {
1380
4.98k
        current_tok->in_debug = 1;
1381
4.98k
    }
1382
1383
    /* Punctuation character */
1384
818k
    p_start = tok->start;
1385
818k
    p_end = tok->cur;
1386
818k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1387
818k
}
1388
1389
static int
1390
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1391
53.6k
{
1392
53.6k
    const char *p_start = NULL;
1393
53.6k
    const char *p_end = NULL;
1394
53.6k
    int end_quote_size = 0;
1395
53.6k
    int unicode_escape = 0;
1396
1397
53.6k
    tok->start = tok->cur;
1398
53.6k
    tok->first_lineno = tok->lineno;
1399
53.6k
    tok->starting_col_offset = tok->col_offset;
1400
1401
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1402
    // before it.
1403
53.6k
    int start_char = tok_nextc(tok);
1404
53.6k
    if (start_char == '{') {
1405
14.8k
        int peek1 = tok_nextc(tok);
1406
14.8k
        tok_backup(tok, peek1);
1407
14.8k
        tok_backup(tok, start_char);
1408
14.8k
        if (peek1 != '{') {
1409
11.9k
            current_tok->curly_bracket_expr_start_depth++;
1410
11.9k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1411
3
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1412
3
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1413
3
            }
1414
11.9k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1415
11.9k
            return tok_get_normal_mode(tok, current_tok, token);
1416
11.9k
        }
1417
14.8k
    }
1418
38.8k
    else {
1419
38.8k
        tok_backup(tok, start_char);
1420
38.8k
    }
1421
1422
    // Check if we are at the end of the string
1423
59.8k
    for (int i = 0; i < current_tok->quote_size; i++) {
1424
47.7k
        int quote = tok_nextc(tok);
1425
47.7k
        if (quote != current_tok->quote) {
1426
29.6k
            tok_backup(tok, quote);
1427
29.6k
            goto f_string_middle;
1428
29.6k
        }
1429
47.7k
    }
1430
1431
12.0k
    if (current_tok->last_expr_buffer != NULL) {
1432
7.16k
        PyMem_Free(current_tok->last_expr_buffer);
1433
7.16k
        current_tok->last_expr_buffer = NULL;
1434
7.16k
        current_tok->last_expr_size = 0;
1435
7.16k
        current_tok->last_expr_end = -1;
1436
7.16k
    }
1437
1438
12.0k
    p_start = tok->start;
1439
12.0k
    p_end = tok->cur;
1440
12.0k
    tok->tok_mode_stack_index--;
1441
12.0k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1442
1443
29.6k
f_string_middle:
1444
1445
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1446
    // this.
1447
29.6k
    tok->multi_line_start = tok->line_start;
1448
169k
    while (end_quote_size != current_tok->quote_size) {
1449
164k
        int c = tok_nextc(tok);
1450
164k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1451
0
            return MAKE_TOKEN(ERRORTOKEN);
1452
0
        }
1453
164k
        int in_format_spec = (
1454
164k
                current_tok->in_format_spec
1455
9.37k
                &&
1456
9.37k
                INSIDE_FSTRING_EXPR(current_tok)
1457
164k
        );
1458
1459
164k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1460
469
            if (tok->decoding_erred) {
1461
0
                return MAKE_TOKEN(ERRORTOKEN);
1462
0
            }
1463
1464
            // If we are in a format spec and we found a newline,
1465
            // it means that the format spec ends here and we should
1466
            // return to the regular mode.
1467
469
            if (in_format_spec && c == '\n') {
1468
81
                if (current_tok->quote_size == 1) {
1469
81
                    return MAKE_TOKEN(
1470
81
                        _PyTokenizer_syntaxerror(
1471
81
                            tok,
1472
81
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1473
81
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1474
81
                        )
1475
81
                    );
1476
81
                }
1477
0
                tok_backup(tok, c);
1478
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1479
0
                current_tok->in_format_spec = 0;
1480
0
                p_start = tok->start;
1481
0
                p_end = tok->cur;
1482
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1483
81
            }
1484
1485
469
            assert(tok->multi_line_start != NULL);
1486
            // shift the tok_state's location into
1487
            // the start of string, and report the error
1488
            // from the initial quote character
1489
388
            tok->cur = (char *)current_tok->start;
1490
388
            tok->cur++;
1491
388
            tok->line_start = current_tok->multi_line_start;
1492
388
            int start = tok->lineno;
1493
1494
388
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1495
388
            tok->lineno = the_current_tok->first_line;
1496
1497
388
            if (current_tok->quote_size == 3) {
1498
35
                _PyTokenizer_syntaxerror(tok,
1499
35
                                    "unterminated triple-quoted %c-string literal"
1500
35
                                    " (detected at line %d)",
1501
35
                                    TOK_GET_STRING_PREFIX(tok), start);
1502
35
                if (c != '\n') {
1503
35
                    tok->done = E_EOFS;
1504
35
                }
1505
35
                return MAKE_TOKEN(ERRORTOKEN);
1506
35
            }
1507
353
            else {
1508
353
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1509
353
                                    "unterminated %c-string literal (detected at"
1510
353
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1511
353
            }
1512
388
        }
1513
1514
163k
        if (c == current_tok->quote) {
1515
8.23k
            end_quote_size += 1;
1516
8.23k
            continue;
1517
155k
        } else {
1518
155k
            end_quote_size = 0;
1519
155k
        }
1520
1521
155k
        if (c == '{') {
1522
18.9k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1523
0
                return MAKE_TOKEN(ENDMARKER);
1524
0
            }
1525
18.9k
            int peek = tok_nextc(tok);
1526
18.9k
            if (peek != '{' || in_format_spec) {
1527
15.3k
                tok_backup(tok, peek);
1528
15.3k
                tok_backup(tok, c);
1529
15.3k
                current_tok->curly_bracket_expr_start_depth++;
1530
15.3k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1531
5
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1532
5
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1533
5
                }
1534
15.3k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1535
15.3k
                current_tok->in_format_spec = 0;
1536
15.3k
                p_start = tok->start;
1537
15.3k
                p_end = tok->cur;
1538
15.3k
            } else {
1539
3.53k
                p_start = tok->start;
1540
3.53k
                p_end = tok->cur - 1;
1541
3.53k
            }
1542
18.9k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1543
136k
        } else if (c == '}') {
1544
4.87k
            if (unicode_escape) {
1545
362
                p_start = tok->start;
1546
362
                p_end = tok->cur;
1547
362
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1548
362
            }
1549
4.51k
            int peek = tok_nextc(tok);
1550
1551
            // The tokenizer can only be in the format spec if we have already completed the expression
1552
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1553
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1554
            // brackets, we can bypass it here.
1555
4.51k
            int cursor = current_tok->curly_bracket_depth;
1556
4.51k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1557
1.72k
                p_start = tok->start;
1558
1.72k
                p_end = tok->cur - 1;
1559
2.78k
            } else {
1560
2.78k
                tok_backup(tok, peek);
1561
2.78k
                tok_backup(tok, c);
1562
2.78k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1563
2.78k
                current_tok->in_format_spec = 0;
1564
2.78k
                p_start = tok->start;
1565
2.78k
                p_end = tok->cur;
1566
2.78k
            }
1567
4.51k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1568
131k
        } else if (c == '\\') {
1569
6.32k
            int peek = tok_nextc(tok);
1570
6.32k
            if (peek == '\r') {
1571
66
                peek = tok_nextc(tok);
1572
66
            }
1573
            // Special case when the backslash is right before a curly
1574
            // brace. We have to restore and return the control back
1575
            // to the loop for the next iteration.
1576
6.32k
            if (peek == '{' || peek == '}') {
1577
1.52k
                if (!current_tok->raw) {
1578
1.32k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1579
1
                        return MAKE_TOKEN(ERRORTOKEN);
1580
1
                    }
1581
1.32k
                }
1582
1.52k
                tok_backup(tok, peek);
1583
1.52k
                continue;
1584
1.52k
            }
1585
1586
4.80k
            if (!current_tok->raw) {
1587
4.55k
                if (peek == 'N') {
1588
                    /* Handle named unicode escapes (\N{BULLET}) */
1589
575
                    peek = tok_nextc(tok);
1590
575
                    if (peek == '{') {
1591
374
                        unicode_escape = 1;
1592
374
                    } else {
1593
201
                        tok_backup(tok, peek);
1594
201
                    }
1595
575
                }
1596
4.55k
            } /* else {
1597
                skip the escaped character
1598
            }*/
1599
4.80k
        }
1600
155k
    }
1601
1602
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1603
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1604
11.6k
    for (int i = 0; i < current_tok->quote_size; i++) {
1605
6.21k
        tok_backup(tok, current_tok->quote);
1606
6.21k
    }
1607
5.41k
    p_start = tok->start;
1608
5.41k
    p_end = tok->cur;
1609
5.41k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1610
29.6k
}
1611
1612
static int
1613
tok_get(struct tok_state *tok, struct token *token)
1614
1.81M
{
1615
1.81M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1616
1.81M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1617
1.76M
        return tok_get_normal_mode(tok, current_tok, token);
1618
1.76M
    } else {
1619
53.6k
        return tok_get_fstring_mode(tok, current_tok, token);
1620
53.6k
    }
1621
1.81M
}
1622
1623
int
1624
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1625
1.81M
{
1626
1.81M
    int result = tok_get(tok, token);
1627
1.81M
    if (tok->decoding_erred) {
1628
0
        result = ERRORTOKEN;
1629
0
        tok->done = E_DECODE;
1630
0
    }
1631
1.81M
    return result;
1632
1.81M
}