Coverage Report

Created: 2025-09-04 06:25

/src/cpython/Parser/lexer/lexer.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_token.h"
3
#include "pycore_unicodeobject.h"
4
#include "errcode.h"
5
6
#include "state.h"
7
#include "../tokenizer/helpers.h"
8
9
/* Alternate tab spacing */
10
1.63k
#define ALTTABSIZE 1
11
12
1.76M
#define is_potential_identifier_start(c) (\
13
1.76M
              (c >= 'a' && c <= 'z')\
14
1.76M
               || (c >= 'A' && c <= 'Z')\
15
1.76M
               || c == '_'\
16
1.76M
               || (c >= 128))
17
18
2.47M
#define is_potential_identifier_char(c) (\
19
2.47M
              (c >= 'a' && c <= 'z')\
20
2.47M
               || (c >= 'A' && c <= 'Z')\
21
2.47M
               || (c >= '0' && c <= '9')\
22
2.47M
               || c == '_'\
23
2.47M
               || (c >= 128))
24
25
#ifdef Py_DEBUG
26
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27
    assert(tok->tok_mode_stack_index >= 0);
28
    assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29
    return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30
}
31
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32
    assert(tok->tok_mode_stack_index >= 0);
33
    assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34
    return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35
}
36
#else
37
1.90M
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38
16.7k
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39
#endif
40
41
#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
42
#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
43
34
#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
44
1.78M
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
45
0
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
46
0
                _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
47
48
/* Spaces in this constant are treated as "zero or more spaces or tabs" when
49
   tokenizing. */
50
static const char* type_comment_prefix = "# type: ";
51
52
static inline int
53
contains_null_bytes(const char* str, size_t size)
54
227k
{
55
227k
    return memchr(str, 0, size) != NULL;
56
227k
}
57
58
/* Get next char, updating state; error code goes into tok->done */
59
static int
60
tok_nextc(struct tok_state *tok)
61
10.9M
{
62
10.9M
    int rc;
63
11.1M
    for (;;) {
64
11.1M
        if (tok->cur != tok->inp) {
65
10.9M
            if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
66
0
                tok->done = E_COLUMNOVERFLOW;
67
0
                return EOF;
68
0
            }
69
10.9M
            tok->col_offset++;
70
10.9M
            return Py_CHARMASK(*tok->cur++); /* Fast path */
71
10.9M
        }
72
278k
        if (tok->done != E_OK) {
73
33.9k
            return EOF;
74
33.9k
        }
75
244k
        rc = tok->underflow(tok);
76
#if defined(Py_DEBUG)
77
        if (tok->debug) {
78
            fprintf(stderr, "line[%d] = ", tok->lineno);
79
            _PyTokenizer_print_escape(stderr, tok->cur, tok->inp - tok->cur);
80
            fprintf(stderr, "  tok->done = %d\n", tok->done);
81
        }
82
#endif
83
244k
        if (!rc) {
84
17.1k
            tok->cur = tok->inp;
85
17.1k
            return EOF;
86
17.1k
        }
87
227k
        tok->line_start = tok->cur;
88
89
227k
        if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) {
90
0
            _PyTokenizer_syntaxerror(tok, "source code cannot contain null bytes");
91
0
            tok->cur = tok->inp;
92
0
            return EOF;
93
0
        }
94
227k
    }
95
10.9M
    Py_UNREACHABLE();
96
10.9M
}
97
98
/* Back-up one character */
99
static void
100
tok_backup(struct tok_state *tok, int c)
101
3.74M
{
102
3.74M
    if (c != EOF) {
103
3.70M
        if (--tok->cur < tok->buf) {
104
0
            Py_FatalError("tokenizer beginning of buffer");
105
0
        }
106
3.70M
        if ((int)(unsigned char)*tok->cur != Py_CHARMASK(c)) {
107
0
            Py_FatalError("tok_backup: wrong character");
108
0
        }
109
3.70M
        tok->col_offset--;
110
3.70M
    }
111
3.74M
}
112
113
static int
114
23.8k
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
115
23.8k
    assert(token != NULL);
116
23.8k
    assert(c == '}' || c == ':' || c == '!');
117
23.8k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
118
119
23.8k
    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
120
14.3k
        return 0;
121
14.3k
    }
122
9.52k
    PyObject *res = NULL;
123
124
    // Look for a # character outside of string literals
125
9.52k
    int hash_detected = 0;
126
9.52k
    int in_string = 0;
127
9.52k
    char quote_char = 0;
128
129
1.46M
    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
130
1.45M
        char ch = tok_mode->last_expr_buffer[i];
131
132
        // Skip escaped characters
133
1.45M
        if (ch == '\\') {
134
33.0k
            i++;
135
33.0k
            continue;
136
33.0k
        }
137
138
        // Handle quotes
139
1.41M
        if (ch == '"' || ch == '\'') {
140
            // The following if/else block works becase there is an off number
141
            // of quotes in STRING tokens and the lexer only ever reaches this
142
            // function with valid STRING tokens.
143
            // For example: """hello"""
144
            // First quote: in_string = 1
145
            // Second quote: in_string = 0
146
            // Third quote: in_string = 1
147
188k
            if (!in_string) {
148
66.8k
                in_string = 1;
149
66.8k
                quote_char = ch;
150
66.8k
            }
151
121k
            else if (ch == quote_char) {
152
65.9k
                in_string = 0;
153
65.9k
            }
154
188k
            continue;
155
188k
        }
156
157
        // Check for # outside strings
158
1.23M
        if (ch == '#' && !in_string) {
159
874
            hash_detected = 1;
160
874
            break;
161
874
        }
162
1.23M
    }
163
    // If we found a # character in the expression, we need to handle comments
164
9.52k
    if (hash_detected) {
165
        // Allocate buffer for processed result
166
874
        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
167
874
        if (!result) {
168
0
            return -1;
169
0
        }
170
171
874
        Py_ssize_t i = 0;  // Input position
172
874
        Py_ssize_t j = 0;  // Output position
173
874
        in_string = 0;     // Whether we're in a string
174
874
        quote_char = 0;    // Current string quote char
175
176
        // Process each character
177
55.1k
        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178
54.2k
            char ch = tok_mode->last_expr_buffer[i];
179
180
            // Handle string quotes
181
54.2k
            if (ch == '"' || ch == '\'') {
182
                // See comment above to understand this part
183
8.19k
                if (!in_string) {
184
3.35k
                    in_string = 1;
185
3.35k
                    quote_char = ch;
186
4.83k
                } else if (ch == quote_char) {
187
3.33k
                    in_string = 0;
188
3.33k
                }
189
8.19k
                result[j++] = ch;
190
8.19k
            }
191
            // Skip comments
192
46.0k
            else if (ch == '#' && !in_string) {
193
49.6k
                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194
49.6k
                       tok_mode->last_expr_buffer[i] != '\n') {
195
48.5k
                    i++;
196
48.5k
                }
197
1.07k
                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198
303
                    result[j++] = '\n';
199
303
                }
200
1.07k
            }
201
            // Copy other chars
202
45.0k
            else {
203
45.0k
                result[j++] = ch;
204
45.0k
            }
205
54.2k
            i++;
206
54.2k
        }
207
208
874
        result[j] = '\0';  // Null-terminate the result string
209
874
        res = PyUnicode_DecodeUTF8(result, j, NULL);
210
874
        PyMem_Free(result);
211
8.65k
    } else {
212
8.65k
        res = PyUnicode_DecodeUTF8(
213
8.65k
            tok_mode->last_expr_buffer,
214
8.65k
            tok_mode->last_expr_size - tok_mode->last_expr_end,
215
8.65k
            NULL
216
8.65k
        );
217
8.65k
    }
218
219
9.52k
    if (!res) {
220
9
        return -1;
221
9
    }
222
9.51k
    token->metadata = res;
223
9.51k
    return 0;
224
9.52k
}
225
226
int
227
_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
228
67.0k
{
229
67.0k
    assert(tok->cur != NULL);
230
231
67.0k
    Py_ssize_t size = strlen(tok->cur);
232
67.0k
    tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
233
234
67.0k
    switch (cur) {
235
0
       case 0:
236
0
            if (!tok_mode->last_expr_buffer || tok_mode->last_expr_end >= 0) {
237
0
                return 1;
238
0
            }
239
0
            char *new_buffer = PyMem_Realloc(
240
0
                tok_mode->last_expr_buffer,
241
0
                tok_mode->last_expr_size + size
242
0
            );
243
0
            if (new_buffer == NULL) {
244
0
                PyMem_Free(tok_mode->last_expr_buffer);
245
0
                goto error;
246
0
            }
247
0
            tok_mode->last_expr_buffer = new_buffer;
248
0
            strncpy(tok_mode->last_expr_buffer + tok_mode->last_expr_size, tok->cur, size);
249
0
            tok_mode->last_expr_size += size;
250
0
            break;
251
43.2k
        case '{':
252
43.2k
            if (tok_mode->last_expr_buffer != NULL) {
253
31.8k
                PyMem_Free(tok_mode->last_expr_buffer);
254
31.8k
            }
255
43.2k
            tok_mode->last_expr_buffer = PyMem_Malloc(size);
256
43.2k
            if (tok_mode->last_expr_buffer == NULL) {
257
0
                goto error;
258
0
            }
259
43.2k
            tok_mode->last_expr_size = size;
260
43.2k
            tok_mode->last_expr_end = -1;
261
43.2k
            strncpy(tok_mode->last_expr_buffer, tok->cur, size);
262
43.2k
            break;
263
19.4k
        case '}':
264
21.1k
        case '!':
265
21.1k
            tok_mode->last_expr_end = strlen(tok->start);
266
21.1k
            break;
267
2.71k
        case ':':
268
2.71k
            if (tok_mode->last_expr_end == -1) {
269
2.44k
               tok_mode->last_expr_end = strlen(tok->start);
270
2.44k
            }
271
2.71k
            break;
272
0
        default:
273
0
            Py_UNREACHABLE();
274
67.0k
    }
275
67.0k
    return 1;
276
0
error:
277
0
    tok->done = E_NOMEM;
278
0
    return 0;
279
67.0k
}
280
281
static int
282
lookahead(struct tok_state *tok, const char *test)
283
8.28k
{
284
8.28k
    const char *s = test;
285
8.28k
    int res = 0;
286
21.7k
    while (1) {
287
21.7k
        int c = tok_nextc(tok);
288
21.7k
        if (*s == 0) {
289
8.19k
            res = !is_potential_identifier_char(c);
290
8.19k
        }
291
13.5k
        else if (c == *s) {
292
13.4k
            s++;
293
13.4k
            continue;
294
13.4k
        }
295
296
8.28k
        tok_backup(tok, c);
297
21.7k
        while (s != test) {
298
13.4k
            tok_backup(tok, *--s);
299
13.4k
        }
300
8.28k
        return res;
301
21.7k
    }
302
8.28k
}
303
304
static int
305
103k
verify_end_of_number(struct tok_state *tok, int c, const char *kind) {
306
103k
    if (tok->tok_extra_tokens) {
307
        // When we are parsing extra tokens, we don't want to emit warnings
308
        // about invalid literals, because we want to be a bit more liberal.
309
0
        return 1;
310
0
    }
311
    /* Emit a deprecation warning only if the numeric literal is immediately
312
     * followed by one of keywords which can occur after a numeric literal
313
     * in valid code: "and", "else", "for", "if", "in", "is" and "or".
314
     * It allows to gradually deprecate existing valid code without adding
315
     * warning before error in most cases of invalid numeric literal (which
316
     * would be confusing and break existing tests).
317
     * Raise a syntax error with slightly better message than plain
318
     * "invalid syntax" if the numeric literal is immediately followed by
319
     * other keyword or identifier.
320
     */
321
103k
    int r = 0;
322
103k
    if (c == 'a') {
323
743
        r = lookahead(tok, "nd");
324
743
    }
325
102k
    else if (c == 'e') {
326
435
        r = lookahead(tok, "lse");
327
435
    }
328
101k
    else if (c == 'f') {
329
3.47k
        r = lookahead(tok, "or");
330
3.47k
    }
331
98.4k
    else if (c == 'i') {
332
2.50k
        int c2 = tok_nextc(tok);
333
2.50k
        if (c2 == 'f' || c2 == 'n' || c2 == 's') {
334
2.49k
            r = 1;
335
2.49k
        }
336
2.50k
        tok_backup(tok, c2);
337
2.50k
    }
338
95.9k
    else if (c == 'o') {
339
3.34k
        r = lookahead(tok, "r");
340
3.34k
    }
341
92.5k
    else if (c == 'n') {
342
289
        r = lookahead(tok, "ot");
343
289
    }
344
103k
    if (r) {
345
10.6k
        tok_backup(tok, c);
346
10.6k
        if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning,
347
10.6k
                "invalid %s literal", kind))
348
0
        {
349
0
            return 0;
350
0
        }
351
10.6k
        tok_nextc(tok);
352
10.6k
    }
353
92.3k
    else /* In future releases, only error will remain. */
354
92.3k
    if (c < 128 && is_potential_identifier_char(c)) {
355
192
        tok_backup(tok, c);
356
192
        _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind);
357
192
        return 0;
358
192
    }
359
102k
    return 1;
360
103k
}
361
362
/* Verify that the identifier follows PEP 3131. */
363
static int
364
verify_identifier(struct tok_state *tok)
365
14.7k
{
366
14.7k
    if (tok->tok_extra_tokens) {
367
0
        return 1;
368
0
    }
369
14.7k
    PyObject *s;
370
14.7k
    if (tok->decoding_erred)
371
0
        return 0;
372
14.7k
    s = PyUnicode_DecodeUTF8(tok->start, tok->cur - tok->start, NULL);
373
14.7k
    if (s == NULL) {
374
982
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
375
982
            tok->done = E_DECODE;
376
982
        }
377
0
        else {
378
0
            tok->done = E_ERROR;
379
0
        }
380
982
        return 0;
381
982
    }
382
13.7k
    Py_ssize_t invalid = _PyUnicode_ScanIdentifier(s);
383
13.7k
    assert(invalid >= 0);
384
13.7k
    assert(PyUnicode_GET_LENGTH(s) > 0);
385
13.7k
    if (invalid < PyUnicode_GET_LENGTH(s)) {
386
681
        Py_UCS4 ch = PyUnicode_READ_CHAR(s, invalid);
387
681
        if (invalid + 1 < PyUnicode_GET_LENGTH(s)) {
388
            /* Determine the offset in UTF-8 encoded input */
389
474
            Py_SETREF(s, PyUnicode_Substring(s, 0, invalid + 1));
390
474
            if (s != NULL) {
391
474
                Py_SETREF(s, PyUnicode_AsUTF8String(s));
392
474
            }
393
474
            if (s == NULL) {
394
0
                tok->done = E_ERROR;
395
0
                return 0;
396
0
            }
397
474
            tok->cur = (char *)tok->start + PyBytes_GET_SIZE(s);
398
474
        }
399
681
        Py_DECREF(s);
400
681
        if (Py_UNICODE_ISPRINTABLE(ch)) {
401
393
            _PyTokenizer_syntaxerror(tok, "invalid character '%c' (U+%04X)", ch, ch);
402
393
        }
403
288
        else {
404
288
            _PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", ch);
405
288
        }
406
681
        return 0;
407
681
    }
408
13.0k
    Py_DECREF(s);
409
13.0k
    return 1;
410
13.7k
}
411
412
static int
413
tok_decimal_tail(struct tok_state *tok)
414
82.6k
{
415
82.6k
    int c;
416
417
83.1k
    while (1) {
418
234k
        do {
419
234k
            c = tok_nextc(tok);
420
234k
        } while (Py_ISDIGIT(c));
421
83.1k
        if (c != '_') {
422
82.6k
            break;
423
82.6k
        }
424
489
        c = tok_nextc(tok);
425
489
        if (!Py_ISDIGIT(c)) {
426
16
            tok_backup(tok, c);
427
16
            _PyTokenizer_syntaxerror(tok, "invalid decimal literal");
428
16
            return 0;
429
16
        }
430
489
    }
431
82.6k
    return c;
432
82.6k
}
433
434
static inline int
435
1.09k
tok_continuation_line(struct tok_state *tok) {
436
1.09k
    int c = tok_nextc(tok);
437
1.09k
    if (c == '\r') {
438
71
        c = tok_nextc(tok);
439
71
    }
440
1.09k
    if (c != '\n') {
441
62
        tok->done = E_LINECONT;
442
62
        return -1;
443
62
    }
444
1.02k
    c = tok_nextc(tok);
445
1.02k
    if (c == EOF) {
446
48
        tok->done = E_EOF;
447
48
        tok->cur = tok->inp;
448
48
        return -1;
449
980
    } else {
450
980
        tok_backup(tok, c);
451
980
    }
452
980
    return c;
453
1.02k
}
454
455
static int
456
maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
457
                                             int saw_b, int saw_r, int saw_u,
458
22.0k
                                             int saw_f, int saw_t) {
459
    // Supported: rb, rf, rt (in any order)
460
    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
461
462
22.0k
#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
463
22.0k
    do {                                                                  \
464
7
        (void)_PyTokenizer_syntaxerror_known_range(                       \
465
7
            tok, (int)(tok->start + 1 - tok->line_start),                 \
466
7
            (int)(tok->cur - tok->line_start),                            \
467
7
            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
468
7
        return -1;                                                        \
469
7
    } while (0)
470
471
22.0k
    if (saw_u && saw_b) {
472
1
        RETURN_SYNTAX_ERROR("u", "b");
473
1
    }
474
22.0k
    if (saw_u && saw_r) {
475
1
        RETURN_SYNTAX_ERROR("u", "r");
476
1
    }
477
22.0k
    if (saw_u && saw_f) {
478
1
        RETURN_SYNTAX_ERROR("u", "f");
479
1
    }
480
22.0k
    if (saw_u && saw_t) {
481
1
        RETURN_SYNTAX_ERROR("u", "t");
482
1
    }
483
484
22.0k
    if (saw_b && saw_f) {
485
1
        RETURN_SYNTAX_ERROR("b", "f");
486
1
    }
487
22.0k
    if (saw_b && saw_t) {
488
1
        RETURN_SYNTAX_ERROR("b", "t");
489
1
    }
490
491
22.0k
    if (saw_f && saw_t) {
492
1
        RETURN_SYNTAX_ERROR("f", "t");
493
1
    }
494
495
22.0k
#undef RETURN_SYNTAX_ERROR
496
497
22.0k
    return 0;
498
22.0k
}
499
500
static int
501
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
502
1.74M
{
503
1.74M
    int c;
504
1.74M
    int blankline, nonascii;
505
506
1.74M
    const char *p_start = NULL;
507
1.74M
    const char *p_end = NULL;
508
1.82M
  nextline:
509
1.82M
    tok->start = NULL;
510
1.82M
    tok->starting_col_offset = -1;
511
1.82M
    blankline = 0;
512
513
514
    /* Get indentation level */
515
1.82M
    if (tok->atbol) {
516
227k
        int col = 0;
517
227k
        int altcol = 0;
518
227k
        tok->atbol = 0;
519
227k
        int cont_line_col = 0;
520
911k
        for (;;) {
521
911k
            c = tok_nextc(tok);
522
911k
            if (c == ' ') {
523
680k
                col++, altcol++;
524
680k
            }
525
230k
            else if (c == '\t') {
526
817
                col = (col / tok->tabsize + 1) * tok->tabsize;
527
817
                altcol = (altcol / ALTTABSIZE + 1) * ALTTABSIZE;
528
817
            }
529
229k
            else if (c == '\014')  {/* Control-L (formfeed) */
530
1.81k
                col = altcol = 0; /* For Emacs users */
531
1.81k
            }
532
227k
            else if (c == '\\') {
533
                // Indentation cannot be split over multiple physical lines
534
                // using backslashes. This means that if we found a backslash
535
                // preceded by whitespace, **the first one we find** determines
536
                // the level of indentation of whatever comes next.
537
653
                cont_line_col = cont_line_col ? cont_line_col : col;
538
653
                if ((c = tok_continuation_line(tok)) == -1) {
539
40
                    return MAKE_TOKEN(ERRORTOKEN);
540
40
                }
541
653
            }
542
227k
            else {
543
227k
                break;
544
227k
            }
545
911k
        }
546
227k
        tok_backup(tok, c);
547
227k
        if (c == '#' || c == '\n' || c == '\r') {
548
            /* Lines with only whitespace and/or comments
549
               shouldn't affect the indentation and are
550
               not passed to the parser as NEWLINE tokens,
551
               except *totally* empty lines in interactive
552
               mode, which signal the end of a command group. */
553
45.7k
            if (col == 0 && c == '\n' && tok->prompt != NULL) {
554
0
                blankline = 0; /* Let it through */
555
0
            }
556
45.7k
            else if (tok->prompt != NULL && tok->lineno == 1) {
557
                /* In interactive mode, if the first line contains
558
                   only spaces and/or a comment, let it through. */
559
0
                blankline = 0;
560
0
                col = altcol = 0;
561
0
            }
562
45.7k
            else {
563
45.7k
                blankline = 1; /* Ignore completely */
564
45.7k
            }
565
            /* We can't jump back right here since we still
566
               may need to skip to the end of a comment */
567
45.7k
        }
568
227k
        if (!blankline && tok->level == 0) {
569
140k
            col = cont_line_col ? cont_line_col : col;
570
140k
            altcol = cont_line_col ? cont_line_col : altcol;
571
140k
            if (col == tok->indstack[tok->indent]) {
572
                /* No change */
573
102k
                if (altcol != tok->altindstack[tok->indent]) {
574
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
575
1
                }
576
102k
            }
577
38.1k
            else if (col > tok->indstack[tok->indent]) {
578
                /* Indent -- always one */
579
21.3k
                if (tok->indent+1 >= MAXINDENT) {
580
0
                    tok->done = E_TOODEEP;
581
0
                    tok->cur = tok->inp;
582
0
                    return MAKE_TOKEN(ERRORTOKEN);
583
0
                }
584
21.3k
                if (altcol <= tok->altindstack[tok->indent]) {
585
3
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
586
3
                }
587
21.3k
                tok->pendin++;
588
21.3k
                tok->indstack[++tok->indent] = col;
589
21.3k
                tok->altindstack[tok->indent] = altcol;
590
21.3k
            }
591
16.7k
            else /* col < tok->indstack[tok->indent] */ {
592
                /* Dedent -- any number, must be consistent */
593
37.3k
                while (tok->indent > 0 &&
594
37.3k
                    col < tok->indstack[tok->indent]) {
595
20.5k
                    tok->pendin--;
596
20.5k
                    tok->indent--;
597
20.5k
                }
598
16.7k
                if (col != tok->indstack[tok->indent]) {
599
8
                    tok->done = E_DEDENT;
600
8
                    tok->cur = tok->inp;
601
8
                    return MAKE_TOKEN(ERRORTOKEN);
602
8
                }
603
16.7k
                if (altcol != tok->altindstack[tok->indent]) {
604
1
                    return MAKE_TOKEN(_PyTokenizer_indenterror(tok));
605
1
                }
606
16.7k
            }
607
140k
        }
608
227k
    }
609
610
1.82M
    tok->start = tok->cur;
611
1.82M
    tok->starting_col_offset = tok->col_offset;
612
613
    /* Return pending indents/dedents */
614
1.82M
    if (tok->pendin != 0) {
615
41.9k
        if (tok->pendin < 0) {
616
20.5k
            if (tok->tok_extra_tokens) {
617
0
                p_start = tok->cur;
618
0
                p_end = tok->cur;
619
0
            }
620
20.5k
            tok->pendin++;
621
20.5k
            return MAKE_TOKEN(DEDENT);
622
20.5k
        }
623
21.3k
        else {
624
21.3k
            if (tok->tok_extra_tokens) {
625
0
                p_start = tok->buf;
626
0
                p_end = tok->cur;
627
0
            }
628
21.3k
            tok->pendin--;
629
21.3k
            return MAKE_TOKEN(INDENT);
630
21.3k
        }
631
41.9k
    }
632
633
    /* Peek ahead at the next character */
634
1.78M
    c = tok_nextc(tok);
635
1.78M
    tok_backup(tok, c);
636
637
1.78M
 again:
638
1.78M
    tok->start = NULL;
639
    /* Skip spaces */
640
2.13M
    do {
641
2.13M
        c = tok_nextc(tok);
642
2.13M
    } while (c == ' ' || c == '\t' || c == '\014');
643
644
    /* Set start of current token */
645
1.78M
    tok->start = tok->cur == NULL ? NULL : tok->cur - 1;
646
1.78M
    tok->starting_col_offset = tok->col_offset - 1;
647
648
    /* Skip comment, unless it's a type comment */
649
1.78M
    if (c == '#') {
650
651
42.0k
        const char* p = NULL;
652
42.0k
        const char *prefix, *type_start;
653
42.0k
        int current_starting_col_offset;
654
655
1.30M
        while (c != EOF && c != '\n' && c != '\r') {
656
1.25M
            c = tok_nextc(tok);
657
1.25M
        }
658
659
42.0k
        if (tok->tok_extra_tokens) {
660
0
            p = tok->start;
661
0
        }
662
663
42.0k
        if (tok->type_comments) {
664
0
            p = tok->start;
665
0
            current_starting_col_offset = tok->starting_col_offset;
666
0
            prefix = type_comment_prefix;
667
0
            while (*prefix && p < tok->cur) {
668
0
                if (*prefix == ' ') {
669
0
                    while (*p == ' ' || *p == '\t') {
670
0
                        p++;
671
0
                        current_starting_col_offset++;
672
0
                    }
673
0
                } else if (*prefix == *p) {
674
0
                    p++;
675
0
                    current_starting_col_offset++;
676
0
                } else {
677
0
                    break;
678
0
                }
679
680
0
                prefix++;
681
0
            }
682
683
            /* This is a type comment if we matched all of type_comment_prefix. */
684
0
            if (!*prefix) {
685
0
                int is_type_ignore = 1;
686
                // +6 in order to skip the word 'ignore'
687
0
                const char *ignore_end = p + 6;
688
0
                const int ignore_end_col_offset = current_starting_col_offset + 6;
689
0
                tok_backup(tok, c);  /* don't eat the newline or EOF */
690
691
0
                type_start = p;
692
693
                /* A TYPE_IGNORE is "type: ignore" followed by the end of the token
694
                 * or anything ASCII and non-alphanumeric. */
695
0
                is_type_ignore = (
696
0
                    tok->cur >= ignore_end && memcmp(p, "ignore", 6) == 0
697
0
                    && !(tok->cur > ignore_end
698
0
                         && ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
699
700
0
                if (is_type_ignore) {
701
0
                    p_start = ignore_end;
702
0
                    p_end = tok->cur;
703
704
                    /* If this type ignore is the only thing on the line, consume the newline also. */
705
0
                    if (blankline) {
706
0
                        tok_nextc(tok);
707
0
                        tok->atbol = 1;
708
0
                    }
709
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_IGNORE, ignore_end_col_offset, tok->col_offset);
710
0
                } else {
711
0
                    p_start = type_start;
712
0
                    p_end = tok->cur;
713
0
                    return MAKE_TYPE_COMMENT_TOKEN(TYPE_COMMENT, current_starting_col_offset, tok->col_offset);
714
0
                }
715
0
            }
716
0
        }
717
42.0k
        if (tok->tok_extra_tokens) {
718
0
            tok_backup(tok, c);  /* don't eat the newline or EOF */
719
0
            p_start = p;
720
0
            p_end = tok->cur;
721
0
            tok->comment_newline = blankline;
722
0
            return MAKE_TOKEN(COMMENT);
723
0
        }
724
42.0k
    }
725
726
1.78M
    if (tok->done == E_INTERACT_STOP) {
727
0
        return MAKE_TOKEN(ENDMARKER);
728
0
    }
729
730
    /* Check for EOF and errors now */
731
1.78M
    if (c == EOF) {
732
16.9k
        if (tok->level) {
733
4.10k
            return MAKE_TOKEN(ERRORTOKEN);
734
4.10k
        }
735
12.8k
        return MAKE_TOKEN(tok->done == E_EOF ? ENDMARKER : ERRORTOKEN);
736
16.9k
    }
737
738
    /* Identifier (most frequent token!) */
739
1.76M
    nonascii = 0;
740
1.76M
    if (is_potential_identifier_start(c)) {
741
        /* Process the various legal combinations of b"", r"", u"", and f"". */
742
531k
        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
743
653k
        while (1) {
744
653k
            if (!saw_b && (c == 'b' || c == 'B')) {
745
20.9k
                saw_b = 1;
746
20.9k
            }
747
            /* Since this is a backwards compatibility support literal we don't
748
               want to support it in arbitrary order like byte literals. */
749
632k
            else if (!saw_u && (c == 'u'|| c == 'U')) {
750
6.90k
                saw_u = 1;
751
6.90k
            }
752
            /* ur"" and ru"" are not supported */
753
626k
            else if (!saw_r && (c == 'r' || c == 'R')) {
754
38.0k
                saw_r = 1;
755
38.0k
            }
756
587k
            else if (!saw_f && (c == 'f' || c == 'F')) {
757
45.6k
                saw_f = 1;
758
45.6k
            }
759
542k
            else if (!saw_t && (c == 't' || c == 'T')) {
760
32.7k
                saw_t = 1;
761
32.7k
            }
762
509k
            else {
763
509k
                break;
764
509k
            }
765
144k
            c = tok_nextc(tok);
766
144k
            if (c == '"' || c == '\'') {
767
                // Raise error on incompatible string prefixes:
768
22.0k
                int status = maybe_raise_syntax_error_for_string_prefixes(
769
22.0k
                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
770
22.0k
                if (status < 0) {
771
7
                    return MAKE_TOKEN(ERRORTOKEN);
772
7
                }
773
774
                // Handle valid f or t string creation:
775
22.0k
                if (saw_f || saw_t) {
776
16.7k
                    goto f_string_quote;
777
16.7k
                }
778
5.29k
                goto letter_quote;
779
22.0k
            }
780
144k
        }
781
2.37M
        while (is_potential_identifier_char(c)) {
782
1.86M
            if (c >= 128) {
783
214k
                nonascii = 1;
784
214k
            }
785
1.86M
            c = tok_nextc(tok);
786
1.86M
        }
787
509k
        tok_backup(tok, c);
788
509k
        if (nonascii && !verify_identifier(tok)) {
789
1.66k
            return MAKE_TOKEN(ERRORTOKEN);
790
1.66k
        }
791
792
507k
        p_start = tok->start;
793
507k
        p_end = tok->cur;
794
795
507k
        return MAKE_TOKEN(NAME);
796
509k
    }
797
798
1.23M
    if (c == '\r') {
799
440
        c = tok_nextc(tok);
800
440
    }
801
802
    /* Newline */
803
1.23M
    if (c == '\n') {
804
206k
        tok->atbol = 1;
805
206k
        if (blankline || tok->level > 0) {
806
86.4k
            if (tok->tok_extra_tokens) {
807
0
                if (tok->comment_newline) {
808
0
                    tok->comment_newline = 0;
809
0
                }
810
0
                p_start = tok->start;
811
0
                p_end = tok->cur;
812
0
                return MAKE_TOKEN(NL);
813
0
            }
814
86.4k
            goto nextline;
815
86.4k
        }
816
119k
        if (tok->comment_newline && tok->tok_extra_tokens) {
817
0
            tok->comment_newline = 0;
818
0
            p_start = tok->start;
819
0
            p_end = tok->cur;
820
0
            return MAKE_TOKEN(NL);
821
0
        }
822
119k
        p_start = tok->start;
823
119k
        p_end = tok->cur - 1; /* Leave '\n' out of the string */
824
119k
        tok->cont_line = 0;
825
119k
        return MAKE_TOKEN(NEWLINE);
826
119k
    }
827
828
    /* Period or number starting with period? */
829
1.03M
    if (c == '.') {
830
34.5k
        c = tok_nextc(tok);
831
34.5k
        if (Py_ISDIGIT(c)) {
832
2.93k
            goto fraction;
833
31.6k
        } else if (c == '.') {
834
3.14k
            c = tok_nextc(tok);
835
3.14k
            if (c == '.') {
836
2.49k
                p_start = tok->start;
837
2.49k
                p_end = tok->cur;
838
2.49k
                return MAKE_TOKEN(ELLIPSIS);
839
2.49k
            }
840
653
            else {
841
653
                tok_backup(tok, c);
842
653
            }
843
653
            tok_backup(tok, '.');
844
653
        }
845
28.4k
        else {
846
28.4k
            tok_backup(tok, c);
847
28.4k
        }
848
29.1k
        p_start = tok->start;
849
29.1k
        p_end = tok->cur;
850
29.1k
        return MAKE_TOKEN(DOT);
851
34.5k
    }
852
853
    /* Number */
854
995k
    if (Py_ISDIGIT(c)) {
855
100k
        if (c == '0') {
856
            /* Hex, octal or binary -- maybe. */
857
35.0k
            c = tok_nextc(tok);
858
35.0k
            if (c == 'x' || c == 'X') {
859
                /* Hex */
860
15.9k
                c = tok_nextc(tok);
861
16.1k
                do {
862
16.1k
                    if (c == '_') {
863
205
                        c = tok_nextc(tok);
864
205
                    }
865
16.1k
                    if (!Py_ISXDIGIT(c)) {
866
18
                        tok_backup(tok, c);
867
18
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal"));
868
18
                    }
869
79.1k
                    do {
870
79.1k
                        c = tok_nextc(tok);
871
79.1k
                    } while (Py_ISXDIGIT(c));
872
16.1k
                } while (c == '_');
873
15.9k
                if (!verify_end_of_number(tok, c, "hexadecimal")) {
874
2
                    return MAKE_TOKEN(ERRORTOKEN);
875
2
                }
876
15.9k
            }
877
19.0k
            else if (c == 'o' || c == 'O') {
878
                /* Octal */
879
619
                c = tok_nextc(tok);
880
1.06k
                do {
881
1.06k
                    if (c == '_') {
882
444
                        c = tok_nextc(tok);
883
444
                    }
884
1.06k
                    if (c < '0' || c >= '8') {
885
19
                        if (Py_ISDIGIT(c)) {
886
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
887
1
                                    "invalid digit '%c' in octal literal", c));
888
1
                        }
889
18
                        else {
890
18
                            tok_backup(tok, c);
891
18
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid octal literal"));
892
18
                        }
893
19
                    }
894
2.78k
                    do {
895
2.78k
                        c = tok_nextc(tok);
896
2.78k
                    } while ('0' <= c && c < '8');
897
1.04k
                } while (c == '_');
898
600
                if (Py_ISDIGIT(c)) {
899
1
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
900
1
                            "invalid digit '%c' in octal literal", c));
901
1
                }
902
599
                if (!verify_end_of_number(tok, c, "octal")) {
903
2
                    return MAKE_TOKEN(ERRORTOKEN);
904
2
                }
905
599
            }
906
18.4k
            else if (c == 'b' || c == 'B') {
907
                /* Binary */
908
556
                c = tok_nextc(tok);
909
921
                do {
910
921
                    if (c == '_') {
911
374
                        c = tok_nextc(tok);
912
374
                    }
913
921
                    if (c != '0' && c != '1') {
914
18
                        if (Py_ISDIGIT(c)) {
915
1
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
916
1
                        }
917
17
                        else {
918
17
                            tok_backup(tok, c);
919
17
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid binary literal"));
920
17
                        }
921
18
                    }
922
4.01k
                    do {
923
4.01k
                        c = tok_nextc(tok);
924
4.01k
                    } while (c == '0' || c == '1');
925
903
                } while (c == '_');
926
538
                if (Py_ISDIGIT(c)) {
927
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid digit '%c' in binary literal", c));
928
2
                }
929
536
                if (!verify_end_of_number(tok, c, "binary")) {
930
2
                    return MAKE_TOKEN(ERRORTOKEN);
931
2
                }
932
536
            }
933
17.9k
            else {
934
17.9k
                int nonzero = 0;
935
                /* maybe old-style octal; c is first char of it */
936
                /* in any case, allow '0' as a literal */
937
19.3k
                while (1) {
938
19.3k
                    if (c == '_') {
939
93
                        c = tok_nextc(tok);
940
93
                        if (!Py_ISDIGIT(c)) {
941
3
                            tok_backup(tok, c);
942
3
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
943
3
                        }
944
93
                    }
945
19.3k
                    if (c != '0') {
946
17.9k
                        break;
947
17.9k
                    }
948
1.47k
                    c = tok_nextc(tok);
949
1.47k
                }
950
17.9k
                char* zeros_end = tok->cur;
951
17.9k
                if (Py_ISDIGIT(c)) {
952
579
                    nonzero = 1;
953
579
                    c = tok_decimal_tail(tok);
954
579
                    if (c == 0) {
955
1
                        return MAKE_TOKEN(ERRORTOKEN);
956
1
                    }
957
579
                }
958
17.9k
                if (c == '.') {
959
897
                    c = tok_nextc(tok);
960
897
                    goto fraction;
961
897
                }
962
17.0k
                else if (c == 'e' || c == 'E') {
963
937
                    goto exponent;
964
937
                }
965
16.0k
                else if (c == 'j' || c == 'J') {
966
904
                    goto imaginary;
967
904
                }
968
15.1k
                else if (nonzero && !tok->tok_extra_tokens) {
969
                    /* Old-style octal: now disallowed. */
970
16
                    tok_backup(tok, c);
971
16
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
972
16
                            tok, (int)(tok->start + 1 - tok->line_start),
973
16
                            (int)(zeros_end - tok->line_start),
974
16
                            "leading zeros in decimal integer "
975
16
                            "literals are not permitted; "
976
16
                            "use an 0o prefix for octal integers"));
977
16
                }
978
15.1k
                if (!verify_end_of_number(tok, c, "decimal")) {
979
28
                    return MAKE_TOKEN(ERRORTOKEN);
980
28
                }
981
15.1k
            }
982
35.0k
        }
983
65.1k
        else {
984
            /* Decimal */
985
65.1k
            c = tok_decimal_tail(tok);
986
65.1k
            if (c == 0) {
987
13
                return MAKE_TOKEN(ERRORTOKEN);
988
13
            }
989
65.1k
            {
990
                /* Accept floating-point numbers. */
991
65.1k
                if (c == '.') {
992
4.15k
                    c = tok_nextc(tok);
993
7.98k
        fraction:
994
                    /* Fraction */
995
7.98k
                    if (Py_ISDIGIT(c)) {
996
6.01k
                        c = tok_decimal_tail(tok);
997
6.01k
                        if (c == 0) {
998
1
                            return MAKE_TOKEN(ERRORTOKEN);
999
1
                        }
1000
6.01k
                    }
1001
7.98k
                }
1002
68.9k
                if (c == 'e' || c == 'E') {
1003
10.3k
                    int e;
1004
11.3k
                  exponent:
1005
11.3k
                    e = c;
1006
                    /* Exponent part */
1007
11.3k
                    c = tok_nextc(tok);
1008
11.3k
                    if (c == '+' || c == '-') {
1009
4.17k
                        c = tok_nextc(tok);
1010
4.17k
                        if (!Py_ISDIGIT(c)) {
1011
13
                            tok_backup(tok, c);
1012
13
                            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal"));
1013
13
                        }
1014
7.15k
                    } else if (!Py_ISDIGIT(c)) {
1015
439
                        tok_backup(tok, c);
1016
439
                        if (!verify_end_of_number(tok, e, "decimal")) {
1017
36
                            return MAKE_TOKEN(ERRORTOKEN);
1018
36
                        }
1019
403
                        tok_backup(tok, e);
1020
403
                        p_start = tok->start;
1021
403
                        p_end = tok->cur;
1022
403
                        return MAKE_TOKEN(NUMBER);
1023
439
                    }
1024
10.8k
                    c = tok_decimal_tail(tok);
1025
10.8k
                    if (c == 0) {
1026
1
                        return MAKE_TOKEN(ERRORTOKEN);
1027
1
                    }
1028
10.8k
                }
1029
69.4k
                if (c == 'j' || c == 'J') {
1030
                    /* Imaginary part */
1031
4.63k
        imaginary:
1032
4.63k
                    c = tok_nextc(tok);
1033
4.63k
                    if (!verify_end_of_number(tok, c, "imaginary")) {
1034
9
                        return MAKE_TOKEN(ERRORTOKEN);
1035
9
                    }
1036
4.63k
                }
1037
65.7k
                else if (!verify_end_of_number(tok, c, "decimal")) {
1038
113
                    return MAKE_TOKEN(ERRORTOKEN);
1039
113
                }
1040
69.4k
            }
1041
69.4k
        }
1042
102k
        tok_backup(tok, c);
1043
102k
        p_start = tok->start;
1044
102k
        p_end = tok->cur;
1045
102k
        return MAKE_TOKEN(NUMBER);
1046
100k
    }
1047
1048
912k
  f_string_quote:
1049
912k
    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
1050
912k
        && (c == '\'' || c == '"'))) {
1051
1052
16.7k
        int quote = c;
1053
16.7k
        int quote_size = 1;             /* 1 or 3 */
1054
1055
        /* Nodes of type STRING, especially multi line strings
1056
           must be handled differently in order to get both
1057
           the starting line number and the column offset right.
1058
           (cf. issue 16806) */
1059
16.7k
        tok->first_lineno = tok->lineno;
1060
16.7k
        tok->multi_line_start = tok->line_start;
1061
1062
        /* Find the quote size and start of string */
1063
16.7k
        int after_quote = tok_nextc(tok);
1064
16.7k
        if (after_quote == quote) {
1065
2.38k
            int after_after_quote = tok_nextc(tok);
1066
2.38k
            if (after_after_quote == quote) {
1067
769
                quote_size = 3;
1068
769
            }
1069
1.61k
            else {
1070
                // TODO: Check this
1071
1.61k
                tok_backup(tok, after_after_quote);
1072
1.61k
                tok_backup(tok, after_quote);
1073
1.61k
            }
1074
2.38k
        }
1075
16.7k
        if (after_quote != quote) {
1076
14.3k
            tok_backup(tok, after_quote);
1077
14.3k
        }
1078
1079
1080
16.7k
        p_start = tok->start;
1081
16.7k
        p_end = tok->cur;
1082
16.7k
        if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
1083
2
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
1084
2
        }
1085
16.7k
        tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
1086
16.7k
        the_current_tok->kind = TOK_FSTRING_MODE;
1087
16.7k
        the_current_tok->quote = quote;
1088
16.7k
        the_current_tok->quote_size = quote_size;
1089
16.7k
        the_current_tok->start = tok->start;
1090
16.7k
        the_current_tok->multi_line_start = tok->line_start;
1091
16.7k
        the_current_tok->first_line = tok->lineno;
1092
16.7k
        the_current_tok->start_offset = -1;
1093
16.7k
        the_current_tok->multi_line_start_offset = -1;
1094
16.7k
        the_current_tok->last_expr_buffer = NULL;
1095
16.7k
        the_current_tok->last_expr_size = 0;
1096
16.7k
        the_current_tok->last_expr_end = -1;
1097
16.7k
        the_current_tok->in_format_spec = 0;
1098
16.7k
        the_current_tok->in_debug = 0;
1099
1100
16.7k
        enum string_kind_t string_kind = FSTRING;
1101
16.7k
        switch (*tok->start) {
1102
920
            case 'T':
1103
4.57k
            case 't':
1104
4.57k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1105
4.57k
                string_kind = TSTRING;
1106
4.57k
                break;
1107
1.75k
            case 'F':
1108
11.6k
            case 'f':
1109
11.6k
                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
1110
11.6k
                break;
1111
95
            case 'R':
1112
492
            case 'r':
1113
492
                the_current_tok->raw = 1;
1114
492
                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
1115
200
                    string_kind = TSTRING;
1116
200
                }
1117
492
                break;
1118
0
            default:
1119
0
                Py_UNREACHABLE();
1120
16.7k
        }
1121
1122
16.7k
        the_current_tok->string_kind = string_kind;
1123
16.7k
        the_current_tok->curly_bracket_depth = 0;
1124
16.7k
        the_current_tok->curly_bracket_expr_start_depth = -1;
1125
16.7k
        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
1126
16.7k
    }
1127
1128
900k
  letter_quote:
1129
    /* String */
1130
900k
    if (c == '\'' || c == '"') {
1131
59.0k
        int quote = c;
1132
59.0k
        int quote_size = 1;             /* 1 or 3 */
1133
59.0k
        int end_quote_size = 0;
1134
59.0k
        int has_escaped_quote = 0;
1135
1136
        /* Nodes of type STRING, especially multi line strings
1137
           must be handled differently in order to get both
1138
           the starting line number and the column offset right.
1139
           (cf. issue 16806) */
1140
59.0k
        tok->first_lineno = tok->lineno;
1141
59.0k
        tok->multi_line_start = tok->line_start;
1142
1143
        /* Find the quote size and start of string */
1144
59.0k
        c = tok_nextc(tok);
1145
59.0k
        if (c == quote) {
1146
10.6k
            c = tok_nextc(tok);
1147
10.6k
            if (c == quote) {
1148
2.57k
                quote_size = 3;
1149
2.57k
            }
1150
8.03k
            else {
1151
8.03k
                end_quote_size = 1;     /* empty string found */
1152
8.03k
            }
1153
10.6k
        }
1154
59.0k
        if (c != quote) {
1155
56.4k
            tok_backup(tok, c);
1156
56.4k
        }
1157
1158
        /* Get rest of string */
1159
1.17M
        while (end_quote_size != quote_size) {
1160
1.11M
            c = tok_nextc(tok);
1161
1.11M
            if (tok->done == E_ERROR) {
1162
0
                return MAKE_TOKEN(ERRORTOKEN);
1163
0
            }
1164
1.11M
            if (tok->done == E_DECODE) {
1165
0
                break;
1166
0
            }
1167
1.11M
            if (c == EOF || (quote_size == 1 && c == '\n')) {
1168
411
                assert(tok->multi_line_start != NULL);
1169
                // shift the tok_state's location into
1170
                // the start of string, and report the error
1171
                // from the initial quote character
1172
411
                tok->cur = (char *)tok->start;
1173
411
                tok->cur++;
1174
411
                tok->line_start = tok->multi_line_start;
1175
411
                int start = tok->lineno;
1176
411
                tok->lineno = tok->first_lineno;
1177
1178
411
                if (INSIDE_FSTRING(tok)) {
1179
                    /* When we are in an f-string, before raising the
1180
                     * unterminated string literal error, check whether
1181
                     * does the initial quote matches with f-strings quotes
1182
                     * and if it is, then this must be a missing '}' token
1183
                     * so raise the proper error */
1184
31
                    tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1185
31
                    if (the_current_tok->quote == quote &&
1186
31
                        the_current_tok->quote_size == quote_size) {
1187
19
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1188
19
                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
1189
19
                    }
1190
31
                }
1191
1192
392
                if (quote_size == 3) {
1193
16
                    _PyTokenizer_syntaxerror(tok, "unterminated triple-quoted string literal"
1194
16
                                     " (detected at line %d)", start);
1195
16
                    if (c != '\n') {
1196
16
                        tok->done = E_EOFS;
1197
16
                    }
1198
16
                    return MAKE_TOKEN(ERRORTOKEN);
1199
16
                }
1200
376
                else {
1201
376
                    if (has_escaped_quote) {
1202
11
                        _PyTokenizer_syntaxerror(
1203
11
                            tok,
1204
11
                            "unterminated string literal (detected at line %d); "
1205
11
                            "perhaps you escaped the end quote?",
1206
11
                            start
1207
11
                        );
1208
365
                    } else {
1209
365
                        _PyTokenizer_syntaxerror(
1210
365
                            tok, "unterminated string literal (detected at line %d)", start
1211
365
                        );
1212
365
                    }
1213
376
                    if (c != '\n') {
1214
14
                        tok->done = E_EOLS;
1215
14
                    }
1216
376
                    return MAKE_TOKEN(ERRORTOKEN);
1217
376
                }
1218
392
            }
1219
1.11M
            if (c == quote) {
1220
57.5k
                end_quote_size += 1;
1221
57.5k
            }
1222
1.05M
            else {
1223
1.05M
                end_quote_size = 0;
1224
1.05M
                if (c == '\\') {
1225
28.4k
                    c = tok_nextc(tok);  /* skip escaped char */
1226
28.4k
                    if (c == quote) {  /* but record whether the escaped char was a quote */
1227
1.25k
                        has_escaped_quote = 1;
1228
1.25k
                    }
1229
28.4k
                    if (c == '\r') {
1230
204
                        c = tok_nextc(tok);
1231
204
                    }
1232
28.4k
                }
1233
1.05M
            }
1234
1.11M
        }
1235
1236
58.6k
        p_start = tok->start;
1237
58.6k
        p_end = tok->cur;
1238
58.6k
        return MAKE_TOKEN(STRING);
1239
59.0k
    }
1240
1241
    /* Line continuation */
1242
841k
    if (c == '\\') {
1243
437
        if ((c = tok_continuation_line(tok)) == -1) {
1244
70
            return MAKE_TOKEN(ERRORTOKEN);
1245
70
        }
1246
367
        tok->cont_line = 1;
1247
367
        goto again; /* Read next line */
1248
437
    }
1249
1250
    /* Punctuation character */
1251
841k
    int is_punctuation = (c == ':' || c == '}' || c == '!' || c == '{');
1252
841k
    if (is_punctuation && INSIDE_FSTRING(tok) && INSIDE_FSTRING_EXPR(current_tok)) {
1253
        /* This code block gets executed before the curly_bracket_depth is incremented
1254
         * by the `{` case, so for ensuring that we are on the 0th level, we need
1255
         * to adjust it manually */
1256
55.2k
        int cursor = current_tok->curly_bracket_depth - (c != '{');
1257
55.2k
        int in_format_spec = current_tok->in_format_spec;
1258
55.2k
         int cursor_in_format_with_debug =
1259
55.2k
             cursor == 1 && (current_tok->in_debug || in_format_spec);
1260
55.2k
         int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
1261
55.2k
        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
1262
0
            return MAKE_TOKEN(ENDMARKER);
1263
0
        }
1264
55.2k
        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
1265
9
            return MAKE_TOKEN(ERRORTOKEN);
1266
9
        }
1267
1268
55.2k
        if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) {
1269
3.68k
            current_tok->kind = TOK_FSTRING_MODE;
1270
3.68k
            current_tok->in_format_spec = 1;
1271
3.68k
            p_start = tok->start;
1272
3.68k
            p_end = tok->cur;
1273
3.68k
            return MAKE_TOKEN(_PyToken_OneChar(c));
1274
3.68k
        }
1275
55.2k
    }
1276
1277
    /* Check for two-character token */
1278
837k
    {
1279
837k
        int c2 = tok_nextc(tok);
1280
837k
        int current_token = _PyToken_TwoChars(c, c2);
1281
837k
        if (current_token != OP) {
1282
22.7k
            int c3 = tok_nextc(tok);
1283
22.7k
            int current_token3 = _PyToken_ThreeChars(c, c2, c3);
1284
22.7k
            if (current_token3 != OP) {
1285
993
                current_token = current_token3;
1286
993
            }
1287
21.7k
            else {
1288
21.7k
                tok_backup(tok, c3);
1289
21.7k
            }
1290
22.7k
            p_start = tok->start;
1291
22.7k
            p_end = tok->cur;
1292
22.7k
            return MAKE_TOKEN(current_token);
1293
22.7k
        }
1294
814k
        tok_backup(tok, c2);
1295
814k
    }
1296
1297
    /* Keep track of parentheses nesting level */
1298
0
    switch (c) {
1299
89.0k
    case '(':
1300
123k
    case '[':
1301
171k
    case '{':
1302
171k
        if (tok->level >= MAXLEVEL) {
1303
3
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested parentheses"));
1304
3
        }
1305
171k
        tok->parenstack[tok->level] = c;
1306
171k
        tok->parenlinenostack[tok->level] = tok->lineno;
1307
171k
        tok->parencolstack[tok->level] = (int)(tok->start - tok->line_start);
1308
171k
        tok->level++;
1309
171k
        if (INSIDE_FSTRING(tok)) {
1310
30.4k
            current_tok->curly_bracket_depth++;
1311
30.4k
        }
1312
171k
        break;
1313
60.4k
    case ')':
1314
71.9k
    case ']':
1315
98.9k
    case '}':
1316
98.9k
        if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
1317
46
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1318
46
                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
1319
46
        }
1320
98.8k
        if (!tok->tok_extra_tokens && !tok->level) {
1321
216
            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
1322
216
        }
1323
98.6k
        if (tok->level > 0) {
1324
98.6k
            tok->level--;
1325
98.6k
            int opening = tok->parenstack[tok->level];
1326
98.6k
            if (!tok->tok_extra_tokens && !((opening == '(' && c == ')') ||
1327
98.6k
                                            (opening == '[' && c == ']') ||
1328
98.6k
                                            (opening == '{' && c == '}'))) {
1329
                /* If the opening bracket belongs to an f-string's expression
1330
                part (e.g. f"{)}") and the closing bracket is an arbitrary
1331
                nested expression, then instead of matching a different
1332
                syntactical construct with it; we'll throw an unmatched
1333
                parentheses error. */
1334
42
                if (INSIDE_FSTRING(tok) && opening == '{') {
1335
5
                    assert(current_tok->curly_bracket_depth >= 0);
1336
5
                    int previous_bracket = current_tok->curly_bracket_depth - 1;
1337
5
                    if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
1338
3
                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1339
3
                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
1340
3
                    }
1341
5
                }
1342
39
                if (tok->parenlinenostack[tok->level] != tok->lineno) {
1343
2
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1344
2
                            "closing parenthesis '%c' does not match "
1345
2
                            "opening parenthesis '%c' on line %d",
1346
2
                            c, opening, tok->parenlinenostack[tok->level]));
1347
2
                }
1348
37
                else {
1349
37
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1350
37
                            "closing parenthesis '%c' does not match "
1351
37
                            "opening parenthesis '%c'",
1352
37
                            c, opening));
1353
37
                }
1354
39
            }
1355
98.6k
        }
1356
1357
98.6k
        if (INSIDE_FSTRING(tok)) {
1358
22.9k
            current_tok->curly_bracket_depth--;
1359
22.9k
            if (current_tok->curly_bracket_depth < 0) {
1360
1
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
1361
1
                    TOK_GET_STRING_PREFIX(tok), c));
1362
1
            }
1363
22.9k
            if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
1364
21.2k
                current_tok->curly_bracket_expr_start_depth--;
1365
21.2k
                current_tok->kind = TOK_FSTRING_MODE;
1366
21.2k
                current_tok->in_format_spec = 0;
1367
21.2k
                current_tok->in_debug = 0;
1368
21.2k
            }
1369
22.9k
        }
1370
98.6k
        break;
1371
544k
    default:
1372
544k
        break;
1373
814k
    }
1374
1375
814k
    if (!Py_UNICODE_ISPRINTABLE(c)) {
1376
488
        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid non-printable character U+%04X", c));
1377
488
    }
1378
1379
814k
    if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
1380
41.8k
        current_tok->in_debug = 1;
1381
41.8k
    }
1382
1383
    /* Punctuation character */
1384
814k
    p_start = tok->start;
1385
814k
    p_end = tok->cur;
1386
814k
    return MAKE_TOKEN(_PyToken_OneChar(c));
1387
814k
}
1388
1389
static int
1390
tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
1391
52.9k
{
1392
52.9k
    const char *p_start = NULL;
1393
52.9k
    const char *p_end = NULL;
1394
52.9k
    int end_quote_size = 0;
1395
52.9k
    int unicode_escape = 0;
1396
1397
52.9k
    tok->start = tok->cur;
1398
52.9k
    tok->first_lineno = tok->lineno;
1399
52.9k
    tok->starting_col_offset = tok->col_offset;
1400
1401
    // If we start with a bracket, we defer to the normal mode as there is nothing for us to tokenize
1402
    // before it.
1403
52.9k
    int start_char = tok_nextc(tok);
1404
52.9k
    if (start_char == '{') {
1405
13.6k
        int peek1 = tok_nextc(tok);
1406
13.6k
        tok_backup(tok, peek1);
1407
13.6k
        tok_backup(tok, start_char);
1408
13.6k
        if (peek1 != '{') {
1409
10.8k
            current_tok->curly_bracket_expr_start_depth++;
1410
10.8k
            if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1411
4
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1412
4
                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1413
4
            }
1414
10.8k
            TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1415
10.8k
            return tok_get_normal_mode(tok, current_tok, token);
1416
10.8k
        }
1417
13.6k
    }
1418
39.2k
    else {
1419
39.2k
        tok_backup(tok, start_char);
1420
39.2k
    }
1421
1422
    // Check if we are at the end of the string
1423
60.2k
    for (int i = 0; i < current_tok->quote_size; i++) {
1424
48.2k
        int quote = tok_nextc(tok);
1425
48.2k
        if (quote != current_tok->quote) {
1426
29.9k
            tok_backup(tok, quote);
1427
29.9k
            goto f_string_middle;
1428
29.9k
        }
1429
48.2k
    }
1430
1431
12.0k
    if (current_tok->last_expr_buffer != NULL) {
1432
6.90k
        PyMem_Free(current_tok->last_expr_buffer);
1433
6.90k
        current_tok->last_expr_buffer = NULL;
1434
6.90k
        current_tok->last_expr_size = 0;
1435
6.90k
        current_tok->last_expr_end = -1;
1436
6.90k
    }
1437
1438
12.0k
    p_start = tok->start;
1439
12.0k
    p_end = tok->cur;
1440
12.0k
    tok->tok_mode_stack_index--;
1441
12.0k
    return MAKE_TOKEN(FTSTRING_END(current_tok));
1442
1443
29.9k
f_string_middle:
1444
1445
    // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
1446
    // this.
1447
29.9k
    tok->multi_line_start = tok->line_start;
1448
167k
    while (end_quote_size != current_tok->quote_size) {
1449
161k
        int c = tok_nextc(tok);
1450
161k
        if (tok->done == E_ERROR || tok->done == E_DECODE) {
1451
0
            return MAKE_TOKEN(ERRORTOKEN);
1452
0
        }
1453
161k
        int in_format_spec = (
1454
161k
                current_tok->in_format_spec
1455
161k
                &&
1456
161k
                INSIDE_FSTRING_EXPR(current_tok)
1457
161k
        );
1458
1459
161k
       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
1460
491
            if (tok->decoding_erred) {
1461
0
                return MAKE_TOKEN(ERRORTOKEN);
1462
0
            }
1463
1464
            // If we are in a format spec and we found a newline,
1465
            // it means that the format spec ends here and we should
1466
            // return to the regular mode.
1467
491
            if (in_format_spec && c == '\n') {
1468
81
                if (current_tok->quote_size == 1) {
1469
81
                    return MAKE_TOKEN(
1470
81
                        _PyTokenizer_syntaxerror(
1471
81
                            tok,
1472
81
                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
1473
81
                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
1474
81
                        )
1475
81
                    );
1476
81
                }
1477
0
                tok_backup(tok, c);
1478
0
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1479
0
                current_tok->in_format_spec = 0;
1480
0
                p_start = tok->start;
1481
0
                p_end = tok->cur;
1482
0
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1483
81
            }
1484
1485
410
            assert(tok->multi_line_start != NULL);
1486
            // shift the tok_state's location into
1487
            // the start of string, and report the error
1488
            // from the initial quote character
1489
410
            tok->cur = (char *)current_tok->start;
1490
410
            tok->cur++;
1491
410
            tok->line_start = current_tok->multi_line_start;
1492
410
            int start = tok->lineno;
1493
1494
410
            tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
1495
410
            tok->lineno = the_current_tok->first_line;
1496
1497
410
            if (current_tok->quote_size == 3) {
1498
34
                _PyTokenizer_syntaxerror(tok,
1499
34
                                    "unterminated triple-quoted %c-string literal"
1500
34
                                    " (detected at line %d)",
1501
34
                                    TOK_GET_STRING_PREFIX(tok), start);
1502
34
                if (c != '\n') {
1503
34
                    tok->done = E_EOFS;
1504
34
                }
1505
34
                return MAKE_TOKEN(ERRORTOKEN);
1506
34
            }
1507
376
            else {
1508
376
                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1509
376
                                    "unterminated %c-string literal (detected at"
1510
376
                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
1511
376
            }
1512
410
        }
1513
1514
161k
        if (c == current_tok->quote) {
1515
9.03k
            end_quote_size += 1;
1516
9.03k
            continue;
1517
152k
        } else {
1518
152k
            end_quote_size = 0;
1519
152k
        }
1520
1521
152k
        if (c == '{') {
1522
18.9k
            if (!_PyLexer_update_ftstring_expr(tok, c)) {
1523
0
                return MAKE_TOKEN(ENDMARKER);
1524
0
            }
1525
18.9k
            int peek = tok_nextc(tok);
1526
18.9k
            if (peek != '{' || in_format_spec) {
1527
15.6k
                tok_backup(tok, peek);
1528
15.6k
                tok_backup(tok, c);
1529
15.6k
                current_tok->curly_bracket_expr_start_depth++;
1530
15.6k
                if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
1531
6
                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
1532
6
                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
1533
6
                }
1534
15.6k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1535
15.6k
                current_tok->in_format_spec = 0;
1536
15.6k
                p_start = tok->start;
1537
15.6k
                p_end = tok->cur;
1538
15.6k
            } else {
1539
3.29k
                p_start = tok->start;
1540
3.29k
                p_end = tok->cur - 1;
1541
3.29k
            }
1542
18.9k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1543
133k
        } else if (c == '}') {
1544
4.79k
            if (unicode_escape) {
1545
477
                p_start = tok->start;
1546
477
                p_end = tok->cur;
1547
477
                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1548
477
            }
1549
4.31k
            int peek = tok_nextc(tok);
1550
1551
            // The tokenizer can only be in the format spec if we have already completed the expression
1552
            // scanning (indicated by the end of the expression being set) and we are not at the top level
1553
            // of the bracket stack (-1 is the top level). Since format specifiers can't legally use double
1554
            // brackets, we can bypass it here.
1555
4.31k
            int cursor = current_tok->curly_bracket_depth;
1556
4.31k
            if (peek == '}' && !in_format_spec && cursor == 0) {
1557
1.72k
                p_start = tok->start;
1558
1.72k
                p_end = tok->cur - 1;
1559
2.59k
            } else {
1560
2.59k
                tok_backup(tok, peek);
1561
2.59k
                tok_backup(tok, c);
1562
2.59k
                TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
1563
2.59k
                current_tok->in_format_spec = 0;
1564
2.59k
                p_start = tok->start;
1565
2.59k
                p_end = tok->cur;
1566
2.59k
            }
1567
4.31k
            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1568
128k
        } else if (c == '\\') {
1569
6.93k
            int peek = tok_nextc(tok);
1570
6.93k
            if (peek == '\r') {
1571
69
                peek = tok_nextc(tok);
1572
69
            }
1573
            // Special case when the backslash is right before a curly
1574
            // brace. We have to restore and return the control back
1575
            // to the loop for the next iteration.
1576
6.93k
            if (peek == '{' || peek == '}') {
1577
1.41k
                if (!current_tok->raw) {
1578
1.20k
                    if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
1579
1
                        return MAKE_TOKEN(ERRORTOKEN);
1580
1
                    }
1581
1.20k
                }
1582
1.41k
                tok_backup(tok, peek);
1583
1.41k
                continue;
1584
1.41k
            }
1585
1586
5.52k
            if (!current_tok->raw) {
1587
5.26k
                if (peek == 'N') {
1588
                    /* Handle named unicode escapes (\N{BULLET}) */
1589
735
                    peek = tok_nextc(tok);
1590
735
                    if (peek == '{') {
1591
497
                        unicode_escape = 1;
1592
497
                    } else {
1593
238
                        tok_backup(tok, peek);
1594
238
                    }
1595
735
                }
1596
5.26k
            } /* else {
1597
                skip the escaped character
1598
            }*/
1599
5.52k
        }
1600
152k
    }
1601
1602
    // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
1603
    // add the quotes to the FSTRING_END in the next tokenizer iteration.
1604
12.3k
    for (int i = 0; i < current_tok->quote_size; i++) {
1605
6.57k
        tok_backup(tok, current_tok->quote);
1606
6.57k
    }
1607
5.76k
    p_start = tok->start;
1608
5.76k
    p_end = tok->cur;
1609
5.76k
    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
1610
29.9k
}
1611
1612
static int
1613
tok_get(struct tok_state *tok, struct token *token)
1614
1.78M
{
1615
1.78M
    tokenizer_mode *current_tok = TOK_GET_MODE(tok);
1616
1.78M
    if (current_tok->kind == TOK_REGULAR_MODE) {
1617
1.72M
        return tok_get_normal_mode(tok, current_tok, token);
1618
1.72M
    } else {
1619
52.9k
        return tok_get_fstring_mode(tok, current_tok, token);
1620
52.9k
    }
1621
1.78M
}
1622
1623
int
1624
_PyTokenizer_Get(struct tok_state *tok, struct token *token)
1625
1.78M
{
1626
1.78M
    int result = tok_get(tok, token);
1627
1.78M
    if (tok->decoding_erred) {
1628
0
        result = ERRORTOKEN;
1629
0
        tok->done = E_DECODE;
1630
0
    }
1631
1.78M
    return result;
1632
1.78M
}