/src/cpython/Parser/tokenizer/utf8_tokenizer.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "Python.h" |
2 | | #include "errcode.h" |
3 | | |
4 | | #include "helpers.h" |
5 | | #include "../lexer/state.h" |
6 | | |
7 | | static int |
8 | 86 | tok_underflow_string(struct tok_state *tok) { |
9 | 86 | char *end = strchr(tok->inp, '\n'); |
10 | 86 | if (end != NULL) { |
11 | 0 | end++; |
12 | 0 | } |
13 | 86 | else { |
14 | 86 | end = strchr(tok->inp, '\0'); |
15 | 86 | if (end == tok->inp) { |
16 | 43 | tok->done = E_EOF; |
17 | 43 | return 0; |
18 | 43 | } |
19 | 86 | } |
20 | 43 | if (tok->start == NULL) { |
21 | 43 | tok->buf = tok->cur; |
22 | 43 | } |
23 | 43 | tok->line_start = tok->cur; |
24 | 43 | ADVANCE_LINENO(); |
25 | 43 | tok->inp = end; |
26 | 43 | return 1; |
27 | 86 | } |
28 | | |
29 | | /* Set up tokenizer for UTF-8 string */ |
30 | | struct tok_state * |
31 | | _PyTokenizer_FromUTF8(const char *str, int exec_input, int preserve_crlf) |
32 | 43 | { |
33 | 43 | struct tok_state *tok = _PyTokenizer_tok_new(); |
34 | 43 | char *translated; |
35 | 43 | if (tok == NULL) |
36 | 0 | return NULL; |
37 | 43 | tok->input = translated = _PyTokenizer_translate_newlines(str, exec_input, preserve_crlf, tok); |
38 | 43 | if (translated == NULL) { |
39 | 0 | _PyTokenizer_Free(tok); |
40 | 0 | return NULL; |
41 | 0 | } |
42 | 43 | tok->decoding_state = STATE_NORMAL; |
43 | 43 | tok->enc = NULL; |
44 | 43 | tok->str = translated; |
45 | 43 | tok->encoding = _PyTokenizer_new_string("utf-8", 5, tok); |
46 | 43 | if (!tok->encoding) { |
47 | 0 | _PyTokenizer_Free(tok); |
48 | 0 | return NULL; |
49 | 0 | } |
50 | | |
51 | 43 | tok->buf = tok->cur = tok->inp = translated; |
52 | 43 | tok->end = translated; |
53 | 43 | tok->underflow = &tok_underflow_string; |
54 | 43 | return tok; |
55 | 43 | } |