/src/cpython/Parser/tokenizer/readline_tokenizer.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "Python.h" |
2 | | #include "errcode.h" |
3 | | |
4 | | #include "helpers.h" |
5 | | #include "../lexer/lexer.h" |
6 | | #include "../lexer/state.h" |
7 | | #include "../lexer/buffer.h" |
8 | | |
9 | | static int |
10 | 0 | tok_readline_string(struct tok_state* tok) { |
11 | 0 | PyObject* line = NULL; |
12 | 0 | PyObject* raw_line = PyObject_CallNoArgs(tok->readline); |
13 | 0 | if (raw_line == NULL) { |
14 | 0 | if (PyErr_ExceptionMatches(PyExc_StopIteration)) { |
15 | 0 | PyErr_Clear(); |
16 | 0 | return 1; |
17 | 0 | } |
18 | 0 | _PyTokenizer_error_ret(tok); |
19 | 0 | goto error; |
20 | 0 | } |
21 | 0 | if(tok->encoding != NULL) { |
22 | 0 | if (!PyBytes_Check(raw_line)) { |
23 | 0 | PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object"); |
24 | 0 | _PyTokenizer_error_ret(tok); |
25 | 0 | goto error; |
26 | 0 | } |
27 | 0 | line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line), |
28 | 0 | tok->encoding, "replace"); |
29 | 0 | Py_CLEAR(raw_line); |
30 | 0 | if (line == NULL) { |
31 | 0 | _PyTokenizer_error_ret(tok); |
32 | 0 | goto error; |
33 | 0 | } |
34 | 0 | } else { |
35 | 0 | if(!PyUnicode_Check(raw_line)) { |
36 | 0 | PyErr_Format(PyExc_TypeError, "readline() returned a non-string object"); |
37 | 0 | _PyTokenizer_error_ret(tok); |
38 | 0 | goto error; |
39 | 0 | } |
40 | 0 | line = raw_line; |
41 | 0 | raw_line = NULL; |
42 | 0 | } |
43 | 0 | Py_ssize_t buflen; |
44 | 0 | const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen); |
45 | 0 | if (buf == NULL) { |
46 | 0 | _PyTokenizer_error_ret(tok); |
47 | 0 | goto error; |
48 | 0 | } |
49 | | |
50 | | // Make room for the null terminator *and* potentially |
51 | | // an extra newline character that we may need to artificially |
52 | | // add. |
53 | 0 | size_t buffer_size = buflen + 2; |
54 | 0 | if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) { |
55 | 0 | goto error; |
56 | 0 | } |
57 | 0 | memcpy(tok->inp, buf, buflen); |
58 | 0 | tok->inp += buflen; |
59 | 0 | *tok->inp = '\0'; |
60 | |
|
61 | 0 | tok->line_start = tok->cur; |
62 | 0 | Py_DECREF(line); |
63 | 0 | return 1; |
64 | 0 | error: |
65 | 0 | Py_XDECREF(raw_line); |
66 | 0 | Py_XDECREF(line); |
67 | 0 | return 0; |
68 | 0 | } |
69 | | |
70 | | static int |
71 | 0 | tok_underflow_readline(struct tok_state* tok) { |
72 | 0 | assert(tok->decoding_state == STATE_NORMAL); |
73 | 0 | assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL); |
74 | 0 | if (tok->start == NULL && !INSIDE_FSTRING(tok)) { |
75 | 0 | tok->cur = tok->inp = tok->buf; |
76 | 0 | } |
77 | 0 | if (!tok_readline_string(tok)) { |
78 | 0 | return 0; |
79 | 0 | } |
80 | 0 | if (tok->inp == tok->cur) { |
81 | 0 | tok->done = E_EOF; |
82 | 0 | return 0; |
83 | 0 | } |
84 | 0 | tok->implicit_newline = 0; |
85 | 0 | if (tok->inp[-1] != '\n') { |
86 | 0 | assert(tok->inp + 1 < tok->end); |
87 | | /* Last line does not end in \n, fake one */ |
88 | 0 | *tok->inp++ = '\n'; |
89 | 0 | *tok->inp = '\0'; |
90 | 0 | tok->implicit_newline = 1; |
91 | 0 | } |
92 | |
|
93 | 0 | if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) { |
94 | 0 | return 0; |
95 | 0 | } |
96 | | |
97 | 0 | ADVANCE_LINENO(); |
98 | | /* The default encoding is UTF-8, so make sure we don't have any |
99 | | non-UTF-8 sequences in it. */ |
100 | 0 | if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) { |
101 | 0 | _PyTokenizer_error_ret(tok); |
102 | 0 | return 0; |
103 | 0 | } |
104 | 0 | assert(tok->done == E_OK); |
105 | 0 | return tok->done == E_OK; |
106 | 0 | } |
107 | | |
108 | | struct tok_state * |
109 | | _PyTokenizer_FromReadline(PyObject* readline, const char* enc, |
110 | | int exec_input, int preserve_crlf) |
111 | 0 | { |
112 | 0 | struct tok_state *tok = _PyTokenizer_tok_new(); |
113 | 0 | if (tok == NULL) |
114 | 0 | return NULL; |
115 | 0 | if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) { |
116 | 0 | _PyTokenizer_Free(tok); |
117 | 0 | return NULL; |
118 | 0 | } |
119 | 0 | tok->cur = tok->inp = tok->buf; |
120 | 0 | tok->end = tok->buf + BUFSIZ; |
121 | 0 | tok->fp = NULL; |
122 | 0 | if (enc != NULL) { |
123 | 0 | tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok); |
124 | 0 | if (!tok->encoding) { |
125 | 0 | _PyTokenizer_Free(tok); |
126 | 0 | return NULL; |
127 | 0 | } |
128 | 0 | } |
129 | 0 | tok->decoding_state = STATE_NORMAL; |
130 | 0 | tok->underflow = &tok_underflow_readline; |
131 | 0 | Py_INCREF(readline); |
132 | 0 | tok->readline = readline; |
133 | 0 | return tok; |
134 | 0 | } |