Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Parser/tokenizer/readline_tokenizer.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "errcode.h"
3
4
#include "helpers.h"
5
#include "../lexer/lexer.h"
6
#include "../lexer/state.h"
7
#include "../lexer/buffer.h"
8
9
static int
10
0
tok_readline_string(struct tok_state* tok) {
11
0
    PyObject* line = NULL;
12
0
    PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
13
0
    if (raw_line == NULL) {
14
0
        if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
15
0
            PyErr_Clear();
16
0
            return 1;
17
0
        }
18
0
        _PyTokenizer_error_ret(tok);
19
0
        goto error;
20
0
    }
21
0
    if(tok->encoding != NULL) {
22
0
        if (!PyBytes_Check(raw_line)) {
23
0
            PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
24
0
            _PyTokenizer_error_ret(tok);
25
0
            goto error;
26
0
        }
27
0
        line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
28
0
                                tok->encoding, "replace");
29
0
        Py_CLEAR(raw_line);
30
0
        if (line == NULL) {
31
0
            _PyTokenizer_error_ret(tok);
32
0
            goto error;
33
0
        }
34
0
    } else {
35
0
        if(!PyUnicode_Check(raw_line)) {
36
0
            PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
37
0
            _PyTokenizer_error_ret(tok);
38
0
            goto error;
39
0
        }
40
0
        line = raw_line;
41
0
        raw_line = NULL;
42
0
    }
43
0
    Py_ssize_t buflen;
44
0
    const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
45
0
    if (buf == NULL) {
46
0
        _PyTokenizer_error_ret(tok);
47
0
        goto error;
48
0
    }
49
50
    // Make room for the null terminator *and* potentially
51
    // an extra newline character that we may need to artificially
52
    // add.
53
0
    size_t buffer_size = buflen + 2;
54
0
    if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
55
0
        goto error;
56
0
    }
57
0
    memcpy(tok->inp, buf, buflen);
58
0
    tok->inp += buflen;
59
0
    *tok->inp = '\0';
60
61
0
    tok->line_start = tok->cur;
62
0
    Py_DECREF(line);
63
0
    return 1;
64
0
error:
65
0
    Py_XDECREF(raw_line);
66
0
    Py_XDECREF(line);
67
0
    return 0;
68
0
}
69
70
static int
71
0
tok_underflow_readline(struct tok_state* tok) {
72
0
    assert(tok->decoding_state == STATE_NORMAL);
73
0
    assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
74
0
    if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
75
0
        tok->cur = tok->inp = tok->buf;
76
0
    }
77
0
    if (!tok_readline_string(tok)) {
78
0
        return 0;
79
0
    }
80
0
    if (tok->inp == tok->cur) {
81
0
        tok->done = E_EOF;
82
0
        return 0;
83
0
    }
84
0
    tok->implicit_newline = 0;
85
0
    if (tok->inp[-1] != '\n') {
86
0
        assert(tok->inp + 1 < tok->end);
87
        /* Last line does not end in \n, fake one */
88
0
        *tok->inp++ = '\n';
89
0
        *tok->inp = '\0';
90
0
        tok->implicit_newline = 1;
91
0
    }
92
93
0
    if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
94
0
        return 0;
95
0
    }
96
97
0
    ADVANCE_LINENO();
98
    /* The default encoding is UTF-8, so make sure we don't have any
99
       non-UTF-8 sequences in it. */
100
0
    if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) {
101
0
        _PyTokenizer_error_ret(tok);
102
0
        return 0;
103
0
    }
104
0
    assert(tok->done == E_OK);
105
0
    return tok->done == E_OK;
106
0
}
107
108
struct tok_state *
109
_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
110
                          int exec_input, int preserve_crlf)
111
0
{
112
0
    struct tok_state *tok = _PyTokenizer_tok_new();
113
0
    if (tok == NULL)
114
0
        return NULL;
115
0
    if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
116
0
        _PyTokenizer_Free(tok);
117
0
        return NULL;
118
0
    }
119
0
    tok->cur = tok->inp = tok->buf;
120
0
    tok->end = tok->buf + BUFSIZ;
121
0
    tok->fp = NULL;
122
0
    if (enc != NULL) {
123
0
        tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
124
0
        if (!tok->encoding) {
125
0
            _PyTokenizer_Free(tok);
126
0
            return NULL;
127
0
        }
128
0
    }
129
0
    tok->decoding_state = STATE_NORMAL;
130
0
    tok->underflow = &tok_underflow_readline;
131
0
    Py_INCREF(readline);
132
0
    tok->readline = readline;
133
0
    return tok;
134
0
}