/src/cpython/Parser/tokenizer/readline_tokenizer.c

Source (jump to first uncovered line)
#include "Python.h"
#include "errcode.h"

#include "helpers.h"
#include "../lexer/lexer.h"
#include "../lexer/state.h"
#include "../lexer/buffer.h"

static int
tok_readline_string(struct tok_state* tok) {
    PyObject* line = NULL;
    PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
    if (raw_line == NULL) {
        if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
            PyErr_Clear();
            return 1;
        }
        _PyTokenizer_error_ret(tok);
        goto error;
    }
    if(tok->encoding != NULL) {
        if (!PyBytes_Check(raw_line)) {
            PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
            _PyTokenizer_error_ret(tok);
            goto error;
        }
        line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
                                tok->encoding, "replace");
        Py_CLEAR(raw_line);
        if (line == NULL) {
            _PyTokenizer_error_ret(tok);
            goto error;
        }
    } else {
        if(!PyUnicode_Check(raw_line)) {
            PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
            _PyTokenizer_error_ret(tok);
            goto error;
        }
        line = raw_line;
        raw_line = NULL;
    }
    Py_ssize_t buflen;
    const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
    if (buf == NULL) {
        _PyTokenizer_error_ret(tok);
        goto error;
    }

    // Make room for the null terminator *and* potentially
    // an extra newline character that we may need to artificially
    // add.
    size_t buffer_size = buflen + 2;
    if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
        goto error;
    }
    memcpy(tok->inp, buf, buflen);
    tok->inp += buflen;
    *tok->inp = '\0';

    tok->line_start = tok->cur;
    Py_DECREF(line);
    return 1;
error:
    Py_XDECREF(raw_line);
    Py_XDECREF(line);
    return 0;
}

static int
tok_underflow_readline(struct tok_state* tok) {
    assert(tok->decoding_state == STATE_NORMAL);
    assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
    if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
        tok->cur = tok->inp = tok->buf;
    }
    if (!tok_readline_string(tok)) {
        return 0;
    }
    if (tok->inp == tok->cur) {
        tok->done = E_EOF;
        return 0;
    }
    tok->implicit_newline = 0;
    if (tok->inp[-1] != '\n') {
        assert(tok->inp + 1 < tok->end);
        /* Last line does not end in \n, fake one */
        *tok->inp++ = '\n';
        *tok->inp = '\0';
        tok->implicit_newline = 1;
    }

    if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
        return 0;
    }

    ADVANCE_LINENO();
    /* The default encoding is UTF-8, so make sure we don't have any
       non-UTF-8 sequences in it. */
    if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) {
        _PyTokenizer_error_ret(tok);
        return 0;
    }
    assert(tok->done == E_OK);
    return tok->done == E_OK;
}

struct tok_state *
_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
                          int exec_input, int preserve_crlf)
{
    struct tok_state *tok = _PyTokenizer_tok_new();
    if (tok == NULL)
        return NULL;
    if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
        _PyTokenizer_Free(tok);
        return NULL;
    }
    tok->cur = tok->inp = tok->buf;
    tok->end = tok->buf + BUFSIZ;
    tok->fp = NULL;
    if (enc != NULL) {
        tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
        if (!tok->encoding) {
            _PyTokenizer_Free(tok);
            return NULL;
        }
    }
    tok->decoding_state = STATE_NORMAL;
    tok->underflow = &tok_underflow_readline;
    Py_INCREF(readline);
    tok->readline = readline;
    return tok;
}

Line	Count	Source (jump to first uncovered line)
1		#include "Python.h"
2		#include "errcode.h"
3
4		#include "helpers.h"
5		#include "../lexer/lexer.h"
6		#include "../lexer/state.h"
7		#include "../lexer/buffer.h"
8
9		static int
10	0	tok_readline_string(struct tok_state* tok) {
11	0	PyObject* line = NULL;
12	0	PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
13	0	if (raw_line == NULL) {
14	0	if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
15	0	PyErr_Clear();
16	0	return 1;
17	0	}
18	0	_PyTokenizer_error_ret(tok);
19	0	goto error;
20	0	}
21	0	if(tok->encoding != NULL) {
22	0	if (!PyBytes_Check(raw_line)) {
23	0	PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
24	0	_PyTokenizer_error_ret(tok);
25	0	goto error;
26	0	}
27	0	line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
28	0	tok->encoding, "replace");
29	0	Py_CLEAR(raw_line);
30	0	if (line == NULL) {
31	0	_PyTokenizer_error_ret(tok);
32	0	goto error;
33	0	}
34	0	} else {
35	0	if(!PyUnicode_Check(raw_line)) {
36	0	PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
37	0	_PyTokenizer_error_ret(tok);
38	0	goto error;
39	0	}
40	0	line = raw_line;
41	0	raw_line = NULL;
42	0	}
43	0	Py_ssize_t buflen;
44	0	const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
45	0	if (buf == NULL) {
46	0	_PyTokenizer_error_ret(tok);
47	0	goto error;
48	0	}
49
50		// Make room for the null terminator and potentially
51		// an extra newline character that we may need to artificially
52		// add.
53	0	size_t buffer_size = buflen + 2;
54	0	if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
55	0	goto error;
56	0	}
57	0	memcpy(tok->inp, buf, buflen);
58	0	tok->inp += buflen;
59	0	*tok->inp = '\0';
60
61	0	tok->line_start = tok->cur;
62	0	Py_DECREF(line);
63	0	return 1;
64	0	error:
65	0	Py_XDECREF(raw_line);
66	0	Py_XDECREF(line);
67	0	return 0;
68	0	}
69
70		static int
71	0	tok_underflow_readline(struct tok_state* tok) {
72	0	assert(tok->decoding_state == STATE_NORMAL);
73	0	assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
74	0	if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
75	0	tok->cur = tok->inp = tok->buf;
76	0	}
77	0	if (!tok_readline_string(tok)) {
78	0	return 0;
79	0	}
80	0	if (tok->inp == tok->cur) {
81	0	tok->done = E_EOF;
82	0	return 0;
83	0	}
84	0	tok->implicit_newline = 0;
85	0	if (tok->inp[-1] != '\n') {
86	0	assert(tok->inp + 1 < tok->end);
87		/* Last line does not end in \n, fake one */
88	0	*tok->inp++ = '\n';
89	0	*tok->inp = '\0';
90	0	tok->implicit_newline = 1;
91	0	}
92
93	0	if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
94	0	return 0;
95	0	}
96
97	0	ADVANCE_LINENO();
98		/* The default encoding is UTF-8, so make sure we don't have any
99		non-UTF-8 sequences in it. */
100	0	if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok)) {
101	0	_PyTokenizer_error_ret(tok);
102	0	return 0;
103	0	}
104	0	assert(tok->done == E_OK);
105	0	return tok->done == E_OK;
106	0	}
107
108		struct tok_state *
109		_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
110		int exec_input, int preserve_crlf)
111	0	{
112	0	struct tok_state *tok = _PyTokenizer_tok_new();
113	0	if (tok == NULL)
114	0	return NULL;
115	0	if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
116	0	_PyTokenizer_Free(tok);
117	0	return NULL;
118	0	}
119	0	tok->cur = tok->inp = tok->buf;
120	0	tok->end = tok->buf + BUFSIZ;
121	0	tok->fp = NULL;
122	0	if (enc != NULL) {
123	0	tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
124	0	if (!tok->encoding) {
125	0	_PyTokenizer_Free(tok);
126	0	return NULL;
127	0	}
128	0	}
129	0	tok->decoding_state = STATE_NORMAL;
130	0	tok->underflow = &tok_underflow_readline;
131	0	Py_INCREF(readline);
132	0	tok->readline = readline;
133	0	return tok;
134	0	}

Coverage Report

Created: 2025-07-04 06:49