/src/cpython/Parser/tokenizer/readline_tokenizer.c

Source
#include "Python.h"
#include "errcode.h"

#include "helpers.h"
#include "../lexer/lexer.h"
#include "../lexer/state.h"
#include "../lexer/buffer.h"

static int
tok_readline_string(struct tok_state* tok) {
    PyObject* line = NULL;
    PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
    if (raw_line == NULL) {
        if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
            PyErr_Clear();
            return 1;
        }
        _PyTokenizer_error_ret(tok);
        goto error;
    }
    if(tok->encoding != NULL) {
        if (!PyBytes_Check(raw_line)) {
            PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
            _PyTokenizer_error_ret(tok);
            goto error;
        }
        line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
                                tok->encoding, "replace");
        Py_CLEAR(raw_line);
        if (line == NULL) {
            _PyTokenizer_error_ret(tok);
            goto error;
        }
    } else {
        if(!PyUnicode_Check(raw_line)) {
            PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
            _PyTokenizer_error_ret(tok);
            goto error;
        }
        line = raw_line;
        raw_line = NULL;
    }
    Py_ssize_t buflen;
    const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
    if (buf == NULL) {
        _PyTokenizer_error_ret(tok);
        goto error;
    }

    // Make room for the null terminator *and* potentially
    // an extra newline character that we may need to artificially
    // add.
    size_t buffer_size = buflen + 2;
    if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
        goto error;
    }
    memcpy(tok->inp, buf, buflen);
    tok->inp += buflen;
    *tok->inp = '\0';

    tok->line_start = tok->cur;
    Py_DECREF(line);
    return 1;
error:
    Py_XDECREF(raw_line);
    Py_XDECREF(line);
    return 0;
}

static int
tok_underflow_readline(struct tok_state* tok) {
    assert(tok->decoding_state == STATE_NORMAL);
    assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
    if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
        tok->cur = tok->inp = tok->buf;
    }
    if (!tok_readline_string(tok)) {
        return 0;
    }
    if (tok->inp == tok->cur) {
        tok->done = E_EOF;
        return 0;
    }
    tok->implicit_newline = 0;
    if (tok->inp[-1] != '\n') {
        assert(tok->inp + 1 < tok->end);
        /* Last line does not end in \n, fake one */
        *tok->inp++ = '\n';
        *tok->inp = '\0';
        tok->implicit_newline = 1;
    }

    if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
        return 0;
    }

    ADVANCE_LINENO();
    /* The default encoding is UTF-8, so make sure we don't have any
       non-UTF-8 sequences in it. */
    if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok, tok->lineno)) {
        _PyTokenizer_error_ret(tok);
        return 0;
    }
    assert(tok->done == E_OK);
    return tok->done == E_OK;
}

struct tok_state *
_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
                          int exec_input, int preserve_crlf)
{
    struct tok_state *tok = _PyTokenizer_tok_new();
    if (tok == NULL)
        return NULL;
    if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
        _PyTokenizer_Free(tok);
        return NULL;
    }
    tok->cur = tok->inp = tok->buf;
    tok->end = tok->buf + BUFSIZ;
    tok->fp = NULL;
    if (enc != NULL) {
        tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
        if (!tok->encoding) {
            _PyTokenizer_Free(tok);
            return NULL;
        }
    }
    tok->decoding_state = STATE_NORMAL;
    tok->underflow = &tok_underflow_readline;
    Py_INCREF(readline);
    tok->readline = readline;
    return tok;
}

Line	Count	Source
1		#include "Python.h"
2		#include "errcode.h"
3
4		#include "helpers.h"
5		#include "../lexer/lexer.h"
6		#include "../lexer/state.h"
7		#include "../lexer/buffer.h"
8
9		static int
10	176	tok_readline_string(struct tok_state* tok) {
11	176	PyObject* line = NULL;
12	176	PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
13	176	if (raw_line == NULL) {
14	4	if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
15	4	PyErr_Clear();
16	4	return 1;
17	4	}
18	0	_PyTokenizer_error_ret(tok);
19	0	goto error;
20	4	}
21	172	if(tok->encoding != NULL) {
22	2	if (!PyBytes_Check(raw_line)) {
23	0	PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
24	0	_PyTokenizer_error_ret(tok);
25	0	goto error;
26	0	}
27	2	line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
28	2	tok->encoding, "replace");
29	2	Py_CLEAR(raw_line);
30	2	if (line == NULL) {
31	0	_PyTokenizer_error_ret(tok);
32	0	goto error;
33	0	}
34	170	} else {
35	170	if(!PyUnicode_Check(raw_line)) {
36	0	PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
37	0	_PyTokenizer_error_ret(tok);
38	0	goto error;
39	0	}
40	170	line = raw_line;
41	170	raw_line = NULL;
42	170	}
43	172	Py_ssize_t buflen;
44	172	const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
45	172	if (buf == NULL) {
46	0	_PyTokenizer_error_ret(tok);
47	0	goto error;
48	0	}
49
50		// Make room for the null terminator and potentially
51		// an extra newline character that we may need to artificially
52		// add.
53	172	size_t buffer_size = buflen + 2;
54	172	if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
55	0	goto error;
56	0	}
57	172	memcpy(tok->inp, buf, buflen);
58	172	tok->inp += buflen;
59	172	*tok->inp = '\0';
60
61	172	tok->line_start = tok->cur;
62	172	Py_DECREF(line);
63	172	return 1;
64	0	error:
65	0	Py_XDECREF(raw_line);
66	0	Py_XDECREF(line);
67	0	return 0;
68	172	}
69
70		static int
71	176	tok_underflow_readline(struct tok_state* tok) {
72	176	assert(tok->decoding_state == STATE_NORMAL);
73	176	assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
74	176	if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
75	162	tok->cur = tok->inp = tok->buf;
76	162	}
77	176	if (!tok_readline_string(tok)) {
78	0	return 0;
79	0	}
80	176	if (tok->inp == tok->cur) {
81	6	tok->done = E_EOF;
82	6	return 0;
83	6	}
84	170	tok->implicit_newline = 0;
85	170	if (tok->inp[-1] != '\n') {
86	2	assert(tok->inp + 1 < tok->end);
87		/* Last line does not end in \n, fake one */
88	2	*tok->inp++ = '\n';
89	2	*tok->inp = '\0';
90	2	tok->implicit_newline = 1;
91	2	}
92
93	170	if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
94	0	return 0;
95	0	}
96
97	170	ADVANCE_LINENO();
98		/* The default encoding is UTF-8, so make sure we don't have any
99		non-UTF-8 sequences in it. */
100	170	if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok, tok->lineno)) {
101	0	_PyTokenizer_error_ret(tok);
102	0	return 0;
103	0	}
104	170	assert(tok->done == E_OK);
105	170	return tok->done == E_OK;
106	170	}
107
108		struct tok_state *
109		_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
110		int exec_input, int preserve_crlf)
111	10	{
112	10	struct tok_state *tok = _PyTokenizer_tok_new();
113	10	if (tok == NULL)
114	0	return NULL;
115	10	if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
116	0	_PyTokenizer_Free(tok);
117	0	return NULL;
118	0	}
119	10	tok->cur = tok->inp = tok->buf;
120	10	tok->end = tok->buf + BUFSIZ;
121	10	tok->fp = NULL;
122	10	if (enc != NULL) {
123	2	tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
124	2	if (!tok->encoding) {
125	0	_PyTokenizer_Free(tok);
126	0	return NULL;
127	0	}
128	2	}
129	10	tok->decoding_state = STATE_NORMAL;
130	10	tok->underflow = &tok_underflow_readline;
131	10	Py_INCREF(readline);
132	10	tok->readline = readline;
133	10	return tok;
134	10	}

Coverage Report

Created: 2026-02-26 06:53