/src/cpython/Parser/tokenizer/readline_tokenizer.c

Source
#include "Python.h"
#include "errcode.h"

#include "helpers.h"
#include "../lexer/lexer.h"
#include "../lexer/state.h"
#include "../lexer/buffer.h"

static int
tok_readline_string(struct tok_state* tok) {
    PyObject* line = NULL;
    PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
    if (raw_line == NULL) {
        if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
            PyErr_Clear();
            return 1;
        }
        _PyTokenizer_error_ret(tok);
        goto error;
    }
    if(tok->encoding != NULL) {
        if (!PyBytes_Check(raw_line)) {
            PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
            _PyTokenizer_error_ret(tok);
            goto error;
        }
        line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
                                tok->encoding, "replace");
        Py_CLEAR(raw_line);
        if (line == NULL) {
            _PyTokenizer_error_ret(tok);
            goto error;
        }
    } else {
        if(!PyUnicode_Check(raw_line)) {
            PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
            _PyTokenizer_error_ret(tok);
            goto error;
        }
        line = raw_line;
        raw_line = NULL;
    }
    Py_ssize_t buflen;
    const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
    if (buf == NULL) {
        _PyTokenizer_error_ret(tok);
        goto error;
    }

    // Make room for the null terminator *and* potentially
    // an extra newline character that we may need to artificially
    // add.
    size_t buffer_size = buflen + 2;
    if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
        goto error;
    }
    memcpy(tok->inp, buf, buflen);
    tok->inp += buflen;
    *tok->inp = '\0';

    tok->line_start = tok->cur;
    Py_DECREF(line);
    return 1;
error:
    Py_XDECREF(raw_line);
    Py_XDECREF(line);
    return 0;
}

static int
tok_underflow_readline(struct tok_state* tok) {
    assert(tok->decoding_state == STATE_NORMAL);
    assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
    if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
        tok->cur = tok->inp = tok->buf;
    }
    if (!tok_readline_string(tok)) {
        return 0;
    }
    if (tok->inp == tok->cur) {
        tok->done = E_EOF;
        return 0;
    }
    tok->implicit_newline = 0;
    if (tok->inp[-1] != '\n') {
        assert(tok->inp + 1 < tok->end);
        /* Last line does not end in \n, fake one */
        *tok->inp++ = '\n';
        *tok->inp = '\0';
        tok->implicit_newline = 1;
    }

    if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
        return 0;
    }

    ADVANCE_LINENO();
    /* The default encoding is UTF-8, so make sure we don't have any
       non-UTF-8 sequences in it. */
    if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok, tok->lineno)) {
        _PyTokenizer_error_ret(tok);
        return 0;
    }
    assert(tok->done == E_OK);
    return tok->done == E_OK;
}

struct tok_state *
_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
                          int exec_input, int preserve_crlf)
{
    struct tok_state *tok = _PyTokenizer_tok_new();
    if (tok == NULL)
        return NULL;
    if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
        _PyTokenizer_Free(tok);
        return NULL;
    }
    tok->cur = tok->inp = tok->buf;
    tok->end = tok->buf + BUFSIZ;
    tok->fp = NULL;
    if (enc != NULL) {
        tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
        if (!tok->encoding) {
            _PyTokenizer_Free(tok);
            return NULL;
        }
    }
    tok->decoding_state = STATE_NORMAL;
    tok->underflow = &tok_underflow_readline;
    Py_INCREF(readline);
    tok->readline = readline;
    return tok;
}

Line	Count	Source
1		#include "Python.h"
2		#include "errcode.h"
3
4		#include "helpers.h"
5		#include "../lexer/lexer.h"
6		#include "../lexer/state.h"
7		#include "../lexer/buffer.h"
8
9		static int
10	352	tok_readline_string(struct tok_state* tok) {
11	352	PyObject* line = NULL;
12	352	PyObject* raw_line = PyObject_CallNoArgs(tok->readline);
13	352	if (raw_line == NULL) {
14	8	if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
15	8	PyErr_Clear();
16	8	return 1;
17	8	}
18	0	_PyTokenizer_error_ret(tok);
19	0	goto error;
20	8	}
21	344	if(tok->encoding != NULL) {
22	4	if (!PyBytes_Check(raw_line)) {
23	0	PyErr_Format(PyExc_TypeError, "readline() returned a non-bytes object");
24	0	_PyTokenizer_error_ret(tok);
25	0	goto error;
26	0	}
27	4	line = PyUnicode_Decode(PyBytes_AS_STRING(raw_line), PyBytes_GET_SIZE(raw_line),
28	4	tok->encoding, "replace");
29	4	Py_CLEAR(raw_line);
30	4	if (line == NULL) {
31	0	_PyTokenizer_error_ret(tok);
32	0	goto error;
33	0	}
34	340	} else {
35	340	if(!PyUnicode_Check(raw_line)) {
36	0	PyErr_Format(PyExc_TypeError, "readline() returned a non-string object");
37	0	_PyTokenizer_error_ret(tok);
38	0	goto error;
39	0	}
40	340	line = raw_line;
41	340	raw_line = NULL;
42	340	}
43	344	Py_ssize_t buflen;
44	344	const char* buf = PyUnicode_AsUTF8AndSize(line, &buflen);
45	344	if (buf == NULL) {
46	0	_PyTokenizer_error_ret(tok);
47	0	goto error;
48	0	}
49
50		// Make room for the null terminator and potentially
51		// an extra newline character that we may need to artificially
52		// add.
53	344	size_t buffer_size = buflen + 2;
54	344	if (!_PyLexer_tok_reserve_buf(tok, buffer_size)) {
55	0	goto error;
56	0	}
57	344	memcpy(tok->inp, buf, buflen);
58	344	tok->inp += buflen;
59	344	*tok->inp = '\0';
60
61	344	tok->line_start = tok->cur;
62	344	Py_DECREF(line);
63	344	return 1;
64	0	error:
65	0	Py_XDECREF(raw_line);
66	0	Py_XDECREF(line);
67	0	return 0;
68	344	}
69
70		static int
71	352	tok_underflow_readline(struct tok_state* tok) {
72	352	assert(tok->decoding_state == STATE_NORMAL);
73	352	assert(tok->fp == NULL && tok->input == NULL && tok->decoding_readline == NULL);
74	352	if (tok->start == NULL && !INSIDE_FSTRING(tok)) {
75	324	tok->cur = tok->inp = tok->buf;
76	324	}
77	352	if (!tok_readline_string(tok)) {
78	0	return 0;
79	0	}
80	352	if (tok->inp == tok->cur) {
81	12	tok->done = E_EOF;
82	12	return 0;
83	12	}
84	340	tok->implicit_newline = 0;
85	340	if (tok->inp[-1] != '\n') {
86	4	assert(tok->inp + 1 < tok->end);
87		/* Last line does not end in \n, fake one */
88	4	*tok->inp++ = '\n';
89	4	*tok->inp = '\0';
90	4	tok->implicit_newline = 1;
91	4	}
92
93	340	if (tok->tok_mode_stack_index && !_PyLexer_update_ftstring_expr(tok, 0)) {
94	0	return 0;
95	0	}
96
97	340	ADVANCE_LINENO();
98		/* The default encoding is UTF-8, so make sure we don't have any
99		non-UTF-8 sequences in it. */
100	340	if (!tok->encoding && !_PyTokenizer_ensure_utf8(tok->cur, tok, tok->lineno)) {
101	0	_PyTokenizer_error_ret(tok);
102	0	return 0;
103	0	}
104	340	assert(tok->done == E_OK);
105	340	return tok->done == E_OK;
106	340	}
107
108		struct tok_state *
109		_PyTokenizer_FromReadline(PyObject* readline, const char* enc,
110		int exec_input, int preserve_crlf)
111	20	{
112	20	struct tok_state *tok = _PyTokenizer_tok_new();
113	20	if (tok == NULL)
114	0	return NULL;
115	20	if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
116	0	_PyTokenizer_Free(tok);
117	0	return NULL;
118	0	}
119	20	tok->cur = tok->inp = tok->buf;
120	20	tok->end = tok->buf + BUFSIZ;
121	20	tok->fp = NULL;
122	20	if (enc != NULL) {
123	4	tok->encoding = _PyTokenizer_new_string(enc, strlen(enc), tok);
124	4	if (!tok->encoding) {
125	0	_PyTokenizer_Free(tok);
126	0	return NULL;
127	0	}
128	4	}
129	20	tok->decoding_state = STATE_NORMAL;
130	20	tok->underflow = &tok_underflow_readline;
131	20	Py_INCREF(readline);
132	20	tok->readline = readline;
133	20	return tok;
134	20	}

Coverage Report

Created: 2026-03-08 06:40