1"""Token-related utilities"""
2
3# Copyright (c) IPython Development Team.
4# Distributed under the terms of the Modified BSD License.
5from __future__ import annotations
6
7import itertools
8import tokenize
9from io import StringIO
10from keyword import iskeyword
11from tokenize import TokenInfo
12from typing import NamedTuple
13from collections.abc import Generator
14
15
16class Token(NamedTuple):
17 token: int
18 text: str
19 start: int
20 end: int
21 line: str
22
23
24def generate_tokens(readline) -> Generator[TokenInfo, None, None]:
25 """wrap generate_tkens to catch EOF errors"""
26 try:
27 yield from tokenize.generate_tokens(readline)
28 except tokenize.TokenError:
29 # catch EOF error
30 return
31
32
33def generate_tokens_catch_errors(
34 readline, extra_errors_to_catch: list[str] | None = None
35):
36 default_errors_to_catch = [
37 "unterminated string literal",
38 "invalid non-printable character",
39 "after line continuation character",
40 ]
41 assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
42 errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
43
44 tokens: list[TokenInfo] = []
45 try:
46 for token in tokenize.generate_tokens(readline):
47 tokens.append(token)
48 yield token
49 except tokenize.TokenError as exc:
50 if any(error in exc.args[0] for error in errors_to_catch):
51 if tokens:
52 start = tokens[-1].start[0], tokens[-1].end[0]
53 end = start
54 line = tokens[-1].line
55 else:
56 start = end = (1, 0)
57 line = ""
58 yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
59 else:
60 # Catch EOF
61 raise
62
63
64def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]:
65 """Return the line in a cell at a given cursor position
66
67 Used for calling line-based APIs that don't support multi-line input, yet.
68
69 Parameters
70 ----------
71 cell : str
72 multiline block of text
73 cursor_pos : integer
74 the cursor position
75
76 Returns
77 -------
78 (line, offset): (string, integer)
79 The line with the current cursor, and the character offset of the start of the line.
80 """
81 offset = 0
82 lines = cell.splitlines(True)
83 for line in lines:
84 next_offset = offset + len(line)
85 if not line.endswith("\n"):
86 # If the last line doesn't have a trailing newline, treat it as if
87 # it does so that the cursor at the end of the line still counts
88 # as being on that line.
89 next_offset += 1
90 if next_offset > cursor_pos:
91 break
92 offset = next_offset
93 else:
94 line = ""
95 return line, offset
96
97
98def token_at_cursor(cell: str, cursor_pos: int = 0) -> str:
99 """Get the token at a given cursor
100
101 Used for introspection.
102
103 Function calls are prioritized, so the token for the callable will be returned
104 if the cursor is anywhere inside the call.
105
106 Parameters
107 ----------
108 cell : str
109 A block of Python code
110 cursor_pos : int
111 The location of the cursor in the block where the token should be found
112 """
113 names: list[str] = []
114 call_names: list[str] = []
115 closing_call_name: str | None = None
116 most_recent_outer_name: str | None = None
117
118 offsets = {1: 0} # lines start at 1
119 intersects_with_cursor = False
120 cur_token_is_name = False
121 tokens: list[Token | None] = [
122 Token(*tup) for tup in generate_tokens(StringIO(cell).readline)
123 ]
124 if not tokens:
125 return ""
126 for prev_tok, (tok, next_tok) in zip(
127 [None] + tokens, itertools.pairwise(tokens + [None])
128 ):
129 # token, text, start, end, line = tup
130 start_line, start_col = tok.start
131 end_line, end_col = tok.end
132 if end_line + 1 not in offsets:
133 # keep track of offsets for each line
134 lines = tok.line.splitlines(True)
135 for lineno, line in enumerate(lines, start_line + 1):
136 if lineno not in offsets:
137 offsets[lineno] = offsets[lineno - 1] + len(line)
138
139 closing_call_name = None
140
141 offset = offsets[start_line]
142 if offset + start_col > cursor_pos:
143 # current token starts after the cursor,
144 # don't consume it
145 break
146
147 if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text):
148 if (
149 names
150 and prev_tok
151 and prev_tok.token == tokenize.OP
152 and prev_tok.text == "."
153 ):
154 names[-1] = "%s.%s" % (names[-1], tok.text)
155 else:
156 names.append(tok.text)
157 if (
158 next_tok is not None
159 and next_tok.token == tokenize.OP
160 and next_tok.text == "="
161 ):
162 # don't inspect the lhs of an assignment
163 names.pop(-1)
164 cur_token_is_name = False
165 if not call_names:
166 most_recent_outer_name = names[-1] if names else None
167 elif tok.token == tokenize.OP:
168 if tok.text == "(" and names:
169 # if we are inside a function call, inspect the function
170 call_names.append(names[-1])
171 elif tok.text == ")" and call_names:
172 # keep track of the most recently popped call_name from the stack
173 closing_call_name = call_names.pop(-1)
174
175 if offsets[end_line] + end_col > cursor_pos:
176 # we found the cursor, stop reading
177 # if the current token intersects directly, use it instead of the call token
178 intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos
179 break
180
181 if cur_token_is_name and intersects_with_cursor:
182 return names[-1]
183 # if the cursor isn't directly over a name token, use the most recent
184 # call name if we can find one
185 elif closing_call_name:
186 # if we're on a ")", use the most recently popped call name
187 return closing_call_name
188 elif call_names:
189 # otherwise, look for the most recent call name in the stack
190 return call_names[-1]
191 elif most_recent_outer_name:
192 # if we've popped all the call names, use the most recently-seen
193 # outer name
194 return most_recent_outer_name
195 elif names:
196 # failing that, use the most recently seen name
197 return names[-1]
198 else:
199 # give up
200 return ""