1"""Token-related utilities"""
2
3# Copyright (c) IPython Development Team.
4# Distributed under the terms of the Modified BSD License.
5from __future__ import annotations
6
7import itertools
8import tokenize
9from io import StringIO
10from keyword import iskeyword
11from tokenize import TokenInfo
12from typing import Generator, NamedTuple
13
14
15class Token(NamedTuple):
16 token: int
17 text: str
18 start: int
19 end: int
20 line: str
21
22
23def generate_tokens(readline) -> Generator[TokenInfo, None, None]:
24 """wrap generate_tkens to catch EOF errors"""
25 try:
26 yield from tokenize.generate_tokens(readline)
27 except tokenize.TokenError:
28 # catch EOF error
29 return
30
31
32def generate_tokens_catch_errors(
33 readline, extra_errors_to_catch: list[str] | None = None
34):
35 default_errors_to_catch = [
36 "unterminated string literal",
37 "invalid non-printable character",
38 "after line continuation character",
39 ]
40 assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
41 errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])
42
43 tokens: list[TokenInfo] = []
44 try:
45 for token in tokenize.generate_tokens(readline):
46 tokens.append(token)
47 yield token
48 except tokenize.TokenError as exc:
49 if any(error in exc.args[0] for error in errors_to_catch):
50 if tokens:
51 start = tokens[-1].start[0], tokens[-1].end[0]
52 end = start
53 line = tokens[-1].line
54 else:
55 start = end = (1, 0)
56 line = ""
57 yield TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
58 else:
59 # Catch EOF
60 raise
61
62
63def line_at_cursor(cell: str, cursor_pos: int = 0) -> tuple[str, int]:
64 """Return the line in a cell at a given cursor position
65
66 Used for calling line-based APIs that don't support multi-line input, yet.
67
68 Parameters
69 ----------
70 cell : str
71 multiline block of text
72 cursor_pos : integer
73 the cursor position
74
75 Returns
76 -------
77 (line, offset): (string, integer)
78 The line with the current cursor, and the character offset of the start of the line.
79 """
80 offset = 0
81 lines = cell.splitlines(True)
82 for line in lines:
83 next_offset = offset + len(line)
84 if not line.endswith("\n"):
85 # If the last line doesn't have a trailing newline, treat it as if
86 # it does so that the cursor at the end of the line still counts
87 # as being on that line.
88 next_offset += 1
89 if next_offset > cursor_pos:
90 break
91 offset = next_offset
92 else:
93 line = ""
94 return line, offset
95
96
97def token_at_cursor(cell: str, cursor_pos: int = 0) -> str:
98 """Get the token at a given cursor
99
100 Used for introspection.
101
102 Function calls are prioritized, so the token for the callable will be returned
103 if the cursor is anywhere inside the call.
104
105 Parameters
106 ----------
107 cell : str
108 A block of Python code
109 cursor_pos : int
110 The location of the cursor in the block where the token should be found
111 """
112 names: list[str] = []
113 call_names: list[str] = []
114 closing_call_name: str | None = None
115 most_recent_outer_name: str | None = None
116
117 offsets = {1: 0} # lines start at 1
118 intersects_with_cursor = False
119 cur_token_is_name = False
120 tokens: list[Token | None] = [
121 Token(*tup) for tup in generate_tokens(StringIO(cell).readline)
122 ]
123 if not tokens:
124 return ""
125 for prev_tok, (tok, next_tok) in zip(
126 [None] + tokens, itertools.pairwise(tokens + [None])
127 ):
128 # token, text, start, end, line = tup
129 start_line, start_col = tok.start
130 end_line, end_col = tok.end
131 if end_line + 1 not in offsets:
132 # keep track of offsets for each line
133 lines = tok.line.splitlines(True)
134 for lineno, line in enumerate(lines, start_line + 1):
135 if lineno not in offsets:
136 offsets[lineno] = offsets[lineno - 1] + len(line)
137
138 closing_call_name = None
139
140 offset = offsets[start_line]
141 if offset + start_col > cursor_pos:
142 # current token starts after the cursor,
143 # don't consume it
144 break
145
146 if cur_token_is_name := tok.token == tokenize.NAME and not iskeyword(tok.text):
147 if (
148 names
149 and prev_tok
150 and prev_tok.token == tokenize.OP
151 and prev_tok.text == "."
152 ):
153 names[-1] = "%s.%s" % (names[-1], tok.text)
154 else:
155 names.append(tok.text)
156 if (
157 next_tok is not None
158 and next_tok.token == tokenize.OP
159 and next_tok.text == "="
160 ):
161 # don't inspect the lhs of an assignment
162 names.pop(-1)
163 cur_token_is_name = False
164 if not call_names:
165 most_recent_outer_name = names[-1] if names else None
166 elif tok.token == tokenize.OP:
167 if tok.text == "(" and names:
168 # if we are inside a function call, inspect the function
169 call_names.append(names[-1])
170 elif tok.text == ")" and call_names:
171 # keep track of the most recently popped call_name from the stack
172 closing_call_name = call_names.pop(-1)
173
174 if offsets[end_line] + end_col > cursor_pos:
175 # we found the cursor, stop reading
176 # if the current token intersects directly, use it instead of the call token
177 intersects_with_cursor = offsets[start_line] + start_col <= cursor_pos
178 break
179
180 if cur_token_is_name and intersects_with_cursor:
181 return names[-1]
182 # if the cursor isn't directly over a name token, use the most recent
183 # call name if we can find one
184 elif closing_call_name:
185 # if we're on a ")", use the most recently popped call name
186 return closing_call_name
187 elif call_names:
188 # otherwise, look for the most recent call name in the stack
189 return call_names[-1]
190 elif most_recent_outer_name:
191 # if we've popped all the call names, use the most recently-seen
192 # outer name
193 return most_recent_outer_name
194 elif names:
195 # failing that, use the most recently seen name
196 return names[-1]
197 else:
198 # give up
199 return ""