Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/black/comments.py: 17%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2from collections.abc import Collection, Iterator
3from dataclasses import dataclass
4from functools import lru_cache
5from typing import Final, Optional, Union
7from black.mode import Mode, Preview
8from black.nodes import (
9 CLOSING_BRACKETS,
10 STANDALONE_COMMENT,
11 WHITESPACE,
12 container_of,
13 first_leaf_of,
14 make_simple_prefix,
15 preceding_leaf,
16 syms,
17)
18from blib2to3.pgen2 import token
19from blib2to3.pytree import Leaf, Node
21# types
22LN = Union[Leaf, Node]
24FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
25FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
26FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
28COMMENT_EXCEPTIONS = " !:#'"
29_COMMENT_PREFIX = "# "
30_COMMENT_LIST_SEPARATOR = ";"
33@dataclass
34class ProtoComment:
35 """Describes a piece of syntax that is a comment.
37 It's not a :class:`blib2to3.pytree.Leaf` so that:
39 * it can be cached (`Leaf` objects should not be reused more than once as
40 they store their lineno, column, prefix, and parent information);
41 * `newlines` and `consumed` fields are kept separate from the `value`. This
42 simplifies handling of special marker comments like ``# fmt: off/on``.
43 """
45 type: int # token.COMMENT or STANDALONE_COMMENT
46 value: str # content of the comment
47 newlines: int # how many newlines before the comment
48 consumed: int # how many characters of the original leaf's prefix did we consume
49 form_feed: bool # is there a form feed before the comment
50 leading_whitespace: str # leading whitespace before the comment, if any
53def generate_comments(leaf: LN) -> Iterator[Leaf]:
54 """Clean the prefix of the `leaf` and generate comments from it, if any.
56 Comments in lib2to3 are shoved into the whitespace prefix. This happens
57 in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
58 move because it does away with modifying the grammar to include all the
59 possible places in which comments can be placed.
61 The sad consequence for us though is that comments don't "belong" anywhere.
62 This is why this function generates simple parentless Leaf objects for
63 comments. We simply don't know what the correct parent should be.
65 No matter though, we can live without this. We really only need to
66 differentiate between inline and standalone comments. The latter don't
67 share the line with any code.
69 Inline comments are emitted as regular token.COMMENT leaves. Standalone
70 are emitted with a fake STANDALONE_COMMENT token identifier.
71 """
72 total_consumed = 0
73 for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
74 total_consumed = pc.consumed
75 prefix = make_simple_prefix(pc.newlines, pc.form_feed)
76 yield Leaf(pc.type, pc.value, prefix=prefix)
77 normalize_trailing_prefix(leaf, total_consumed)
80@lru_cache(maxsize=4096)
81def list_comments(prefix: str, *, is_endmarker: bool) -> list[ProtoComment]:
82 """Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
83 result: list[ProtoComment] = []
84 if not prefix or "#" not in prefix:
85 return result
87 consumed = 0
88 nlines = 0
89 ignored_lines = 0
90 form_feed = False
91 for index, full_line in enumerate(re.split("\r?\n|\r", prefix)):
92 consumed += len(full_line) + 1 # adding the length of the split '\n'
93 match = re.match(r"^(\s*)(\S.*|)$", full_line)
94 assert match
95 whitespace, line = match.groups()
96 if not line:
97 nlines += 1
98 if "\f" in full_line:
99 form_feed = True
100 if not line.startswith("#"):
101 # Escaped newlines outside of a comment are not really newlines at
102 # all. We treat a single-line comment following an escaped newline
103 # as a simple trailing comment.
104 if line.endswith("\\"):
105 ignored_lines += 1
106 continue
108 if index == ignored_lines and not is_endmarker:
109 comment_type = token.COMMENT # simple trailing comment
110 else:
111 comment_type = STANDALONE_COMMENT
112 comment = make_comment(line)
113 result.append(
114 ProtoComment(
115 type=comment_type,
116 value=comment,
117 newlines=nlines,
118 consumed=consumed,
119 form_feed=form_feed,
120 leading_whitespace=whitespace,
121 )
122 )
123 form_feed = False
124 nlines = 0
125 return result
128def normalize_trailing_prefix(leaf: LN, total_consumed: int) -> None:
129 """Normalize the prefix that's left over after generating comments.
131 Note: don't use backslashes for formatting or you'll lose your voting rights.
132 """
133 remainder = leaf.prefix[total_consumed:]
134 if "\\" not in remainder:
135 nl_count = remainder.count("\n")
136 form_feed = "\f" in remainder and remainder.endswith("\n")
137 leaf.prefix = make_simple_prefix(nl_count, form_feed)
138 return
140 leaf.prefix = ""
143def make_comment(content: str) -> str:
144 """Return a consistently formatted comment from the given `content` string.
146 All comments (except for "##", "#!", "#:", '#'") should have a single
147 space between the hash sign and the content.
149 If `content` didn't start with a hash sign, one is provided.
150 """
151 content = content.rstrip()
152 if not content:
153 return "#"
155 if content[0] == "#":
156 content = content[1:]
157 NON_BREAKING_SPACE = " "
158 if (
159 content
160 and content[0] == NON_BREAKING_SPACE
161 and not content.lstrip().startswith("type:")
162 ):
163 content = " " + content[1:] # Replace NBSP by a simple space
164 if content and content[0] not in COMMENT_EXCEPTIONS:
165 content = " " + content
166 return "#" + content
169def normalize_fmt_off(
170 node: Node, mode: Mode, lines: Collection[tuple[int, int]]
171) -> None:
172 """Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
173 try_again = True
174 while try_again:
175 try_again = convert_one_fmt_off_pair(node, mode, lines)
178def convert_one_fmt_off_pair(
179 node: Node, mode: Mode, lines: Collection[tuple[int, int]]
180) -> bool:
181 """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
183 Returns True if a pair was converted.
184 """
185 for leaf in node.leaves():
186 previous_consumed = 0
187 for comment in list_comments(leaf.prefix, is_endmarker=False):
188 is_fmt_off = comment.value in FMT_OFF
189 is_fmt_skip = _contains_fmt_skip_comment(comment.value, mode)
190 if (not is_fmt_off and not is_fmt_skip) or (
191 # Invalid use when `# fmt: off` is applied before a closing bracket.
192 is_fmt_off
193 and leaf.type in CLOSING_BRACKETS
194 ):
195 previous_consumed = comment.consumed
196 continue
197 # We only want standalone comments. If there's no previous leaf or
198 # the previous leaf is indentation, it's a standalone comment in
199 # disguise.
200 if comment.type != STANDALONE_COMMENT:
201 prev = preceding_leaf(leaf)
202 if prev:
203 if is_fmt_off and prev.type not in WHITESPACE:
204 continue
205 if is_fmt_skip and prev.type in WHITESPACE:
206 continue
208 ignored_nodes = list(generate_ignored_nodes(leaf, comment, mode))
209 if not ignored_nodes:
210 continue
212 first = ignored_nodes[0] # Can be a container node with the `leaf`.
213 parent = first.parent
214 prefix = first.prefix
215 if comment.value in FMT_OFF:
216 first.prefix = prefix[comment.consumed :]
217 if is_fmt_skip:
218 first.prefix = ""
219 standalone_comment_prefix = prefix
220 else:
221 standalone_comment_prefix = (
222 prefix[:previous_consumed] + "\n" * comment.newlines
223 )
224 hidden_value = "".join(str(n) for n in ignored_nodes)
225 comment_lineno = leaf.lineno - comment.newlines
226 if comment.value in FMT_OFF:
227 fmt_off_prefix = ""
228 if len(lines) > 0 and not any(
229 line[0] <= comment_lineno <= line[1] for line in lines
230 ):
231 # keeping indentation of comment by preserving original whitespaces.
232 fmt_off_prefix = prefix.split(comment.value)[0]
233 if "\n" in fmt_off_prefix:
234 fmt_off_prefix = fmt_off_prefix.split("\n")[-1]
235 standalone_comment_prefix += fmt_off_prefix
236 hidden_value = comment.value + "\n" + hidden_value
237 if is_fmt_skip:
238 hidden_value += comment.leading_whitespace + comment.value
239 if hidden_value.endswith("\n"):
240 # That happens when one of the `ignored_nodes` ended with a NEWLINE
241 # leaf (possibly followed by a DEDENT).
242 hidden_value = hidden_value[:-1]
243 first_idx: Optional[int] = None
244 for ignored in ignored_nodes:
245 index = ignored.remove()
246 if first_idx is None:
247 first_idx = index
248 assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
249 assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
250 parent.insert_child(
251 first_idx,
252 Leaf(
253 STANDALONE_COMMENT,
254 hidden_value,
255 prefix=standalone_comment_prefix,
256 fmt_pass_converted_first_leaf=first_leaf_of(first),
257 ),
258 )
259 return True
261 return False
264def generate_ignored_nodes(
265 leaf: Leaf, comment: ProtoComment, mode: Mode
266) -> Iterator[LN]:
267 """Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
269 If comment is skip, returns leaf only.
270 Stops at the end of the block.
271 """
272 if _contains_fmt_skip_comment(comment.value, mode):
273 yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, mode)
274 return
275 container: Optional[LN] = container_of(leaf)
276 while container is not None and container.type != token.ENDMARKER:
277 if is_fmt_on(container):
278 return
280 # fix for fmt: on in children
281 if children_contains_fmt_on(container):
282 for index, child in enumerate(container.children):
283 if isinstance(child, Leaf) and is_fmt_on(child):
284 if child.type in CLOSING_BRACKETS:
285 # This means `# fmt: on` is placed at a different bracket level
286 # than `# fmt: off`. This is an invalid use, but as a courtesy,
287 # we include this closing bracket in the ignored nodes.
288 # The alternative is to fail the formatting.
289 yield child
290 return
291 if (
292 child.type == token.INDENT
293 and index < len(container.children) - 1
294 and children_contains_fmt_on(container.children[index + 1])
295 ):
296 # This means `# fmt: on` is placed right after an indentation
297 # level, and we shouldn't swallow the previous INDENT token.
298 return
299 if children_contains_fmt_on(child):
300 return
301 yield child
302 else:
303 if container.type == token.DEDENT and container.next_sibling is None:
304 # This can happen when there is no matching `# fmt: on` comment at the
305 # same level as `# fmt: on`. We need to keep this DEDENT.
306 return
307 yield container
308 container = container.next_sibling
311def _generate_ignored_nodes_from_fmt_skip(
312 leaf: Leaf, comment: ProtoComment, mode: Mode
313) -> Iterator[LN]:
314 """Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
315 prev_sibling = leaf.prev_sibling
316 parent = leaf.parent
317 ignored_nodes: list[LN] = []
318 # Need to properly format the leaf prefix to compare it to comment.value,
319 # which is also formatted
320 comments = list_comments(leaf.prefix, is_endmarker=False)
321 if not comments or comment.value != comments[0].value:
322 return
323 if prev_sibling is not None:
324 leaf.prefix = leaf.prefix[comment.consumed :]
326 if Preview.fix_fmt_skip_in_one_liners not in mode:
327 siblings = [prev_sibling]
328 while (
329 "\n" not in prev_sibling.prefix
330 and prev_sibling.prev_sibling is not None
331 ):
332 prev_sibling = prev_sibling.prev_sibling
333 siblings.insert(0, prev_sibling)
334 yield from siblings
335 return
337 # Generates the nodes to be ignored by `fmt: skip`.
339 # Nodes to ignore are the ones on the same line as the
340 # `# fmt: skip` comment, excluding the `# fmt: skip`
341 # node itself.
343 # Traversal process (starting at the `# fmt: skip` node):
344 # 1. Move to the `prev_sibling` of the current node.
345 # 2. If `prev_sibling` has children, go to its rightmost leaf.
346 # 3. If there’s no `prev_sibling`, move up to the parent
347 # node and repeat.
348 # 4. Continue until:
349 # a. You encounter an `INDENT` or `NEWLINE` node (indicates
350 # start of the line).
351 # b. You reach the root node.
353 # Include all visited LEAVES in the ignored list, except INDENT
354 # or NEWLINE leaves.
356 current_node = prev_sibling
357 ignored_nodes = [current_node]
358 if current_node.prev_sibling is None and current_node.parent is not None:
359 current_node = current_node.parent
360 while "\n" not in current_node.prefix and current_node.prev_sibling is not None:
361 leaf_nodes = list(current_node.prev_sibling.leaves())
362 current_node = leaf_nodes[-1] if leaf_nodes else current_node
364 if current_node.type in (token.NEWLINE, token.INDENT):
365 current_node.prefix = ""
366 break
368 ignored_nodes.insert(0, current_node)
370 if current_node.prev_sibling is None and current_node.parent is not None:
371 current_node = current_node.parent
372 yield from ignored_nodes
373 elif (
374 parent is not None and parent.type == syms.suite and leaf.type == token.NEWLINE
375 ):
376 # The `# fmt: skip` is on the colon line of the if/while/def/class/...
377 # statements. The ignored nodes should be previous siblings of the
378 # parent suite node.
379 leaf.prefix = ""
380 parent_sibling = parent.prev_sibling
381 while parent_sibling is not None and parent_sibling.type != syms.suite:
382 ignored_nodes.insert(0, parent_sibling)
383 parent_sibling = parent_sibling.prev_sibling
384 # Special case for `async_stmt` where the ASYNC token is on the
385 # grandparent node.
386 grandparent = parent.parent
387 if (
388 grandparent is not None
389 and grandparent.prev_sibling is not None
390 and grandparent.prev_sibling.type == token.ASYNC
391 ):
392 ignored_nodes.insert(0, grandparent.prev_sibling)
393 yield from iter(ignored_nodes)
396def is_fmt_on(container: LN) -> bool:
397 """Determine whether formatting is switched on within a container.
398 Determined by whether the last `# fmt:` comment is `on` or `off`.
399 """
400 fmt_on = False
401 for comment in list_comments(container.prefix, is_endmarker=False):
402 if comment.value in FMT_ON:
403 fmt_on = True
404 elif comment.value in FMT_OFF:
405 fmt_on = False
406 return fmt_on
409def children_contains_fmt_on(container: LN) -> bool:
410 """Determine if children have formatting switched on."""
411 for child in container.children:
412 leaf = first_leaf_of(child)
413 if leaf is not None and is_fmt_on(leaf):
414 return True
416 return False
419def contains_pragma_comment(comment_list: list[Leaf]) -> bool:
420 """
421 Returns:
422 True iff one of the comments in @comment_list is a pragma used by one
423 of the more common static analysis tools for python (e.g. mypy, flake8,
424 pylint).
425 """
426 for comment in comment_list:
427 if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
428 return True
430 return False
433def _contains_fmt_skip_comment(comment_line: str, mode: Mode) -> bool:
434 """
435 Checks if the given comment contains FMT_SKIP alone or paired with other comments.
436 Matching styles:
437 # fmt:skip <-- single comment
438 # noqa:XXX # fmt:skip # a nice line <-- multiple comments (Preview)
439 # pylint:XXX; fmt:skip <-- list of comments (; separated, Preview)
440 """
441 semantic_comment_blocks = [
442 comment_line,
443 *[
444 _COMMENT_PREFIX + comment.strip()
445 for comment in comment_line.split(_COMMENT_PREFIX)[1:]
446 ],
447 *[
448 _COMMENT_PREFIX + comment.strip()
449 for comment in comment_line.strip(_COMMENT_PREFIX).split(
450 _COMMENT_LIST_SEPARATOR
451 )
452 ],
453 ]
455 return any(comment in FMT_SKIP for comment in semantic_comment_blocks)