Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_nodes/whitespace.py: 75%
95 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
1# Copyright (c) Meta Platforms, Inc. and affiliates.
2#
3# This source code is licensed under the MIT license found in the
4# LICENSE file in the root directory of this source tree.
7import re
8from abc import ABC, abstractmethod
9from dataclasses import dataclass
10from typing import Optional, Pattern, Sequence
12from libcst._add_slots import add_slots
13from libcst._nodes.base import BaseLeaf, BaseValueToken, CSTNode, CSTValidationError
14from libcst._nodes.internal import (
15 CodegenState,
16 visit_optional,
17 visit_required,
18 visit_sequence,
19)
20from libcst._visitors import CSTVisitorT
22# SimpleWhitespace includes continuation characters, which must be followed immediately
23# by a newline. SimpleWhitespace does not include other kinds of newlines, because those
24# may have semantic significance.
25SIMPLE_WHITESPACE_RE: Pattern[str] = re.compile(r"([ \f\t]|\\(\r\n?|\n))*", re.UNICODE)
26NEWLINE_RE: Pattern[str] = re.compile(r"\r\n?|\n", re.UNICODE)
27COMMENT_RE: Pattern[str] = re.compile(r"#[^\r\n]*", re.UNICODE)
30class BaseParenthesizableWhitespace(CSTNode, ABC):
31 """
32 This is the kind of whitespace you might see inside the body of a statement or
33 expression between two tokens. This is the most common type of whitespace.
35 The list of allowed characters in a whitespace depends on whether it is found
36 inside a parenthesized expression or not. This class allows nodes which can be
37 found inside or outside a ``()``, ``[]`` or ``{}`` section to accept either
38 whitespace form.
40 https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining
42 Parenthesizable whitespace may contain a backslash character (``\\``), when used as
43 a line-continuation character. While the continuation character isn't technically
44 "whitespace", it serves the same purpose.
46 Parenthesizable whitespace is often non-semantic (optional), but in cases where
47 whitespace solves a grammar ambiguity between tokens (e.g. ``if test``, versus
48 ``iftest``), it has some semantic value.
49 """
51 __slots__ = ()
53 # TODO: Should we somehow differentiate places where we require non-zero whitespace
54 # with a separate type?
56 @property
57 @abstractmethod
58 def empty(self) -> bool:
59 """
60 Indicates that this node is empty (zero whitespace characters).
61 """
62 ...
65@add_slots
66@dataclass(frozen=True)
67class SimpleWhitespace(BaseParenthesizableWhitespace, BaseValueToken):
68 """
69 This is the kind of whitespace you might see inside the body of a statement or
70 expression between two tokens. This is the most common type of whitespace.
72 A simple whitespace cannot contain a newline character unless it is directly
73 preceeded by a line continuation character (``\\``). It can contain zero or
74 more spaces or tabs. If you need a newline character without a line continuation
75 character, use :class:`ParenthesizedWhitespace` instead.
77 Simple whitespace is often non-semantic (optional), but in cases where whitespace
78 solves a grammar ambiguity between tokens (e.g. ``if test``, versus ``iftest``),
79 it has some semantic value.
81 An example :class:`SimpleWhitespace` containing a space, a line continuation,
82 a newline and another space is as follows::
84 SimpleWhitespace(r" \\\\n ")
85 """
87 #: Actual string value of the simple whitespace. A legal value contains only
88 #: space, ``\f`` and ``\t`` characters, and optionally a continuation
89 #: (``\``) followed by a newline (``\n`` or ``\r\n``).
90 value: str
92 def _validate(self) -> None:
93 if SIMPLE_WHITESPACE_RE.fullmatch(self.value) is None:
94 raise CSTValidationError(
95 f"Got non-whitespace value for whitespace node: {repr(self.value)}"
96 )
98 @property
99 def empty(self) -> bool:
100 """
101 Indicates that this node is empty (zero whitespace characters).
102 """
104 return len(self.value) == 0
107@add_slots
108@dataclass(frozen=True)
109class Newline(BaseLeaf):
110 """
111 Represents the newline that ends an :class:`EmptyLine` or a statement (as part of
112 :class:`TrailingWhitespace`).
114 Other newlines may occur in the document after continuation characters (the
115 backslash, ``\\``), but those newlines are treated as part of the
116 :class:`SimpleWhitespace`.
118 Optionally, a value can be specified in order to overwrite the module's default
119 newline. In general, this should be left as the default, which is ``None``. This
120 is allowed because python modules are permitted to mix multiple unambiguous
121 newline markers.
122 """
124 #: A value of ``None`` indicates that the module's default newline sequence should
125 #: be used. A value of ``\n`` or ``\r\n`` indicates that the exact value specified
126 #: will be used for this newline.
127 value: Optional[str] = None
129 def _validate(self) -> None:
130 value = self.value
131 if value and NEWLINE_RE.fullmatch(value) is None:
132 raise CSTValidationError(
133 f"Got an invalid value for newline node: {repr(value)}"
134 )
136 def _codegen_impl(self, state: CodegenState) -> None:
137 value = self.value
138 state.add_token(state.default_newline if value is None else value)
141@add_slots
142@dataclass(frozen=True)
143class Comment(BaseValueToken):
144 """
145 A comment including the leading pound (``#``) character.
147 The leading pound character is included in the 'value' property (instead of being
148 stripped) to help re-enforce the idea that whitespace immediately after the pound
149 character may be significant. E.g::
151 # comment with whitespace at the start (usually preferred)
152 #comment without whitespace at the start (usually not desirable)
154 Usually wrapped in a :class:`TrailingWhitespace` or :class:`EmptyLine` node.
155 """
157 #: The comment itself. Valid values start with the pound (``#``) character followed
158 #: by zero or more non-newline characters. Comments cannot include newlines.
159 value: str
161 def _validate(self) -> None:
162 if COMMENT_RE.fullmatch(self.value) is None:
163 raise CSTValidationError(
164 f"Got non-comment value for comment node: {repr(self.value)}"
165 )
168@add_slots
169@dataclass(frozen=True)
170class TrailingWhitespace(CSTNode):
171 """
172 The whitespace at the end of a line after a statement. If a line contains only
173 whitespace, :class:`EmptyLine` should be used instead.
174 """
176 #: Any simple whitespace before any comment or newline.
177 whitespace: SimpleWhitespace = SimpleWhitespace.field("")
179 #: An optional comment appearing after any simple whitespace.
180 comment: Optional[Comment] = None
182 #: The newline character that terminates this trailing whitespace.
183 newline: Newline = Newline.field()
185 def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "TrailingWhitespace":
186 return TrailingWhitespace(
187 whitespace=visit_required(self, "whitespace", self.whitespace, visitor),
188 comment=visit_optional(self, "comment", self.comment, visitor),
189 newline=visit_required(self, "newline", self.newline, visitor),
190 )
192 def _codegen_impl(self, state: CodegenState) -> None:
193 self.whitespace._codegen(state)
194 comment = self.comment
195 if comment is not None:
196 comment._codegen(state)
197 self.newline._codegen(state)
200@add_slots
201@dataclass(frozen=True)
202class EmptyLine(CSTNode):
203 """
204 Represents a line with only whitespace/comments. Usually statements will own any
205 :class:`EmptyLine` nodes above themselves, and a :class:`Module` will own the
206 document's header/footer :class:`EmptyLine` nodes.
207 """
209 #: An empty line doesn't have to correspond to the current indentation level. For
210 #: example, this happens when all trailing whitespace is stripped and there is
211 #: an empty line between two statements.
212 indent: bool = True
214 #: Extra whitespace after the indent, but before the comment.
215 whitespace: SimpleWhitespace = SimpleWhitespace.field("")
217 #: An optional comment appearing after the indent and extra whitespace.
218 comment: Optional[Comment] = None
220 #: The newline character that terminates this empty line.
221 newline: Newline = Newline.field()
223 def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "EmptyLine":
224 return EmptyLine(
225 indent=self.indent,
226 whitespace=visit_required(self, "whitespace", self.whitespace, visitor),
227 comment=visit_optional(self, "comment", self.comment, visitor),
228 newline=visit_required(self, "newline", self.newline, visitor),
229 )
231 def _codegen_impl(self, state: CodegenState) -> None:
232 if self.indent:
233 state.add_indent_tokens()
234 self.whitespace._codegen(state)
235 comment = self.comment
236 if comment is not None:
237 comment._codegen(state)
238 self.newline._codegen(state)
241@add_slots
242@dataclass(frozen=True)
243class ParenthesizedWhitespace(BaseParenthesizableWhitespace):
244 """
245 This is the kind of whitespace you might see inside a parenthesized expression
246 or statement between two tokens when there is a newline without a line
247 continuation (``\\``) character.
249 https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining
251 A parenthesized whitespace cannot be empty since it requires at least one
252 :class:`TrailingWhitespace`. If you have whitespace that does not contain
253 comments or newlines, use :class:`SimpleWhitespace` instead.
254 """
256 #: The whitespace that comes after the previous node, up to and including
257 #: the end-of-line comment and newline.
258 first_line: TrailingWhitespace = TrailingWhitespace.field()
260 #: Any lines after the first that contain only indentation and/or comments.
261 empty_lines: Sequence[EmptyLine] = ()
263 #: Whether or not the final simple whitespace is indented regularly.
264 indent: bool = False
266 #: Extra whitespace after the indent, but before the next node.
267 last_line: SimpleWhitespace = SimpleWhitespace.field("")
269 def _visit_and_replace_children(
270 self, visitor: CSTVisitorT
271 ) -> "ParenthesizedWhitespace":
272 return ParenthesizedWhitespace(
273 first_line=visit_required(self, "first_line", self.first_line, visitor),
274 empty_lines=visit_sequence(self, "empty_lines", self.empty_lines, visitor),
275 indent=self.indent,
276 last_line=visit_required(self, "last_line", self.last_line, visitor),
277 )
279 def _codegen_impl(self, state: CodegenState) -> None:
280 self.first_line._codegen(state)
281 for line in self.empty_lines:
282 line._codegen(state)
283 if self.indent:
284 state.add_indent_tokens()
285 self.last_line._codegen(state)
287 @property
288 def empty(self) -> bool:
289 """
290 Indicates that this node is empty (zero whitespace characters). For
291 :class:`ParenthesizedWhitespace` this will always be ``False``.
292 """
294 # Its not possible to have a ParenthesizedWhitespace with zero characers.
295 # If we did, the TrailingWhitespace would not have parsed.
296 return False