Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/_nodes/whitespace.py: 75%

95 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:43 +0000

1# Copyright (c) Meta Platforms, Inc. and affiliates. 

2# 

3# This source code is licensed under the MIT license found in the 

4# LICENSE file in the root directory of this source tree. 

5 

6 

7import re 

8from abc import ABC, abstractmethod 

9from dataclasses import dataclass 

10from typing import Optional, Pattern, Sequence 

11 

12from libcst._add_slots import add_slots 

13from libcst._nodes.base import BaseLeaf, BaseValueToken, CSTNode, CSTValidationError 

14from libcst._nodes.internal import ( 

15 CodegenState, 

16 visit_optional, 

17 visit_required, 

18 visit_sequence, 

19) 

20from libcst._visitors import CSTVisitorT 

21 

22# SimpleWhitespace includes continuation characters, which must be followed immediately 

23# by a newline. SimpleWhitespace does not include other kinds of newlines, because those 

24# may have semantic significance. 

25SIMPLE_WHITESPACE_RE: Pattern[str] = re.compile(r"([ \f\t]|\\(\r\n?|\n))*", re.UNICODE) 

26NEWLINE_RE: Pattern[str] = re.compile(r"\r\n?|\n", re.UNICODE) 

27COMMENT_RE: Pattern[str] = re.compile(r"#[^\r\n]*", re.UNICODE) 

28 

29 

30class BaseParenthesizableWhitespace(CSTNode, ABC): 

31 """ 

32 This is the kind of whitespace you might see inside the body of a statement or 

33 expression between two tokens. This is the most common type of whitespace. 

34 

35 The list of allowed characters in a whitespace depends on whether it is found 

36 inside a parenthesized expression or not. This class allows nodes which can be 

37 found inside or outside a ``()``, ``[]`` or ``{}`` section to accept either 

38 whitespace form. 

39 

40 https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining 

41 

42 Parenthesizable whitespace may contain a backslash character (``\\``), when used as 

43 a line-continuation character. While the continuation character isn't technically 

44 "whitespace", it serves the same purpose. 

45 

46 Parenthesizable whitespace is often non-semantic (optional), but in cases where 

47 whitespace solves a grammar ambiguity between tokens (e.g. ``if test``, versus 

48 ``iftest``), it has some semantic value. 

49 """ 

50 

51 __slots__ = () 

52 

53 # TODO: Should we somehow differentiate places where we require non-zero whitespace 

54 # with a separate type? 

55 

56 @property 

57 @abstractmethod 

58 def empty(self) -> bool: 

59 """ 

60 Indicates that this node is empty (zero whitespace characters). 

61 """ 

62 ... 

63 

64 

65@add_slots 

66@dataclass(frozen=True) 

67class SimpleWhitespace(BaseParenthesizableWhitespace, BaseValueToken): 

68 """ 

69 This is the kind of whitespace you might see inside the body of a statement or 

70 expression between two tokens. This is the most common type of whitespace. 

71 

72 A simple whitespace cannot contain a newline character unless it is directly 

73 preceeded by a line continuation character (``\\``). It can contain zero or 

74 more spaces or tabs. If you need a newline character without a line continuation 

75 character, use :class:`ParenthesizedWhitespace` instead. 

76 

77 Simple whitespace is often non-semantic (optional), but in cases where whitespace 

78 solves a grammar ambiguity between tokens (e.g. ``if test``, versus ``iftest``), 

79 it has some semantic value. 

80 

81 An example :class:`SimpleWhitespace` containing a space, a line continuation, 

82 a newline and another space is as follows:: 

83 

84 SimpleWhitespace(r" \\\\n ") 

85 """ 

86 

87 #: Actual string value of the simple whitespace. A legal value contains only 

88 #: space, ``\f`` and ``\t`` characters, and optionally a continuation 

89 #: (``\``) followed by a newline (``\n`` or ``\r\n``). 

90 value: str 

91 

92 def _validate(self) -> None: 

93 if SIMPLE_WHITESPACE_RE.fullmatch(self.value) is None: 

94 raise CSTValidationError( 

95 f"Got non-whitespace value for whitespace node: {repr(self.value)}" 

96 ) 

97 

98 @property 

99 def empty(self) -> bool: 

100 """ 

101 Indicates that this node is empty (zero whitespace characters). 

102 """ 

103 

104 return len(self.value) == 0 

105 

106 

107@add_slots 

108@dataclass(frozen=True) 

109class Newline(BaseLeaf): 

110 """ 

111 Represents the newline that ends an :class:`EmptyLine` or a statement (as part of 

112 :class:`TrailingWhitespace`). 

113 

114 Other newlines may occur in the document after continuation characters (the 

115 backslash, ``\\``), but those newlines are treated as part of the 

116 :class:`SimpleWhitespace`. 

117 

118 Optionally, a value can be specified in order to overwrite the module's default 

119 newline. In general, this should be left as the default, which is ``None``. This 

120 is allowed because python modules are permitted to mix multiple unambiguous 

121 newline markers. 

122 """ 

123 

124 #: A value of ``None`` indicates that the module's default newline sequence should 

125 #: be used. A value of ``\n`` or ``\r\n`` indicates that the exact value specified 

126 #: will be used for this newline. 

127 value: Optional[str] = None 

128 

129 def _validate(self) -> None: 

130 value = self.value 

131 if value and NEWLINE_RE.fullmatch(value) is None: 

132 raise CSTValidationError( 

133 f"Got an invalid value for newline node: {repr(value)}" 

134 ) 

135 

136 def _codegen_impl(self, state: CodegenState) -> None: 

137 value = self.value 

138 state.add_token(state.default_newline if value is None else value) 

139 

140 

141@add_slots 

142@dataclass(frozen=True) 

143class Comment(BaseValueToken): 

144 """ 

145 A comment including the leading pound (``#``) character. 

146 

147 The leading pound character is included in the 'value' property (instead of being 

148 stripped) to help re-enforce the idea that whitespace immediately after the pound 

149 character may be significant. E.g:: 

150 

151 # comment with whitespace at the start (usually preferred) 

152 #comment without whitespace at the start (usually not desirable) 

153 

154 Usually wrapped in a :class:`TrailingWhitespace` or :class:`EmptyLine` node. 

155 """ 

156 

157 #: The comment itself. Valid values start with the pound (``#``) character followed 

158 #: by zero or more non-newline characters. Comments cannot include newlines. 

159 value: str 

160 

161 def _validate(self) -> None: 

162 if COMMENT_RE.fullmatch(self.value) is None: 

163 raise CSTValidationError( 

164 f"Got non-comment value for comment node: {repr(self.value)}" 

165 ) 

166 

167 

168@add_slots 

169@dataclass(frozen=True) 

170class TrailingWhitespace(CSTNode): 

171 """ 

172 The whitespace at the end of a line after a statement. If a line contains only 

173 whitespace, :class:`EmptyLine` should be used instead. 

174 """ 

175 

176 #: Any simple whitespace before any comment or newline. 

177 whitespace: SimpleWhitespace = SimpleWhitespace.field("") 

178 

179 #: An optional comment appearing after any simple whitespace. 

180 comment: Optional[Comment] = None 

181 

182 #: The newline character that terminates this trailing whitespace. 

183 newline: Newline = Newline.field() 

184 

185 def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "TrailingWhitespace": 

186 return TrailingWhitespace( 

187 whitespace=visit_required(self, "whitespace", self.whitespace, visitor), 

188 comment=visit_optional(self, "comment", self.comment, visitor), 

189 newline=visit_required(self, "newline", self.newline, visitor), 

190 ) 

191 

192 def _codegen_impl(self, state: CodegenState) -> None: 

193 self.whitespace._codegen(state) 

194 comment = self.comment 

195 if comment is not None: 

196 comment._codegen(state) 

197 self.newline._codegen(state) 

198 

199 

200@add_slots 

201@dataclass(frozen=True) 

202class EmptyLine(CSTNode): 

203 """ 

204 Represents a line with only whitespace/comments. Usually statements will own any 

205 :class:`EmptyLine` nodes above themselves, and a :class:`Module` will own the 

206 document's header/footer :class:`EmptyLine` nodes. 

207 """ 

208 

209 #: An empty line doesn't have to correspond to the current indentation level. For 

210 #: example, this happens when all trailing whitespace is stripped and there is 

211 #: an empty line between two statements. 

212 indent: bool = True 

213 

214 #: Extra whitespace after the indent, but before the comment. 

215 whitespace: SimpleWhitespace = SimpleWhitespace.field("") 

216 

217 #: An optional comment appearing after the indent and extra whitespace. 

218 comment: Optional[Comment] = None 

219 

220 #: The newline character that terminates this empty line. 

221 newline: Newline = Newline.field() 

222 

223 def _visit_and_replace_children(self, visitor: CSTVisitorT) -> "EmptyLine": 

224 return EmptyLine( 

225 indent=self.indent, 

226 whitespace=visit_required(self, "whitespace", self.whitespace, visitor), 

227 comment=visit_optional(self, "comment", self.comment, visitor), 

228 newline=visit_required(self, "newline", self.newline, visitor), 

229 ) 

230 

231 def _codegen_impl(self, state: CodegenState) -> None: 

232 if self.indent: 

233 state.add_indent_tokens() 

234 self.whitespace._codegen(state) 

235 comment = self.comment 

236 if comment is not None: 

237 comment._codegen(state) 

238 self.newline._codegen(state) 

239 

240 

241@add_slots 

242@dataclass(frozen=True) 

243class ParenthesizedWhitespace(BaseParenthesizableWhitespace): 

244 """ 

245 This is the kind of whitespace you might see inside a parenthesized expression 

246 or statement between two tokens when there is a newline without a line 

247 continuation (``\\``) character. 

248 

249 https://docs.python.org/3/reference/lexical_analysis.html#implicit-line-joining 

250 

251 A parenthesized whitespace cannot be empty since it requires at least one 

252 :class:`TrailingWhitespace`. If you have whitespace that does not contain 

253 comments or newlines, use :class:`SimpleWhitespace` instead. 

254 """ 

255 

256 #: The whitespace that comes after the previous node, up to and including 

257 #: the end-of-line comment and newline. 

258 first_line: TrailingWhitespace = TrailingWhitespace.field() 

259 

260 #: Any lines after the first that contain only indentation and/or comments. 

261 empty_lines: Sequence[EmptyLine] = () 

262 

263 #: Whether or not the final simple whitespace is indented regularly. 

264 indent: bool = False 

265 

266 #: Extra whitespace after the indent, but before the next node. 

267 last_line: SimpleWhitespace = SimpleWhitespace.field("") 

268 

269 def _visit_and_replace_children( 

270 self, visitor: CSTVisitorT 

271 ) -> "ParenthesizedWhitespace": 

272 return ParenthesizedWhitespace( 

273 first_line=visit_required(self, "first_line", self.first_line, visitor), 

274 empty_lines=visit_sequence(self, "empty_lines", self.empty_lines, visitor), 

275 indent=self.indent, 

276 last_line=visit_required(self, "last_line", self.last_line, visitor), 

277 ) 

278 

279 def _codegen_impl(self, state: CodegenState) -> None: 

280 self.first_line._codegen(state) 

281 for line in self.empty_lines: 

282 line._codegen(state) 

283 if self.indent: 

284 state.add_indent_tokens() 

285 self.last_line._codegen(state) 

286 

287 @property 

288 def empty(self) -> bool: 

289 """ 

290 Indicates that this node is empty (zero whitespace characters). For 

291 :class:`ParenthesizedWhitespace` this will always be ``False``. 

292 """ 

293 

294 # Its not possible to have a ParenthesizedWhitespace with zero characers. 

295 # If we did, the TrailingWhitespace would not have parsed. 

296 return False