Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/metadata/reentrant_codegen.py: 43%

82 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:43 +0000

1# Copyright (c) Meta Platforms, Inc. and affiliates. 

2# 

3# This source code is licensed under the MIT license found in the 

4# LICENSE file in the root directory of this source tree. 

5 

6 

7from dataclasses import dataclass, field 

8from typing import List, Optional, Sequence 

9 

10from libcst import BaseStatement, CSTNode, Module 

11from libcst._add_slots import add_slots 

12from libcst._nodes.internal import CodegenState 

13from libcst.metadata import BaseMetadataProvider 

14 

15 

16class CodegenPartial: 

17 """ 

18 Provided by :class:`ExperimentalReentrantCodegenProvider`. 

19 

20 Stores enough information to generate either a small patch 

21 (:meth:`get_modified_code_range`) or a new file (:meth:`get_modified_code`) by 

22 replacing the old node at this position. 

23 """ 

24 

25 __slots__ = [ 

26 "start_offset", 

27 "end_offset", 

28 "has_trailing_newline", 

29 "_indent_tokens", 

30 "_prev_codegen_state", 

31 ] 

32 

33 def __init__(self, state: "_ReentrantCodegenState") -> None: 

34 # store a frozen copy of these values, since they change over time 

35 self.start_offset: int = state.start_offset_stack[-1] 

36 self.end_offset: int = state.char_offset 

37 self.has_trailing_newline: bool = True # this may get updated to False later 

38 self._indent_tokens: Sequence[str] = tuple(state.indent_tokens) 

39 # everything else can be accessed from the codegen state object 

40 self._prev_codegen_state: _ReentrantCodegenState = state 

41 

42 def get_original_module_code(self) -> str: 

43 """ 

44 Equivalent to :meth:`libcst.Module.bytes` on the top-level module that contains 

45 this statement, except that it uses the cached result from our previous code 

46 generation pass, so it's faster. 

47 """ 

48 return self._prev_codegen_state.get_code() 

49 

50 def get_original_module_bytes(self) -> bytes: 

51 """ 

52 Equivalent to :meth:`libcst.Module.bytes` on the top-level module that contains 

53 this statement, except that it uses the cached result from our previous code 

54 generation pass, so it's faster. 

55 """ 

56 return self.get_original_module_code().encode(self._prev_codegen_state.encoding) 

57 

58 def get_original_statement_code(self) -> str: 

59 """ 

60 Equivalent to :meth:`libcst.Module.code_for_node` on the current statement, 

61 except that it uses the cached result from our previous code generation pass, 

62 so it's faster. 

63 """ 

64 return self._prev_codegen_state.get_code()[self.start_offset : self.end_offset] 

65 

66 def get_modified_statement_code(self, node: BaseStatement) -> str: 

67 """ 

68 Gets the new code for ``node`` as if it were in same location as the old 

69 statement being replaced. This means that it inherits details like the old 

70 statement's indentation. 

71 """ 

72 new_codegen_state = CodegenState( 

73 default_indent=self._prev_codegen_state.default_indent, 

74 default_newline=self._prev_codegen_state.default_newline, 

75 indent_tokens=list(self._indent_tokens), 

76 ) 

77 node._codegen(new_codegen_state) 

78 if not self.has_trailing_newline: 

79 new_codegen_state.pop_trailing_newline() 

80 return "".join(new_codegen_state.tokens) 

81 

82 def get_modified_module_code(self, node: BaseStatement) -> str: 

83 """ 

84 Gets the new code for the module at the root of this statement's tree, but with 

85 the supplied replacement ``node`` in its place. 

86 """ 

87 original = self.get_original_module_code() 

88 patch = self.get_modified_statement_code(node) 

89 return f"{original[:self.start_offset]}{patch}{original[self.end_offset:]}" 

90 

91 def get_modified_module_bytes(self, node: BaseStatement) -> bytes: 

92 """ 

93 Gets the new bytes for the module at the root of this statement's tree, but with 

94 the supplied replacement ``node`` in its place. 

95 """ 

96 return self.get_modified_module_code(node).encode( 

97 self._prev_codegen_state.encoding 

98 ) 

99 

100 

101@add_slots 

102@dataclass(frozen=False) 

103class _ReentrantCodegenState(CodegenState): 

104 provider: BaseMetadataProvider[CodegenPartial] 

105 encoding: str = "utf-8" 

106 indent_size: int = 0 

107 char_offset: int = 0 

108 start_offset_stack: List[int] = field(default_factory=list) 

109 cached_code: Optional[str] = None 

110 trailing_partials: List[CodegenPartial] = field(default_factory=list) 

111 

112 def increase_indent(self, value: str) -> None: 

113 super(_ReentrantCodegenState, self).increase_indent(value) 

114 self.indent_size += len(value) 

115 

116 def decrease_indent(self) -> None: 

117 self.indent_size -= len(self.indent_tokens[-1]) 

118 super(_ReentrantCodegenState, self).decrease_indent() 

119 

120 def add_indent_tokens(self) -> None: 

121 super(_ReentrantCodegenState, self).add_indent_tokens() 

122 self.char_offset += self.indent_size 

123 

124 def add_token(self, value: str) -> None: 

125 super(_ReentrantCodegenState, self).add_token(value) 

126 self.char_offset += len(value) 

127 self.trailing_partials.clear() 

128 

129 def before_codegen(self, node: CSTNode) -> None: 

130 if not isinstance(node, BaseStatement): 

131 return 

132 

133 self.start_offset_stack.append(self.char_offset) 

134 

135 def after_codegen(self, node: CSTNode) -> None: 

136 if not isinstance(node, BaseStatement): 

137 return 

138 

139 partial = CodegenPartial(self) 

140 self.provider.set_metadata(node, partial) 

141 self.start_offset_stack.pop() 

142 self.trailing_partials.append(partial) 

143 

144 def pop_trailing_newline(self) -> None: 

145 """ 

146 :class:`libcst.Module` contains a hack where it removes the last token (a 

147 newline) if the original file didn't have a newline. 

148 

149 If this happens, we need to go back through every node at the end of the file, 

150 and fix their `end_offset`. 

151 """ 

152 for tp in self.trailing_partials: 

153 tp.end_offset -= len(self.tokens[-1]) 

154 tp.has_trailing_newline = False 

155 super(_ReentrantCodegenState, self).pop_trailing_newline() 

156 

157 def get_code(self) -> str: 

158 # Ideally this would use functools.cached_property, but that's only in 

159 # Python 3.8+. 

160 # 

161 # This is a little ugly to make pyre's attribute refinement checks happy. 

162 cached_code = self.cached_code 

163 if cached_code is not None: 

164 return cached_code 

165 cached_code = "".join(self.tokens) 

166 self.cached_code = cached_code 

167 return cached_code 

168 

169 

170class ExperimentalReentrantCodegenProvider(BaseMetadataProvider[CodegenPartial]): 

171 """ 

172 An experimental API that allows fast generation of modified code by recording an 

173 initial code-generation pass, and incrementally applying updates. It is a 

174 performance optimization for a few niche use-cases and is not user-friendly. 

175 

176 **This API may change at any time without warning (including in minor releases).** 

177 

178 This is rarely useful. Instead you should make multiple modifications to a single 

179 syntax tree, and generate the code once. However, we can think of a few use-cases 

180 for this API (hence, why it exists): 

181 

182 - When linting a file, you might generate multiple independent patches that a user 

183 can accept or reject. Depending on your architecture, it may be advantageous to 

184 avoid regenerating the file when computing each patch. 

185 

186 - You might want to call out to an external utility (e.g. a typechecker, such as 

187 pyre or mypy) to validate a small change. You may need to generate and test lots 

188 of these patches. 

189 

190 Restrictions: 

191 

192 - For safety and sanity reasons, the smallest/only level of granularity is a 

193 statement. If you need to patch part of a statement, you regenerate the entire 

194 statement. If you need to regenerate an entire module, just call 

195 :meth:`libcst.Module.code`. 

196 

197 - This does not (currently) operate recursively. You can patch an unpatched piece 

198 of code multiple times, but you can't layer additional patches on an already 

199 patched piece of code. 

200 """ 

201 

202 def _gen_impl(self, module: Module) -> None: 

203 state = _ReentrantCodegenState( 

204 default_indent=module.default_indent, 

205 default_newline=module.default_newline, 

206 provider=self, 

207 encoding=module.encoding, 

208 ) 

209 module._codegen(state)