Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/metadata/reentrant_codegen.py: 43%
82 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:43 +0000
1# Copyright (c) Meta Platforms, Inc. and affiliates.
2#
3# This source code is licensed under the MIT license found in the
4# LICENSE file in the root directory of this source tree.
7from dataclasses import dataclass, field
8from typing import List, Optional, Sequence
10from libcst import BaseStatement, CSTNode, Module
11from libcst._add_slots import add_slots
12from libcst._nodes.internal import CodegenState
13from libcst.metadata import BaseMetadataProvider
16class CodegenPartial:
17 """
18 Provided by :class:`ExperimentalReentrantCodegenProvider`.
20 Stores enough information to generate either a small patch
21 (:meth:`get_modified_code_range`) or a new file (:meth:`get_modified_code`) by
22 replacing the old node at this position.
23 """
25 __slots__ = [
26 "start_offset",
27 "end_offset",
28 "has_trailing_newline",
29 "_indent_tokens",
30 "_prev_codegen_state",
31 ]
33 def __init__(self, state: "_ReentrantCodegenState") -> None:
34 # store a frozen copy of these values, since they change over time
35 self.start_offset: int = state.start_offset_stack[-1]
36 self.end_offset: int = state.char_offset
37 self.has_trailing_newline: bool = True # this may get updated to False later
38 self._indent_tokens: Sequence[str] = tuple(state.indent_tokens)
39 # everything else can be accessed from the codegen state object
40 self._prev_codegen_state: _ReentrantCodegenState = state
42 def get_original_module_code(self) -> str:
43 """
44 Equivalent to :meth:`libcst.Module.bytes` on the top-level module that contains
45 this statement, except that it uses the cached result from our previous code
46 generation pass, so it's faster.
47 """
48 return self._prev_codegen_state.get_code()
50 def get_original_module_bytes(self) -> bytes:
51 """
52 Equivalent to :meth:`libcst.Module.bytes` on the top-level module that contains
53 this statement, except that it uses the cached result from our previous code
54 generation pass, so it's faster.
55 """
56 return self.get_original_module_code().encode(self._prev_codegen_state.encoding)
58 def get_original_statement_code(self) -> str:
59 """
60 Equivalent to :meth:`libcst.Module.code_for_node` on the current statement,
61 except that it uses the cached result from our previous code generation pass,
62 so it's faster.
63 """
64 return self._prev_codegen_state.get_code()[self.start_offset : self.end_offset]
66 def get_modified_statement_code(self, node: BaseStatement) -> str:
67 """
68 Gets the new code for ``node`` as if it were in same location as the old
69 statement being replaced. This means that it inherits details like the old
70 statement's indentation.
71 """
72 new_codegen_state = CodegenState(
73 default_indent=self._prev_codegen_state.default_indent,
74 default_newline=self._prev_codegen_state.default_newline,
75 indent_tokens=list(self._indent_tokens),
76 )
77 node._codegen(new_codegen_state)
78 if not self.has_trailing_newline:
79 new_codegen_state.pop_trailing_newline()
80 return "".join(new_codegen_state.tokens)
82 def get_modified_module_code(self, node: BaseStatement) -> str:
83 """
84 Gets the new code for the module at the root of this statement's tree, but with
85 the supplied replacement ``node`` in its place.
86 """
87 original = self.get_original_module_code()
88 patch = self.get_modified_statement_code(node)
89 return f"{original[:self.start_offset]}{patch}{original[self.end_offset:]}"
91 def get_modified_module_bytes(self, node: BaseStatement) -> bytes:
92 """
93 Gets the new bytes for the module at the root of this statement's tree, but with
94 the supplied replacement ``node`` in its place.
95 """
96 return self.get_modified_module_code(node).encode(
97 self._prev_codegen_state.encoding
98 )
101@add_slots
102@dataclass(frozen=False)
103class _ReentrantCodegenState(CodegenState):
104 provider: BaseMetadataProvider[CodegenPartial]
105 encoding: str = "utf-8"
106 indent_size: int = 0
107 char_offset: int = 0
108 start_offset_stack: List[int] = field(default_factory=list)
109 cached_code: Optional[str] = None
110 trailing_partials: List[CodegenPartial] = field(default_factory=list)
112 def increase_indent(self, value: str) -> None:
113 super(_ReentrantCodegenState, self).increase_indent(value)
114 self.indent_size += len(value)
116 def decrease_indent(self) -> None:
117 self.indent_size -= len(self.indent_tokens[-1])
118 super(_ReentrantCodegenState, self).decrease_indent()
120 def add_indent_tokens(self) -> None:
121 super(_ReentrantCodegenState, self).add_indent_tokens()
122 self.char_offset += self.indent_size
124 def add_token(self, value: str) -> None:
125 super(_ReentrantCodegenState, self).add_token(value)
126 self.char_offset += len(value)
127 self.trailing_partials.clear()
129 def before_codegen(self, node: CSTNode) -> None:
130 if not isinstance(node, BaseStatement):
131 return
133 self.start_offset_stack.append(self.char_offset)
135 def after_codegen(self, node: CSTNode) -> None:
136 if not isinstance(node, BaseStatement):
137 return
139 partial = CodegenPartial(self)
140 self.provider.set_metadata(node, partial)
141 self.start_offset_stack.pop()
142 self.trailing_partials.append(partial)
144 def pop_trailing_newline(self) -> None:
145 """
146 :class:`libcst.Module` contains a hack where it removes the last token (a
147 newline) if the original file didn't have a newline.
149 If this happens, we need to go back through every node at the end of the file,
150 and fix their `end_offset`.
151 """
152 for tp in self.trailing_partials:
153 tp.end_offset -= len(self.tokens[-1])
154 tp.has_trailing_newline = False
155 super(_ReentrantCodegenState, self).pop_trailing_newline()
157 def get_code(self) -> str:
158 # Ideally this would use functools.cached_property, but that's only in
159 # Python 3.8+.
160 #
161 # This is a little ugly to make pyre's attribute refinement checks happy.
162 cached_code = self.cached_code
163 if cached_code is not None:
164 return cached_code
165 cached_code = "".join(self.tokens)
166 self.cached_code = cached_code
167 return cached_code
170class ExperimentalReentrantCodegenProvider(BaseMetadataProvider[CodegenPartial]):
171 """
172 An experimental API that allows fast generation of modified code by recording an
173 initial code-generation pass, and incrementally applying updates. It is a
174 performance optimization for a few niche use-cases and is not user-friendly.
176 **This API may change at any time without warning (including in minor releases).**
178 This is rarely useful. Instead you should make multiple modifications to a single
179 syntax tree, and generate the code once. However, we can think of a few use-cases
180 for this API (hence, why it exists):
182 - When linting a file, you might generate multiple independent patches that a user
183 can accept or reject. Depending on your architecture, it may be advantageous to
184 avoid regenerating the file when computing each patch.
186 - You might want to call out to an external utility (e.g. a typechecker, such as
187 pyre or mypy) to validate a small change. You may need to generate and test lots
188 of these patches.
190 Restrictions:
192 - For safety and sanity reasons, the smallest/only level of granularity is a
193 statement. If you need to patch part of a statement, you regenerate the entire
194 statement. If you need to regenerate an entire module, just call
195 :meth:`libcst.Module.code`.
197 - This does not (currently) operate recursively. You can patch an unpatched piece
198 of code multiple times, but you can't layer additional patches on an already
199 patched piece of code.
200 """
202 def _gen_impl(self, module: Module) -> None:
203 state = _ReentrantCodegenState(
204 default_indent=module.default_indent,
205 default_newline=module.default_newline,
206 provider=self,
207 encoding=module.encoding,
208 )
209 module._codegen(state)