Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/codemod/_cli.py: 25%
225 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 07:09 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 07:09 +0000
1# Copyright (c) Meta Platforms, Inc. and affiliates.
2#
3# This source code is licensed under the MIT license found in the
4# LICENSE file in the root directory of this source tree.
5#
6"""
7Provides helpers for CLI interaction.
8"""
10import difflib
11import os.path
12import re
13import subprocess
14import sys
15import time
16import traceback
17from dataclasses import dataclass, replace
18from multiprocessing import cpu_count, Pool
19from pathlib import Path
20from typing import Any, AnyStr, cast, Dict, List, Optional, Sequence, Union
22from libcst import parse_module, PartialParserConfig
23from libcst.codemod._codemod import Codemod
24from libcst.codemod._dummy_pool import DummyPool
25from libcst.codemod._runner import (
26 SkipFile,
27 SkipReason,
28 transform_module,
29 TransformExit,
30 TransformFailure,
31 TransformResult,
32 TransformSkip,
33 TransformSuccess,
34)
35from libcst.helpers import calculate_module_and_package
36from libcst.metadata import FullRepoManager
38_DEFAULT_GENERATED_CODE_MARKER: str = f"@gen{''}erated"
41def invoke_formatter(formatter_args: Sequence[str], code: AnyStr) -> AnyStr:
42 """
43 Given a code string, run an external formatter on the code and return new
44 formatted code.
45 """
47 # Make sure there is something to run
48 if len(formatter_args) == 0:
49 raise Exception("No formatter configured but code formatting requested.")
51 # Invoke the formatter, giving it the code as stdin and assuming the formatted
52 # code comes from stdout.
53 work_with_bytes = isinstance(code, bytes)
54 return cast(
55 AnyStr,
56 subprocess.check_output(
57 formatter_args,
58 input=code,
59 universal_newlines=not work_with_bytes,
60 encoding=None if work_with_bytes else "utf-8",
61 ),
62 )
65def print_execution_result(result: TransformResult) -> None:
66 for warning in result.warning_messages:
67 print(f"WARNING: {warning}", file=sys.stderr)
69 if isinstance(result, TransformFailure):
70 error = result.error
71 if isinstance(error, subprocess.CalledProcessError):
72 print(error.output.decode("utf-8"), file=sys.stderr)
73 print(result.traceback_str, file=sys.stderr)
76def gather_files(
77 files_or_dirs: Sequence[str], *, include_stubs: bool = False
78) -> List[str]:
79 """
80 Given a list of files or directories (can be intermingled), return a list of
81 all python files that exist at those locations. If ``include_stubs`` is ``True``,
82 this will include ``.py`` and ``.pyi`` stub files. If it is ``False``, only
83 ``.py`` files will be included in the returned list.
84 """
85 ret: List[str] = []
86 for fd in files_or_dirs:
87 if os.path.isfile(fd):
88 ret.append(fd)
89 elif os.path.isdir(fd):
90 ret.extend(
91 str(p)
92 for p in Path(fd).rglob("*.py*")
93 if str(p).endswith("py") or (include_stubs and str(p).endswith("pyi"))
94 )
95 return sorted(ret)
98def diff_code(
99 oldcode: str, newcode: str, context: int, *, filename: Optional[str] = None
100) -> str:
101 """
102 Given two strings representing a module before and after a codemod, produce
103 a unified diff of the changes with ``context`` lines of context. Optionally,
104 assign the ``filename`` to the change, and if it is not available, assume
105 that the change was performed on stdin/stdout. If no change is detected,
106 return an empty string instead of returning an empty unified diff. This is
107 comparable to revision control software which only shows differences for
108 files that have changed.
109 """
111 if oldcode == newcode:
112 return ""
114 if filename:
115 difflines = difflib.unified_diff(
116 oldcode.split("\n"),
117 newcode.split("\n"),
118 fromfile=filename,
119 tofile=filename,
120 lineterm="",
121 n=context,
122 )
123 else:
124 difflines = difflib.unified_diff(
125 oldcode.split("\n"), newcode.split("\n"), lineterm="", n=context
126 )
127 return "\n".join(difflines)
130def exec_transform_with_prettyprint(
131 transform: Codemod,
132 code: str,
133 *,
134 include_generated: bool = False,
135 generated_code_marker: str = _DEFAULT_GENERATED_CODE_MARKER,
136 format_code: bool = False,
137 formatter_args: Sequence[str] = (),
138 python_version: Optional[str] = None,
139) -> Optional[str]:
140 """
141 Given an instantiated codemod and a string representing a module, transform that
142 code by executing the transform, optionally invoking the formatter and finally
143 printing any generated warnings to stderr. If the code includes the generated
144 marker at any spot and ``include_generated`` is not set to ``True``, the code
145 will not be modified. If ``format_code`` is set to ``False`` or the instantiated
146 codemod does not modify the code, the code will not be formatted. If a
147 ``python_version`` is provided, then we will parse the module using
148 this version. Otherwise, we will use the version of the currently executing python
149 binary.
151 In all cases a module will be returned. Whether it is changed depends on the
152 input parameters as well as the codemod itself.
153 """
155 if not include_generated and generated_code_marker in code:
156 print(
157 "WARNING: Code is generated and we are set to ignore generated code, "
158 + "skipping!",
159 file=sys.stderr,
160 )
161 return code
163 result = transform_module(transform, code, python_version=python_version)
164 maybe_code: Optional[str] = (
165 None
166 if isinstance(result, (TransformFailure, TransformExit, TransformSkip))
167 else result.code
168 )
170 if maybe_code is not None and format_code:
171 try:
172 maybe_code = invoke_formatter(formatter_args, maybe_code)
173 except Exception as ex:
174 # Failed to format code, treat as a failure and make sure that
175 # we print the exception for debugging.
176 maybe_code = None
177 result = TransformFailure(
178 error=ex,
179 traceback_str=traceback.format_exc(),
180 warning_messages=result.warning_messages,
181 )
183 # Finally, print the output, regardless of what happened
184 print_execution_result(result)
185 return maybe_code
188@dataclass(frozen=True)
189class ExecutionResult:
190 # File we have results for
191 filename: str
192 # Whether we actually changed the code for the file or not
193 changed: bool
194 # The actual result
195 transform_result: TransformResult
198@dataclass(frozen=True)
199class ExecutionConfig:
200 blacklist_patterns: Sequence[str] = ()
201 format_code: bool = False
202 formatter_args: Sequence[str] = ()
203 generated_code_marker: str = _DEFAULT_GENERATED_CODE_MARKER
204 include_generated: bool = False
205 python_version: Optional[str] = None
206 repo_root: Optional[str] = None
207 unified_diff: Optional[int] = None
210def _execute_transform( # noqa: C901
211 transformer: Codemod,
212 filename: str,
213 config: ExecutionConfig,
214) -> ExecutionResult:
215 for pattern in config.blacklist_patterns:
216 if re.fullmatch(pattern, filename):
217 return ExecutionResult(
218 filename=filename,
219 changed=False,
220 transform_result=TransformSkip(
221 skip_reason=SkipReason.BLACKLISTED,
222 skip_description=f"Blacklisted by pattern {pattern}.",
223 ),
224 )
226 try:
227 with open(filename, "rb") as fp:
228 oldcode = fp.read()
230 # Skip generated files
231 if (
232 not config.include_generated
233 and config.generated_code_marker.encode("utf-8") in oldcode
234 ):
235 return ExecutionResult(
236 filename=filename,
237 changed=False,
238 transform_result=TransformSkip(
239 skip_reason=SkipReason.GENERATED,
240 skip_description="Generated file.",
241 ),
242 )
244 # Somewhat gross hack to provide the filename in the transform's context.
245 # We do this after the fork so that a context that was initialized with
246 # some defaults before calling parallel_exec_transform_with_prettyprint
247 # will be updated per-file.
248 transformer.context = replace(
249 transformer.context,
250 filename=filename,
251 scratch={},
252 )
254 # determine the module and package name for this file
255 try:
256 module_name_and_package = calculate_module_and_package(
257 config.repo_root or ".", filename
258 )
259 transformer.context = replace(
260 transformer.context,
261 full_module_name=module_name_and_package.name,
262 full_package_name=module_name_and_package.package,
263 )
264 except ValueError as ex:
265 print(
266 f"Failed to determine module name for {filename}: {ex}", file=sys.stderr
267 )
269 # Run the transform, bail if we failed or if we aren't formatting code
270 try:
271 input_tree = parse_module(
272 oldcode,
273 config=(
274 PartialParserConfig(python_version=str(config.python_version))
275 if config.python_version is not None
276 else PartialParserConfig()
277 ),
278 )
279 output_tree = transformer.transform_module(input_tree)
280 newcode = output_tree.bytes
281 encoding = output_tree.encoding
282 except KeyboardInterrupt:
283 return ExecutionResult(
284 filename=filename, changed=False, transform_result=TransformExit()
285 )
286 except SkipFile as ex:
287 return ExecutionResult(
288 filename=filename,
289 changed=False,
290 transform_result=TransformSkip(
291 skip_reason=SkipReason.OTHER,
292 skip_description=str(ex),
293 warning_messages=transformer.context.warnings,
294 ),
295 )
296 except Exception as ex:
297 return ExecutionResult(
298 filename=filename,
299 changed=False,
300 transform_result=TransformFailure(
301 error=ex,
302 traceback_str=traceback.format_exc(),
303 warning_messages=transformer.context.warnings,
304 ),
305 )
307 # Call formatter if needed, but only if we actually changed something in this
308 # file
309 if config.format_code and newcode != oldcode:
310 try:
311 newcode = invoke_formatter(config.formatter_args, newcode)
312 except KeyboardInterrupt:
313 return ExecutionResult(
314 filename=filename,
315 changed=False,
316 transform_result=TransformExit(),
317 )
318 except Exception as ex:
319 return ExecutionResult(
320 filename=filename,
321 changed=False,
322 transform_result=TransformFailure(
323 error=ex,
324 traceback_str=traceback.format_exc(),
325 warning_messages=transformer.context.warnings,
326 ),
327 )
329 # Format as unified diff if needed, otherwise save it back
330 changed = oldcode != newcode
331 if config.unified_diff:
332 newcode = diff_code(
333 oldcode.decode(encoding),
334 newcode.decode(encoding),
335 config.unified_diff,
336 filename=filename,
337 )
338 else:
339 # Write back if we changed
340 if changed:
341 with open(filename, "wb") as fp:
342 fp.write(newcode)
343 # Not strictly necessary, but saves space in pickle since we won't use it
344 newcode = ""
346 # Inform success
347 return ExecutionResult(
348 filename=filename,
349 changed=changed,
350 transform_result=TransformSuccess(
351 warning_messages=transformer.context.warnings, code=newcode
352 ),
353 )
354 except KeyboardInterrupt:
355 return ExecutionResult(
356 filename=filename, changed=False, transform_result=TransformExit()
357 )
358 except Exception as ex:
359 return ExecutionResult(
360 filename=filename,
361 changed=False,
362 transform_result=TransformFailure(
363 error=ex,
364 traceback_str=traceback.format_exc(),
365 warning_messages=transformer.context.warnings,
366 ),
367 )
370class Progress:
371 ERASE_CURRENT_LINE: str = "\r\033[2K"
373 def __init__(self, *, enabled: bool, total: int) -> None:
374 self.enabled = enabled
375 self.total = total
376 # 1/100 = 0, len("0") = 1, precision = 0, more digits for more files
377 self.pretty_precision: int = len(str(self.total // 100)) - 1
378 # Pretend we start processing immediately. This is not true, but it's
379 # close enough to true.
380 self.started_at: float = time.time()
382 def print(self, finished: int) -> None:
383 if not self.enabled:
384 return
385 left = self.total - finished
386 percent = 100.0 * (float(finished) / float(self.total))
387 elapsed_time = max(time.time() - self.started_at, 0)
389 print(
390 f"{self.ERASE_CURRENT_LINE}{self._human_seconds(elapsed_time)} {percent:.{self.pretty_precision}f}% complete, {self.estimate_completion(elapsed_time, finished, left)} estimated for {left} files to go...",
391 end="",
392 file=sys.stderr,
393 )
395 def _human_seconds(self, seconds: Union[int, float]) -> str:
396 """
397 This returns a string which is a human-ish readable elapsed time such
398 as 30.42s or 10m 31s
399 """
401 minutes, seconds = divmod(seconds, 60)
402 hours, minutes = divmod(minutes, 60)
403 if hours > 0:
404 return f"{hours:.0f}h {minutes:02.0f}m {seconds:02.0f}s"
405 elif minutes > 0:
406 return f"{minutes:02.0f}m {seconds:02.0f}s"
407 else:
408 return f"{seconds:02.2f}s"
410 def estimate_completion(
411 self, elapsed_seconds: float, files_finished: int, files_left: int
412 ) -> str:
413 """
414 Computes a really basic estimated completion given a number of
415 operations still to do.
416 """
418 if files_finished <= 0:
419 # Technically infinite but calculating sounds better.
420 return "[calculating]"
422 fps = files_finished / elapsed_seconds
423 estimated_seconds_left = files_left / fps
424 return self._human_seconds(estimated_seconds_left)
426 def clear(self) -> None:
427 if not self.enabled:
428 return
429 print(self.ERASE_CURRENT_LINE, end="", file=sys.stderr)
432def _print_parallel_result(
433 exec_result: ExecutionResult,
434 progress: Progress,
435 *,
436 unified_diff: bool,
437 show_successes: bool,
438 hide_generated: bool,
439 hide_blacklisted: bool,
440) -> None:
441 filename = exec_result.filename
442 result = exec_result.transform_result
444 if isinstance(result, TransformSkip):
445 # Skipped file, print message and don't write back since not changed.
446 if not (
447 (result.skip_reason is SkipReason.BLACKLISTED and hide_blacklisted)
448 or (result.skip_reason is SkipReason.GENERATED and hide_generated)
449 ):
450 progress.clear()
451 print(f"Codemodding {filename}", file=sys.stderr)
452 print_execution_result(result)
453 print(
454 f"Skipped codemodding {filename}: {result.skip_description}\n",
455 file=sys.stderr,
456 )
457 elif isinstance(result, TransformFailure):
458 # Print any exception, don't write the file back.
459 progress.clear()
460 print(f"Codemodding {filename}", file=sys.stderr)
461 print_execution_result(result)
462 print(f"Failed to codemod {filename}\n", file=sys.stderr)
463 elif isinstance(result, TransformSuccess):
464 if show_successes or result.warning_messages:
465 # Print any warnings, save the changes if there were any.
466 progress.clear()
467 print(f"Codemodding {filename}", file=sys.stderr)
468 print_execution_result(result)
469 print(
470 f"Successfully codemodded {filename}"
471 + (" with warnings\n" if result.warning_messages else "\n"),
472 file=sys.stderr,
473 )
475 # In unified diff mode, the code is a diff we must print.
476 if unified_diff:
477 print(result.code)
480@dataclass(frozen=True)
481class ParallelTransformResult:
482 """
483 The result of running
484 :func:`~libcst.codemod.parallel_exec_transform_with_prettyprint` against
485 a series of files. This is a simple summary, with counts for number of
486 successfully codemodded files, number of files that we failed to codemod,
487 number of warnings generated when running the codemod across the files, and
488 the number of files that we skipped when running the codemod.
489 """
491 #: Number of files that we successfully transformed.
492 successes: int
493 #: Number of files that we failed to transform.
494 failures: int
495 #: Number of warnings generated when running transform across files.
496 warnings: int
497 #: Number of files skipped because they were blacklisted, generated
498 #: or the codemod requested to skip.
499 skips: int
502# Unfortunate wrapper required since there is no `istarmap_unordered`...
503def _execute_transform_wrap(
504 job: Dict[str, Any],
505) -> ExecutionResult:
506 return _execute_transform(**job)
509def parallel_exec_transform_with_prettyprint( # noqa: C901
510 transform: Codemod,
511 files: Sequence[str],
512 *,
513 jobs: Optional[int] = None,
514 unified_diff: Optional[int] = None,
515 include_generated: bool = False,
516 generated_code_marker: str = _DEFAULT_GENERATED_CODE_MARKER,
517 format_code: bool = False,
518 formatter_args: Sequence[str] = (),
519 show_successes: bool = False,
520 hide_generated: bool = False,
521 hide_blacklisted: bool = False,
522 hide_progress: bool = False,
523 blacklist_patterns: Sequence[str] = (),
524 python_version: Optional[str] = None,
525 repo_root: Optional[str] = None,
526) -> ParallelTransformResult:
527 """
528 Given a list of files and an instantiated codemod we should apply to them,
529 fork and apply the codemod in parallel to all of the files, including any
530 configured formatter. The ``jobs`` parameter controls the maximum number of
531 in-flight transforms, and needs to be at least 1. If not included, the number
532 of jobs will automatically be set to the number of CPU cores. If ``unified_diff``
533 is set to a number, changes to files will be printed to stdout with
534 ``unified_diff`` lines of context. If it is set to ``None`` or left out, files
535 themselves will be updated with changes and formatting. If a
536 ``python_version`` is provided, then we will parse each source file using
537 this version. Otherwise, we will use the version of the currently executing python
538 binary.
540 A progress indicator as well as any generated warnings will be printed to stderr.
541 To supress the interactive progress indicator, set ``hide_progress`` to ``True``.
542 Files that include the generated code marker will be skipped unless the
543 ``include_generated`` parameter is set to ``True``. Similarly, files that match
544 a supplied blacklist of regex patterns will be skipped. Warnings for skipping
545 both blacklisted and generated files will be printed to stderr along with
546 warnings generated by the codemod unless ``hide_blacklisted`` and
547 ``hide_generated`` are set to ``True``. Files that were successfully codemodded
548 will not be printed to stderr unless ``show_successes`` is set to ``True``.
550 To make this API possible, we take an instantiated transform. This is due to
551 the fact that lambdas are not pickleable and pickling functions is undefined.
552 This means we're implicitly relying on fork behavior on UNIX-like systems, and
553 this function will not work on Windows systems. To create a command-line utility
554 that runs on Windows, please instead see
555 :func:`~libcst.codemod.exec_transform_with_prettyprint`.
556 """
558 # Ensure that we have no duplicates, otherwise we might get race conditions
559 # on write.
560 files = sorted({os.path.abspath(f) for f in files})
561 total = len(files)
562 progress = Progress(enabled=not hide_progress, total=total)
564 chunksize = 4
565 # Grab number of cores if we need to
566 jobs = min(
567 jobs if jobs is not None else cpu_count(),
568 (len(files) + chunksize - 1) // chunksize,
569 )
571 if jobs < 1:
572 raise Exception("Must have at least one job to process!")
574 if total == 0:
575 return ParallelTransformResult(successes=0, failures=0, skips=0, warnings=0)
577 if repo_root is not None:
578 # Make sure if there is a root that we have the absolute path to it.
579 repo_root = os.path.abspath(repo_root)
580 # Spin up a full repo metadata manager so that we can provide metadata
581 # like type inference to individual forked processes.
582 print("Calculating full-repo metadata...", file=sys.stderr)
583 metadata_manager = FullRepoManager(
584 repo_root,
585 files,
586 transform.get_inherited_dependencies(),
587 )
588 metadata_manager.resolve_cache()
589 transform.context = replace(
590 transform.context,
591 metadata_manager=metadata_manager,
592 )
593 print("Executing codemod...", file=sys.stderr)
595 config = ExecutionConfig(
596 repo_root=repo_root,
597 unified_diff=unified_diff,
598 include_generated=include_generated,
599 generated_code_marker=generated_code_marker,
600 format_code=format_code,
601 formatter_args=formatter_args,
602 blacklist_patterns=blacklist_patterns,
603 python_version=python_version,
604 )
606 if total == 1 or jobs == 1:
607 # Simple case, we should not pay for process overhead.
608 # Let's just use a dummy synchronous pool.
609 jobs = 1
610 pool_impl = DummyPool
611 else:
612 pool_impl = Pool
613 # Warm the parser, pre-fork.
614 parse_module(
615 "",
616 config=(
617 PartialParserConfig(python_version=python_version)
618 if python_version is not None
619 else PartialParserConfig()
620 ),
621 )
623 successes: int = 0
624 failures: int = 0
625 warnings: int = 0
626 skips: int = 0
628 with pool_impl(processes=jobs) as p: # type: ignore
629 args = [
630 {
631 "transformer": transform,
632 "filename": filename,
633 "config": config,
634 }
635 for filename in files
636 ]
637 try:
638 for result in p.imap_unordered(
639 _execute_transform_wrap, args, chunksize=chunksize
640 ):
641 # Print an execution result, keep track of failures
642 _print_parallel_result(
643 result,
644 progress,
645 unified_diff=bool(unified_diff),
646 show_successes=show_successes,
647 hide_generated=hide_generated,
648 hide_blacklisted=hide_blacklisted,
649 )
650 progress.print(successes + failures + skips)
652 if isinstance(result.transform_result, TransformFailure):
653 failures += 1
654 elif isinstance(result.transform_result, TransformSuccess):
655 successes += 1
656 elif isinstance(
657 result.transform_result, (TransformExit, TransformSkip)
658 ):
659 skips += 1
661 warnings += len(result.transform_result.warning_messages)
662 finally:
663 progress.clear()
665 # Return whether there was one or more failure.
666 return ParallelTransformResult(
667 successes=successes, failures=failures, skips=skips, warnings=warnings
668 )