Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/libcst/codemod/_cli.py: 25%

225 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 07:09 +0000

1# Copyright (c) Meta Platforms, Inc. and affiliates. 

2# 

3# This source code is licensed under the MIT license found in the 

4# LICENSE file in the root directory of this source tree. 

5# 

6""" 

7Provides helpers for CLI interaction. 

8""" 

9 

10import difflib 

11import os.path 

12import re 

13import subprocess 

14import sys 

15import time 

16import traceback 

17from dataclasses import dataclass, replace 

18from multiprocessing import cpu_count, Pool 

19from pathlib import Path 

20from typing import Any, AnyStr, cast, Dict, List, Optional, Sequence, Union 

21 

22from libcst import parse_module, PartialParserConfig 

23from libcst.codemod._codemod import Codemod 

24from libcst.codemod._dummy_pool import DummyPool 

25from libcst.codemod._runner import ( 

26 SkipFile, 

27 SkipReason, 

28 transform_module, 

29 TransformExit, 

30 TransformFailure, 

31 TransformResult, 

32 TransformSkip, 

33 TransformSuccess, 

34) 

35from libcst.helpers import calculate_module_and_package 

36from libcst.metadata import FullRepoManager 

37 

38_DEFAULT_GENERATED_CODE_MARKER: str = f"@gen{''}erated" 

39 

40 

41def invoke_formatter(formatter_args: Sequence[str], code: AnyStr) -> AnyStr: 

42 """ 

43 Given a code string, run an external formatter on the code and return new 

44 formatted code. 

45 """ 

46 

47 # Make sure there is something to run 

48 if len(formatter_args) == 0: 

49 raise Exception("No formatter configured but code formatting requested.") 

50 

51 # Invoke the formatter, giving it the code as stdin and assuming the formatted 

52 # code comes from stdout. 

53 work_with_bytes = isinstance(code, bytes) 

54 return cast( 

55 AnyStr, 

56 subprocess.check_output( 

57 formatter_args, 

58 input=code, 

59 universal_newlines=not work_with_bytes, 

60 encoding=None if work_with_bytes else "utf-8", 

61 ), 

62 ) 

63 

64 

65def print_execution_result(result: TransformResult) -> None: 

66 for warning in result.warning_messages: 

67 print(f"WARNING: {warning}", file=sys.stderr) 

68 

69 if isinstance(result, TransformFailure): 

70 error = result.error 

71 if isinstance(error, subprocess.CalledProcessError): 

72 print(error.output.decode("utf-8"), file=sys.stderr) 

73 print(result.traceback_str, file=sys.stderr) 

74 

75 

76def gather_files( 

77 files_or_dirs: Sequence[str], *, include_stubs: bool = False 

78) -> List[str]: 

79 """ 

80 Given a list of files or directories (can be intermingled), return a list of 

81 all python files that exist at those locations. If ``include_stubs`` is ``True``, 

82 this will include ``.py`` and ``.pyi`` stub files. If it is ``False``, only 

83 ``.py`` files will be included in the returned list. 

84 """ 

85 ret: List[str] = [] 

86 for fd in files_or_dirs: 

87 if os.path.isfile(fd): 

88 ret.append(fd) 

89 elif os.path.isdir(fd): 

90 ret.extend( 

91 str(p) 

92 for p in Path(fd).rglob("*.py*") 

93 if str(p).endswith("py") or (include_stubs and str(p).endswith("pyi")) 

94 ) 

95 return sorted(ret) 

96 

97 

98def diff_code( 

99 oldcode: str, newcode: str, context: int, *, filename: Optional[str] = None 

100) -> str: 

101 """ 

102 Given two strings representing a module before and after a codemod, produce 

103 a unified diff of the changes with ``context`` lines of context. Optionally, 

104 assign the ``filename`` to the change, and if it is not available, assume 

105 that the change was performed on stdin/stdout. If no change is detected, 

106 return an empty string instead of returning an empty unified diff. This is 

107 comparable to revision control software which only shows differences for 

108 files that have changed. 

109 """ 

110 

111 if oldcode == newcode: 

112 return "" 

113 

114 if filename: 

115 difflines = difflib.unified_diff( 

116 oldcode.split("\n"), 

117 newcode.split("\n"), 

118 fromfile=filename, 

119 tofile=filename, 

120 lineterm="", 

121 n=context, 

122 ) 

123 else: 

124 difflines = difflib.unified_diff( 

125 oldcode.split("\n"), newcode.split("\n"), lineterm="", n=context 

126 ) 

127 return "\n".join(difflines) 

128 

129 

130def exec_transform_with_prettyprint( 

131 transform: Codemod, 

132 code: str, 

133 *, 

134 include_generated: bool = False, 

135 generated_code_marker: str = _DEFAULT_GENERATED_CODE_MARKER, 

136 format_code: bool = False, 

137 formatter_args: Sequence[str] = (), 

138 python_version: Optional[str] = None, 

139) -> Optional[str]: 

140 """ 

141 Given an instantiated codemod and a string representing a module, transform that 

142 code by executing the transform, optionally invoking the formatter and finally 

143 printing any generated warnings to stderr. If the code includes the generated 

144 marker at any spot and ``include_generated`` is not set to ``True``, the code 

145 will not be modified. If ``format_code`` is set to ``False`` or the instantiated 

146 codemod does not modify the code, the code will not be formatted. If a 

147 ``python_version`` is provided, then we will parse the module using 

148 this version. Otherwise, we will use the version of the currently executing python 

149 binary. 

150 

151 In all cases a module will be returned. Whether it is changed depends on the 

152 input parameters as well as the codemod itself. 

153 """ 

154 

155 if not include_generated and generated_code_marker in code: 

156 print( 

157 "WARNING: Code is generated and we are set to ignore generated code, " 

158 + "skipping!", 

159 file=sys.stderr, 

160 ) 

161 return code 

162 

163 result = transform_module(transform, code, python_version=python_version) 

164 maybe_code: Optional[str] = ( 

165 None 

166 if isinstance(result, (TransformFailure, TransformExit, TransformSkip)) 

167 else result.code 

168 ) 

169 

170 if maybe_code is not None and format_code: 

171 try: 

172 maybe_code = invoke_formatter(formatter_args, maybe_code) 

173 except Exception as ex: 

174 # Failed to format code, treat as a failure and make sure that 

175 # we print the exception for debugging. 

176 maybe_code = None 

177 result = TransformFailure( 

178 error=ex, 

179 traceback_str=traceback.format_exc(), 

180 warning_messages=result.warning_messages, 

181 ) 

182 

183 # Finally, print the output, regardless of what happened 

184 print_execution_result(result) 

185 return maybe_code 

186 

187 

188@dataclass(frozen=True) 

189class ExecutionResult: 

190 # File we have results for 

191 filename: str 

192 # Whether we actually changed the code for the file or not 

193 changed: bool 

194 # The actual result 

195 transform_result: TransformResult 

196 

197 

198@dataclass(frozen=True) 

199class ExecutionConfig: 

200 blacklist_patterns: Sequence[str] = () 

201 format_code: bool = False 

202 formatter_args: Sequence[str] = () 

203 generated_code_marker: str = _DEFAULT_GENERATED_CODE_MARKER 

204 include_generated: bool = False 

205 python_version: Optional[str] = None 

206 repo_root: Optional[str] = None 

207 unified_diff: Optional[int] = None 

208 

209 

210def _execute_transform( # noqa: C901 

211 transformer: Codemod, 

212 filename: str, 

213 config: ExecutionConfig, 

214) -> ExecutionResult: 

215 for pattern in config.blacklist_patterns: 

216 if re.fullmatch(pattern, filename): 

217 return ExecutionResult( 

218 filename=filename, 

219 changed=False, 

220 transform_result=TransformSkip( 

221 skip_reason=SkipReason.BLACKLISTED, 

222 skip_description=f"Blacklisted by pattern {pattern}.", 

223 ), 

224 ) 

225 

226 try: 

227 with open(filename, "rb") as fp: 

228 oldcode = fp.read() 

229 

230 # Skip generated files 

231 if ( 

232 not config.include_generated 

233 and config.generated_code_marker.encode("utf-8") in oldcode 

234 ): 

235 return ExecutionResult( 

236 filename=filename, 

237 changed=False, 

238 transform_result=TransformSkip( 

239 skip_reason=SkipReason.GENERATED, 

240 skip_description="Generated file.", 

241 ), 

242 ) 

243 

244 # Somewhat gross hack to provide the filename in the transform's context. 

245 # We do this after the fork so that a context that was initialized with 

246 # some defaults before calling parallel_exec_transform_with_prettyprint 

247 # will be updated per-file. 

248 transformer.context = replace( 

249 transformer.context, 

250 filename=filename, 

251 scratch={}, 

252 ) 

253 

254 # determine the module and package name for this file 

255 try: 

256 module_name_and_package = calculate_module_and_package( 

257 config.repo_root or ".", filename 

258 ) 

259 transformer.context = replace( 

260 transformer.context, 

261 full_module_name=module_name_and_package.name, 

262 full_package_name=module_name_and_package.package, 

263 ) 

264 except ValueError as ex: 

265 print( 

266 f"Failed to determine module name for {filename}: {ex}", file=sys.stderr 

267 ) 

268 

269 # Run the transform, bail if we failed or if we aren't formatting code 

270 try: 

271 input_tree = parse_module( 

272 oldcode, 

273 config=( 

274 PartialParserConfig(python_version=str(config.python_version)) 

275 if config.python_version is not None 

276 else PartialParserConfig() 

277 ), 

278 ) 

279 output_tree = transformer.transform_module(input_tree) 

280 newcode = output_tree.bytes 

281 encoding = output_tree.encoding 

282 except KeyboardInterrupt: 

283 return ExecutionResult( 

284 filename=filename, changed=False, transform_result=TransformExit() 

285 ) 

286 except SkipFile as ex: 

287 return ExecutionResult( 

288 filename=filename, 

289 changed=False, 

290 transform_result=TransformSkip( 

291 skip_reason=SkipReason.OTHER, 

292 skip_description=str(ex), 

293 warning_messages=transformer.context.warnings, 

294 ), 

295 ) 

296 except Exception as ex: 

297 return ExecutionResult( 

298 filename=filename, 

299 changed=False, 

300 transform_result=TransformFailure( 

301 error=ex, 

302 traceback_str=traceback.format_exc(), 

303 warning_messages=transformer.context.warnings, 

304 ), 

305 ) 

306 

307 # Call formatter if needed, but only if we actually changed something in this 

308 # file 

309 if config.format_code and newcode != oldcode: 

310 try: 

311 newcode = invoke_formatter(config.formatter_args, newcode) 

312 except KeyboardInterrupt: 

313 return ExecutionResult( 

314 filename=filename, 

315 changed=False, 

316 transform_result=TransformExit(), 

317 ) 

318 except Exception as ex: 

319 return ExecutionResult( 

320 filename=filename, 

321 changed=False, 

322 transform_result=TransformFailure( 

323 error=ex, 

324 traceback_str=traceback.format_exc(), 

325 warning_messages=transformer.context.warnings, 

326 ), 

327 ) 

328 

329 # Format as unified diff if needed, otherwise save it back 

330 changed = oldcode != newcode 

331 if config.unified_diff: 

332 newcode = diff_code( 

333 oldcode.decode(encoding), 

334 newcode.decode(encoding), 

335 config.unified_diff, 

336 filename=filename, 

337 ) 

338 else: 

339 # Write back if we changed 

340 if changed: 

341 with open(filename, "wb") as fp: 

342 fp.write(newcode) 

343 # Not strictly necessary, but saves space in pickle since we won't use it 

344 newcode = "" 

345 

346 # Inform success 

347 return ExecutionResult( 

348 filename=filename, 

349 changed=changed, 

350 transform_result=TransformSuccess( 

351 warning_messages=transformer.context.warnings, code=newcode 

352 ), 

353 ) 

354 except KeyboardInterrupt: 

355 return ExecutionResult( 

356 filename=filename, changed=False, transform_result=TransformExit() 

357 ) 

358 except Exception as ex: 

359 return ExecutionResult( 

360 filename=filename, 

361 changed=False, 

362 transform_result=TransformFailure( 

363 error=ex, 

364 traceback_str=traceback.format_exc(), 

365 warning_messages=transformer.context.warnings, 

366 ), 

367 ) 

368 

369 

370class Progress: 

371 ERASE_CURRENT_LINE: str = "\r\033[2K" 

372 

373 def __init__(self, *, enabled: bool, total: int) -> None: 

374 self.enabled = enabled 

375 self.total = total 

376 # 1/100 = 0, len("0") = 1, precision = 0, more digits for more files 

377 self.pretty_precision: int = len(str(self.total // 100)) - 1 

378 # Pretend we start processing immediately. This is not true, but it's 

379 # close enough to true. 

380 self.started_at: float = time.time() 

381 

382 def print(self, finished: int) -> None: 

383 if not self.enabled: 

384 return 

385 left = self.total - finished 

386 percent = 100.0 * (float(finished) / float(self.total)) 

387 elapsed_time = max(time.time() - self.started_at, 0) 

388 

389 print( 

390 f"{self.ERASE_CURRENT_LINE}{self._human_seconds(elapsed_time)} {percent:.{self.pretty_precision}f}% complete, {self.estimate_completion(elapsed_time, finished, left)} estimated for {left} files to go...", 

391 end="", 

392 file=sys.stderr, 

393 ) 

394 

395 def _human_seconds(self, seconds: Union[int, float]) -> str: 

396 """ 

397 This returns a string which is a human-ish readable elapsed time such 

398 as 30.42s or 10m 31s 

399 """ 

400 

401 minutes, seconds = divmod(seconds, 60) 

402 hours, minutes = divmod(minutes, 60) 

403 if hours > 0: 

404 return f"{hours:.0f}h {minutes:02.0f}m {seconds:02.0f}s" 

405 elif minutes > 0: 

406 return f"{minutes:02.0f}m {seconds:02.0f}s" 

407 else: 

408 return f"{seconds:02.2f}s" 

409 

410 def estimate_completion( 

411 self, elapsed_seconds: float, files_finished: int, files_left: int 

412 ) -> str: 

413 """ 

414 Computes a really basic estimated completion given a number of 

415 operations still to do. 

416 """ 

417 

418 if files_finished <= 0: 

419 # Technically infinite but calculating sounds better. 

420 return "[calculating]" 

421 

422 fps = files_finished / elapsed_seconds 

423 estimated_seconds_left = files_left / fps 

424 return self._human_seconds(estimated_seconds_left) 

425 

426 def clear(self) -> None: 

427 if not self.enabled: 

428 return 

429 print(self.ERASE_CURRENT_LINE, end="", file=sys.stderr) 

430 

431 

432def _print_parallel_result( 

433 exec_result: ExecutionResult, 

434 progress: Progress, 

435 *, 

436 unified_diff: bool, 

437 show_successes: bool, 

438 hide_generated: bool, 

439 hide_blacklisted: bool, 

440) -> None: 

441 filename = exec_result.filename 

442 result = exec_result.transform_result 

443 

444 if isinstance(result, TransformSkip): 

445 # Skipped file, print message and don't write back since not changed. 

446 if not ( 

447 (result.skip_reason is SkipReason.BLACKLISTED and hide_blacklisted) 

448 or (result.skip_reason is SkipReason.GENERATED and hide_generated) 

449 ): 

450 progress.clear() 

451 print(f"Codemodding {filename}", file=sys.stderr) 

452 print_execution_result(result) 

453 print( 

454 f"Skipped codemodding {filename}: {result.skip_description}\n", 

455 file=sys.stderr, 

456 ) 

457 elif isinstance(result, TransformFailure): 

458 # Print any exception, don't write the file back. 

459 progress.clear() 

460 print(f"Codemodding {filename}", file=sys.stderr) 

461 print_execution_result(result) 

462 print(f"Failed to codemod {filename}\n", file=sys.stderr) 

463 elif isinstance(result, TransformSuccess): 

464 if show_successes or result.warning_messages: 

465 # Print any warnings, save the changes if there were any. 

466 progress.clear() 

467 print(f"Codemodding {filename}", file=sys.stderr) 

468 print_execution_result(result) 

469 print( 

470 f"Successfully codemodded {filename}" 

471 + (" with warnings\n" if result.warning_messages else "\n"), 

472 file=sys.stderr, 

473 ) 

474 

475 # In unified diff mode, the code is a diff we must print. 

476 if unified_diff: 

477 print(result.code) 

478 

479 

480@dataclass(frozen=True) 

481class ParallelTransformResult: 

482 """ 

483 The result of running 

484 :func:`~libcst.codemod.parallel_exec_transform_with_prettyprint` against 

485 a series of files. This is a simple summary, with counts for number of 

486 successfully codemodded files, number of files that we failed to codemod, 

487 number of warnings generated when running the codemod across the files, and 

488 the number of files that we skipped when running the codemod. 

489 """ 

490 

491 #: Number of files that we successfully transformed. 

492 successes: int 

493 #: Number of files that we failed to transform. 

494 failures: int 

495 #: Number of warnings generated when running transform across files. 

496 warnings: int 

497 #: Number of files skipped because they were blacklisted, generated 

498 #: or the codemod requested to skip. 

499 skips: int 

500 

501 

502# Unfortunate wrapper required since there is no `istarmap_unordered`... 

503def _execute_transform_wrap( 

504 job: Dict[str, Any], 

505) -> ExecutionResult: 

506 return _execute_transform(**job) 

507 

508 

509def parallel_exec_transform_with_prettyprint( # noqa: C901 

510 transform: Codemod, 

511 files: Sequence[str], 

512 *, 

513 jobs: Optional[int] = None, 

514 unified_diff: Optional[int] = None, 

515 include_generated: bool = False, 

516 generated_code_marker: str = _DEFAULT_GENERATED_CODE_MARKER, 

517 format_code: bool = False, 

518 formatter_args: Sequence[str] = (), 

519 show_successes: bool = False, 

520 hide_generated: bool = False, 

521 hide_blacklisted: bool = False, 

522 hide_progress: bool = False, 

523 blacklist_patterns: Sequence[str] = (), 

524 python_version: Optional[str] = None, 

525 repo_root: Optional[str] = None, 

526) -> ParallelTransformResult: 

527 """ 

528 Given a list of files and an instantiated codemod we should apply to them, 

529 fork and apply the codemod in parallel to all of the files, including any 

530 configured formatter. The ``jobs`` parameter controls the maximum number of 

531 in-flight transforms, and needs to be at least 1. If not included, the number 

532 of jobs will automatically be set to the number of CPU cores. If ``unified_diff`` 

533 is set to a number, changes to files will be printed to stdout with 

534 ``unified_diff`` lines of context. If it is set to ``None`` or left out, files 

535 themselves will be updated with changes and formatting. If a 

536 ``python_version`` is provided, then we will parse each source file using 

537 this version. Otherwise, we will use the version of the currently executing python 

538 binary. 

539 

540 A progress indicator as well as any generated warnings will be printed to stderr. 

541 To supress the interactive progress indicator, set ``hide_progress`` to ``True``. 

542 Files that include the generated code marker will be skipped unless the 

543 ``include_generated`` parameter is set to ``True``. Similarly, files that match 

544 a supplied blacklist of regex patterns will be skipped. Warnings for skipping 

545 both blacklisted and generated files will be printed to stderr along with 

546 warnings generated by the codemod unless ``hide_blacklisted`` and 

547 ``hide_generated`` are set to ``True``. Files that were successfully codemodded 

548 will not be printed to stderr unless ``show_successes`` is set to ``True``. 

549 

550 To make this API possible, we take an instantiated transform. This is due to 

551 the fact that lambdas are not pickleable and pickling functions is undefined. 

552 This means we're implicitly relying on fork behavior on UNIX-like systems, and 

553 this function will not work on Windows systems. To create a command-line utility 

554 that runs on Windows, please instead see 

555 :func:`~libcst.codemod.exec_transform_with_prettyprint`. 

556 """ 

557 

558 # Ensure that we have no duplicates, otherwise we might get race conditions 

559 # on write. 

560 files = sorted({os.path.abspath(f) for f in files}) 

561 total = len(files) 

562 progress = Progress(enabled=not hide_progress, total=total) 

563 

564 chunksize = 4 

565 # Grab number of cores if we need to 

566 jobs = min( 

567 jobs if jobs is not None else cpu_count(), 

568 (len(files) + chunksize - 1) // chunksize, 

569 ) 

570 

571 if jobs < 1: 

572 raise Exception("Must have at least one job to process!") 

573 

574 if total == 0: 

575 return ParallelTransformResult(successes=0, failures=0, skips=0, warnings=0) 

576 

577 if repo_root is not None: 

578 # Make sure if there is a root that we have the absolute path to it. 

579 repo_root = os.path.abspath(repo_root) 

580 # Spin up a full repo metadata manager so that we can provide metadata 

581 # like type inference to individual forked processes. 

582 print("Calculating full-repo metadata...", file=sys.stderr) 

583 metadata_manager = FullRepoManager( 

584 repo_root, 

585 files, 

586 transform.get_inherited_dependencies(), 

587 ) 

588 metadata_manager.resolve_cache() 

589 transform.context = replace( 

590 transform.context, 

591 metadata_manager=metadata_manager, 

592 ) 

593 print("Executing codemod...", file=sys.stderr) 

594 

595 config = ExecutionConfig( 

596 repo_root=repo_root, 

597 unified_diff=unified_diff, 

598 include_generated=include_generated, 

599 generated_code_marker=generated_code_marker, 

600 format_code=format_code, 

601 formatter_args=formatter_args, 

602 blacklist_patterns=blacklist_patterns, 

603 python_version=python_version, 

604 ) 

605 

606 if total == 1 or jobs == 1: 

607 # Simple case, we should not pay for process overhead. 

608 # Let's just use a dummy synchronous pool. 

609 jobs = 1 

610 pool_impl = DummyPool 

611 else: 

612 pool_impl = Pool 

613 # Warm the parser, pre-fork. 

614 parse_module( 

615 "", 

616 config=( 

617 PartialParserConfig(python_version=python_version) 

618 if python_version is not None 

619 else PartialParserConfig() 

620 ), 

621 ) 

622 

623 successes: int = 0 

624 failures: int = 0 

625 warnings: int = 0 

626 skips: int = 0 

627 

628 with pool_impl(processes=jobs) as p: # type: ignore 

629 args = [ 

630 { 

631 "transformer": transform, 

632 "filename": filename, 

633 "config": config, 

634 } 

635 for filename in files 

636 ] 

637 try: 

638 for result in p.imap_unordered( 

639 _execute_transform_wrap, args, chunksize=chunksize 

640 ): 

641 # Print an execution result, keep track of failures 

642 _print_parallel_result( 

643 result, 

644 progress, 

645 unified_diff=bool(unified_diff), 

646 show_successes=show_successes, 

647 hide_generated=hide_generated, 

648 hide_blacklisted=hide_blacklisted, 

649 ) 

650 progress.print(successes + failures + skips) 

651 

652 if isinstance(result.transform_result, TransformFailure): 

653 failures += 1 

654 elif isinstance(result.transform_result, TransformSuccess): 

655 successes += 1 

656 elif isinstance( 

657 result.transform_result, (TransformExit, TransformSkip) 

658 ): 

659 skips += 1 

660 

661 warnings += len(result.transform_result.warning_messages) 

662 finally: 

663 progress.clear() 

664 

665 # Return whether there was one or more failure. 

666 return ParallelTransformResult( 

667 successes=successes, failures=failures, skips=skips, warnings=warnings 

668 )