Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/git/diff.py: 29%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

299 statements  

1# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors 

2# 

3# This module is part of GitPython and is released under the 

4# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ 

5 

6__all__ = ["DiffConstants", "NULL_TREE", "NULL_TREE_SHA", "INDEX", "Diffable", "DiffIndex", "Diff"] 

7 

8import enum 

9import re 

10import warnings 

11 

12from git.cmd import handle_process_output 

13from git.compat import defenc 

14from git.objects.blob import Blob 

15from git.objects.util import mode_str_to_int 

16from git.util import finalize_process, hex_to_bin 

17 

18# typing ------------------------------------------------------------------ 

19 

20from typing import ( 

21 Any, 

22 Iterator, 

23 List, 

24 Match, 

25 Optional, 

26 Sequence, 

27 Tuple, 

28 TYPE_CHECKING, 

29 TypeVar, 

30 Union, 

31 cast, 

32) 

33from git.types import PathLike, Literal 

34 

35if TYPE_CHECKING: 

36 from subprocess import Popen 

37 

38 from git.cmd import Git 

39 from git.objects.base import IndexObject 

40 from git.objects.commit import Commit 

41 from git.objects.tree import Tree 

42 from git.repo.base import Repo 

43 

44Lit_change_type = Literal["A", "D", "C", "M", "R", "T", "U"] 

45 

46# ------------------------------------------------------------------------ 

47 

48 

49@enum.unique 

50class DiffConstants(enum.Enum): 

51 """Special objects for :meth:`Diffable.diff`. 

52 

53 See the :meth:`Diffable.diff` method's ``other`` parameter, which accepts various 

54 values including these. 

55 

56 :note: 

57 These constants are also available as attributes of the :mod:`git.diff` module, 

58 the :class:`Diffable` class and its subclasses and instances, and the top-level 

59 :mod:`git` module. 

60 """ 

61 

62 NULL_TREE = enum.auto() 

63 """Stand-in indicating you want to compare against the empty tree in diffs. 

64 

65 Also accessible as :const:`git.NULL_TREE`, :const:`git.diff.NULL_TREE`, and 

66 :const:`Diffable.NULL_TREE`. 

67 """ 

68 

69 INDEX = enum.auto() 

70 """Stand-in indicating you want to diff against the index. 

71 

72 Also accessible as :const:`git.INDEX`, :const:`git.diff.INDEX`, and 

73 :const:`Diffable.INDEX`, as well as :const:`Diffable.Index`. The latter has been 

74 kept for backward compatibility and made an alias of this, so it may still be used. 

75 """ 

76 

77 

78NULL_TREE: Literal[DiffConstants.NULL_TREE] = DiffConstants.NULL_TREE 

79"""Stand-in indicating you want to compare against the empty tree in diffs. 

80 

81See :meth:`Diffable.diff`, which accepts this as a value of its ``other`` parameter. 

82 

83This is an alias of :const:`DiffConstants.NULL_TREE`, which may also be accessed as 

84:const:`git.NULL_TREE` and :const:`Diffable.NULL_TREE`. 

85""" 

86 

87NULL_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" 

88"""SHA of Git's canonical empty tree object.""" 

89 

90INDEX: Literal[DiffConstants.INDEX] = DiffConstants.INDEX 

91"""Stand-in indicating you want to diff against the index. 

92 

93See :meth:`Diffable.diff`, which accepts this as a value of its ``other`` parameter. 

94 

95This is an alias of :const:`DiffConstants.INDEX`, which may also be accessed as 

96:const:`git.INDEX` and :const:`Diffable.INDEX`, as well as :const:`Diffable.Index`. 

97""" 

98 

99_octal_byte_re = re.compile(rb"\\([0-9]{3})") 

100 

101 

102def _octal_repl(matchobj: Match) -> bytes: 

103 value = matchobj.group(1) 

104 value = int(value, 8) 

105 value = bytes(bytearray((value,))) 

106 return value 

107 

108 

109def decode_path(path: bytes, has_ab_prefix: bool = True) -> Optional[bytes]: 

110 if path == b"/dev/null": 

111 return None 

112 

113 if path.startswith(b'"') and path.endswith(b'"'): 

114 path = path[1:-1].replace(b"\\n", b"\n").replace(b"\\t", b"\t").replace(b'\\"', b'"').replace(b"\\\\", b"\\") 

115 

116 path = _octal_byte_re.sub(_octal_repl, path) 

117 

118 if has_ab_prefix: 

119 assert path.startswith(b"a/") or path.startswith(b"b/") 

120 path = path[2:] 

121 

122 return path 

123 

124 

125class Diffable: 

126 """Common interface for all objects that can be diffed against another object of 

127 compatible type. 

128 

129 :note: 

130 Subclasses require a :attr:`repo` member, as it is the case for 

131 :class:`~git.objects.base.Object` instances. For practical reasons we do not 

132 derive from :class:`~git.objects.base.Object`. 

133 """ 

134 

135 __slots__ = () 

136 

137 repo: "Repo" 

138 """Repository to operate on. Must be provided by subclass or sibling class.""" 

139 

140 NULL_TREE = NULL_TREE 

141 """Stand-in indicating you want to compare against the empty tree in diffs. 

142 

143 See the :meth:`diff` method, which accepts this as a value of its ``other`` 

144 parameter. 

145 

146 This is the same as :const:`DiffConstants.NULL_TREE`, and may also be accessed as 

147 :const:`git.NULL_TREE` and :const:`git.diff.NULL_TREE`. 

148 """ 

149 

150 INDEX = INDEX 

151 """Stand-in indicating you want to diff against the index. 

152 

153 See the :meth:`diff` method, which accepts this as a value of its ``other`` 

154 parameter. 

155 

156 This is the same as :const:`DiffConstants.INDEX`, and may also be accessed as 

157 :const:`git.INDEX` and :const:`git.diff.INDEX`, as well as :class:`Diffable.INDEX`, 

158 which is kept for backward compatibility (it is now defined an alias of this). 

159 """ 

160 

161 Index = INDEX 

162 """Stand-in indicating you want to diff against the index 

163 (same as :const:`~Diffable.INDEX`). 

164 

165 This is an alias of :const:`~Diffable.INDEX`, for backward compatibility. See 

166 :const:`~Diffable.INDEX` and :meth:`diff` for details. 

167 

168 :note: 

169 Although always meant for use as an opaque constant, this was formerly defined 

170 as a class. Its usage is unchanged, but static type annotations that attempt 

171 to permit only this object must be changed to avoid new mypy errors. This was 

172 previously not possible to do, though ``Type[Diffable.Index]`` approximated it. 

173 It is now possible to do precisely, using ``Literal[DiffConstants.INDEX]``. 

174 """ 

175 

176 def _process_diff_args( 

177 self, 

178 args: List[Union[PathLike, "Diffable"]], 

179 ) -> List[Union[PathLike, "Diffable"]]: 

180 """ 

181 :return: 

182 Possibly altered version of the given args list. 

183 This method is called right before git command execution. 

184 Subclasses can use it to alter the behaviour of the superclass. 

185 """ 

186 return args 

187 

188 def diff( 

189 self, 

190 other: Union[DiffConstants, "Tree", "Commit", str, None] = INDEX, 

191 paths: Union[PathLike, List[PathLike], Tuple[PathLike, ...], None] = None, 

192 create_patch: bool = False, 

193 **kwargs: Any, 

194 ) -> "DiffIndex[Diff]": 

195 """Create diffs between two items being trees, trees and index or an index and 

196 the working tree. Detects renames automatically. 

197 

198 :param other: 

199 This the item to compare us with. 

200 

201 * If ``None``, we will be compared to the working tree. 

202 

203 * If a :class:`~git.types.Tree_ish` or string, it will be compared against 

204 the respective tree. 

205 

206 * If :const:`INDEX`, it will be compared against the index. 

207 

208 * If :const:`NULL_TREE`, it will compare against the empty tree. 

209 

210 This parameter defaults to :const:`INDEX` (rather than ``None``) so that the 

211 method will not by default fail on bare repositories. 

212 

213 :param paths: 

214 This a list of paths or a single path to limit the diff to. It will only 

215 include at least one of the given path or paths. 

216 

217 :param create_patch: 

218 If ``True``, the returned :class:`Diff` contains a detailed patch that if 

219 applied makes the self to other. Patches are somewhat costly as blobs have 

220 to be read and diffed. 

221 

222 :param kwargs: 

223 Additional arguments passed to :manpage:`git-diff(1)`, such as ``R=True`` to 

224 swap both sides of the diff. 

225 

226 :return: 

227 A :class:`DiffIndex` representing the computed diff. 

228 

229 :note: 

230 On a bare repository, `other` needs to be provided as :const:`INDEX`, or as 

231 an instance of :class:`~git.objects.tree.Tree` or 

232 :class:`~git.objects.commit.Commit`, or a git command error will occur. 

233 """ 

234 args: List[Union[PathLike, Diffable]] = [] 

235 args.append("--abbrev=40") # We need full shas. 

236 args.append("--full-index") # Get full index paths, not only filenames. 

237 

238 # Remove default '-M' arg (check for renames) if user is overriding it. 

239 if not any(x in kwargs for x in ("find_renames", "no_renames", "M")): 

240 args.append("-M") 

241 

242 if create_patch: 

243 args.append("-p") 

244 args.append("--no-ext-diff") 

245 else: 

246 args.append("--raw") 

247 args.append("-z") 

248 

249 # Ensure we never see colored output. 

250 # Fixes: https://github.com/gitpython-developers/GitPython/issues/172 

251 args.append("--no-color") 

252 

253 if paths is not None and not isinstance(paths, (tuple, list)): 

254 paths = [paths] 

255 

256 diff_cmd = self.repo.git.diff 

257 if other is INDEX: 

258 args.insert(0, "--cached") 

259 elif other is NULL_TREE: 

260 args.insert(0, "-r") # Recursive diff-tree. 

261 args.insert(0, "--root") 

262 diff_cmd = self.repo.git.diff_tree 

263 elif other is not None: 

264 args.insert(0, "-r") # Recursive diff-tree. 

265 args.insert(0, other) 

266 diff_cmd = self.repo.git.diff_tree 

267 

268 args.insert(0, self) 

269 

270 # paths is a list or tuple here, or None. 

271 if paths: 

272 args.append("--") 

273 args.extend(paths) 

274 # END paths handling 

275 

276 kwargs["as_process"] = True 

277 proc = diff_cmd(*self._process_diff_args(args), **kwargs) 

278 

279 diff_method = Diff._index_from_patch_format if create_patch else Diff._index_from_raw_format 

280 index = diff_method(self.repo, proc) 

281 

282 proc.wait() 

283 return index 

284 

285 

286T_Diff = TypeVar("T_Diff", bound="Diff") 

287 

288 

289class DiffIndex(List[T_Diff]): 

290 R"""An index for diffs, allowing a list of :class:`Diff`\s to be queried by the diff 

291 properties. 

292 

293 The class improves the diff handling convenience. 

294 """ 

295 

296 change_type: Sequence[Literal["A", "C", "D", "R", "M", "T"]] = ("A", "C", "D", "R", "M", "T") # noqa: F821 

297 """Change type invariant identifying possible ways a blob can have changed: 

298 

299 * ``A`` = Added 

300 * ``D`` = Deleted 

301 * ``R`` = Renamed 

302 * ``M`` = Modified 

303 * ``T`` = Changed in the type 

304 """ 

305 

306 def iter_change_type(self, change_type: Lit_change_type) -> Iterator[T_Diff]: 

307 """ 

308 :return: 

309 Iterator yielding :class:`Diff` instances that match the given `change_type` 

310 

311 :param change_type: 

312 Member of :attr:`DiffIndex.change_type`, namely: 

313 

314 * 'A' for added paths 

315 * 'D' for deleted paths 

316 * 'R' for renamed paths 

317 * 'M' for paths with modified data 

318 * 'T' for changed in the type paths 

319 """ 

320 if change_type not in self.change_type: 

321 raise ValueError("Invalid change type: %s" % change_type) 

322 

323 for diffidx in self: 

324 if diffidx.change_type == change_type: 

325 yield diffidx 

326 elif change_type == "A" and diffidx.new_file: 

327 yield diffidx 

328 elif change_type == "D" and diffidx.deleted_file: 

329 yield diffidx 

330 elif change_type == "C" and diffidx.copied_file: 

331 yield diffidx 

332 elif change_type == "R" and diffidx.renamed_file: 

333 yield diffidx 

334 elif change_type == "M" and diffidx.a_blob and diffidx.b_blob and diffidx.a_blob != diffidx.b_blob: 

335 yield diffidx 

336 # END for each diff 

337 

338 

339class Diff: 

340 """A Diff contains diff information between two Trees. 

341 

342 It contains two sides a and b of the diff. Members are prefixed with "a" and "b" 

343 respectively to indicate that. 

344 

345 Diffs keep information about the changed blob objects, the file mode, renames, 

346 deletions and new files. 

347 

348 There are a few cases where ``None`` has to be expected as member variable value: 

349 

350 New File:: 

351 

352 a_mode is None 

353 a_blob is None 

354 a_path is None 

355 

356 Deleted File:: 

357 

358 b_mode is None 

359 b_blob is None 

360 b_path is None 

361 

362 Working Tree Blobs: 

363 

364 When comparing to working trees, the working tree blob will have a null hexsha 

365 as a corresponding object does not yet exist. The mode will be null as well. The 

366 path will be available, though. 

367 

368 If it is listed in a diff, the working tree version of the file must differ from 

369 the version in the index or tree, and hence has been modified. 

370 """ 

371 

372 # Precompiled regex. 

373 re_header = re.compile( 

374 rb""" 

375 ^diff[ ]--git 

376 [ ](?P<a_path_fallback>"?[ab]/.+?"?)[ ](?P<b_path_fallback>"?[ab]/.+?"?)\n 

377 (?:^old[ ]mode[ ](?P<old_mode>\d+)\n 

378 ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? 

379 (?:^similarity[ ]index[ ]\d+%\n 

380 ^rename[ ]from[ ](?P<rename_from>.*)\n 

381 ^rename[ ]to[ ](?P<rename_to>.*)(?:\n|$))? 

382 (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? 

383 (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? 

384 (?:^similarity[ ]index[ ]\d+%\n 

385 ^copy[ ]from[ ].*\n 

386 ^copy[ ]to[ ](?P<copied_file_name>.*)(?:\n|$))? 

387 (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) 

388 \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? 

389 (?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? 

390 (?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? 

391 """, 

392 re.VERBOSE | re.MULTILINE, 

393 ) 

394 

395 # These can be used for comparisons. 

396 NULL_HEX_SHA = "0" * 40 

397 NULL_BIN_SHA = b"\0" * 20 

398 

399 __slots__ = ( 

400 "a_blob", 

401 "b_blob", 

402 "a_mode", 

403 "b_mode", 

404 "a_rawpath", 

405 "b_rawpath", 

406 "new_file", 

407 "deleted_file", 

408 "copied_file", 

409 "raw_rename_from", 

410 "raw_rename_to", 

411 "diff", 

412 "change_type", 

413 "score", 

414 ) 

415 

416 def __init__( 

417 self, 

418 repo: "Repo", 

419 a_rawpath: Optional[bytes], 

420 b_rawpath: Optional[bytes], 

421 a_blob_id: Union[str, bytes, None], 

422 b_blob_id: Union[str, bytes, None], 

423 a_mode: Union[bytes, str, None], 

424 b_mode: Union[bytes, str, None], 

425 new_file: bool, 

426 deleted_file: bool, 

427 copied_file: bool, 

428 raw_rename_from: Optional[bytes], 

429 raw_rename_to: Optional[bytes], 

430 diff: Union[str, bytes, None], 

431 change_type: Optional[Lit_change_type], 

432 score: Optional[int], 

433 ) -> None: 

434 assert a_rawpath is None or isinstance(a_rawpath, bytes) 

435 assert b_rawpath is None or isinstance(b_rawpath, bytes) 

436 self.a_rawpath = a_rawpath 

437 self.b_rawpath = b_rawpath 

438 

439 self.a_mode = mode_str_to_int(a_mode) if a_mode else None 

440 self.b_mode = mode_str_to_int(b_mode) if b_mode else None 

441 

442 # Determine whether this diff references a submodule. If it does then 

443 # we need to overwrite "repo" to the corresponding submodule's repo instead. 

444 if repo and a_rawpath: 

445 for submodule in repo.submodules: 

446 if submodule.path == a_rawpath.decode(defenc, "replace"): 

447 if submodule.module_exists(): 

448 repo = submodule.module() 

449 break 

450 

451 self.a_blob: Union["IndexObject", None] 

452 if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA: 

453 self.a_blob = None 

454 else: 

455 self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path) 

456 

457 self.b_blob: Union["IndexObject", None] 

458 if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA: 

459 self.b_blob = None 

460 else: 

461 self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path) 

462 

463 self.new_file: bool = new_file 

464 self.deleted_file: bool = deleted_file 

465 self.copied_file: bool = copied_file 

466 

467 # Be clear and use None instead of empty strings. 

468 assert raw_rename_from is None or isinstance(raw_rename_from, bytes) 

469 assert raw_rename_to is None or isinstance(raw_rename_to, bytes) 

470 self.raw_rename_from = raw_rename_from or None 

471 self.raw_rename_to = raw_rename_to or None 

472 

473 self.diff = diff 

474 self.change_type: Union[Lit_change_type, None] = change_type 

475 self.score = score 

476 

477 def __eq__(self, other: object) -> bool: 

478 for name in self.__slots__: 

479 if getattr(self, name) != getattr(other, name): 

480 return False 

481 # END for each name 

482 return True 

483 

484 def __ne__(self, other: object) -> bool: 

485 return not (self == other) 

486 

487 def __hash__(self) -> int: 

488 return hash(tuple(getattr(self, n) for n in self.__slots__)) 

489 

490 def __str__(self) -> str: 

491 h = "%s" 

492 if self.a_blob: 

493 h %= self.a_blob.path 

494 elif self.b_blob: 

495 h %= self.b_blob.path 

496 

497 msg = "" 

498 line = None 

499 line_length = 0 

500 for b, n in zip((self.a_blob, self.b_blob), ("lhs", "rhs")): 

501 if b: 

502 line = "\n%s: %o | %s" % (n, b.mode, b.hexsha) 

503 else: 

504 line = "\n%s: None" % n 

505 # END if blob is not None 

506 line_length = max(len(line), line_length) 

507 msg += line 

508 # END for each blob 

509 

510 # Add headline. 

511 h += "\n" + "=" * line_length 

512 

513 if self.deleted_file: 

514 msg += "\nfile deleted in rhs" 

515 if self.new_file: 

516 msg += "\nfile added in rhs" 

517 if self.copied_file: 

518 msg += "\nfile %r copied from %r" % (self.b_path, self.a_path) 

519 if self.rename_from: 

520 msg += "\nfile renamed from %r" % self.rename_from 

521 if self.rename_to: 

522 msg += "\nfile renamed to %r" % self.rename_to 

523 if self.diff: 

524 msg += "\n---" 

525 try: 

526 msg += self.diff.decode(defenc) if isinstance(self.diff, bytes) else self.diff 

527 except UnicodeDecodeError: 

528 msg += "OMITTED BINARY DATA" 

529 # END handle encoding 

530 msg += "\n---" 

531 # END diff info 

532 

533 return h + msg 

534 

535 @property 

536 def a_path(self) -> Optional[str]: 

537 return self.a_rawpath.decode(defenc, "replace") if self.a_rawpath else None 

538 

539 @property 

540 def b_path(self) -> Optional[str]: 

541 return self.b_rawpath.decode(defenc, "replace") if self.b_rawpath else None 

542 

543 @property 

544 def rename_from(self) -> Optional[str]: 

545 return self.raw_rename_from.decode(defenc, "replace") if self.raw_rename_from else None 

546 

547 @property 

548 def rename_to(self) -> Optional[str]: 

549 return self.raw_rename_to.decode(defenc, "replace") if self.raw_rename_to else None 

550 

551 @property 

552 def renamed(self) -> bool: 

553 """Deprecated, use :attr:`renamed_file` instead. 

554 

555 :return: 

556 ``True`` if the blob of our diff has been renamed 

557 

558 :note: 

559 This property is deprecated. 

560 Please use the :attr:`renamed_file` property instead. 

561 """ 

562 warnings.warn( 

563 "Diff.renamed is deprecated, use Diff.renamed_file instead", 

564 DeprecationWarning, 

565 stacklevel=2, 

566 ) 

567 return self.renamed_file 

568 

569 @property 

570 def renamed_file(self) -> bool: 

571 """:return: ``True`` if the blob of our diff has been renamed""" 

572 return self.rename_from != self.rename_to 

573 

574 @classmethod 

575 def _pick_best_path(cls, path_match: bytes, rename_match: bytes, path_fallback_match: bytes) -> Optional[bytes]: 

576 if path_match: 

577 return decode_path(path_match) 

578 

579 if rename_match: 

580 return decode_path(rename_match, has_ab_prefix=False) 

581 

582 if path_fallback_match: 

583 return decode_path(path_fallback_match) 

584 

585 return None 

586 

587 @classmethod 

588 def _index_from_patch_format(cls, repo: "Repo", proc: Union["Popen", "Git.AutoInterrupt"]) -> DiffIndex["Diff"]: 

589 """Create a new :class:`DiffIndex` from the given process output which must be 

590 in patch format. 

591 

592 :param repo: 

593 The repository we are operating on. 

594 

595 :param proc: 

596 :manpage:`git-diff(1)` process to read from 

597 (supports :class:`Git.AutoInterrupt <git.cmd.Git.AutoInterrupt>` wrapper). 

598 

599 :return: 

600 :class:`DiffIndex` 

601 """ 

602 

603 # FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise. 

604 text_list: List[bytes] = [] 

605 stderr_list: List[bytes] = [] 

606 

607 def finalize_process_with_stderr(proc: Union["Popen", "Git.AutoInterrupt"]) -> None: 

608 finalize_process(proc, stderr=b"".join(stderr_list)) 

609 

610 handle_process_output( 

611 proc, text_list.append, stderr_list.append, finalize_process_with_stderr, decode_streams=False 

612 ) 

613 

614 # For now, we have to bake the stream. 

615 text = b"".join(text_list) 

616 index: "DiffIndex" = DiffIndex() 

617 previous_header: Union[Match[bytes], None] = None 

618 header: Union[Match[bytes], None] = None 

619 a_path, b_path = None, None # For mypy. 

620 a_mode, b_mode = None, None # For mypy. 

621 for _header in cls.re_header.finditer(text): 

622 ( 

623 a_path_fallback, 

624 b_path_fallback, 

625 old_mode, 

626 new_mode, 

627 rename_from, 

628 rename_to, 

629 new_file_mode, 

630 deleted_file_mode, 

631 copied_file_name, 

632 a_blob_id, 

633 b_blob_id, 

634 b_mode, 

635 a_path, 

636 b_path, 

637 ) = _header.groups() 

638 

639 new_file, deleted_file, copied_file = ( 

640 bool(new_file_mode), 

641 bool(deleted_file_mode), 

642 bool(copied_file_name), 

643 ) 

644 

645 a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) 

646 b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback) 

647 

648 # Our only means to find the actual text is to see what has not been matched 

649 # by our regex, and then retro-actively assign it to our index. 

650 if previous_header is not None: 

651 index[-1].diff = text[previous_header.end() : _header.start()] 

652 # END assign actual diff 

653 

654 # Make sure the mode is set if the path is set. Otherwise the resulting blob 

655 # is invalid. We just use the one mode we should have parsed. 

656 a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode)) 

657 b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode) 

658 index.append( 

659 Diff( 

660 repo, 

661 a_path, 

662 b_path, 

663 a_blob_id and a_blob_id.decode(defenc), 

664 b_blob_id and b_blob_id.decode(defenc), 

665 a_mode and a_mode.decode(defenc), 

666 b_mode and b_mode.decode(defenc), 

667 new_file, 

668 deleted_file, 

669 copied_file, 

670 rename_from, 

671 rename_to, 

672 None, 

673 None, 

674 None, 

675 ) 

676 ) 

677 

678 previous_header = _header 

679 header = _header 

680 # END for each header we parse 

681 if index and header: 

682 index[-1].diff = text[header.end() :] 

683 # END assign last diff 

684 

685 return index 

686 

687 @staticmethod 

688 def _handle_diff_line(lines_bytes: bytes, repo: "Repo", index: DiffIndex["Diff"]) -> None: 

689 lines = lines_bytes.decode(defenc) 

690 

691 # Discard everything before the first colon, and the colon itself. 

692 _, _, lines = lines.partition(":") 

693 

694 for line in lines.split("\x00:"): 

695 if not line: 

696 # The line data is empty, skip. 

697 continue 

698 meta, _, path = line.partition("\x00") 

699 path = path.rstrip("\x00") 

700 a_blob_id: Optional[str] 

701 b_blob_id: Optional[str] 

702 old_mode, new_mode, a_blob_id, b_blob_id, _change_type = meta.split(None, 4) 

703 # Change type can be R100 

704 # R: status letter 

705 # 100: score (in case of copy and rename) 

706 change_type: Lit_change_type = cast(Lit_change_type, _change_type[0]) 

707 score_str = "".join(_change_type[1:]) 

708 score = int(score_str) if score_str.isdigit() else None 

709 path = path.strip("\n") 

710 a_path = path.encode(defenc) 

711 b_path = path.encode(defenc) 

712 deleted_file = False 

713 new_file = False 

714 copied_file = False 

715 rename_from = None 

716 rename_to = None 

717 

718 # NOTE: We cannot conclude from the existence of a blob to change type, 

719 # as diffs with the working do not have blobs yet. 

720 if change_type == "D": 

721 b_blob_id = None # Optional[str] 

722 deleted_file = True 

723 elif change_type == "A": 

724 a_blob_id = None 

725 new_file = True 

726 elif change_type == "C": 

727 copied_file = True 

728 a_path_str, b_path_str = path.split("\x00", 1) 

729 a_path = a_path_str.encode(defenc) 

730 b_path = b_path_str.encode(defenc) 

731 elif change_type == "R": 

732 a_path_str, b_path_str = path.split("\x00", 1) 

733 a_path = a_path_str.encode(defenc) 

734 b_path = b_path_str.encode(defenc) 

735 rename_from, rename_to = a_path, b_path 

736 elif change_type == "T": 

737 # Nothing to do. 

738 pass 

739 # END add/remove handling 

740 

741 diff = Diff( 

742 repo, 

743 a_path, 

744 b_path, 

745 a_blob_id, 

746 b_blob_id, 

747 old_mode, 

748 new_mode, 

749 new_file, 

750 deleted_file, 

751 copied_file, 

752 rename_from, 

753 rename_to, 

754 "", 

755 change_type, 

756 score, 

757 ) 

758 index.append(diff) 

759 

760 @classmethod 

761 def _index_from_raw_format(cls, repo: "Repo", proc: "Popen") -> "DiffIndex[Diff]": 

762 """Create a new :class:`DiffIndex` from the given process output which must be 

763 in raw format. 

764 

765 :param repo: 

766 The repository we are operating on. 

767 

768 :param proc: 

769 Process to read output from. 

770 

771 :return: 

772 :class:`DiffIndex` 

773 """ 

774 # handles 

775 # :100644 100644 687099101... 37c5e30c8... M .gitignore 

776 

777 index: "DiffIndex" = DiffIndex() 

778 stderr_list: List[bytes] = [] 

779 

780 def finalize_process_with_stderr(proc: Union["Popen", "Git.AutoInterrupt"]) -> None: 

781 finalize_process(proc, stderr=b"".join(stderr_list)) 

782 

783 handle_process_output( 

784 proc, 

785 lambda byt: cls._handle_diff_line(byt, repo, index), 

786 stderr_list.append, 

787 finalize_process_with_stderr, 

788 decode_streams=False, 

789 ) 

790 

791 return index