Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1082 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as public by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from typing import ( 

36 Callable, 

37 Optional, 

38 Protocol, 

39 Union, 

40 cast, 

41) 

42 

43from .errors import NotTreeError 

44from .file import GitFile 

45from .objects import ( 

46 S_ISGITLINK, 

47 ZERO_SHA, 

48 Blob, 

49 Commit, 

50 ObjectID, 

51 ShaFile, 

52 Tag, 

53 Tree, 

54 TreeEntry, 

55 hex_to_filename, 

56 hex_to_sha, 

57 object_class, 

58 sha_to_hex, 

59 valid_hexsha, 

60) 

61from .pack import ( 

62 PACK_SPOOL_FILE_MAX_SIZE, 

63 ObjectContainer, 

64 Pack, 

65 PackData, 

66 PackedObjectContainer, 

67 PackFileDisappeared, 

68 PackHint, 

69 PackIndexer, 

70 PackInflater, 

71 PackStreamCopier, 

72 UnpackedObject, 

73 extend_pack, 

74 full_unpacked_object, 

75 generate_unpacked_objects, 

76 iter_sha1, 

77 load_pack_index_file, 

78 pack_objects_to_data, 

79 write_pack_data, 

80 write_pack_index, 

81) 

82from .protocol import DEPTH_INFINITE 

83from .refs import PEELED_TAG_SUFFIX, Ref 

84 

85INFODIR = "info" 

86PACKDIR = "pack" 

87 

88# use permissions consistent with Git; just readable by everyone 

89# TODO: should packs also be non-writable on Windows? if so, that 

90# would requite some rather significant adjustments to the test suite 

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

92 

93# Grace period for cleaning up temporary pack files (in seconds) 

94# Matches git's default of 2 weeks 

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

96 

97 

98def find_shallow(store, heads, depth): 

99 """Find shallow commits according to a given depth. 

100 

101 Args: 

102 store: An ObjectStore for looking up objects. 

103 heads: Iterable of head SHAs to start walking from. 

104 depth: The depth of ancestors to include. A depth of one includes 

105 only the heads themselves. 

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

107 considered shallow and unshallow according to the arguments. Note that 

108 these sets may overlap if a commit is reachable along multiple paths. 

109 """ 

110 parents = {} 

111 

112 def get_parents(sha): 

113 result = parents.get(sha, None) 

114 if not result: 

115 result = store[sha].parents 

116 parents[sha] = result 

117 return result 

118 

119 todo = [] # stack of (sha, depth) 

120 for head_sha in heads: 

121 obj = store[head_sha] 

122 # Peel tags if necessary 

123 while isinstance(obj, Tag): 

124 _, sha = obj.object 

125 obj = store[sha] 

126 if isinstance(obj, Commit): 

127 todo.append((obj.id, 1)) 

128 

129 not_shallow = set() 

130 shallow = set() 

131 while todo: 

132 sha, cur_depth = todo.pop() 

133 if cur_depth < depth: 

134 not_shallow.add(sha) 

135 new_depth = cur_depth + 1 

136 todo.extend((p, new_depth) for p in get_parents(sha)) 

137 else: 

138 shallow.add(sha) 

139 

140 return shallow, not_shallow 

141 

142 

143def get_depth( 

144 store, 

145 head, 

146 get_parents=lambda commit: commit.parents, 

147 max_depth=None, 

148): 

149 """Return the current available depth for the given head. 

150 For commits with multiple parents, the largest possible depth will be 

151 returned. 

152 

153 Args: 

154 head: commit to start from 

155 get_parents: optional function for getting the parents of a commit 

156 max_depth: maximum depth to search 

157 """ 

158 if head not in store: 

159 return 0 

160 current_depth = 1 

161 queue = [(head, current_depth)] 

162 while queue and (max_depth is None or current_depth < max_depth): 

163 e, depth = queue.pop(0) 

164 current_depth = max(current_depth, depth) 

165 cmt = store[e] 

166 if isinstance(cmt, Tag): 

167 _cls, sha = cmt.object 

168 cmt = store[sha] 

169 queue.extend( 

170 (parent, depth + 1) for parent in get_parents(cmt) if parent in store 

171 ) 

172 return current_depth 

173 

174 

175class PackContainer(Protocol): 

176 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

177 """Add a new pack.""" 

178 

179 

180class BaseObjectStore: 

181 """Object store interface.""" 

182 

183 def determine_wants_all( 

184 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

185 ) -> list[ObjectID]: 

186 def _want_deepen(sha): 

187 if not depth: 

188 return False 

189 if depth == DEPTH_INFINITE: 

190 return True 

191 return depth > self._get_depth(sha) 

192 

193 return [ 

194 sha 

195 for (ref, sha) in refs.items() 

196 if (sha not in self or _want_deepen(sha)) 

197 and not ref.endswith(PEELED_TAG_SUFFIX) 

198 and not sha == ZERO_SHA 

199 ] 

200 

201 def contains_loose(self, sha) -> bool: 

202 """Check if a particular object is present by SHA1 and is loose.""" 

203 raise NotImplementedError(self.contains_loose) 

204 

205 def __contains__(self, sha1: bytes) -> bool: 

206 """Check if a particular object is present by SHA1. 

207 

208 This method makes no distinction between loose and packed objects. 

209 """ 

210 return self.contains_loose(sha1) 

211 

212 @property 

213 def packs(self): 

214 """Iterable of pack objects.""" 

215 raise NotImplementedError 

216 

217 def get_raw(self, name) -> tuple[int, bytes]: 

218 """Obtain the raw text for an object. 

219 

220 Args: 

221 name: sha for the object. 

222 Returns: tuple with numeric type and object contents. 

223 """ 

224 raise NotImplementedError(self.get_raw) 

225 

226 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

227 """Obtain an object by SHA1.""" 

228 type_num, uncomp = self.get_raw(sha1) 

229 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

230 

231 def __iter__(self): 

232 """Iterate over the SHAs that are present in this store.""" 

233 raise NotImplementedError(self.__iter__) 

234 

235 def add_object(self, obj) -> None: 

236 """Add a single object to this object store.""" 

237 raise NotImplementedError(self.add_object) 

238 

239 def add_objects(self, objects, progress=None) -> None: 

240 """Add a set of objects to this object store. 

241 

242 Args: 

243 objects: Iterable over a list of (object, path) tuples 

244 """ 

245 raise NotImplementedError(self.add_objects) 

246 

247 def tree_changes( 

248 self, 

249 source, 

250 target, 

251 want_unchanged=False, 

252 include_trees=False, 

253 change_type_same=False, 

254 rename_detector=None, 

255 ): 

256 """Find the differences between the contents of two trees. 

257 

258 Args: 

259 source: SHA1 of the source tree 

260 target: SHA1 of the target tree 

261 want_unchanged: Whether unchanged files should be reported 

262 include_trees: Whether to include trees 

263 change_type_same: Whether to report files changing 

264 type in the same entry. 

265 Returns: Iterator over tuples with 

266 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

267 """ 

268 from .diff_tree import tree_changes 

269 

270 for change in tree_changes( 

271 self, 

272 source, 

273 target, 

274 want_unchanged=want_unchanged, 

275 include_trees=include_trees, 

276 change_type_same=change_type_same, 

277 rename_detector=rename_detector, 

278 ): 

279 yield ( 

280 (change.old.path, change.new.path), 

281 (change.old.mode, change.new.mode), 

282 (change.old.sha, change.new.sha), 

283 ) 

284 

285 def iter_tree_contents(self, tree_id, include_trees=False): 

286 """Iterate the contents of a tree and all subtrees. 

287 

288 Iteration is depth-first pre-order, as in e.g. os.walk. 

289 

290 Args: 

291 tree_id: SHA1 of the tree. 

292 include_trees: If True, include tree objects in the iteration. 

293 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

294 tree. 

295 """ 

296 warnings.warn( 

297 "Please use dulwich.object_store.iter_tree_contents", 

298 DeprecationWarning, 

299 stacklevel=2, 

300 ) 

301 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

302 

303 def iterobjects_subset( 

304 self, shas: Iterable[bytes], *, allow_missing: bool = False 

305 ) -> Iterator[ShaFile]: 

306 for sha in shas: 

307 try: 

308 yield self[sha] 

309 except KeyError: 

310 if not allow_missing: 

311 raise 

312 

313 def find_missing_objects( 

314 self, 

315 haves, 

316 wants, 

317 shallow=None, 

318 progress=None, 

319 get_tagged=None, 

320 get_parents=lambda commit: commit.parents, 

321 ): 

322 """Find the missing objects required for a set of revisions. 

323 

324 Args: 

325 haves: Iterable over SHAs already in common. 

326 wants: Iterable over SHAs of objects to fetch. 

327 shallow: Set of shallow commit SHA1s to skip 

328 progress: Simple progress function that will be called with 

329 updated progress strings. 

330 get_tagged: Function that returns a dict of pointed-to sha -> 

331 tag sha for including tags. 

332 get_parents: Optional function for getting the parents of a 

333 commit. 

334 Returns: Iterator over (sha, path) pairs. 

335 """ 

336 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

337 finder = MissingObjectFinder( 

338 self, 

339 haves=haves, 

340 wants=wants, 

341 shallow=shallow, 

342 progress=progress, 

343 get_tagged=get_tagged, 

344 get_parents=get_parents, 

345 ) 

346 return iter(finder) 

347 

348 def find_common_revisions(self, graphwalker): 

349 """Find which revisions this store has in common using graphwalker. 

350 

351 Args: 

352 graphwalker: A graphwalker object. 

353 Returns: List of SHAs that are in common 

354 """ 

355 haves = [] 

356 sha = next(graphwalker) 

357 while sha: 

358 if sha in self: 

359 haves.append(sha) 

360 graphwalker.ack(sha) 

361 sha = next(graphwalker) 

362 return haves 

363 

364 def generate_pack_data( 

365 self, have, want, shallow=None, progress=None, ofs_delta=True 

366 ) -> tuple[int, Iterator[UnpackedObject]]: 

367 """Generate pack data objects for a set of wants/haves. 

368 

369 Args: 

370 have: List of SHA1s of objects that should not be sent 

371 want: List of SHA1s of objects that should be sent 

372 shallow: Set of shallow commit SHA1s to skip 

373 ofs_delta: Whether OFS deltas can be included 

374 progress: Optional progress reporting method 

375 """ 

376 # Note that the pack-specific implementation below is more efficient, 

377 # as it reuses deltas 

378 missing_objects = MissingObjectFinder( 

379 self, haves=have, wants=want, shallow=shallow, progress=progress 

380 ) 

381 object_ids = list(missing_objects) 

382 return pack_objects_to_data( 

383 [(self[oid], path) for oid, path in object_ids], 

384 ofs_delta=ofs_delta, 

385 progress=progress, 

386 ) 

387 

388 def peel_sha(self, sha): 

389 """Peel all tags from a SHA. 

390 

391 Args: 

392 sha: The object SHA to peel. 

393 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

394 intermediate tags; if the original ref does not point to a tag, 

395 this will equal the original SHA1. 

396 """ 

397 warnings.warn( 

398 "Please use dulwich.object_store.peel_sha()", 

399 DeprecationWarning, 

400 stacklevel=2, 

401 ) 

402 return peel_sha(self, sha)[1] 

403 

404 def _get_depth( 

405 self, 

406 head, 

407 get_parents=lambda commit: commit.parents, 

408 max_depth=None, 

409 ): 

410 """Return the current available depth for the given head. 

411 For commits with multiple parents, the largest possible depth will be 

412 returned. 

413 

414 Args: 

415 head: commit to start from 

416 get_parents: optional function for getting the parents of a commit 

417 max_depth: maximum depth to search 

418 """ 

419 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

420 

421 def close(self) -> None: 

422 """Close any files opened by this object store.""" 

423 # Default implementation is a NO-OP 

424 

425 def prune(self, grace_period: Optional[int] = None) -> None: 

426 """Prune/clean up this object store. 

427 

428 This includes removing orphaned temporary files and other 

429 housekeeping tasks. Default implementation is a NO-OP. 

430 

431 Args: 

432 grace_period: Grace period in seconds for removing temporary files. 

433 If None, uses the default grace period. 

434 """ 

435 # Default implementation is a NO-OP 

436 

437 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

438 """Iterate over all SHA1s that start with a given prefix. 

439 

440 The default implementation is a naive iteration over all objects. 

441 However, subclasses may override this method with more efficient 

442 implementations. 

443 """ 

444 for sha in self: 

445 if sha.startswith(prefix): 

446 yield sha 

447 

448 def get_commit_graph(self): 

449 """Get the commit graph for this object store. 

450 

451 Returns: 

452 CommitGraph object if available, None otherwise 

453 """ 

454 return None 

455 

456 def write_commit_graph(self, refs=None, reachable=True) -> None: 

457 """Write a commit graph file for this object store. 

458 

459 Args: 

460 refs: List of refs to include. If None, includes all refs from object store. 

461 reachable: If True, includes all commits reachable from refs. 

462 If False, only includes the direct ref targets. 

463 

464 Note: 

465 Default implementation does nothing. Subclasses should override 

466 this method to provide commit graph writing functionality. 

467 """ 

468 raise NotImplementedError(self.write_commit_graph) 

469 

470 def get_object_mtime(self, sha): 

471 """Get the modification time of an object. 

472 

473 Args: 

474 sha: SHA1 of the object 

475 

476 Returns: 

477 Modification time as seconds since epoch 

478 

479 Raises: 

480 KeyError: if the object is not found 

481 """ 

482 # Default implementation raises KeyError 

483 # Subclasses should override to provide actual mtime 

484 raise KeyError(sha) 

485 

486 

487class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

488 def __init__(self, pack_compression_level=-1, pack_index_version=None) -> None: 

489 self._pack_cache: dict[str, Pack] = {} 

490 self.pack_compression_level = pack_compression_level 

491 self.pack_index_version = pack_index_version 

492 

493 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

494 """Add a new pack to this object store.""" 

495 raise NotImplementedError(self.add_pack) 

496 

497 def add_pack_data( 

498 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

499 ) -> None: 

500 """Add pack data to this object store. 

501 

502 Args: 

503 count: Number of items to add 

504 """ 

505 if count == 0: 

506 # Don't bother writing an empty pack file 

507 return 

508 f, commit, abort = self.add_pack() 

509 try: 

510 write_pack_data( 

511 f.write, 

512 unpacked_objects, 

513 num_records=count, 

514 progress=progress, 

515 compression_level=self.pack_compression_level, 

516 ) 

517 except BaseException: 

518 abort() 

519 raise 

520 else: 

521 return commit() 

522 

523 @property 

524 def alternates(self): 

525 return [] 

526 

527 def contains_packed(self, sha) -> bool: 

528 """Check if a particular object is present by SHA1 and is packed. 

529 

530 This does not check alternates. 

531 """ 

532 for pack in self.packs: 

533 try: 

534 if sha in pack: 

535 return True 

536 except PackFileDisappeared: 

537 pass 

538 return False 

539 

540 def __contains__(self, sha) -> bool: 

541 """Check if a particular object is present by SHA1. 

542 

543 This method makes no distinction between loose and packed objects. 

544 """ 

545 if self.contains_packed(sha) or self.contains_loose(sha): 

546 return True 

547 for alternate in self.alternates: 

548 if sha in alternate: 

549 return True 

550 return False 

551 

552 def _add_cached_pack(self, base_name, pack) -> None: 

553 """Add a newly appeared pack to the cache by path.""" 

554 prev_pack = self._pack_cache.get(base_name) 

555 if prev_pack is not pack: 

556 self._pack_cache[base_name] = pack 

557 if prev_pack: 

558 prev_pack.close() 

559 

560 def generate_pack_data( 

561 self, have, want, shallow=None, progress=None, ofs_delta=True 

562 ) -> tuple[int, Iterator[UnpackedObject]]: 

563 """Generate pack data objects for a set of wants/haves. 

564 

565 Args: 

566 have: List of SHA1s of objects that should not be sent 

567 want: List of SHA1s of objects that should be sent 

568 shallow: Set of shallow commit SHA1s to skip 

569 ofs_delta: Whether OFS deltas can be included 

570 progress: Optional progress reporting method 

571 """ 

572 missing_objects = MissingObjectFinder( 

573 self, haves=have, wants=want, shallow=shallow, progress=progress 

574 ) 

575 remote_has = missing_objects.get_remote_has() 

576 object_ids = list(missing_objects) 

577 return len(object_ids), generate_unpacked_objects( 

578 cast(PackedObjectContainer, self), 

579 object_ids, 

580 progress=progress, 

581 ofs_delta=ofs_delta, 

582 other_haves=remote_has, 

583 ) 

584 

585 def _clear_cached_packs(self) -> None: 

586 pack_cache = self._pack_cache 

587 self._pack_cache = {} 

588 while pack_cache: 

589 (name, pack) = pack_cache.popitem() 

590 pack.close() 

591 

592 def _iter_cached_packs(self): 

593 return self._pack_cache.values() 

594 

595 def _update_pack_cache(self) -> list[Pack]: 

596 raise NotImplementedError(self._update_pack_cache) 

597 

598 def close(self) -> None: 

599 self._clear_cached_packs() 

600 

601 @property 

602 def packs(self): 

603 """List with pack objects.""" 

604 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

605 

606 def count_pack_files(self) -> int: 

607 """Count the number of pack files. 

608 

609 Returns: 

610 Number of pack files (excluding those with .keep files) 

611 """ 

612 count = 0 

613 for pack in self.packs: 

614 # Check if there's a .keep file for this pack 

615 keep_path = pack._basename + ".keep" 

616 if not os.path.exists(keep_path): 

617 count += 1 

618 return count 

619 

620 def _iter_alternate_objects(self): 

621 """Iterate over the SHAs of all the objects in alternate stores.""" 

622 for alternate in self.alternates: 

623 yield from alternate 

624 

625 def _iter_loose_objects(self): 

626 """Iterate over the SHAs of all loose objects.""" 

627 raise NotImplementedError(self._iter_loose_objects) 

628 

629 def _get_loose_object(self, sha) -> Optional[ShaFile]: 

630 raise NotImplementedError(self._get_loose_object) 

631 

632 def delete_loose_object(self, sha) -> None: 

633 """Delete a loose object. 

634 

635 This method only handles loose objects. For packed objects, 

636 use repack(exclude=...) to exclude them during repacking. 

637 """ 

638 raise NotImplementedError(self.delete_loose_object) 

639 

640 def _remove_pack(self, name) -> None: 

641 raise NotImplementedError(self._remove_pack) 

642 

643 def pack_loose_objects(self): 

644 """Pack loose objects. 

645 

646 Returns: Number of objects packed 

647 """ 

648 objects = set() 

649 for sha in self._iter_loose_objects(): 

650 objects.add((self._get_loose_object(sha), None)) 

651 self.add_objects(list(objects)) 

652 for obj, path in objects: 

653 self.delete_loose_object(obj.id) 

654 return len(objects) 

655 

656 def repack(self, exclude=None): 

657 """Repack the packs in this repository. 

658 

659 Note that this implementation is fairly naive and currently keeps all 

660 objects in memory while it repacks. 

661 

662 Args: 

663 exclude: Optional set of object SHAs to exclude from repacking 

664 """ 

665 if exclude is None: 

666 exclude = set() 

667 

668 loose_objects = set() 

669 excluded_loose_objects = set() 

670 for sha in self._iter_loose_objects(): 

671 if sha not in exclude: 

672 loose_objects.add(self._get_loose_object(sha)) 

673 else: 

674 excluded_loose_objects.add(sha) 

675 

676 objects = {(obj, None) for obj in loose_objects} 

677 old_packs = {p.name(): p for p in self.packs} 

678 for name, pack in old_packs.items(): 

679 objects.update( 

680 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

681 ) 

682 

683 # Only create a new pack if there are objects to pack 

684 if objects: 

685 # The name of the consolidated pack might match the name of a 

686 # pre-existing pack. Take care not to remove the newly created 

687 # consolidated pack. 

688 consolidated = self.add_objects(objects) 

689 old_packs.pop(consolidated.name(), None) 

690 

691 # Delete loose objects that were packed 

692 for obj in loose_objects: 

693 self.delete_loose_object(obj.id) 

694 # Delete excluded loose objects 

695 for sha in excluded_loose_objects: 

696 self.delete_loose_object(sha) 

697 for name, pack in old_packs.items(): 

698 self._remove_pack(pack) 

699 self._update_pack_cache() 

700 return len(objects) 

701 

702 def __iter__(self): 

703 """Iterate over the SHAs that are present in this store.""" 

704 self._update_pack_cache() 

705 for pack in self._iter_cached_packs(): 

706 try: 

707 yield from pack 

708 except PackFileDisappeared: 

709 pass 

710 yield from self._iter_loose_objects() 

711 yield from self._iter_alternate_objects() 

712 

713 def contains_loose(self, sha): 

714 """Check if a particular object is present by SHA1 and is loose. 

715 

716 This does not check alternates. 

717 """ 

718 return self._get_loose_object(sha) is not None 

719 

720 def get_raw(self, name): 

721 """Obtain the raw fulltext for an object. 

722 

723 Args: 

724 name: sha for the object. 

725 Returns: tuple with numeric type and object contents. 

726 """ 

727 if name == ZERO_SHA: 

728 raise KeyError(name) 

729 if len(name) == 40: 

730 sha = hex_to_sha(name) 

731 hexsha = name 

732 elif len(name) == 20: 

733 sha = name 

734 hexsha = None 

735 else: 

736 raise AssertionError(f"Invalid object name {name!r}") 

737 for pack in self._iter_cached_packs(): 

738 try: 

739 return pack.get_raw(sha) 

740 except (KeyError, PackFileDisappeared): 

741 pass 

742 if hexsha is None: 

743 hexsha = sha_to_hex(name) 

744 ret = self._get_loose_object(hexsha) 

745 if ret is not None: 

746 return ret.type_num, ret.as_raw_string() 

747 # Maybe something else has added a pack with the object 

748 # in the mean time? 

749 for pack in self._update_pack_cache(): 

750 try: 

751 return pack.get_raw(sha) 

752 except KeyError: 

753 pass 

754 for alternate in self.alternates: 

755 try: 

756 return alternate.get_raw(hexsha) 

757 except KeyError: 

758 pass 

759 raise KeyError(hexsha) 

760 

761 def iter_unpacked_subset( 

762 self, 

763 shas: set[bytes], 

764 include_comp: bool = False, 

765 allow_missing: bool = False, 

766 convert_ofs_delta: bool = True, 

767 ) -> Iterator[UnpackedObject]: 

768 todo: set[bytes] = set(shas) 

769 for p in self._iter_cached_packs(): 

770 for unpacked in p.iter_unpacked_subset( 

771 todo, 

772 include_comp=include_comp, 

773 allow_missing=True, 

774 convert_ofs_delta=convert_ofs_delta, 

775 ): 

776 yield unpacked 

777 hexsha = sha_to_hex(unpacked.sha()) 

778 todo.remove(hexsha) 

779 # Maybe something else has added a pack with the object 

780 # in the mean time? 

781 for p in self._update_pack_cache(): 

782 for unpacked in p.iter_unpacked_subset( 

783 todo, 

784 include_comp=include_comp, 

785 allow_missing=True, 

786 convert_ofs_delta=convert_ofs_delta, 

787 ): 

788 yield unpacked 

789 hexsha = sha_to_hex(unpacked.sha()) 

790 todo.remove(hexsha) 

791 for alternate in self.alternates: 

792 for unpacked in alternate.iter_unpacked_subset( 

793 todo, 

794 include_comp=include_comp, 

795 allow_missing=True, 

796 convert_ofs_delta=convert_ofs_delta, 

797 ): 

798 yield unpacked 

799 hexsha = sha_to_hex(unpacked.sha()) 

800 todo.remove(hexsha) 

801 

802 def iterobjects_subset( 

803 self, shas: Iterable[bytes], *, allow_missing: bool = False 

804 ) -> Iterator[ShaFile]: 

805 todo: set[bytes] = set(shas) 

806 for p in self._iter_cached_packs(): 

807 for o in p.iterobjects_subset(todo, allow_missing=True): 

808 yield o 

809 todo.remove(o.id) 

810 # Maybe something else has added a pack with the object 

811 # in the mean time? 

812 for p in self._update_pack_cache(): 

813 for o in p.iterobjects_subset(todo, allow_missing=True): 

814 yield o 

815 todo.remove(o.id) 

816 for alternate in self.alternates: 

817 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

818 yield o 

819 todo.remove(o.id) 

820 for oid in todo: 

821 o = self._get_loose_object(oid) 

822 if o is not None: 

823 yield o 

824 elif not allow_missing: 

825 raise KeyError(oid) 

826 

827 def get_unpacked_object( 

828 self, sha1: bytes, *, include_comp: bool = False 

829 ) -> UnpackedObject: 

830 """Obtain the unpacked object. 

831 

832 Args: 

833 sha1: sha for the object. 

834 """ 

835 if sha1 == ZERO_SHA: 

836 raise KeyError(sha1) 

837 if len(sha1) == 40: 

838 sha = hex_to_sha(sha1) 

839 hexsha = sha1 

840 elif len(sha1) == 20: 

841 sha = sha1 

842 hexsha = None 

843 else: 

844 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

845 for pack in self._iter_cached_packs(): 

846 try: 

847 return pack.get_unpacked_object(sha, include_comp=include_comp) 

848 except (KeyError, PackFileDisappeared): 

849 pass 

850 if hexsha is None: 

851 hexsha = sha_to_hex(sha1) 

852 # Maybe something else has added a pack with the object 

853 # in the mean time? 

854 for pack in self._update_pack_cache(): 

855 try: 

856 return pack.get_unpacked_object(sha, include_comp=include_comp) 

857 except KeyError: 

858 pass 

859 for alternate in self.alternates: 

860 try: 

861 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

862 except KeyError: 

863 pass 

864 raise KeyError(hexsha) 

865 

866 def add_objects( 

867 self, 

868 objects: Sequence[tuple[ShaFile, Optional[str]]], 

869 progress: Optional[Callable[[str], None]] = None, 

870 ) -> None: 

871 """Add a set of objects to this object store. 

872 

873 Args: 

874 objects: Iterable over (object, path) tuples, should support 

875 __len__. 

876 Returns: Pack object of the objects written. 

877 """ 

878 count = len(objects) 

879 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

880 return self.add_pack_data(count, record_iter, progress=progress) 

881 

882 

883class DiskObjectStore(PackBasedObjectStore): 

884 """Git-style object store that exists on disk.""" 

885 

886 path: Union[str, os.PathLike] 

887 pack_dir: Union[str, os.PathLike] 

888 

889 def __init__( 

890 self, 

891 path: Union[str, os.PathLike], 

892 loose_compression_level=-1, 

893 pack_compression_level=-1, 

894 pack_index_version=None, 

895 ) -> None: 

896 """Open an object store. 

897 

898 Args: 

899 path: Path of the object store. 

900 loose_compression_level: zlib compression level for loose objects 

901 pack_compression_level: zlib compression level for pack objects 

902 pack_index_version: pack index version to use (1, 2, or 3) 

903 """ 

904 super().__init__( 

905 pack_compression_level=pack_compression_level, 

906 pack_index_version=pack_index_version, 

907 ) 

908 self.path = path 

909 self.pack_dir = os.path.join(self.path, PACKDIR) 

910 self._alternates = None 

911 self.loose_compression_level = loose_compression_level 

912 self.pack_compression_level = pack_compression_level 

913 self.pack_index_version = pack_index_version 

914 

915 # Commit graph support - lazy loaded 

916 self._commit_graph = None 

917 

918 def __repr__(self) -> str: 

919 return f"<{self.__class__.__name__}({self.path!r})>" 

920 

921 @classmethod 

922 def from_config(cls, path: Union[str, os.PathLike], config): 

923 try: 

924 default_compression_level = int( 

925 config.get((b"core",), b"compression").decode() 

926 ) 

927 except KeyError: 

928 default_compression_level = -1 

929 try: 

930 loose_compression_level = int( 

931 config.get((b"core",), b"looseCompression").decode() 

932 ) 

933 except KeyError: 

934 loose_compression_level = default_compression_level 

935 try: 

936 pack_compression_level = int( 

937 config.get((b"core",), "packCompression").decode() 

938 ) 

939 except KeyError: 

940 pack_compression_level = default_compression_level 

941 try: 

942 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

943 except KeyError: 

944 pack_index_version = None 

945 return cls( 

946 path, loose_compression_level, pack_compression_level, pack_index_version 

947 ) 

948 

949 @property 

950 def alternates(self): 

951 if self._alternates is not None: 

952 return self._alternates 

953 self._alternates = [] 

954 for path in self._read_alternate_paths(): 

955 self._alternates.append(DiskObjectStore(path)) 

956 return self._alternates 

957 

958 def _read_alternate_paths(self): 

959 try: 

960 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

961 except FileNotFoundError: 

962 return 

963 with f: 

964 for line in f.readlines(): 

965 line = line.rstrip(b"\n") 

966 if line.startswith(b"#"): 

967 continue 

968 if os.path.isabs(line): 

969 yield os.fsdecode(line) 

970 else: 

971 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

972 

973 def add_alternate_path(self, path) -> None: 

974 """Add an alternate path to this object store.""" 

975 try: 

976 os.mkdir(os.path.join(self.path, INFODIR)) 

977 except FileExistsError: 

978 pass 

979 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

980 with GitFile(alternates_path, "wb") as f: 

981 try: 

982 orig_f = open(alternates_path, "rb") 

983 except FileNotFoundError: 

984 pass 

985 else: 

986 with orig_f: 

987 f.write(orig_f.read()) 

988 f.write(os.fsencode(path) + b"\n") 

989 

990 if not os.path.isabs(path): 

991 path = os.path.join(self.path, path) 

992 self.alternates.append(DiskObjectStore(path)) 

993 

994 def _update_pack_cache(self): 

995 """Read and iterate over new pack files and cache them.""" 

996 try: 

997 pack_dir_contents = os.listdir(self.pack_dir) 

998 except FileNotFoundError: 

999 self.close() 

1000 return [] 

1001 pack_files = set() 

1002 for name in pack_dir_contents: 

1003 if name.startswith("pack-") and name.endswith(".pack"): 

1004 # verify that idx exists first (otherwise the pack was not yet 

1005 # fully written) 

1006 idx_name = os.path.splitext(name)[0] + ".idx" 

1007 if idx_name in pack_dir_contents: 

1008 pack_name = name[: -len(".pack")] 

1009 pack_files.add(pack_name) 

1010 

1011 # Open newly appeared pack files 

1012 new_packs = [] 

1013 for f in pack_files: 

1014 if f not in self._pack_cache: 

1015 pack = Pack(os.path.join(self.pack_dir, f)) 

1016 new_packs.append(pack) 

1017 self._pack_cache[f] = pack 

1018 # Remove disappeared pack files 

1019 for f in set(self._pack_cache) - pack_files: 

1020 self._pack_cache.pop(f).close() 

1021 return new_packs 

1022 

1023 def _get_shafile_path(self, sha): 

1024 # Check from object dir 

1025 return hex_to_filename(self.path, sha) 

1026 

1027 def _iter_loose_objects(self): 

1028 for base in os.listdir(self.path): 

1029 if len(base) != 2: 

1030 continue 

1031 for rest in os.listdir(os.path.join(self.path, base)): 

1032 sha = os.fsencode(base + rest) 

1033 if not valid_hexsha(sha): 

1034 continue 

1035 yield sha 

1036 

1037 def count_loose_objects(self) -> int: 

1038 """Count the number of loose objects in the object store. 

1039 

1040 Returns: 

1041 Number of loose objects 

1042 """ 

1043 count = 0 

1044 if not os.path.exists(self.path): 

1045 return 0 

1046 

1047 for i in range(256): 

1048 subdir = os.path.join(self.path, f"{i:02x}") 

1049 try: 

1050 count += len( 

1051 [ 

1052 name 

1053 for name in os.listdir(subdir) 

1054 if len(name) == 38 # 40 - 2 for the prefix 

1055 ] 

1056 ) 

1057 except FileNotFoundError: 

1058 # Directory may have been removed or is inaccessible 

1059 continue 

1060 

1061 return count 

1062 

1063 def _get_loose_object(self, sha): 

1064 path = self._get_shafile_path(sha) 

1065 try: 

1066 return ShaFile.from_path(path) 

1067 except FileNotFoundError: 

1068 return None 

1069 

1070 def delete_loose_object(self, sha) -> None: 

1071 os.remove(self._get_shafile_path(sha)) 

1072 

1073 def get_object_mtime(self, sha): 

1074 """Get the modification time of an object. 

1075 

1076 Args: 

1077 sha: SHA1 of the object 

1078 

1079 Returns: 

1080 Modification time as seconds since epoch 

1081 

1082 Raises: 

1083 KeyError: if the object is not found 

1084 """ 

1085 # First check if it's a loose object 

1086 if self.contains_loose(sha): 

1087 path = self._get_shafile_path(sha) 

1088 try: 

1089 return os.path.getmtime(path) 

1090 except FileNotFoundError: 

1091 pass 

1092 

1093 # Check if it's in a pack file 

1094 for pack in self.packs: 

1095 try: 

1096 if sha in pack: 

1097 # Use the pack file's mtime for packed objects 

1098 pack_path = pack._data_path 

1099 try: 

1100 return os.path.getmtime(pack_path) 

1101 except (FileNotFoundError, AttributeError): 

1102 pass 

1103 except PackFileDisappeared: 

1104 pass 

1105 

1106 raise KeyError(sha) 

1107 

1108 def _remove_pack(self, pack) -> None: 

1109 try: 

1110 del self._pack_cache[os.path.basename(pack._basename)] 

1111 except KeyError: 

1112 pass 

1113 pack.close() 

1114 os.remove(pack.data.path) 

1115 os.remove(pack.index.path) 

1116 

1117 def _get_pack_basepath(self, entries): 

1118 suffix = iter_sha1(entry[0] for entry in entries) 

1119 # TODO: Handle self.pack_dir being bytes 

1120 suffix = suffix.decode("ascii") 

1121 return os.path.join(self.pack_dir, "pack-" + suffix) 

1122 

1123 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1124 """Move a specific file containing a pack into the pack directory. 

1125 

1126 Note: The file should be on the same file system as the 

1127 packs directory. 

1128 

1129 Args: 

1130 f: Open file object for the pack. 

1131 path: Path to the pack file. 

1132 indexer: A PackIndexer for indexing the pack. 

1133 """ 

1134 entries = [] 

1135 for i, entry in enumerate(indexer): 

1136 if progress is not None: 

1137 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1138 entries.append(entry) 

1139 

1140 pack_sha, extra_entries = extend_pack( 

1141 f, 

1142 indexer.ext_refs(), 

1143 get_raw=self.get_raw, 

1144 compression_level=self.pack_compression_level, 

1145 progress=progress, 

1146 ) 

1147 f.flush() 

1148 try: 

1149 fileno = f.fileno() 

1150 except AttributeError: 

1151 pass 

1152 else: 

1153 os.fsync(fileno) 

1154 f.close() 

1155 

1156 entries.extend(extra_entries) 

1157 

1158 # Move the pack in. 

1159 entries.sort() 

1160 pack_base_name = self._get_pack_basepath(entries) 

1161 

1162 for pack in self.packs: 

1163 if pack._basename == pack_base_name: 

1164 return pack 

1165 

1166 target_pack_path = pack_base_name + ".pack" 

1167 target_index_path = pack_base_name + ".idx" 

1168 if sys.platform == "win32": 

1169 # Windows might have the target pack file lingering. Attempt 

1170 # removal, silently passing if the target does not exist. 

1171 with suppress(FileNotFoundError): 

1172 os.remove(target_pack_path) 

1173 os.rename(path, target_pack_path) 

1174 

1175 # Write the index. 

1176 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1177 write_pack_index( 

1178 index_file, entries, pack_sha, version=self.pack_index_version 

1179 ) 

1180 

1181 # Add the pack to the store and return it. 

1182 final_pack = Pack(pack_base_name) 

1183 final_pack.check_length_and_checksum() 

1184 self._add_cached_pack(pack_base_name, final_pack) 

1185 return final_pack 

1186 

1187 def add_thin_pack(self, read_all, read_some, progress=None): 

1188 """Add a new thin pack to this object store. 

1189 

1190 Thin packs are packs that contain deltas with parents that exist 

1191 outside the pack. They should never be placed in the object store 

1192 directly, and always indexed and completed as they are copied. 

1193 

1194 Args: 

1195 read_all: Read function that blocks until the number of 

1196 requested bytes are read. 

1197 read_some: Read function that returns at least one byte, but may 

1198 not return the number of bytes requested. 

1199 Returns: A Pack object pointing at the now-completed thin pack in the 

1200 objects/pack directory. 

1201 """ 

1202 import tempfile 

1203 

1204 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1205 with os.fdopen(fd, "w+b") as f: 

1206 os.chmod(path, PACK_MODE) 

1207 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1208 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1209 copier.verify(progress=progress) 

1210 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1211 

1212 def add_pack(self): 

1213 """Add a new pack to this object store. 

1214 

1215 Returns: Fileobject to write to, a commit function to 

1216 call when the pack is finished and an abort 

1217 function. 

1218 """ 

1219 import tempfile 

1220 

1221 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1222 f = os.fdopen(fd, "w+b") 

1223 os.chmod(path, PACK_MODE) 

1224 

1225 def commit(): 

1226 if f.tell() > 0: 

1227 f.seek(0) 

1228 with PackData(path, f) as pd: 

1229 indexer = PackIndexer.for_pack_data( 

1230 pd, resolve_ext_ref=self.get_raw 

1231 ) 

1232 return self._complete_pack(f, path, len(pd), indexer) 

1233 else: 

1234 f.close() 

1235 os.remove(path) 

1236 return None 

1237 

1238 def abort() -> None: 

1239 f.close() 

1240 os.remove(path) 

1241 

1242 return f, commit, abort 

1243 

1244 def add_object(self, obj) -> None: 

1245 """Add a single object to this object store. 

1246 

1247 Args: 

1248 obj: Object to add 

1249 """ 

1250 path = self._get_shafile_path(obj.id) 

1251 dir = os.path.dirname(path) 

1252 try: 

1253 os.mkdir(dir) 

1254 except FileExistsError: 

1255 pass 

1256 if os.path.exists(path): 

1257 return # Already there, no need to write again 

1258 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1259 f.write( 

1260 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1261 ) 

1262 

1263 @classmethod 

1264 def init(cls, path: Union[str, os.PathLike]): 

1265 try: 

1266 os.mkdir(path) 

1267 except FileExistsError: 

1268 pass 

1269 os.mkdir(os.path.join(path, "info")) 

1270 os.mkdir(os.path.join(path, PACKDIR)) 

1271 return cls(path) 

1272 

1273 def iter_prefix(self, prefix): 

1274 if len(prefix) < 2: 

1275 yield from super().iter_prefix(prefix) 

1276 return 

1277 seen = set() 

1278 dir = prefix[:2].decode() 

1279 rest = prefix[2:].decode() 

1280 try: 

1281 for name in os.listdir(os.path.join(self.path, dir)): 

1282 if name.startswith(rest): 

1283 sha = os.fsencode(dir + name) 

1284 if sha not in seen: 

1285 seen.add(sha) 

1286 yield sha 

1287 except FileNotFoundError: 

1288 pass 

1289 

1290 for p in self.packs: 

1291 bin_prefix = ( 

1292 binascii.unhexlify(prefix) 

1293 if len(prefix) % 2 == 0 

1294 else binascii.unhexlify(prefix[:-1]) 

1295 ) 

1296 for sha in p.index.iter_prefix(bin_prefix): 

1297 sha = sha_to_hex(sha) 

1298 if sha.startswith(prefix) and sha not in seen: 

1299 seen.add(sha) 

1300 yield sha 

1301 for alternate in self.alternates: 

1302 for sha in alternate.iter_prefix(prefix): 

1303 if sha not in seen: 

1304 seen.add(sha) 

1305 yield sha 

1306 

1307 def get_commit_graph(self): 

1308 """Get the commit graph for this object store. 

1309 

1310 Returns: 

1311 CommitGraph object if available, None otherwise 

1312 """ 

1313 if self._commit_graph is None: 

1314 from .commit_graph import read_commit_graph 

1315 

1316 # Look for commit graph in our objects directory 

1317 graph_file = os.path.join(self.path, "info", "commit-graph") 

1318 if os.path.exists(graph_file): 

1319 self._commit_graph = read_commit_graph(graph_file) 

1320 return self._commit_graph 

1321 

1322 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1323 """Write a commit graph file for this object store. 

1324 

1325 Args: 

1326 refs: List of refs to include. If None, includes all refs from object store. 

1327 reachable: If True, includes all commits reachable from refs. 

1328 If False, only includes the direct ref targets. 

1329 """ 

1330 from .commit_graph import get_reachable_commits 

1331 

1332 if refs is None: 

1333 # Get all commit objects from the object store 

1334 all_refs = [] 

1335 # Iterate through all objects to find commits 

1336 for sha in self: 

1337 try: 

1338 obj = self[sha] 

1339 if obj.type_name == b"commit": 

1340 all_refs.append(sha) 

1341 except KeyError: 

1342 continue 

1343 else: 

1344 # Use provided refs 

1345 all_refs = refs 

1346 

1347 if not all_refs: 

1348 return # No commits to include 

1349 

1350 if reachable: 

1351 # Get all reachable commits 

1352 commit_ids = get_reachable_commits(self, all_refs) 

1353 else: 

1354 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1355 commit_ids = [] 

1356 for ref in all_refs: 

1357 if isinstance(ref, bytes) and len(ref) == 40: 

1358 # Already hex ObjectID 

1359 commit_ids.append(ref) 

1360 elif isinstance(ref, bytes) and len(ref) == 20: 

1361 # Binary SHA, convert to hex ObjectID 

1362 from .objects import sha_to_hex 

1363 

1364 commit_ids.append(sha_to_hex(ref)) 

1365 else: 

1366 # Assume it's already correct format 

1367 commit_ids.append(ref) 

1368 

1369 if commit_ids: 

1370 # Write commit graph directly to our object store path 

1371 # Generate the commit graph 

1372 from .commit_graph import generate_commit_graph 

1373 

1374 graph = generate_commit_graph(self, commit_ids) 

1375 

1376 if graph.entries: 

1377 # Ensure the info directory exists 

1378 info_dir = os.path.join(self.path, "info") 

1379 os.makedirs(info_dir, exist_ok=True) 

1380 

1381 # Write using GitFile for atomic operation 

1382 graph_path = os.path.join(info_dir, "commit-graph") 

1383 with GitFile(graph_path, "wb") as f: 

1384 graph.write_to_file(f) 

1385 

1386 # Clear cached commit graph so it gets reloaded 

1387 self._commit_graph = None 

1388 

1389 def prune(self, grace_period: Optional[int] = None) -> None: 

1390 """Prune/clean up this object store. 

1391 

1392 This removes temporary files that were left behind by interrupted 

1393 pack operations. These are files that start with ``tmp_pack_`` in the 

1394 repository directory or files with .pack extension but no corresponding 

1395 .idx file in the pack directory. 

1396 

1397 Args: 

1398 grace_period: Grace period in seconds for removing temporary files. 

1399 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1400 """ 

1401 import glob 

1402 

1403 if grace_period is None: 

1404 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1405 

1406 # Clean up tmp_pack_* files in the repository directory 

1407 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1408 # Check if file is old enough (more than grace period) 

1409 mtime = os.path.getmtime(tmp_file) 

1410 if time.time() - mtime > grace_period: 

1411 os.remove(tmp_file) 

1412 

1413 # Clean up orphaned .pack files without corresponding .idx files 

1414 try: 

1415 pack_dir_contents = os.listdir(self.pack_dir) 

1416 except FileNotFoundError: 

1417 return 

1418 

1419 pack_files = {} 

1420 idx_files = set() 

1421 

1422 for name in pack_dir_contents: 

1423 if name.endswith(".pack"): 

1424 base_name = name[:-5] # Remove .pack extension 

1425 pack_files[base_name] = name 

1426 elif name.endswith(".idx"): 

1427 base_name = name[:-4] # Remove .idx extension 

1428 idx_files.add(base_name) 

1429 

1430 # Remove .pack files without corresponding .idx files 

1431 for base_name, pack_name in pack_files.items(): 

1432 if base_name not in idx_files: 

1433 pack_path = os.path.join(self.pack_dir, pack_name) 

1434 # Check if file is old enough (more than grace period) 

1435 mtime = os.path.getmtime(pack_path) 

1436 if time.time() - mtime > grace_period: 

1437 os.remove(pack_path) 

1438 

1439 

1440class MemoryObjectStore(BaseObjectStore): 

1441 """Object store that keeps all objects in memory.""" 

1442 

1443 def __init__(self) -> None: 

1444 super().__init__() 

1445 self._data: dict[str, ShaFile] = {} 

1446 self.pack_compression_level = -1 

1447 

1448 def _to_hexsha(self, sha): 

1449 if len(sha) == 40: 

1450 return sha 

1451 elif len(sha) == 20: 

1452 return sha_to_hex(sha) 

1453 else: 

1454 raise ValueError(f"Invalid sha {sha!r}") 

1455 

1456 def contains_loose(self, sha): 

1457 """Check if a particular object is present by SHA1 and is loose.""" 

1458 return self._to_hexsha(sha) in self._data 

1459 

1460 def contains_packed(self, sha) -> bool: 

1461 """Check if a particular object is present by SHA1 and is packed.""" 

1462 return False 

1463 

1464 def __iter__(self): 

1465 """Iterate over the SHAs that are present in this store.""" 

1466 return iter(self._data.keys()) 

1467 

1468 @property 

1469 def packs(self): 

1470 """List with pack objects.""" 

1471 return [] 

1472 

1473 def get_raw(self, name: ObjectID): 

1474 """Obtain the raw text for an object. 

1475 

1476 Args: 

1477 name: sha for the object. 

1478 Returns: tuple with numeric type and object contents. 

1479 """ 

1480 obj = self[self._to_hexsha(name)] 

1481 return obj.type_num, obj.as_raw_string() 

1482 

1483 def __getitem__(self, name: ObjectID): 

1484 return self._data[self._to_hexsha(name)].copy() 

1485 

1486 def __delitem__(self, name: ObjectID) -> None: 

1487 """Delete an object from this store, for testing only.""" 

1488 del self._data[self._to_hexsha(name)] 

1489 

1490 def add_object(self, obj) -> None: 

1491 """Add a single object to this object store.""" 

1492 self._data[obj.id] = obj.copy() 

1493 

1494 def add_objects(self, objects, progress=None) -> None: 

1495 """Add a set of objects to this object store. 

1496 

1497 Args: 

1498 objects: Iterable over a list of (object, path) tuples 

1499 """ 

1500 for obj, path in objects: 

1501 self.add_object(obj) 

1502 

1503 def add_pack(self): 

1504 """Add a new pack to this object store. 

1505 

1506 Because this object store doesn't support packs, we extract and add the 

1507 individual objects. 

1508 

1509 Returns: Fileobject to write to and a commit function to 

1510 call when the pack is finished. 

1511 """ 

1512 from tempfile import SpooledTemporaryFile 

1513 

1514 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1515 

1516 def commit() -> None: 

1517 size = f.tell() 

1518 if size > 0: 

1519 f.seek(0) 

1520 p = PackData.from_file(f, size) 

1521 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1522 self.add_object(obj) 

1523 p.close() 

1524 f.close() 

1525 else: 

1526 f.close() 

1527 

1528 def abort() -> None: 

1529 f.close() 

1530 

1531 return f, commit, abort 

1532 

1533 def add_pack_data( 

1534 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1535 ) -> None: 

1536 """Add pack data to this object store. 

1537 

1538 Args: 

1539 count: Number of items to add 

1540 """ 

1541 if count == 0: 

1542 return 

1543 

1544 # Since MemoryObjectStore doesn't support pack files, we need to 

1545 # extract individual objects. To handle deltas properly, we write 

1546 # to a temporary pack and then use PackInflater to resolve them. 

1547 f, commit, abort = self.add_pack() 

1548 try: 

1549 write_pack_data( 

1550 f.write, 

1551 unpacked_objects, 

1552 num_records=count, 

1553 progress=progress, 

1554 ) 

1555 except BaseException: 

1556 abort() 

1557 raise 

1558 else: 

1559 commit() 

1560 

1561 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1562 """Add a new thin pack to this object store. 

1563 

1564 Thin packs are packs that contain deltas with parents that exist 

1565 outside the pack. Because this object store doesn't support packs, we 

1566 extract and add the individual objects. 

1567 

1568 Args: 

1569 read_all: Read function that blocks until the number of 

1570 requested bytes are read. 

1571 read_some: Read function that returns at least one byte, but may 

1572 not return the number of bytes requested. 

1573 """ 

1574 f, commit, abort = self.add_pack() 

1575 try: 

1576 copier = PackStreamCopier(read_all, read_some, f) 

1577 copier.verify() 

1578 except BaseException: 

1579 abort() 

1580 raise 

1581 else: 

1582 commit() 

1583 

1584 

1585class ObjectIterator(Protocol): 

1586 """Interface for iterating over objects.""" 

1587 

1588 def iterobjects(self) -> Iterator[ShaFile]: 

1589 raise NotImplementedError(self.iterobjects) 

1590 

1591 

1592def tree_lookup_path(lookup_obj, root_sha, path): 

1593 """Look up an object in a Git tree. 

1594 

1595 Args: 

1596 lookup_obj: Callback for retrieving object by SHA1 

1597 root_sha: SHA1 of the root tree 

1598 path: Path to lookup 

1599 Returns: A tuple of (mode, SHA) of the resulting path. 

1600 """ 

1601 tree = lookup_obj(root_sha) 

1602 if not isinstance(tree, Tree): 

1603 raise NotTreeError(root_sha) 

1604 return tree.lookup_path(lookup_obj, path) 

1605 

1606 

1607def _collect_filetree_revs( 

1608 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1609) -> None: 

1610 """Collect SHA1s of files and directories for specified tree. 

1611 

1612 Args: 

1613 obj_store: Object store to get objects by SHA from 

1614 tree_sha: tree reference to walk 

1615 kset: set to fill with references to files and directories 

1616 """ 

1617 filetree = obj_store[tree_sha] 

1618 assert isinstance(filetree, Tree) 

1619 for name, mode, sha in filetree.iteritems(): 

1620 if not S_ISGITLINK(mode) and sha not in kset: 

1621 kset.add(sha) 

1622 if stat.S_ISDIR(mode): 

1623 _collect_filetree_revs(obj_store, sha, kset) 

1624 

1625 

1626def _split_commits_and_tags( 

1627 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1628) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1629 """Split object id list into three lists with commit, tag, and other SHAs. 

1630 

1631 Commits referenced by tags are included into commits 

1632 list as well. Only SHA1s known in this repository will get 

1633 through, and unless ignore_unknown argument is True, KeyError 

1634 is thrown for SHA1 missing in the repository 

1635 

1636 Args: 

1637 obj_store: Object store to get objects by SHA1 from 

1638 lst: Collection of commit and tag SHAs 

1639 ignore_unknown: True to skip SHA1 missing in the repository 

1640 silently. 

1641 Returns: A tuple of (commits, tags, others) SHA1s 

1642 """ 

1643 commits: set[bytes] = set() 

1644 tags: set[bytes] = set() 

1645 others: set[bytes] = set() 

1646 for e in lst: 

1647 try: 

1648 o = obj_store[e] 

1649 except KeyError: 

1650 if not ignore_unknown: 

1651 raise 

1652 else: 

1653 if isinstance(o, Commit): 

1654 commits.add(e) 

1655 elif isinstance(o, Tag): 

1656 tags.add(e) 

1657 tagged = o.object[1] 

1658 c, t, os = _split_commits_and_tags( 

1659 obj_store, [tagged], ignore_unknown=ignore_unknown 

1660 ) 

1661 commits |= c 

1662 tags |= t 

1663 others |= os 

1664 else: 

1665 others.add(e) 

1666 return (commits, tags, others) 

1667 

1668 

1669class MissingObjectFinder: 

1670 """Find the objects missing from another object store. 

1671 

1672 Args: 

1673 object_store: Object store containing at least all objects to be 

1674 sent 

1675 haves: SHA1s of commits not to send (already present in target) 

1676 wants: SHA1s of commits to send 

1677 progress: Optional function to report progress to. 

1678 get_tagged: Function that returns a dict of pointed-to sha -> tag 

1679 sha for including tags. 

1680 get_parents: Optional function for getting the parents of a commit. 

1681 """ 

1682 

1683 def __init__( 

1684 self, 

1685 object_store, 

1686 haves, 

1687 wants, 

1688 *, 

1689 shallow=None, 

1690 progress=None, 

1691 get_tagged=None, 

1692 get_parents=lambda commit: commit.parents, 

1693 ) -> None: 

1694 self.object_store = object_store 

1695 if shallow is None: 

1696 shallow = set() 

1697 self._get_parents = get_parents 

1698 # process Commits and Tags differently 

1699 # Note, while haves may list commits/tags not available locally, 

1700 # and such SHAs would get filtered out by _split_commits_and_tags, 

1701 # wants shall list only known SHAs, and otherwise 

1702 # _split_commits_and_tags fails with KeyError 

1703 have_commits, have_tags, have_others = _split_commits_and_tags( 

1704 object_store, haves, ignore_unknown=True 

1705 ) 

1706 want_commits, want_tags, want_others = _split_commits_and_tags( 

1707 object_store, wants, ignore_unknown=False 

1708 ) 

1709 # all_ancestors is a set of commits that shall not be sent 

1710 # (complete repository up to 'haves') 

1711 all_ancestors = _collect_ancestors( 

1712 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

1713 )[0] 

1714 # all_missing - complete set of commits between haves and wants 

1715 # common - commits from all_ancestors we hit into while 

1716 # traversing parent hierarchy of wants 

1717 missing_commits, common_commits = _collect_ancestors( 

1718 object_store, 

1719 want_commits, 

1720 all_ancestors, 

1721 shallow=shallow, 

1722 get_parents=self._get_parents, 

1723 ) 

1724 self.remote_has: set[bytes] = set() 

1725 # Now, fill sha_done with commits and revisions of 

1726 # files and directories known to be both locally 

1727 # and on target. Thus these commits and files 

1728 # won't get selected for fetch 

1729 for h in common_commits: 

1730 self.remote_has.add(h) 

1731 cmt = object_store[h] 

1732 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

1733 # record tags we have as visited, too 

1734 for t in have_tags: 

1735 self.remote_has.add(t) 

1736 self.sha_done = set(self.remote_has) 

1737 

1738 # in fact, what we 'want' is commits, tags, and others 

1739 # we've found missing 

1740 self.objects_to_send: set[ 

1741 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

1742 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

1743 missing_tags = want_tags.difference(have_tags) 

1744 self.objects_to_send.update( 

1745 {(w, None, Tag.type_num, False) for w in missing_tags} 

1746 ) 

1747 missing_others = want_others.difference(have_others) 

1748 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

1749 

1750 if progress is None: 

1751 self.progress = lambda x: None 

1752 else: 

1753 self.progress = progress 

1754 self._tagged = (get_tagged and get_tagged()) or {} 

1755 

1756 def get_remote_has(self): 

1757 return self.remote_has 

1758 

1759 def add_todo( 

1760 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

1761 ) -> None: 

1762 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

1763 

1764 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

1765 while True: 

1766 if not self.objects_to_send: 

1767 self.progress( 

1768 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

1769 ) 

1770 raise StopIteration 

1771 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

1772 if sha not in self.sha_done: 

1773 break 

1774 if not leaf: 

1775 o = self.object_store[sha] 

1776 if isinstance(o, Commit): 

1777 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

1778 elif isinstance(o, Tree): 

1779 self.add_todo( 

1780 [ 

1781 ( 

1782 s, 

1783 n, 

1784 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

1785 not stat.S_ISDIR(m), 

1786 ) 

1787 for n, m, s in o.iteritems() 

1788 if not S_ISGITLINK(m) 

1789 ] 

1790 ) 

1791 elif isinstance(o, Tag): 

1792 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

1793 if sha in self._tagged: 

1794 self.add_todo([(self._tagged[sha], None, None, True)]) 

1795 self.sha_done.add(sha) 

1796 if len(self.sha_done) % 1000 == 0: 

1797 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

1798 if type_num is None: 

1799 pack_hint = None 

1800 else: 

1801 pack_hint = (type_num, name) 

1802 return (sha, pack_hint) 

1803 

1804 def __iter__(self): 

1805 return self 

1806 

1807 

1808class ObjectStoreGraphWalker: 

1809 """Graph walker that finds what commits are missing from an object store.""" 

1810 

1811 heads: set[ObjectID] 

1812 """Revisions without descendants in the local repo.""" 

1813 

1814 get_parents: Callable[[ObjectID], ObjectID] 

1815 """Function to retrieve parents in the local repo.""" 

1816 

1817 shallow: set[ObjectID] 

1818 

1819 def __init__( 

1820 self, 

1821 local_heads: Iterable[ObjectID], 

1822 get_parents, 

1823 shallow: Optional[set[ObjectID]] = None, 

1824 update_shallow=None, 

1825 ) -> None: 

1826 """Create a new instance. 

1827 

1828 Args: 

1829 local_heads: Heads to start search with 

1830 get_parents: Function for finding the parents of a SHA1. 

1831 """ 

1832 self.heads = set(local_heads) 

1833 self.get_parents = get_parents 

1834 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

1835 if shallow is None: 

1836 shallow = set() 

1837 self.shallow = shallow 

1838 self.update_shallow = update_shallow 

1839 

1840 def nak(self) -> None: 

1841 """Nothing in common was found.""" 

1842 

1843 def ack(self, sha: ObjectID) -> None: 

1844 """Ack that a revision and its ancestors are present in the source.""" 

1845 if len(sha) != 40: 

1846 raise ValueError(f"unexpected sha {sha!r} received") 

1847 ancestors = {sha} 

1848 

1849 # stop if we run out of heads to remove 

1850 while self.heads: 

1851 for a in ancestors: 

1852 if a in self.heads: 

1853 self.heads.remove(a) 

1854 

1855 # collect all ancestors 

1856 new_ancestors = set() 

1857 for a in ancestors: 

1858 ps = self.parents.get(a) 

1859 if ps is not None: 

1860 new_ancestors.update(ps) 

1861 self.parents[a] = None 

1862 

1863 # no more ancestors; stop 

1864 if not new_ancestors: 

1865 break 

1866 

1867 ancestors = new_ancestors 

1868 

1869 def next(self): 

1870 """Iterate over ancestors of heads in the target.""" 

1871 if self.heads: 

1872 ret = self.heads.pop() 

1873 try: 

1874 ps = self.get_parents(ret) 

1875 except KeyError: 

1876 return None 

1877 self.parents[ret] = ps 

1878 self.heads.update([p for p in ps if p not in self.parents]) 

1879 return ret 

1880 return None 

1881 

1882 __next__ = next 

1883 

1884 

1885def commit_tree_changes(object_store, tree, changes): 

1886 """Commit a specified set of changes to a tree structure. 

1887 

1888 This will apply a set of changes on top of an existing tree, storing new 

1889 objects in object_store. 

1890 

1891 changes are a list of tuples with (path, mode, object_sha). 

1892 Paths can be both blobs and trees. See the mode and 

1893 object sha to None deletes the path. 

1894 

1895 This method works especially well if there are only a small 

1896 number of changes to a big tree. For a large number of changes 

1897 to a large tree, use e.g. commit_tree. 

1898 

1899 Args: 

1900 object_store: Object store to store new objects in 

1901 and retrieve old ones from. 

1902 tree: Original tree root 

1903 changes: changes to apply 

1904 Returns: New tree root object 

1905 """ 

1906 # TODO(jelmer): Save up the objects and add them using .add_objects 

1907 # rather than with individual calls to .add_object. 

1908 nested_changes = {} 

1909 for path, new_mode, new_sha in changes: 

1910 try: 

1911 (dirname, subpath) = path.split(b"/", 1) 

1912 except ValueError: 

1913 if new_sha is None: 

1914 del tree[path] 

1915 else: 

1916 tree[path] = (new_mode, new_sha) 

1917 else: 

1918 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

1919 for name, subchanges in nested_changes.items(): 

1920 try: 

1921 orig_subtree = object_store[tree[name][1]] 

1922 except KeyError: 

1923 orig_subtree = Tree() 

1924 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

1925 if len(subtree) == 0: 

1926 del tree[name] 

1927 else: 

1928 tree[name] = (stat.S_IFDIR, subtree.id) 

1929 object_store.add_object(tree) 

1930 return tree 

1931 

1932 

1933class OverlayObjectStore(BaseObjectStore): 

1934 """Object store that can overlay multiple object stores.""" 

1935 

1936 def __init__(self, bases, add_store=None) -> None: 

1937 self.bases = bases 

1938 self.add_store = add_store 

1939 

1940 def add_object(self, object): 

1941 if self.add_store is None: 

1942 raise NotImplementedError(self.add_object) 

1943 return self.add_store.add_object(object) 

1944 

1945 def add_objects(self, objects, progress=None): 

1946 if self.add_store is None: 

1947 raise NotImplementedError(self.add_object) 

1948 return self.add_store.add_objects(objects, progress) 

1949 

1950 @property 

1951 def packs(self): 

1952 ret = [] 

1953 for b in self.bases: 

1954 ret.extend(b.packs) 

1955 return ret 

1956 

1957 def __iter__(self): 

1958 done = set() 

1959 for b in self.bases: 

1960 for o_id in b: 

1961 if o_id not in done: 

1962 yield o_id 

1963 done.add(o_id) 

1964 

1965 def iterobjects_subset( 

1966 self, shas: Iterable[bytes], *, allow_missing: bool = False 

1967 ) -> Iterator[ShaFile]: 

1968 todo = set(shas) 

1969 found: set[bytes] = set() 

1970 

1971 for b in self.bases: 

1972 # Create a copy of todo for each base to avoid modifying 

1973 # the set while iterating through it 

1974 current_todo = todo - found 

1975 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

1976 yield o 

1977 found.add(o.id) 

1978 

1979 # Check for any remaining objects not found 

1980 missing = todo - found 

1981 if missing and not allow_missing: 

1982 raise KeyError(next(iter(missing))) 

1983 

1984 def iter_unpacked_subset( 

1985 self, 

1986 shas: Iterable[bytes], 

1987 *, 

1988 include_comp=False, 

1989 allow_missing: bool = False, 

1990 convert_ofs_delta=True, 

1991 ) -> Iterator[ShaFile]: 

1992 todo = set(shas) 

1993 for b in self.bases: 

1994 for o in b.iter_unpacked_subset( 

1995 todo, 

1996 include_comp=include_comp, 

1997 allow_missing=True, 

1998 convert_ofs_delta=convert_ofs_delta, 

1999 ): 

2000 yield o 

2001 todo.remove(o.id) 

2002 if todo and not allow_missing: 

2003 raise KeyError(o.id) 

2004 

2005 def get_raw(self, sha_id): 

2006 for b in self.bases: 

2007 try: 

2008 return b.get_raw(sha_id) 

2009 except KeyError: 

2010 pass 

2011 raise KeyError(sha_id) 

2012 

2013 def contains_packed(self, sha) -> bool: 

2014 for b in self.bases: 

2015 if b.contains_packed(sha): 

2016 return True 

2017 return False 

2018 

2019 def contains_loose(self, sha) -> bool: 

2020 for b in self.bases: 

2021 if b.contains_loose(sha): 

2022 return True 

2023 return False 

2024 

2025 

2026def read_packs_file(f): 

2027 """Yield the packs listed in a packs file.""" 

2028 for line in f.read().splitlines(): 

2029 if not line: 

2030 continue 

2031 (kind, name) = line.split(b" ", 1) 

2032 if kind != b"P": 

2033 continue 

2034 yield os.fsdecode(name) 

2035 

2036 

2037class BucketBasedObjectStore(PackBasedObjectStore): 

2038 """Object store implementation that uses a bucket store like S3 as backend.""" 

2039 

2040 def _iter_loose_objects(self): 

2041 """Iterate over the SHAs of all loose objects.""" 

2042 return iter([]) 

2043 

2044 def _get_loose_object(self, sha) -> None: 

2045 return None 

2046 

2047 def delete_loose_object(self, sha) -> None: 

2048 # Doesn't exist.. 

2049 pass 

2050 

2051 def _remove_pack(self, name) -> None: 

2052 raise NotImplementedError(self._remove_pack) 

2053 

2054 def _iter_pack_names(self) -> Iterator[str]: 

2055 raise NotImplementedError(self._iter_pack_names) 

2056 

2057 def _get_pack(self, name) -> Pack: 

2058 raise NotImplementedError(self._get_pack) 

2059 

2060 def _update_pack_cache(self): 

2061 pack_files = set(self._iter_pack_names()) 

2062 

2063 # Open newly appeared pack files 

2064 new_packs = [] 

2065 for f in pack_files: 

2066 if f not in self._pack_cache: 

2067 pack = self._get_pack(f) 

2068 new_packs.append(pack) 

2069 self._pack_cache[f] = pack 

2070 # Remove disappeared pack files 

2071 for f in set(self._pack_cache) - pack_files: 

2072 self._pack_cache.pop(f).close() 

2073 return new_packs 

2074 

2075 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2076 raise NotImplementedError 

2077 

2078 def add_pack(self): 

2079 """Add a new pack to this object store. 

2080 

2081 Returns: Fileobject to write to, a commit function to 

2082 call when the pack is finished and an abort 

2083 function. 

2084 """ 

2085 import tempfile 

2086 

2087 pf = tempfile.SpooledTemporaryFile( 

2088 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2089 ) 

2090 

2091 def commit(): 

2092 if pf.tell() == 0: 

2093 pf.close() 

2094 return None 

2095 

2096 pf.seek(0) 

2097 p = PackData(pf.name, pf) 

2098 entries = p.sorted_entries() 

2099 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2100 idxf = tempfile.SpooledTemporaryFile( 

2101 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2102 ) 

2103 checksum = p.get_stored_checksum() 

2104 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2105 idxf.seek(0) 

2106 idx = load_pack_index_file(basename + ".idx", idxf) 

2107 for pack in self.packs: 

2108 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2109 p.close() 

2110 idx.close() 

2111 pf.close() 

2112 idxf.close() 

2113 return pack 

2114 pf.seek(0) 

2115 idxf.seek(0) 

2116 self._upload_pack(basename, pf, idxf) 

2117 final_pack = Pack.from_objects(p, idx) 

2118 self._add_cached_pack(basename, final_pack) 

2119 pf.close() 

2120 idxf.close() 

2121 return final_pack 

2122 

2123 return pf, commit, pf.close 

2124 

2125 

2126def _collect_ancestors( 

2127 store: ObjectContainer, 

2128 heads, 

2129 common: frozenset[ObjectID] = frozenset(), 

2130 shallow: frozenset[ObjectID] = frozenset(), 

2131 get_parents=lambda commit: commit.parents, 

2132): 

2133 """Collect all ancestors of heads up to (excluding) those in common. 

2134 

2135 Args: 

2136 heads: commits to start from 

2137 common: commits to end at, or empty set to walk repository 

2138 completely 

2139 get_parents: Optional function for getting the parents of a 

2140 commit. 

2141 Returns: a tuple (A, B) where A - all commits reachable 

2142 from heads but not present in common, B - common (shared) elements 

2143 that are directly reachable from heads 

2144 """ 

2145 bases = set() 

2146 commits = set() 

2147 queue = [] 

2148 queue.extend(heads) 

2149 while queue: 

2150 e = queue.pop(0) 

2151 if e in common: 

2152 bases.add(e) 

2153 elif e not in commits: 

2154 commits.add(e) 

2155 if e in shallow: 

2156 continue 

2157 cmt = store[e] 

2158 queue.extend(get_parents(cmt)) 

2159 return (commits, bases) 

2160 

2161 

2162def iter_tree_contents( 

2163 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2164): 

2165 """Iterate the contents of a tree and all subtrees. 

2166 

2167 Iteration is depth-first pre-order, as in e.g. os.walk. 

2168 

2169 Args: 

2170 tree_id: SHA1 of the tree. 

2171 include_trees: If True, include tree objects in the iteration. 

2172 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

2173 tree. 

2174 """ 

2175 if tree_id is None: 

2176 return 

2177 # This could be fairly easily generalized to >2 trees if we find a use 

2178 # case. 

2179 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2180 while todo: 

2181 entry = todo.pop() 

2182 if stat.S_ISDIR(entry.mode): 

2183 extra = [] 

2184 tree = store[entry.sha] 

2185 assert isinstance(tree, Tree) 

2186 for subentry in tree.iteritems(name_order=True): 

2187 extra.append(subentry.in_path(entry.path)) 

2188 todo.extend(reversed(extra)) 

2189 if not stat.S_ISDIR(entry.mode) or include_trees: 

2190 yield entry 

2191 

2192 

2193def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2194 """Peel all tags from a SHA. 

2195 

2196 Args: 

2197 sha: The object SHA to peel. 

2198 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2199 intermediate tags; if the original ref does not point to a tag, 

2200 this will equal the original SHA1. 

2201 """ 

2202 unpeeled = obj = store[sha] 

2203 obj_class = object_class(obj.type_name) 

2204 while obj_class is Tag: 

2205 assert isinstance(obj, Tag) 

2206 obj_class, sha = obj.object 

2207 obj = store[sha] 

2208 return unpeeled, obj