Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1404 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set 

33from contextlib import suppress 

34from io import BytesIO 

35from pathlib import Path 

36from typing import ( 

37 TYPE_CHECKING, 

38 BinaryIO, 

39 Protocol, 

40) 

41 

42from .errors import NotTreeError 

43from .file import GitFile, _GitFile 

44from .objects import ( 

45 S_ISGITLINK, 

46 ZERO_SHA, 

47 Blob, 

48 Commit, 

49 ObjectID, 

50 ShaFile, 

51 Tag, 

52 Tree, 

53 TreeEntry, 

54 hex_to_filename, 

55 hex_to_sha, 

56 object_class, 

57 sha_to_hex, 

58 valid_hexsha, 

59) 

60from .pack import ( 

61 PACK_SPOOL_FILE_MAX_SIZE, 

62 ObjectContainer, 

63 Pack, 

64 PackData, 

65 PackedObjectContainer, 

66 PackFileDisappeared, 

67 PackHint, 

68 PackIndexer, 

69 PackInflater, 

70 PackStreamCopier, 

71 UnpackedObject, 

72 extend_pack, 

73 full_unpacked_object, 

74 generate_unpacked_objects, 

75 iter_sha1, 

76 load_pack_index_file, 

77 pack_objects_to_data, 

78 write_pack_data, 

79 write_pack_index, 

80) 

81from .protocol import DEPTH_INFINITE 

82from .refs import PEELED_TAG_SUFFIX, Ref 

83 

84if TYPE_CHECKING: 

85 from .bitmap import EWAHBitmap 

86 from .commit_graph import CommitGraph 

87 from .config import Config 

88 from .diff_tree import RenameDetector 

89 from .pack import Pack 

90 

91 

92class GraphWalker(Protocol): 

93 """Protocol for graph walker objects.""" 

94 

95 def __next__(self) -> bytes | None: 

96 """Return the next object SHA to visit.""" 

97 ... 

98 

99 def ack(self, sha: bytes) -> None: 

100 """Acknowledge that an object has been received.""" 

101 ... 

102 

103 def nak(self) -> None: 

104 """Nothing in common was found.""" 

105 ... 

106 

107 

108class ObjectReachabilityProvider(Protocol): 

109 """Protocol for computing object reachability queries. 

110 

111 This abstraction allows reachability computations to be backed by either 

112 naive graph traversal or optimized bitmap indexes, with a consistent interface. 

113 """ 

114 

115 def get_reachable_commits( 

116 self, 

117 heads: Iterable[bytes], 

118 exclude: Iterable[bytes] | None = None, 

119 shallow: Set[bytes] | None = None, 

120 ) -> set[bytes]: 

121 """Get all commits reachable from heads, excluding those in exclude. 

122 

123 Args: 

124 heads: Starting commit SHAs 

125 exclude: Commit SHAs to exclude (and their ancestors) 

126 shallow: Set of shallow commit boundaries (traversal stops here) 

127 

128 Returns: 

129 Set of commit SHAs reachable from heads but not from exclude 

130 """ 

131 ... 

132 

133 def get_reachable_objects( 

134 self, 

135 commits: Iterable[bytes], 

136 exclude_commits: Iterable[bytes] | None = None, 

137 ) -> set[bytes]: 

138 """Get all objects (commits + trees + blobs) reachable from commits. 

139 

140 Args: 

141 commits: Starting commit SHAs 

142 exclude_commits: Commits whose objects should be excluded 

143 

144 Returns: 

145 Set of all object SHAs (commits, trees, blobs, tags) 

146 """ 

147 ... 

148 

149 def get_tree_objects( 

150 self, 

151 tree_shas: Iterable[bytes], 

152 ) -> set[bytes]: 

153 """Get all trees and blobs reachable from the given trees. 

154 

155 Args: 

156 tree_shas: Starting tree SHAs 

157 

158 Returns: 

159 Set of tree and blob SHAs 

160 """ 

161 ... 

162 

163 

164INFODIR = "info" 

165PACKDIR = "pack" 

166 

167# use permissions consistent with Git; just readable by everyone 

168# TODO: should packs also be non-writable on Windows? if so, that 

169# would requite some rather significant adjustments to the test suite 

170PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

171 

172# Grace period for cleaning up temporary pack files (in seconds) 

173# Matches git's default of 2 weeks 

174DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

175 

176 

177def find_shallow( 

178 store: ObjectContainer, heads: Iterable[bytes], depth: int 

179) -> tuple[set[bytes], set[bytes]]: 

180 """Find shallow commits according to a given depth. 

181 

182 Args: 

183 store: An ObjectStore for looking up objects. 

184 heads: Iterable of head SHAs to start walking from. 

185 depth: The depth of ancestors to include. A depth of one includes 

186 only the heads themselves. 

187 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

188 considered shallow and unshallow according to the arguments. Note that 

189 these sets may overlap if a commit is reachable along multiple paths. 

190 """ 

191 parents: dict[bytes, list[bytes]] = {} 

192 commit_graph = store.get_commit_graph() 

193 

194 def get_parents(sha: bytes) -> list[bytes]: 

195 result = parents.get(sha, None) 

196 if not result: 

197 # Try to use commit graph first if available 

198 if commit_graph: 

199 graph_parents = commit_graph.get_parents(sha) 

200 if graph_parents is not None: 

201 result = graph_parents 

202 parents[sha] = result 

203 return result 

204 # Fall back to loading the object 

205 commit = store[sha] 

206 assert isinstance(commit, Commit) 

207 result = commit.parents 

208 parents[sha] = result 

209 return result 

210 

211 todo = [] # stack of (sha, depth) 

212 for head_sha in heads: 

213 obj = store[head_sha] 

214 # Peel tags if necessary 

215 while isinstance(obj, Tag): 

216 _, sha = obj.object 

217 obj = store[sha] 

218 if isinstance(obj, Commit): 

219 todo.append((obj.id, 1)) 

220 

221 not_shallow = set() 

222 shallow = set() 

223 while todo: 

224 sha, cur_depth = todo.pop() 

225 if cur_depth < depth: 

226 not_shallow.add(sha) 

227 new_depth = cur_depth + 1 

228 todo.extend((p, new_depth) for p in get_parents(sha)) 

229 else: 

230 shallow.add(sha) 

231 

232 return shallow, not_shallow 

233 

234 

235def get_depth( 

236 store: ObjectContainer, 

237 head: bytes, 

238 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

239 max_depth: int | None = None, 

240) -> int: 

241 """Return the current available depth for the given head. 

242 

243 For commits with multiple parents, the largest possible depth will be 

244 returned. 

245 

246 Args: 

247 store: Object store to search in 

248 head: commit to start from 

249 get_parents: optional function for getting the parents of a commit 

250 max_depth: maximum depth to search 

251 """ 

252 if head not in store: 

253 return 0 

254 current_depth = 1 

255 queue = [(head, current_depth)] 

256 commit_graph = store.get_commit_graph() 

257 

258 while queue and (max_depth is None or current_depth < max_depth): 

259 e, depth = queue.pop(0) 

260 current_depth = max(current_depth, depth) 

261 

262 # Try to use commit graph for parent lookup if available 

263 parents = None 

264 if commit_graph: 

265 parents = commit_graph.get_parents(e) 

266 

267 if parents is None: 

268 # Fall back to loading the object 

269 cmt = store[e] 

270 if isinstance(cmt, Tag): 

271 _cls, sha = cmt.object 

272 cmt = store[sha] 

273 parents = get_parents(cmt) 

274 

275 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

276 return current_depth 

277 

278 

279class PackContainer(Protocol): 

280 """Protocol for containers that can accept pack files.""" 

281 

282 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

283 """Add a new pack.""" 

284 

285 

286class BaseObjectStore: 

287 """Object store interface.""" 

288 

289 def determine_wants_all( 

290 self, refs: Mapping[Ref, ObjectID], depth: int | None = None 

291 ) -> list[ObjectID]: 

292 """Determine which objects are wanted based on refs.""" 

293 

294 def _want_deepen(sha: bytes) -> bool: 

295 if not depth: 

296 return False 

297 if depth == DEPTH_INFINITE: 

298 return True 

299 return depth > self._get_depth(sha) 

300 

301 return [ 

302 sha 

303 for (ref, sha) in refs.items() 

304 if (sha not in self or _want_deepen(sha)) 

305 and not ref.endswith(PEELED_TAG_SUFFIX) 

306 and not sha == ZERO_SHA 

307 ] 

308 

309 def contains_loose(self, sha: bytes) -> bool: 

310 """Check if a particular object is present by SHA1 and is loose.""" 

311 raise NotImplementedError(self.contains_loose) 

312 

313 def contains_packed(self, sha: bytes) -> bool: 

314 """Check if a particular object is present by SHA1 and is packed.""" 

315 return False # Default implementation for stores that don't support packing 

316 

317 def __contains__(self, sha1: bytes) -> bool: 

318 """Check if a particular object is present by SHA1. 

319 

320 This method makes no distinction between loose and packed objects. 

321 """ 

322 return self.contains_loose(sha1) 

323 

324 @property 

325 def packs(self) -> list[Pack]: 

326 """Iterable of pack objects.""" 

327 raise NotImplementedError 

328 

329 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

330 """Obtain the raw text for an object. 

331 

332 Args: 

333 name: sha for the object. 

334 Returns: tuple with numeric type and object contents. 

335 """ 

336 raise NotImplementedError(self.get_raw) 

337 

338 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

339 """Obtain an object by SHA1.""" 

340 type_num, uncomp = self.get_raw(sha1) 

341 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

342 

343 def __iter__(self) -> Iterator[bytes]: 

344 """Iterate over the SHAs that are present in this store.""" 

345 raise NotImplementedError(self.__iter__) 

346 

347 def add_object(self, obj: ShaFile) -> None: 

348 """Add a single object to this object store.""" 

349 raise NotImplementedError(self.add_object) 

350 

351 def add_objects( 

352 self, 

353 objects: Sequence[tuple[ShaFile, str | None]], 

354 progress: Callable[..., None] | None = None, 

355 ) -> "Pack | None": 

356 """Add a set of objects to this object store. 

357 

358 Args: 

359 objects: Iterable over a list of (object, path) tuples 

360 progress: Optional progress callback 

361 """ 

362 raise NotImplementedError(self.add_objects) 

363 

364 def get_reachability_provider( 

365 self, prefer_bitmap: bool = True 

366 ) -> ObjectReachabilityProvider: 

367 """Get a reachability provider for this object store. 

368 

369 Returns an ObjectReachabilityProvider that can efficiently compute 

370 object reachability queries. Subclasses can override this to provide 

371 optimized implementations (e.g., using bitmap indexes). 

372 

373 Args: 

374 prefer_bitmap: Whether to prefer bitmap-based reachability if 

375 available. 

376 

377 Returns: 

378 ObjectReachabilityProvider instance 

379 """ 

380 return GraphTraversalReachability(self) 

381 

382 def tree_changes( 

383 self, 

384 source: bytes | None, 

385 target: bytes | None, 

386 want_unchanged: bool = False, 

387 include_trees: bool = False, 

388 change_type_same: bool = False, 

389 rename_detector: "RenameDetector | None" = None, 

390 paths: Sequence[bytes] | None = None, 

391 ) -> Iterator[ 

392 tuple[ 

393 tuple[bytes | None, bytes | None], 

394 tuple[int | None, int | None], 

395 tuple[bytes | None, bytes | None], 

396 ] 

397 ]: 

398 """Find the differences between the contents of two trees. 

399 

400 Args: 

401 source: SHA1 of the source tree 

402 target: SHA1 of the target tree 

403 want_unchanged: Whether unchanged files should be reported 

404 include_trees: Whether to include trees 

405 change_type_same: Whether to report files changing 

406 type in the same entry. 

407 rename_detector: RenameDetector object for detecting renames. 

408 paths: Optional list of paths to filter to (as bytes). 

409 Returns: Iterator over tuples with 

410 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

411 """ 

412 from .diff_tree import tree_changes 

413 

414 for change in tree_changes( 

415 self, 

416 source, 

417 target, 

418 want_unchanged=want_unchanged, 

419 include_trees=include_trees, 

420 change_type_same=change_type_same, 

421 rename_detector=rename_detector, 

422 paths=paths, 

423 ): 

424 old_path = change.old.path if change.old is not None else None 

425 new_path = change.new.path if change.new is not None else None 

426 old_mode = change.old.mode if change.old is not None else None 

427 new_mode = change.new.mode if change.new is not None else None 

428 old_sha = change.old.sha if change.old is not None else None 

429 new_sha = change.new.sha if change.new is not None else None 

430 yield ( 

431 (old_path, new_path), 

432 (old_mode, new_mode), 

433 (old_sha, new_sha), 

434 ) 

435 

436 def iter_tree_contents( 

437 self, tree_id: bytes, include_trees: bool = False 

438 ) -> Iterator[TreeEntry]: 

439 """Iterate the contents of a tree and all subtrees. 

440 

441 Iteration is depth-first pre-order, as in e.g. os.walk. 

442 

443 Args: 

444 tree_id: SHA1 of the tree. 

445 include_trees: If True, include tree objects in the iteration. 

446 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

447 tree. 

448 """ 

449 warnings.warn( 

450 "Please use dulwich.object_store.iter_tree_contents", 

451 DeprecationWarning, 

452 stacklevel=2, 

453 ) 

454 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

455 

456 def iterobjects_subset( 

457 self, shas: Iterable[bytes], *, allow_missing: bool = False 

458 ) -> Iterator[ShaFile]: 

459 """Iterate over a subset of objects in the store. 

460 

461 Args: 

462 shas: Iterable of object SHAs to retrieve 

463 allow_missing: If True, skip missing objects; if False, raise KeyError 

464 

465 Returns: 

466 Iterator of ShaFile objects 

467 

468 Raises: 

469 KeyError: If an object is missing and allow_missing is False 

470 """ 

471 for sha in shas: 

472 try: 

473 yield self[sha] 

474 except KeyError: 

475 if not allow_missing: 

476 raise 

477 

478 def iter_unpacked_subset( 

479 self, 

480 shas: Iterable[bytes], 

481 include_comp: bool = False, 

482 allow_missing: bool = False, 

483 convert_ofs_delta: bool = True, 

484 ) -> "Iterator[UnpackedObject]": 

485 """Iterate over unpacked objects for a subset of SHAs. 

486 

487 Default implementation that converts ShaFile objects to UnpackedObject. 

488 Subclasses may override for more efficient unpacked access. 

489 

490 Args: 

491 shas: Iterable of object SHAs to retrieve 

492 include_comp: Whether to include compressed data (ignored in base 

493 implementation) 

494 allow_missing: If True, skip missing objects; if False, raise 

495 KeyError 

496 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in 

497 base implementation) 

498 

499 Returns: 

500 Iterator of UnpackedObject instances 

501 

502 Raises: 

503 KeyError: If an object is missing and allow_missing is False 

504 """ 

505 from .pack import UnpackedObject 

506 

507 for sha in shas: 

508 try: 

509 obj = self[sha] 

510 # Convert ShaFile to UnpackedObject 

511 unpacked = UnpackedObject( 

512 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

513 ) 

514 yield unpacked 

515 except KeyError: 

516 if not allow_missing: 

517 raise 

518 

519 def find_missing_objects( 

520 self, 

521 haves: Iterable[bytes], 

522 wants: Iterable[bytes], 

523 shallow: Set[bytes] | None = None, 

524 progress: Callable[..., None] | None = None, 

525 get_tagged: Callable[[], dict[bytes, bytes]] | None = None, 

526 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

527 ) -> Iterator[tuple[bytes, PackHint | None]]: 

528 """Find the missing objects required for a set of revisions. 

529 

530 Args: 

531 haves: Iterable over SHAs already in common. 

532 wants: Iterable over SHAs of objects to fetch. 

533 shallow: Set of shallow commit SHA1s to skip 

534 progress: Simple progress function that will be called with 

535 updated progress strings. 

536 get_tagged: Function that returns a dict of pointed-to sha -> 

537 tag sha for including tags. 

538 get_parents: Optional function for getting the parents of a 

539 commit. 

540 Returns: Iterator over (sha, path) pairs. 

541 """ 

542 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

543 finder = MissingObjectFinder( 

544 self, 

545 haves=haves, 

546 wants=wants, 

547 shallow=shallow, 

548 progress=progress, 

549 get_tagged=get_tagged, 

550 get_parents=get_parents, 

551 ) 

552 return iter(finder) 

553 

554 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

555 """Find which revisions this store has in common using graphwalker. 

556 

557 Args: 

558 graphwalker: A graphwalker object. 

559 Returns: List of SHAs that are in common 

560 """ 

561 haves = [] 

562 sha = next(graphwalker) 

563 while sha: 

564 if sha in self: 

565 haves.append(sha) 

566 graphwalker.ack(sha) 

567 sha = next(graphwalker) 

568 return haves 

569 

570 def generate_pack_data( 

571 self, 

572 have: Iterable[bytes], 

573 want: Iterable[bytes], 

574 *, 

575 shallow: Set[bytes] | None = None, 

576 progress: Callable[..., None] | None = None, 

577 ofs_delta: bool = True, 

578 ) -> tuple[int, Iterator[UnpackedObject]]: 

579 """Generate pack data objects for a set of wants/haves. 

580 

581 Args: 

582 have: List of SHA1s of objects that should not be sent 

583 want: List of SHA1s of objects that should be sent 

584 shallow: Set of shallow commit SHA1s to skip 

585 ofs_delta: Whether OFS deltas can be included 

586 progress: Optional progress reporting method 

587 """ 

588 # Note that the pack-specific implementation below is more efficient, 

589 # as it reuses deltas 

590 missing_objects = MissingObjectFinder( 

591 self, haves=have, wants=want, shallow=shallow, progress=progress 

592 ) 

593 object_ids = list(missing_objects) 

594 return pack_objects_to_data( 

595 [(self[oid], path) for oid, path in object_ids], 

596 ofs_delta=ofs_delta, 

597 progress=progress, 

598 ) 

599 

600 def peel_sha(self, sha: bytes) -> bytes: 

601 """Peel all tags from a SHA. 

602 

603 Args: 

604 sha: The object SHA to peel. 

605 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

606 intermediate tags; if the original ref does not point to a tag, 

607 this will equal the original SHA1. 

608 """ 

609 warnings.warn( 

610 "Please use dulwich.object_store.peel_sha()", 

611 DeprecationWarning, 

612 stacklevel=2, 

613 ) 

614 return peel_sha(self, sha)[1].id 

615 

616 def _get_depth( 

617 self, 

618 head: bytes, 

619 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

620 max_depth: int | None = None, 

621 ) -> int: 

622 """Return the current available depth for the given head. 

623 

624 For commits with multiple parents, the largest possible depth will be 

625 returned. 

626 

627 Args: 

628 head: commit to start from 

629 get_parents: optional function for getting the parents of a commit 

630 max_depth: maximum depth to search 

631 """ 

632 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

633 

634 def close(self) -> None: 

635 """Close any files opened by this object store.""" 

636 # Default implementation is a NO-OP 

637 

638 def prune(self, grace_period: int | None = None) -> None: 

639 """Prune/clean up this object store. 

640 

641 This includes removing orphaned temporary files and other 

642 housekeeping tasks. Default implementation is a NO-OP. 

643 

644 Args: 

645 grace_period: Grace period in seconds for removing temporary files. 

646 If None, uses the default grace period. 

647 """ 

648 # Default implementation is a NO-OP 

649 

650 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

651 """Iterate over all SHA1s that start with a given prefix. 

652 

653 The default implementation is a naive iteration over all objects. 

654 However, subclasses may override this method with more efficient 

655 implementations. 

656 """ 

657 for sha in self: 

658 if sha.startswith(prefix): 

659 yield sha 

660 

661 def get_commit_graph(self) -> "CommitGraph | None": 

662 """Get the commit graph for this object store. 

663 

664 Returns: 

665 CommitGraph object if available, None otherwise 

666 """ 

667 return None 

668 

669 def write_commit_graph( 

670 self, refs: Sequence[bytes] | None = None, reachable: bool = True 

671 ) -> None: 

672 """Write a commit graph file for this object store. 

673 

674 Args: 

675 refs: List of refs to include. If None, includes all refs from object store. 

676 reachable: If True, includes all commits reachable from refs. 

677 If False, only includes the direct ref targets. 

678 

679 Note: 

680 Default implementation does nothing. Subclasses should override 

681 this method to provide commit graph writing functionality. 

682 """ 

683 raise NotImplementedError(self.write_commit_graph) 

684 

685 def get_object_mtime(self, sha: bytes) -> float: 

686 """Get the modification time of an object. 

687 

688 Args: 

689 sha: SHA1 of the object 

690 

691 Returns: 

692 Modification time as seconds since epoch 

693 

694 Raises: 

695 KeyError: if the object is not found 

696 """ 

697 # Default implementation raises KeyError 

698 # Subclasses should override to provide actual mtime 

699 raise KeyError(sha) 

700 

701 

702class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

703 """Object store that supports pack operations. 

704 

705 This is a base class for object stores that can handle pack files, 

706 including both disk-based and memory-based stores. 

707 """ 

708 

709 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

710 """Add a new pack to this object store. 

711 

712 Returns: Tuple of (file, commit_func, abort_func) 

713 """ 

714 raise NotImplementedError(self.add_pack) 

715 

716 def add_pack_data( 

717 self, 

718 count: int, 

719 unpacked_objects: Iterator["UnpackedObject"], 

720 progress: Callable[..., None] | None = None, 

721 ) -> "Pack | None": 

722 """Add pack data to this object store. 

723 

724 Args: 

725 count: Number of objects 

726 unpacked_objects: Iterator over unpacked objects 

727 progress: Optional progress callback 

728 """ 

729 raise NotImplementedError(self.add_pack_data) 

730 

731 def get_unpacked_object( 

732 self, sha1: bytes, *, include_comp: bool = False 

733 ) -> "UnpackedObject": 

734 """Get a raw unresolved object. 

735 

736 Args: 

737 sha1: SHA-1 hash of the object 

738 include_comp: Whether to include compressed data 

739 

740 Returns: 

741 UnpackedObject instance 

742 """ 

743 from .pack import UnpackedObject 

744 

745 obj = self[sha1] 

746 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

747 

748 def iterobjects_subset( 

749 self, shas: Iterable[bytes], *, allow_missing: bool = False 

750 ) -> Iterator[ShaFile]: 

751 """Iterate over a subset of objects. 

752 

753 Args: 

754 shas: Iterable of object SHAs to retrieve 

755 allow_missing: If True, skip missing objects 

756 

757 Returns: 

758 Iterator of ShaFile objects 

759 """ 

760 for sha in shas: 

761 try: 

762 yield self[sha] 

763 except KeyError: 

764 if not allow_missing: 

765 raise 

766 

767 

768class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

769 """Object store that uses pack files for storage. 

770 

771 This class provides a base implementation for object stores that use 

772 Git pack files as their primary storage mechanism. It handles caching 

773 of open pack files and provides configuration for pack file operations. 

774 """ 

775 

776 def __init__( 

777 self, 

778 pack_compression_level: int = -1, 

779 pack_index_version: int | None = None, 

780 pack_delta_window_size: int | None = None, 

781 pack_window_memory: int | None = None, 

782 pack_delta_cache_size: int | None = None, 

783 pack_depth: int | None = None, 

784 pack_threads: int | None = None, 

785 pack_big_file_threshold: int | None = None, 

786 ) -> None: 

787 """Initialize a PackBasedObjectStore. 

788 

789 Args: 

790 pack_compression_level: Compression level for pack files (-1 to 9) 

791 pack_index_version: Pack index version to use 

792 pack_delta_window_size: Window size for delta compression 

793 pack_window_memory: Maximum memory to use for delta window 

794 pack_delta_cache_size: Cache size for delta operations 

795 pack_depth: Maximum depth for pack deltas 

796 pack_threads: Number of threads to use for packing 

797 pack_big_file_threshold: Threshold for treating files as "big" 

798 """ 

799 self._pack_cache: dict[str, Pack] = {} 

800 self.pack_compression_level = pack_compression_level 

801 self.pack_index_version = pack_index_version 

802 self.pack_delta_window_size = pack_delta_window_size 

803 self.pack_window_memory = pack_window_memory 

804 self.pack_delta_cache_size = pack_delta_cache_size 

805 self.pack_depth = pack_depth 

806 self.pack_threads = pack_threads 

807 self.pack_big_file_threshold = pack_big_file_threshold 

808 

809 def get_reachability_provider( 

810 self, 

811 prefer_bitmaps: bool = True, 

812 ) -> ObjectReachabilityProvider: 

813 """Get the best reachability provider for the object store. 

814 

815 Args: 

816 object_store: The object store to query 

817 prefer_bitmaps: Whether to use bitmaps if available 

818 

819 Returns: 

820 ObjectReachabilityProvider implementation (either bitmap-accelerated 

821 or graph traversal) 

822 """ 

823 if prefer_bitmaps: 

824 # Check if any packs have bitmaps 

825 has_bitmap = False 

826 for pack in self.packs: 

827 try: 

828 # Try to access bitmap property 

829 if pack.bitmap is not None: 

830 has_bitmap = True 

831 break 

832 except FileNotFoundError: 

833 # Bitmap file doesn't exist for this pack 

834 continue 

835 

836 if has_bitmap: 

837 return BitmapReachability(self) 

838 

839 # Fall back to graph traversal 

840 return GraphTraversalReachability(self) 

841 

842 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

843 """Add a new pack to this object store.""" 

844 raise NotImplementedError(self.add_pack) 

845 

846 def add_pack_data( 

847 self, 

848 count: int, 

849 unpacked_objects: Iterator[UnpackedObject], 

850 progress: Callable[..., None] | None = None, 

851 ) -> "Pack | None": 

852 """Add pack data to this object store. 

853 

854 Args: 

855 count: Number of items to add 

856 unpacked_objects: Iterator of UnpackedObject instances 

857 progress: Optional progress callback 

858 """ 

859 if count == 0: 

860 # Don't bother writing an empty pack file 

861 return None 

862 f, commit, abort = self.add_pack() 

863 try: 

864 write_pack_data( 

865 f.write, 

866 unpacked_objects, 

867 num_records=count, 

868 progress=progress, 

869 compression_level=self.pack_compression_level, 

870 ) 

871 except BaseException: 

872 abort() 

873 raise 

874 else: 

875 return commit() 

876 

877 @property 

878 def alternates(self) -> list["BaseObjectStore"]: 

879 """Return list of alternate object stores.""" 

880 return [] 

881 

882 def contains_packed(self, sha: bytes) -> bool: 

883 """Check if a particular object is present by SHA1 and is packed. 

884 

885 This does not check alternates. 

886 """ 

887 for pack in self.packs: 

888 try: 

889 if sha in pack: 

890 return True 

891 except PackFileDisappeared: 

892 pass 

893 return False 

894 

895 def __contains__(self, sha: bytes) -> bool: 

896 """Check if a particular object is present by SHA1. 

897 

898 This method makes no distinction between loose and packed objects. 

899 """ 

900 if self.contains_packed(sha) or self.contains_loose(sha): 

901 return True 

902 for alternate in self.alternates: 

903 if sha in alternate: 

904 return True 

905 return False 

906 

907 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

908 """Add a newly appeared pack to the cache by path.""" 

909 prev_pack = self._pack_cache.get(base_name) 

910 if prev_pack is not pack: 

911 self._pack_cache[base_name] = pack 

912 if prev_pack: 

913 prev_pack.close() 

914 

915 def generate_pack_data( 

916 self, 

917 have: Iterable[bytes], 

918 want: Iterable[bytes], 

919 *, 

920 shallow: Set[bytes] | None = None, 

921 progress: Callable[..., None] | None = None, 

922 ofs_delta: bool = True, 

923 ) -> tuple[int, Iterator[UnpackedObject]]: 

924 """Generate pack data objects for a set of wants/haves. 

925 

926 Args: 

927 have: List of SHA1s of objects that should not be sent 

928 want: List of SHA1s of objects that should be sent 

929 shallow: Set of shallow commit SHA1s to skip 

930 ofs_delta: Whether OFS deltas can be included 

931 progress: Optional progress reporting method 

932 """ 

933 missing_objects = MissingObjectFinder( 

934 self, haves=have, wants=want, shallow=shallow, progress=progress 

935 ) 

936 remote_has = missing_objects.get_remote_has() 

937 object_ids = list(missing_objects) 

938 return len(object_ids), generate_unpacked_objects( 

939 self, 

940 object_ids, 

941 progress=progress, 

942 ofs_delta=ofs_delta, 

943 other_haves=remote_has, 

944 ) 

945 

946 def _clear_cached_packs(self) -> None: 

947 pack_cache = self._pack_cache 

948 self._pack_cache = {} 

949 while pack_cache: 

950 (_name, pack) = pack_cache.popitem() 

951 pack.close() 

952 

953 def _iter_cached_packs(self) -> Iterator[Pack]: 

954 return iter(self._pack_cache.values()) 

955 

956 def _update_pack_cache(self) -> list[Pack]: 

957 raise NotImplementedError(self._update_pack_cache) 

958 

959 def close(self) -> None: 

960 """Close the object store and release resources. 

961 

962 This method closes all cached pack files and frees associated resources. 

963 """ 

964 self._clear_cached_packs() 

965 

966 @property 

967 def packs(self) -> list[Pack]: 

968 """List with pack objects.""" 

969 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

970 

971 def count_pack_files(self) -> int: 

972 """Count the number of pack files. 

973 

974 Returns: 

975 Number of pack files (excluding those with .keep files) 

976 """ 

977 count = 0 

978 for pack in self.packs: 

979 # Check if there's a .keep file for this pack 

980 keep_path = pack._basename + ".keep" 

981 if not os.path.exists(keep_path): 

982 count += 1 

983 return count 

984 

985 def _iter_alternate_objects(self) -> Iterator[bytes]: 

986 """Iterate over the SHAs of all the objects in alternate stores.""" 

987 for alternate in self.alternates: 

988 yield from alternate 

989 

990 def _iter_loose_objects(self) -> Iterator[bytes]: 

991 """Iterate over the SHAs of all loose objects.""" 

992 raise NotImplementedError(self._iter_loose_objects) 

993 

994 def _get_loose_object(self, sha: bytes) -> ShaFile | None: 

995 raise NotImplementedError(self._get_loose_object) 

996 

997 def delete_loose_object(self, sha: bytes) -> None: 

998 """Delete a loose object. 

999 

1000 This method only handles loose objects. For packed objects, 

1001 use repack(exclude=...) to exclude them during repacking. 

1002 """ 

1003 raise NotImplementedError(self.delete_loose_object) 

1004 

1005 def _remove_pack(self, pack: "Pack") -> None: 

1006 raise NotImplementedError(self._remove_pack) 

1007 

1008 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

1009 """Pack loose objects. 

1010 

1011 Args: 

1012 progress: Optional progress reporting callback 

1013 

1014 Returns: Number of objects packed 

1015 """ 

1016 objects: list[tuple[ShaFile, None]] = [] 

1017 for sha in self._iter_loose_objects(): 

1018 obj = self._get_loose_object(sha) 

1019 if obj is not None: 

1020 objects.append((obj, None)) 

1021 self.add_objects(objects, progress=progress) 

1022 for obj, path in objects: 

1023 self.delete_loose_object(obj.id) 

1024 return len(objects) 

1025 

1026 def repack( 

1027 self, 

1028 exclude: Set[bytes] | None = None, 

1029 progress: Callable[[str], None] | None = None, 

1030 ) -> int: 

1031 """Repack the packs in this repository. 

1032 

1033 Note that this implementation is fairly naive and currently keeps all 

1034 objects in memory while it repacks. 

1035 

1036 Args: 

1037 exclude: Optional set of object SHAs to exclude from repacking 

1038 progress: Optional progress reporting callback 

1039 """ 

1040 if exclude is None: 

1041 exclude = set() 

1042 

1043 loose_objects = set() 

1044 excluded_loose_objects = set() 

1045 for sha in self._iter_loose_objects(): 

1046 if sha not in exclude: 

1047 obj = self._get_loose_object(sha) 

1048 if obj is not None: 

1049 loose_objects.add(obj) 

1050 else: 

1051 excluded_loose_objects.add(sha) 

1052 

1053 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

1054 old_packs = {p.name(): p for p in self.packs} 

1055 for name, pack in old_packs.items(): 

1056 objects.update( 

1057 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

1058 ) 

1059 

1060 # Only create a new pack if there are objects to pack 

1061 if objects: 

1062 # The name of the consolidated pack might match the name of a 

1063 # pre-existing pack. Take care not to remove the newly created 

1064 # consolidated pack. 

1065 consolidated = self.add_objects(list(objects), progress=progress) 

1066 if consolidated is not None: 

1067 old_packs.pop(consolidated.name(), None) 

1068 

1069 # Delete loose objects that were packed 

1070 for obj in loose_objects: 

1071 if obj is not None: 

1072 self.delete_loose_object(obj.id) 

1073 # Delete excluded loose objects 

1074 for sha in excluded_loose_objects: 

1075 self.delete_loose_object(sha) 

1076 for name, pack in old_packs.items(): 

1077 self._remove_pack(pack) 

1078 self._update_pack_cache() 

1079 return len(objects) 

1080 

1081 def generate_pack_bitmaps( 

1082 self, 

1083 refs: dict[bytes, bytes], 

1084 *, 

1085 commit_interval: int | None = None, 

1086 progress: Callable[[str], None] | None = None, 

1087 ) -> int: 

1088 """Generate bitmap indexes for all packs that don't have them. 

1089 

1090 This generates .bitmap files for packfiles, enabling fast reachability 

1091 queries. Equivalent to the bitmap generation part of 'git repack -b'. 

1092 

1093 Args: 

1094 refs: Dictionary of ref names to commit SHAs 

1095 commit_interval: Include every Nth commit in bitmap index (None for default) 

1096 progress: Optional progress reporting callback 

1097 

1098 Returns: 

1099 Number of bitmaps generated 

1100 """ 

1101 count = 0 

1102 for pack in self.packs: 

1103 pack.ensure_bitmap( 

1104 self, refs, commit_interval=commit_interval, progress=progress 

1105 ) 

1106 count += 1 

1107 

1108 # Update cache to pick up new bitmaps 

1109 self._update_pack_cache() 

1110 

1111 return count 

1112 

1113 def __iter__(self) -> Iterator[bytes]: 

1114 """Iterate over the SHAs that are present in this store.""" 

1115 self._update_pack_cache() 

1116 for pack in self._iter_cached_packs(): 

1117 try: 

1118 yield from pack 

1119 except PackFileDisappeared: 

1120 pass 

1121 yield from self._iter_loose_objects() 

1122 yield from self._iter_alternate_objects() 

1123 

1124 def contains_loose(self, sha: bytes) -> bool: 

1125 """Check if a particular object is present by SHA1 and is loose. 

1126 

1127 This does not check alternates. 

1128 """ 

1129 return self._get_loose_object(sha) is not None 

1130 

1131 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

1132 """Obtain the raw fulltext for an object. 

1133 

1134 Args: 

1135 name: sha for the object. 

1136 Returns: tuple with numeric type and object contents. 

1137 """ 

1138 if name == ZERO_SHA: 

1139 raise KeyError(name) 

1140 if len(name) == 40: 

1141 sha = hex_to_sha(name) 

1142 hexsha = name 

1143 elif len(name) == 20: 

1144 sha = name 

1145 hexsha = None 

1146 else: 

1147 raise AssertionError(f"Invalid object name {name!r}") 

1148 for pack in self._iter_cached_packs(): 

1149 try: 

1150 return pack.get_raw(sha) 

1151 except (KeyError, PackFileDisappeared): 

1152 pass 

1153 if hexsha is None: 

1154 hexsha = sha_to_hex(name) 

1155 ret = self._get_loose_object(hexsha) 

1156 if ret is not None: 

1157 return ret.type_num, ret.as_raw_string() 

1158 # Maybe something else has added a pack with the object 

1159 # in the mean time? 

1160 for pack in self._update_pack_cache(): 

1161 try: 

1162 return pack.get_raw(sha) 

1163 except KeyError: 

1164 pass 

1165 for alternate in self.alternates: 

1166 try: 

1167 return alternate.get_raw(hexsha) 

1168 except KeyError: 

1169 pass 

1170 raise KeyError(hexsha) 

1171 

1172 def iter_unpacked_subset( 

1173 self, 

1174 shas: Iterable[bytes], 

1175 include_comp: bool = False, 

1176 allow_missing: bool = False, 

1177 convert_ofs_delta: bool = True, 

1178 ) -> Iterator[UnpackedObject]: 

1179 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1180 

1181 Args: 

1182 shas: Set of object SHAs to retrieve 

1183 include_comp: Whether to include compressed data 

1184 allow_missing: If True, skip missing objects; if False, raise KeyError 

1185 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1186 

1187 Returns: 

1188 Iterator of UnpackedObject instances 

1189 

1190 Raises: 

1191 KeyError: If an object is missing and allow_missing is False 

1192 """ 

1193 todo: set[bytes] = set(shas) 

1194 for p in self._iter_cached_packs(): 

1195 for unpacked in p.iter_unpacked_subset( 

1196 todo, 

1197 include_comp=include_comp, 

1198 allow_missing=True, 

1199 convert_ofs_delta=convert_ofs_delta, 

1200 ): 

1201 yield unpacked 

1202 hexsha = sha_to_hex(unpacked.sha()) 

1203 todo.remove(hexsha) 

1204 # Maybe something else has added a pack with the object 

1205 # in the mean time? 

1206 for p in self._update_pack_cache(): 

1207 for unpacked in p.iter_unpacked_subset( 

1208 todo, 

1209 include_comp=include_comp, 

1210 allow_missing=True, 

1211 convert_ofs_delta=convert_ofs_delta, 

1212 ): 

1213 yield unpacked 

1214 hexsha = sha_to_hex(unpacked.sha()) 

1215 todo.remove(hexsha) 

1216 for alternate in self.alternates: 

1217 assert isinstance(alternate, PackBasedObjectStore) 

1218 for unpacked in alternate.iter_unpacked_subset( 

1219 todo, 

1220 include_comp=include_comp, 

1221 allow_missing=True, 

1222 convert_ofs_delta=convert_ofs_delta, 

1223 ): 

1224 yield unpacked 

1225 hexsha = sha_to_hex(unpacked.sha()) 

1226 todo.remove(hexsha) 

1227 

1228 def iterobjects_subset( 

1229 self, shas: Iterable[bytes], *, allow_missing: bool = False 

1230 ) -> Iterator[ShaFile]: 

1231 """Iterate over a subset of objects in the store. 

1232 

1233 This method searches for objects in pack files, alternates, and loose storage. 

1234 

1235 Args: 

1236 shas: Iterable of object SHAs to retrieve 

1237 allow_missing: If True, skip missing objects; if False, raise KeyError 

1238 

1239 Returns: 

1240 Iterator of ShaFile objects 

1241 

1242 Raises: 

1243 KeyError: If an object is missing and allow_missing is False 

1244 """ 

1245 todo: set[bytes] = set(shas) 

1246 for p in self._iter_cached_packs(): 

1247 for o in p.iterobjects_subset(todo, allow_missing=True): 

1248 yield o 

1249 todo.remove(o.id) 

1250 # Maybe something else has added a pack with the object 

1251 # in the mean time? 

1252 for p in self._update_pack_cache(): 

1253 for o in p.iterobjects_subset(todo, allow_missing=True): 

1254 yield o 

1255 todo.remove(o.id) 

1256 for alternate in self.alternates: 

1257 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1258 yield o 

1259 todo.remove(o.id) 

1260 for oid in todo: 

1261 loose_obj: ShaFile | None = self._get_loose_object(oid) 

1262 if loose_obj is not None: 

1263 yield loose_obj 

1264 elif not allow_missing: 

1265 raise KeyError(oid) 

1266 

1267 def get_unpacked_object( 

1268 self, sha1: bytes, *, include_comp: bool = False 

1269 ) -> UnpackedObject: 

1270 """Obtain the unpacked object. 

1271 

1272 Args: 

1273 sha1: sha for the object. 

1274 include_comp: Whether to include compression metadata. 

1275 """ 

1276 if sha1 == ZERO_SHA: 

1277 raise KeyError(sha1) 

1278 if len(sha1) == 40: 

1279 sha = hex_to_sha(sha1) 

1280 hexsha = sha1 

1281 elif len(sha1) == 20: 

1282 sha = sha1 

1283 hexsha = None 

1284 else: 

1285 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1286 for pack in self._iter_cached_packs(): 

1287 try: 

1288 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1289 except (KeyError, PackFileDisappeared): 

1290 pass 

1291 if hexsha is None: 

1292 hexsha = sha_to_hex(sha1) 

1293 # Maybe something else has added a pack with the object 

1294 # in the mean time? 

1295 for pack in self._update_pack_cache(): 

1296 try: 

1297 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1298 except KeyError: 

1299 pass 

1300 for alternate in self.alternates: 

1301 assert isinstance(alternate, PackBasedObjectStore) 

1302 try: 

1303 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1304 except KeyError: 

1305 pass 

1306 raise KeyError(hexsha) 

1307 

1308 def add_objects( 

1309 self, 

1310 objects: Sequence[tuple[ShaFile, str | None]], 

1311 progress: Callable[[str], None] | None = None, 

1312 ) -> "Pack | None": 

1313 """Add a set of objects to this object store. 

1314 

1315 Args: 

1316 objects: Iterable over (object, path) tuples, should support 

1317 __len__. 

1318 progress: Optional progress reporting function. 

1319 Returns: Pack object of the objects written. 

1320 """ 

1321 count = len(objects) 

1322 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1323 return self.add_pack_data(count, record_iter, progress=progress) 

1324 

1325 

1326class DiskObjectStore(PackBasedObjectStore): 

1327 """Git-style object store that exists on disk.""" 

1328 

1329 path: str | os.PathLike[str] 

1330 pack_dir: str | os.PathLike[str] 

1331 _alternates: "list[BaseObjectStore] | None" 

1332 _commit_graph: "CommitGraph | None" 

1333 

1334 def __init__( 

1335 self, 

1336 path: str | os.PathLike[str], 

1337 *, 

1338 loose_compression_level: int = -1, 

1339 pack_compression_level: int = -1, 

1340 pack_index_version: int | None = None, 

1341 pack_delta_window_size: int | None = None, 

1342 pack_window_memory: int | None = None, 

1343 pack_delta_cache_size: int | None = None, 

1344 pack_depth: int | None = None, 

1345 pack_threads: int | None = None, 

1346 pack_big_file_threshold: int | None = None, 

1347 fsync_object_files: bool = False, 

1348 pack_write_bitmaps: bool = False, 

1349 pack_write_bitmap_hash_cache: bool = True, 

1350 pack_write_bitmap_lookup_table: bool = True, 

1351 file_mode: int | None = None, 

1352 dir_mode: int | None = None, 

1353 ) -> None: 

1354 """Open an object store. 

1355 

1356 Args: 

1357 path: Path of the object store. 

1358 loose_compression_level: zlib compression level for loose objects 

1359 pack_compression_level: zlib compression level for pack objects 

1360 pack_index_version: pack index version to use (1, 2, or 3) 

1361 pack_delta_window_size: sliding window size for delta compression 

1362 pack_window_memory: memory limit for delta window operations 

1363 pack_delta_cache_size: size of cache for delta operations 

1364 pack_depth: maximum delta chain depth 

1365 pack_threads: number of threads for pack operations 

1366 pack_big_file_threshold: threshold for treating files as big 

1367 fsync_object_files: whether to fsync object files for durability 

1368 pack_write_bitmaps: whether to write bitmap indexes for packs 

1369 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps 

1370 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps 

1371 file_mode: File permission mask for shared repository 

1372 dir_mode: Directory permission mask for shared repository 

1373 """ 

1374 super().__init__( 

1375 pack_compression_level=pack_compression_level, 

1376 pack_index_version=pack_index_version, 

1377 pack_delta_window_size=pack_delta_window_size, 

1378 pack_window_memory=pack_window_memory, 

1379 pack_delta_cache_size=pack_delta_cache_size, 

1380 pack_depth=pack_depth, 

1381 pack_threads=pack_threads, 

1382 pack_big_file_threshold=pack_big_file_threshold, 

1383 ) 

1384 self.path = path 

1385 self.pack_dir = os.path.join(self.path, PACKDIR) 

1386 self._alternates = None 

1387 self.loose_compression_level = loose_compression_level 

1388 self.pack_compression_level = pack_compression_level 

1389 self.pack_index_version = pack_index_version 

1390 self.fsync_object_files = fsync_object_files 

1391 self.pack_write_bitmaps = pack_write_bitmaps 

1392 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache 

1393 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table 

1394 self.file_mode = file_mode 

1395 self.dir_mode = dir_mode 

1396 

1397 # Commit graph support - lazy loaded 

1398 self._commit_graph = None 

1399 self._use_commit_graph = True # Default to true 

1400 

1401 def __repr__(self) -> str: 

1402 """Return string representation of DiskObjectStore. 

1403 

1404 Returns: 

1405 String representation including the store path 

1406 """ 

1407 return f"<{self.__class__.__name__}({self.path!r})>" 

1408 

1409 @classmethod 

1410 def from_config( 

1411 cls, 

1412 path: str | os.PathLike[str], 

1413 config: "Config", 

1414 *, 

1415 file_mode: int | None = None, 

1416 dir_mode: int | None = None, 

1417 ) -> "DiskObjectStore": 

1418 """Create a DiskObjectStore from a configuration object. 

1419 

1420 Args: 

1421 path: Path to the object store directory 

1422 config: Configuration object to read settings from 

1423 file_mode: Optional file permission mask for shared repository 

1424 dir_mode: Optional directory permission mask for shared repository 

1425 

1426 Returns: 

1427 New DiskObjectStore instance configured according to config 

1428 """ 

1429 try: 

1430 default_compression_level = int( 

1431 config.get((b"core",), b"compression").decode() 

1432 ) 

1433 except KeyError: 

1434 default_compression_level = -1 

1435 try: 

1436 loose_compression_level = int( 

1437 config.get((b"core",), b"looseCompression").decode() 

1438 ) 

1439 except KeyError: 

1440 loose_compression_level = default_compression_level 

1441 try: 

1442 pack_compression_level = int( 

1443 config.get((b"core",), "packCompression").decode() 

1444 ) 

1445 except KeyError: 

1446 pack_compression_level = default_compression_level 

1447 try: 

1448 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1449 except KeyError: 

1450 pack_index_version = None 

1451 

1452 # Read pack configuration options 

1453 try: 

1454 pack_delta_window_size = int( 

1455 config.get((b"pack",), b"deltaWindowSize").decode() 

1456 ) 

1457 except KeyError: 

1458 pack_delta_window_size = None 

1459 try: 

1460 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1461 except KeyError: 

1462 pack_window_memory = None 

1463 try: 

1464 pack_delta_cache_size = int( 

1465 config.get((b"pack",), b"deltaCacheSize").decode() 

1466 ) 

1467 except KeyError: 

1468 pack_delta_cache_size = None 

1469 try: 

1470 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1471 except KeyError: 

1472 pack_depth = None 

1473 try: 

1474 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1475 except KeyError: 

1476 pack_threads = None 

1477 try: 

1478 pack_big_file_threshold = int( 

1479 config.get((b"pack",), b"bigFileThreshold").decode() 

1480 ) 

1481 except KeyError: 

1482 pack_big_file_threshold = None 

1483 

1484 # Read core.commitGraph setting 

1485 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1486 

1487 # Read core.fsyncObjectFiles setting 

1488 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False) 

1489 

1490 # Read bitmap settings 

1491 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False) 

1492 pack_write_bitmap_hash_cache = config.get_boolean( 

1493 (b"pack",), b"writeBitmapHashCache", True 

1494 ) 

1495 pack_write_bitmap_lookup_table = config.get_boolean( 

1496 (b"pack",), b"writeBitmapLookupTable", True 

1497 ) 

1498 # Also check repack.writeBitmaps for backwards compatibility 

1499 if not pack_write_bitmaps: 

1500 pack_write_bitmaps = config.get_boolean( 

1501 (b"repack",), b"writeBitmaps", False 

1502 ) 

1503 

1504 instance = cls( 

1505 path, 

1506 loose_compression_level=loose_compression_level, 

1507 pack_compression_level=pack_compression_level, 

1508 pack_index_version=pack_index_version, 

1509 pack_delta_window_size=pack_delta_window_size, 

1510 pack_window_memory=pack_window_memory, 

1511 pack_delta_cache_size=pack_delta_cache_size, 

1512 pack_depth=pack_depth, 

1513 pack_threads=pack_threads, 

1514 pack_big_file_threshold=pack_big_file_threshold, 

1515 fsync_object_files=fsync_object_files, 

1516 pack_write_bitmaps=pack_write_bitmaps, 

1517 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache, 

1518 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table, 

1519 file_mode=file_mode, 

1520 dir_mode=dir_mode, 

1521 ) 

1522 instance._use_commit_graph = use_commit_graph 

1523 return instance 

1524 

1525 @property 

1526 def alternates(self) -> list["BaseObjectStore"]: 

1527 """Get the list of alternate object stores. 

1528 

1529 Reads from .git/objects/info/alternates if not already cached. 

1530 

1531 Returns: 

1532 List of DiskObjectStore instances for alternate object directories 

1533 """ 

1534 if self._alternates is not None: 

1535 return self._alternates 

1536 self._alternates = [] 

1537 for path in self._read_alternate_paths(): 

1538 self._alternates.append(DiskObjectStore(path)) 

1539 return self._alternates 

1540 

1541 def _read_alternate_paths(self) -> Iterator[str]: 

1542 try: 

1543 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1544 except FileNotFoundError: 

1545 return 

1546 with f: 

1547 for line in f.readlines(): 

1548 line = line.rstrip(b"\n") 

1549 if line.startswith(b"#"): 

1550 continue 

1551 if os.path.isabs(line): 

1552 yield os.fsdecode(line) 

1553 else: 

1554 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1555 

1556 def add_alternate_path(self, path: str | os.PathLike[str]) -> None: 

1557 """Add an alternate path to this object store.""" 

1558 info_dir = os.path.join(self.path, INFODIR) 

1559 try: 

1560 os.mkdir(info_dir) 

1561 if self.dir_mode is not None: 

1562 os.chmod(info_dir, self.dir_mode) 

1563 except FileExistsError: 

1564 pass 

1565 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1566 mask = self.file_mode if self.file_mode is not None else 0o644 

1567 with GitFile(alternates_path, "wb", mask=mask) as f: 

1568 try: 

1569 orig_f = open(alternates_path, "rb") 

1570 except FileNotFoundError: 

1571 pass 

1572 else: 

1573 with orig_f: 

1574 f.write(orig_f.read()) 

1575 f.write(os.fsencode(path) + b"\n") 

1576 

1577 if not os.path.isabs(path): 

1578 path = os.path.join(self.path, path) 

1579 self.alternates.append(DiskObjectStore(path)) 

1580 

1581 def _update_pack_cache(self) -> list[Pack]: 

1582 """Read and iterate over new pack files and cache them.""" 

1583 try: 

1584 pack_dir_contents = os.listdir(self.pack_dir) 

1585 except FileNotFoundError: 

1586 self.close() 

1587 return [] 

1588 pack_files = set() 

1589 for name in pack_dir_contents: 

1590 if name.startswith("pack-") and name.endswith(".pack"): 

1591 # verify that idx exists first (otherwise the pack was not yet 

1592 # fully written) 

1593 idx_name = os.path.splitext(name)[0] + ".idx" 

1594 if idx_name in pack_dir_contents: 

1595 pack_name = name[: -len(".pack")] 

1596 pack_files.add(pack_name) 

1597 

1598 # Open newly appeared pack files 

1599 new_packs = [] 

1600 for f in pack_files: 

1601 if f not in self._pack_cache: 

1602 pack = Pack( 

1603 os.path.join(self.pack_dir, f), 

1604 delta_window_size=self.pack_delta_window_size, 

1605 window_memory=self.pack_window_memory, 

1606 delta_cache_size=self.pack_delta_cache_size, 

1607 depth=self.pack_depth, 

1608 threads=self.pack_threads, 

1609 big_file_threshold=self.pack_big_file_threshold, 

1610 ) 

1611 new_packs.append(pack) 

1612 self._pack_cache[f] = pack 

1613 # Remove disappeared pack files 

1614 for f in set(self._pack_cache) - pack_files: 

1615 self._pack_cache.pop(f).close() 

1616 return new_packs 

1617 

1618 def _get_shafile_path(self, sha: bytes) -> str: 

1619 # Check from object dir 

1620 return hex_to_filename(os.fspath(self.path), sha) 

1621 

1622 def _iter_loose_objects(self) -> Iterator[bytes]: 

1623 for base in os.listdir(self.path): 

1624 if len(base) != 2: 

1625 continue 

1626 for rest in os.listdir(os.path.join(self.path, base)): 

1627 sha = os.fsencode(base + rest) 

1628 if not valid_hexsha(sha): 

1629 continue 

1630 yield sha 

1631 

1632 def count_loose_objects(self) -> int: 

1633 """Count the number of loose objects in the object store. 

1634 

1635 Returns: 

1636 Number of loose objects 

1637 """ 

1638 count = 0 

1639 if not os.path.exists(self.path): 

1640 return 0 

1641 

1642 for i in range(256): 

1643 subdir = os.path.join(self.path, f"{i:02x}") 

1644 try: 

1645 count += len( 

1646 [ 

1647 name 

1648 for name in os.listdir(subdir) 

1649 if len(name) == 38 # 40 - 2 for the prefix 

1650 ] 

1651 ) 

1652 except FileNotFoundError: 

1653 # Directory may have been removed or is inaccessible 

1654 continue 

1655 

1656 return count 

1657 

1658 def _get_loose_object(self, sha: bytes) -> ShaFile | None: 

1659 path = self._get_shafile_path(sha) 

1660 try: 

1661 return ShaFile.from_path(path) 

1662 except FileNotFoundError: 

1663 return None 

1664 

1665 def delete_loose_object(self, sha: bytes) -> None: 

1666 """Delete a loose object from disk. 

1667 

1668 Args: 

1669 sha: SHA1 of the object to delete 

1670 

1671 Raises: 

1672 FileNotFoundError: If the object file doesn't exist 

1673 """ 

1674 os.remove(self._get_shafile_path(sha)) 

1675 

1676 def get_object_mtime(self, sha: bytes) -> float: 

1677 """Get the modification time of an object. 

1678 

1679 Args: 

1680 sha: SHA1 of the object 

1681 

1682 Returns: 

1683 Modification time as seconds since epoch 

1684 

1685 Raises: 

1686 KeyError: if the object is not found 

1687 """ 

1688 # First check if it's a loose object 

1689 if self.contains_loose(sha): 

1690 path = self._get_shafile_path(sha) 

1691 try: 

1692 return os.path.getmtime(path) 

1693 except FileNotFoundError: 

1694 pass 

1695 

1696 # Check if it's in a pack file 

1697 for pack in self.packs: 

1698 try: 

1699 if sha in pack: 

1700 # Use the pack file's mtime for packed objects 

1701 pack_path = pack._data_path 

1702 try: 

1703 return os.path.getmtime(pack_path) 

1704 except (FileNotFoundError, AttributeError): 

1705 pass 

1706 except PackFileDisappeared: 

1707 pass 

1708 

1709 raise KeyError(sha) 

1710 

1711 def _remove_pack(self, pack: Pack) -> None: 

1712 try: 

1713 del self._pack_cache[os.path.basename(pack._basename)] 

1714 except KeyError: 

1715 pass 

1716 pack.close() 

1717 os.remove(pack.data.path) 

1718 if hasattr(pack.index, "path"): 

1719 os.remove(pack.index.path) 

1720 

1721 def _get_pack_basepath( 

1722 self, entries: Iterable[tuple[bytes, int, int | None]] 

1723 ) -> str: 

1724 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1725 # TODO: Handle self.pack_dir being bytes 

1726 suffix = suffix_bytes.decode("ascii") 

1727 return os.path.join(self.pack_dir, "pack-" + suffix) 

1728 

1729 def _complete_pack( 

1730 self, 

1731 f: BinaryIO, 

1732 path: str, 

1733 num_objects: int, 

1734 indexer: PackIndexer, 

1735 progress: Callable[..., None] | None = None, 

1736 refs: dict[bytes, bytes] | None = None, 

1737 ) -> Pack: 

1738 """Move a specific file containing a pack into the pack directory. 

1739 

1740 Note: The file should be on the same file system as the 

1741 packs directory. 

1742 

1743 Args: 

1744 f: Open file object for the pack. 

1745 path: Path to the pack file. 

1746 num_objects: Number of objects in the pack. 

1747 indexer: A PackIndexer for indexing the pack. 

1748 progress: Optional progress reporting function. 

1749 refs: Optional dictionary of refs for bitmap generation. 

1750 """ 

1751 entries = [] 

1752 for i, entry in enumerate(indexer): 

1753 if progress is not None: 

1754 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1755 entries.append(entry) 

1756 

1757 pack_sha, extra_entries = extend_pack( 

1758 f, 

1759 set(indexer.ext_refs()), 

1760 get_raw=self.get_raw, 

1761 compression_level=self.pack_compression_level, 

1762 progress=progress, 

1763 ) 

1764 f.flush() 

1765 if self.fsync_object_files: 

1766 try: 

1767 fileno = f.fileno() 

1768 except AttributeError as e: 

1769 raise OSError("fsync requested but file has no fileno()") from e 

1770 else: 

1771 os.fsync(fileno) 

1772 f.close() 

1773 

1774 entries.extend(extra_entries) 

1775 

1776 # Move the pack in. 

1777 entries.sort() 

1778 pack_base_name = self._get_pack_basepath(entries) 

1779 

1780 for pack in self.packs: 

1781 if pack._basename == pack_base_name: 

1782 return pack 

1783 

1784 target_pack_path = pack_base_name + ".pack" 

1785 target_index_path = pack_base_name + ".idx" 

1786 if sys.platform == "win32": 

1787 # Windows might have the target pack file lingering. Attempt 

1788 # removal, silently passing if the target does not exist. 

1789 with suppress(FileNotFoundError): 

1790 os.remove(target_pack_path) 

1791 os.rename(path, target_pack_path) 

1792 

1793 # Write the index. 

1794 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1795 with GitFile( 

1796 target_index_path, 

1797 "wb", 

1798 mask=mask, 

1799 fsync=self.fsync_object_files, 

1800 ) as index_file: 

1801 write_pack_index( 

1802 index_file, entries, pack_sha, version=self.pack_index_version 

1803 ) 

1804 

1805 # Generate bitmap if configured and refs are available 

1806 if self.pack_write_bitmaps and refs: 

1807 from .bitmap import generate_bitmap, write_bitmap 

1808 from .pack import load_pack_index_file 

1809 

1810 if progress: 

1811 progress("Generating bitmap index\r".encode("ascii")) 

1812 

1813 # Load the index we just wrote 

1814 with open(target_index_path, "rb") as idx_file: 

1815 pack_index = load_pack_index_file( 

1816 os.path.basename(target_index_path), idx_file 

1817 ) 

1818 

1819 # Generate the bitmap 

1820 bitmap = generate_bitmap( 

1821 pack_index=pack_index, 

1822 object_store=self, 

1823 refs=refs, 

1824 pack_checksum=pack_sha, 

1825 include_hash_cache=self.pack_write_bitmap_hash_cache, 

1826 include_lookup_table=self.pack_write_bitmap_lookup_table, 

1827 progress=lambda msg: progress(msg.encode("ascii")) 

1828 if progress and isinstance(msg, str) 

1829 else None, 

1830 ) 

1831 

1832 # Write the bitmap 

1833 target_bitmap_path = pack_base_name + ".bitmap" 

1834 write_bitmap(target_bitmap_path, bitmap) 

1835 

1836 if progress: 

1837 progress("Bitmap index written\r".encode("ascii")) 

1838 

1839 # Add the pack to the store and return it. 

1840 final_pack = Pack( 

1841 pack_base_name, 

1842 delta_window_size=self.pack_delta_window_size, 

1843 window_memory=self.pack_window_memory, 

1844 delta_cache_size=self.pack_delta_cache_size, 

1845 depth=self.pack_depth, 

1846 threads=self.pack_threads, 

1847 big_file_threshold=self.pack_big_file_threshold, 

1848 ) 

1849 final_pack.check_length_and_checksum() 

1850 self._add_cached_pack(pack_base_name, final_pack) 

1851 return final_pack 

1852 

1853 def add_thin_pack( 

1854 self, 

1855 read_all: Callable[[int], bytes], 

1856 read_some: Callable[[int], bytes] | None, 

1857 progress: Callable[..., None] | None = None, 

1858 ) -> "Pack": 

1859 """Add a new thin pack to this object store. 

1860 

1861 Thin packs are packs that contain deltas with parents that exist 

1862 outside the pack. They should never be placed in the object store 

1863 directly, and always indexed and completed as they are copied. 

1864 

1865 Args: 

1866 read_all: Read function that blocks until the number of 

1867 requested bytes are read. 

1868 read_some: Read function that returns at least one byte, but may 

1869 not return the number of bytes requested. 

1870 progress: Optional progress reporting function. 

1871 Returns: A Pack object pointing at the now-completed thin pack in the 

1872 objects/pack directory. 

1873 """ 

1874 import tempfile 

1875 

1876 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1877 with os.fdopen(fd, "w+b") as f: 

1878 os.chmod(path, PACK_MODE) 

1879 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type] 

1880 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type] 

1881 copier.verify(progress=progress) 

1882 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1883 

1884 def add_pack( 

1885 self, 

1886 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

1887 """Add a new pack to this object store. 

1888 

1889 Returns: Fileobject to write to, a commit function to 

1890 call when the pack is finished and an abort 

1891 function. 

1892 """ 

1893 import tempfile 

1894 

1895 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1896 f = os.fdopen(fd, "w+b") 

1897 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1898 os.chmod(path, mask) 

1899 

1900 def commit() -> "Pack | None": 

1901 if f.tell() > 0: 

1902 f.seek(0) 

1903 

1904 with PackData(path, f) as pd: 

1905 indexer = PackIndexer.for_pack_data( 

1906 pd, 

1907 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

1908 ) 

1909 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

1910 else: 

1911 f.close() 

1912 os.remove(path) 

1913 return None 

1914 

1915 def abort() -> None: 

1916 f.close() 

1917 os.remove(path) 

1918 

1919 return f, commit, abort # type: ignore[return-value] 

1920 

1921 def add_object(self, obj: ShaFile) -> None: 

1922 """Add a single object to this object store. 

1923 

1924 Args: 

1925 obj: Object to add 

1926 """ 

1927 path = self._get_shafile_path(obj.id) 

1928 dir = os.path.dirname(path) 

1929 try: 

1930 os.mkdir(dir) 

1931 if self.dir_mode is not None: 

1932 os.chmod(dir, self.dir_mode) 

1933 except FileExistsError: 

1934 pass 

1935 if os.path.exists(path): 

1936 return # Already there, no need to write again 

1937 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1938 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f: 

1939 f.write( 

1940 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1941 ) 

1942 

1943 @classmethod 

1944 def init( 

1945 cls, 

1946 path: str | os.PathLike[str], 

1947 *, 

1948 file_mode: int | None = None, 

1949 dir_mode: int | None = None, 

1950 ) -> "DiskObjectStore": 

1951 """Initialize a new disk object store. 

1952 

1953 Creates the necessary directory structure for a Git object store. 

1954 

1955 Args: 

1956 path: Path where the object store should be created 

1957 file_mode: Optional file permission mask for shared repository 

1958 dir_mode: Optional directory permission mask for shared repository 

1959 

1960 Returns: 

1961 New DiskObjectStore instance 

1962 """ 

1963 try: 

1964 os.mkdir(path) 

1965 if dir_mode is not None: 

1966 os.chmod(path, dir_mode) 

1967 except FileExistsError: 

1968 pass 

1969 info_path = os.path.join(path, "info") 

1970 pack_path = os.path.join(path, PACKDIR) 

1971 os.mkdir(info_path) 

1972 os.mkdir(pack_path) 

1973 if dir_mode is not None: 

1974 os.chmod(info_path, dir_mode) 

1975 os.chmod(pack_path, dir_mode) 

1976 return cls(path, file_mode=file_mode, dir_mode=dir_mode) 

1977 

1978 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]: 

1979 """Iterate over all object SHAs with the given prefix. 

1980 

1981 Args: 

1982 prefix: Hex prefix to search for (as bytes) 

1983 

1984 Returns: 

1985 Iterator of object SHAs (as bytes) matching the prefix 

1986 """ 

1987 if len(prefix) < 2: 

1988 yield from super().iter_prefix(prefix) 

1989 return 

1990 seen = set() 

1991 dir = prefix[:2].decode() 

1992 rest = prefix[2:].decode() 

1993 try: 

1994 for name in os.listdir(os.path.join(self.path, dir)): 

1995 if name.startswith(rest): 

1996 sha = os.fsencode(dir + name) 

1997 if sha not in seen: 

1998 seen.add(sha) 

1999 yield sha 

2000 except FileNotFoundError: 

2001 pass 

2002 

2003 for p in self.packs: 

2004 bin_prefix = ( 

2005 binascii.unhexlify(prefix) 

2006 if len(prefix) % 2 == 0 

2007 else binascii.unhexlify(prefix[:-1]) 

2008 ) 

2009 for sha in p.index.iter_prefix(bin_prefix): 

2010 sha = sha_to_hex(sha) 

2011 if sha.startswith(prefix) and sha not in seen: 

2012 seen.add(sha) 

2013 yield sha 

2014 for alternate in self.alternates: 

2015 for sha in alternate.iter_prefix(prefix): 

2016 if sha not in seen: 

2017 seen.add(sha) 

2018 yield sha 

2019 

2020 def get_commit_graph(self) -> "CommitGraph | None": 

2021 """Get the commit graph for this object store. 

2022 

2023 Returns: 

2024 CommitGraph object if available, None otherwise 

2025 """ 

2026 if not self._use_commit_graph: 

2027 return None 

2028 

2029 if self._commit_graph is None: 

2030 from .commit_graph import read_commit_graph 

2031 

2032 # Look for commit graph in our objects directory 

2033 graph_file = os.path.join(self.path, "info", "commit-graph") 

2034 if os.path.exists(graph_file): 

2035 self._commit_graph = read_commit_graph(graph_file) 

2036 return self._commit_graph 

2037 

2038 def write_commit_graph( 

2039 self, refs: Iterable[bytes] | None = None, reachable: bool = True 

2040 ) -> None: 

2041 """Write a commit graph file for this object store. 

2042 

2043 Args: 

2044 refs: List of refs to include. If None, includes all refs from object store. 

2045 reachable: If True, includes all commits reachable from refs. 

2046 If False, only includes the direct ref targets. 

2047 """ 

2048 from .commit_graph import get_reachable_commits 

2049 

2050 if refs is None: 

2051 # Get all commit objects from the object store 

2052 all_refs = [] 

2053 # Iterate through all objects to find commits 

2054 for sha in self: 

2055 try: 

2056 obj = self[sha] 

2057 if obj.type_name == b"commit": 

2058 all_refs.append(sha) 

2059 except KeyError: 

2060 continue 

2061 else: 

2062 # Use provided refs 

2063 all_refs = list(refs) 

2064 

2065 if not all_refs: 

2066 return # No commits to include 

2067 

2068 if reachable: 

2069 # Get all reachable commits 

2070 commit_ids = get_reachable_commits(self, all_refs) 

2071 else: 

2072 # Just use the direct ref targets - ensure they're hex ObjectIDs 

2073 commit_ids = [] 

2074 for ref in all_refs: 

2075 if isinstance(ref, bytes) and len(ref) == 40: 

2076 # Already hex ObjectID 

2077 commit_ids.append(ref) 

2078 elif isinstance(ref, bytes) and len(ref) == 20: 

2079 # Binary SHA, convert to hex ObjectID 

2080 from .objects import sha_to_hex 

2081 

2082 commit_ids.append(sha_to_hex(ref)) 

2083 else: 

2084 # Assume it's already correct format 

2085 commit_ids.append(ref) 

2086 

2087 if commit_ids: 

2088 # Write commit graph directly to our object store path 

2089 # Generate the commit graph 

2090 from .commit_graph import generate_commit_graph 

2091 

2092 graph = generate_commit_graph(self, commit_ids) 

2093 

2094 if graph.entries: 

2095 # Ensure the info directory exists 

2096 info_dir = os.path.join(self.path, "info") 

2097 os.makedirs(info_dir, exist_ok=True) 

2098 if self.dir_mode is not None: 

2099 os.chmod(info_dir, self.dir_mode) 

2100 

2101 # Write using GitFile for atomic operation 

2102 graph_path = os.path.join(info_dir, "commit-graph") 

2103 mask = self.file_mode if self.file_mode is not None else 0o644 

2104 with GitFile(graph_path, "wb", mask=mask) as f: 

2105 assert isinstance( 

2106 f, _GitFile 

2107 ) # GitFile in write mode always returns _GitFile 

2108 graph.write_to_file(f) 

2109 

2110 # Clear cached commit graph so it gets reloaded 

2111 self._commit_graph = None 

2112 

2113 def prune(self, grace_period: int | None = None) -> None: 

2114 """Prune/clean up this object store. 

2115 

2116 This removes temporary files that were left behind by interrupted 

2117 pack operations. These are files that start with ``tmp_pack_`` in the 

2118 repository directory or files with .pack extension but no corresponding 

2119 .idx file in the pack directory. 

2120 

2121 Args: 

2122 grace_period: Grace period in seconds for removing temporary files. 

2123 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

2124 """ 

2125 import glob 

2126 

2127 if grace_period is None: 

2128 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

2129 

2130 # Clean up tmp_pack_* files in the repository directory 

2131 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

2132 # Check if file is old enough (more than grace period) 

2133 mtime = os.path.getmtime(tmp_file) 

2134 if time.time() - mtime > grace_period: 

2135 os.remove(tmp_file) 

2136 

2137 # Clean up orphaned .pack files without corresponding .idx files 

2138 try: 

2139 pack_dir_contents = os.listdir(self.pack_dir) 

2140 except FileNotFoundError: 

2141 return 

2142 

2143 pack_files = {} 

2144 idx_files = set() 

2145 

2146 for name in pack_dir_contents: 

2147 if name.endswith(".pack"): 

2148 base_name = name[:-5] # Remove .pack extension 

2149 pack_files[base_name] = name 

2150 elif name.endswith(".idx"): 

2151 base_name = name[:-4] # Remove .idx extension 

2152 idx_files.add(base_name) 

2153 

2154 # Remove .pack files without corresponding .idx files 

2155 for base_name, pack_name in pack_files.items(): 

2156 if base_name not in idx_files: 

2157 pack_path = os.path.join(self.pack_dir, pack_name) 

2158 # Check if file is old enough (more than grace period) 

2159 mtime = os.path.getmtime(pack_path) 

2160 if time.time() - mtime > grace_period: 

2161 os.remove(pack_path) 

2162 

2163 

2164class MemoryObjectStore(PackCapableObjectStore): 

2165 """Object store that keeps all objects in memory.""" 

2166 

2167 def __init__(self) -> None: 

2168 """Initialize a MemoryObjectStore. 

2169 

2170 Creates an empty in-memory object store. 

2171 """ 

2172 super().__init__() 

2173 self._data: dict[bytes, ShaFile] = {} 

2174 self.pack_compression_level = -1 

2175 

2176 def _to_hexsha(self, sha: bytes) -> bytes: 

2177 if len(sha) == 40: 

2178 return sha 

2179 elif len(sha) == 20: 

2180 return sha_to_hex(sha) 

2181 else: 

2182 raise ValueError(f"Invalid sha {sha!r}") 

2183 

2184 def contains_loose(self, sha: bytes) -> bool: 

2185 """Check if a particular object is present by SHA1 and is loose.""" 

2186 return self._to_hexsha(sha) in self._data 

2187 

2188 def contains_packed(self, sha: bytes) -> bool: 

2189 """Check if a particular object is present by SHA1 and is packed.""" 

2190 return False 

2191 

2192 def __iter__(self) -> Iterator[bytes]: 

2193 """Iterate over the SHAs that are present in this store.""" 

2194 return iter(self._data.keys()) 

2195 

2196 @property 

2197 def packs(self) -> list[Pack]: 

2198 """List with pack objects.""" 

2199 return [] 

2200 

2201 def get_raw(self, name: ObjectID) -> tuple[int, bytes]: 

2202 """Obtain the raw text for an object. 

2203 

2204 Args: 

2205 name: sha for the object. 

2206 Returns: tuple with numeric type and object contents. 

2207 """ 

2208 obj = self[self._to_hexsha(name)] 

2209 return obj.type_num, obj.as_raw_string() 

2210 

2211 def __getitem__(self, name: ObjectID) -> ShaFile: 

2212 """Retrieve an object by SHA. 

2213 

2214 Args: 

2215 name: SHA of the object (as hex string or bytes) 

2216 

2217 Returns: 

2218 Copy of the ShaFile object 

2219 

2220 Raises: 

2221 KeyError: If the object is not found 

2222 """ 

2223 return self._data[self._to_hexsha(name)].copy() 

2224 

2225 def __delitem__(self, name: ObjectID) -> None: 

2226 """Delete an object from this store, for testing only.""" 

2227 del self._data[self._to_hexsha(name)] 

2228 

2229 def add_object(self, obj: ShaFile) -> None: 

2230 """Add a single object to this object store.""" 

2231 self._data[obj.id] = obj.copy() 

2232 

2233 def add_objects( 

2234 self, 

2235 objects: Iterable[tuple[ShaFile, str | None]], 

2236 progress: Callable[[str], None] | None = None, 

2237 ) -> None: 

2238 """Add a set of objects to this object store. 

2239 

2240 Args: 

2241 objects: Iterable over a list of (object, path) tuples 

2242 progress: Optional progress reporting function. 

2243 """ 

2244 for obj, path in objects: 

2245 self.add_object(obj) 

2246 

2247 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2248 """Add a new pack to this object store. 

2249 

2250 Because this object store doesn't support packs, we extract and add the 

2251 individual objects. 

2252 

2253 Returns: Fileobject to write to and a commit function to 

2254 call when the pack is finished. 

2255 """ 

2256 from tempfile import SpooledTemporaryFile 

2257 

2258 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2259 

2260 def commit() -> None: 

2261 size = f.tell() 

2262 if size > 0: 

2263 f.seek(0) 

2264 

2265 p = PackData.from_file(f, size) 

2266 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type] 

2267 self.add_object(obj) 

2268 p.close() 

2269 f.close() 

2270 else: 

2271 f.close() 

2272 

2273 def abort() -> None: 

2274 f.close() 

2275 

2276 return f, commit, abort # type: ignore[return-value] 

2277 

2278 def add_pack_data( 

2279 self, 

2280 count: int, 

2281 unpacked_objects: Iterator[UnpackedObject], 

2282 progress: Callable[[str], None] | None = None, 

2283 ) -> None: 

2284 """Add pack data to this object store. 

2285 

2286 Args: 

2287 count: Number of items to add 

2288 unpacked_objects: Iterator of UnpackedObject instances 

2289 progress: Optional progress reporting function. 

2290 """ 

2291 if count == 0: 

2292 return 

2293 

2294 # Since MemoryObjectStore doesn't support pack files, we need to 

2295 # extract individual objects. To handle deltas properly, we write 

2296 # to a temporary pack and then use PackInflater to resolve them. 

2297 f, commit, abort = self.add_pack() 

2298 try: 

2299 write_pack_data( 

2300 f.write, 

2301 unpacked_objects, 

2302 num_records=count, 

2303 progress=progress, 

2304 ) 

2305 except BaseException: 

2306 abort() 

2307 raise 

2308 else: 

2309 commit() 

2310 

2311 def add_thin_pack( 

2312 self, 

2313 read_all: Callable[[], bytes], 

2314 read_some: Callable[[int], bytes], 

2315 progress: Callable[[str], None] | None = None, 

2316 ) -> None: 

2317 """Add a new thin pack to this object store. 

2318 

2319 Thin packs are packs that contain deltas with parents that exist 

2320 outside the pack. Because this object store doesn't support packs, we 

2321 extract and add the individual objects. 

2322 

2323 Args: 

2324 read_all: Read function that blocks until the number of 

2325 requested bytes are read. 

2326 read_some: Read function that returns at least one byte, but may 

2327 not return the number of bytes requested. 

2328 progress: Optional progress reporting function. 

2329 """ 

2330 f, commit, abort = self.add_pack() 

2331 try: 

2332 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type] 

2333 copier.verify() 

2334 except BaseException: 

2335 abort() 

2336 raise 

2337 else: 

2338 commit() 

2339 

2340 

2341class ObjectIterator(Protocol): 

2342 """Interface for iterating over objects.""" 

2343 

2344 def iterobjects(self) -> Iterator[ShaFile]: 

2345 """Iterate over all objects. 

2346 

2347 Returns: 

2348 Iterator of ShaFile objects 

2349 """ 

2350 raise NotImplementedError(self.iterobjects) 

2351 

2352 

2353def tree_lookup_path( 

2354 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes 

2355) -> tuple[int, bytes]: 

2356 """Look up an object in a Git tree. 

2357 

2358 Args: 

2359 lookup_obj: Callback for retrieving object by SHA1 

2360 root_sha: SHA1 of the root tree 

2361 path: Path to lookup 

2362 Returns: A tuple of (mode, SHA) of the resulting path. 

2363 """ 

2364 tree = lookup_obj(root_sha) 

2365 if not isinstance(tree, Tree): 

2366 raise NotTreeError(root_sha) 

2367 return tree.lookup_path(lookup_obj, path) 

2368 

2369 

2370def _collect_filetree_revs( 

2371 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2372) -> None: 

2373 """Collect SHA1s of files and directories for specified tree. 

2374 

2375 Args: 

2376 obj_store: Object store to get objects by SHA from 

2377 tree_sha: tree reference to walk 

2378 kset: set to fill with references to files and directories 

2379 """ 

2380 filetree = obj_store[tree_sha] 

2381 assert isinstance(filetree, Tree) 

2382 for name, mode, sha in filetree.iteritems(): 

2383 assert mode is not None 

2384 assert sha is not None 

2385 if not S_ISGITLINK(mode) and sha not in kset: 

2386 kset.add(sha) 

2387 if stat.S_ISDIR(mode): 

2388 _collect_filetree_revs(obj_store, sha, kset) 

2389 

2390 

2391def _split_commits_and_tags( 

2392 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False 

2393) -> tuple[set[bytes], set[bytes], set[bytes]]: 

2394 """Split object id list into three lists with commit, tag, and other SHAs. 

2395 

2396 Commits referenced by tags are included into commits 

2397 list as well. Only SHA1s known in this repository will get 

2398 through, and unless ignore_unknown argument is True, KeyError 

2399 is thrown for SHA1 missing in the repository 

2400 

2401 Args: 

2402 obj_store: Object store to get objects by SHA1 from 

2403 lst: Collection of commit and tag SHAs 

2404 ignore_unknown: True to skip SHA1 missing in the repository 

2405 silently. 

2406 Returns: A tuple of (commits, tags, others) SHA1s 

2407 """ 

2408 commits: set[bytes] = set() 

2409 tags: set[bytes] = set() 

2410 others: set[bytes] = set() 

2411 for e in lst: 

2412 try: 

2413 o = obj_store[e] 

2414 except KeyError: 

2415 if not ignore_unknown: 

2416 raise 

2417 else: 

2418 if isinstance(o, Commit): 

2419 commits.add(e) 

2420 elif isinstance(o, Tag): 

2421 tags.add(e) 

2422 tagged = o.object[1] 

2423 c, t, os = _split_commits_and_tags( 

2424 obj_store, [tagged], ignore_unknown=ignore_unknown 

2425 ) 

2426 commits |= c 

2427 tags |= t 

2428 others |= os 

2429 else: 

2430 others.add(e) 

2431 return (commits, tags, others) 

2432 

2433 

2434class MissingObjectFinder: 

2435 """Find the objects missing from another object store. 

2436 

2437 Args: 

2438 object_store: Object store containing at least all objects to be 

2439 sent 

2440 haves: SHA1s of commits not to send (already present in target) 

2441 wants: SHA1s of commits to send 

2442 progress: Optional function to report progress to. 

2443 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2444 sha for including tags. 

2445 get_parents: Optional function for getting the parents of a commit. 

2446 """ 

2447 

2448 def __init__( 

2449 self, 

2450 object_store: BaseObjectStore, 

2451 haves: Iterable[bytes], 

2452 wants: Iterable[bytes], 

2453 *, 

2454 shallow: Set[bytes] | None = None, 

2455 progress: Callable[[bytes], None] | None = None, 

2456 get_tagged: Callable[[], dict[bytes, bytes]] | None = None, 

2457 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

2458 ) -> None: 

2459 """Initialize a MissingObjectFinder. 

2460 

2461 Args: 

2462 object_store: Object store containing objects 

2463 haves: SHA1s of objects already present in target 

2464 wants: SHA1s of objects to send 

2465 shallow: Set of shallow commit SHA1s 

2466 progress: Optional progress reporting callback 

2467 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2468 get_parents: Function for getting commit parents 

2469 """ 

2470 self.object_store = object_store 

2471 if shallow is None: 

2472 shallow = set() 

2473 self._get_parents = get_parents 

2474 reachability = object_store.get_reachability_provider() 

2475 # process Commits and Tags differently 

2476 # Note, while haves may list commits/tags not available locally, 

2477 # and such SHAs would get filtered out by _split_commits_and_tags, 

2478 # wants shall list only known SHAs, and otherwise 

2479 # _split_commits_and_tags fails with KeyError 

2480 have_commits, have_tags, have_others = _split_commits_and_tags( 

2481 object_store, haves, ignore_unknown=True 

2482 ) 

2483 want_commits, want_tags, want_others = _split_commits_and_tags( 

2484 object_store, wants, ignore_unknown=False 

2485 ) 

2486 # all_ancestors is a set of commits that shall not be sent 

2487 # (complete repository up to 'haves') 

2488 all_ancestors = reachability.get_reachable_commits( 

2489 have_commits, exclude=None, shallow=shallow 

2490 ) 

2491 # all_missing - complete set of commits between haves and wants 

2492 # common_commits - boundary commits directly encountered when traversing wants 

2493 # We use _collect_ancestors here because we need the exact boundary behavior: 

2494 # commits that are in all_ancestors and directly reachable from wants, 

2495 # but we don't traverse past them. This is hard to express with the 

2496 # reachability abstraction alone. 

2497 missing_commits, common_commits = _collect_ancestors( 

2498 object_store, 

2499 want_commits, 

2500 frozenset(all_ancestors), 

2501 shallow=frozenset(shallow), 

2502 get_parents=self._get_parents, 

2503 ) 

2504 

2505 self.remote_has: set[bytes] = set() 

2506 # Now, fill sha_done with commits and revisions of 

2507 # files and directories known to be both locally 

2508 # and on target. Thus these commits and files 

2509 # won't get selected for fetch 

2510 for h in common_commits: 

2511 self.remote_has.add(h) 

2512 cmt = object_store[h] 

2513 assert isinstance(cmt, Commit) 

2514 # Get tree objects for this commit 

2515 tree_objects = reachability.get_tree_objects([cmt.tree]) 

2516 self.remote_has.update(tree_objects) 

2517 

2518 # record tags we have as visited, too 

2519 for t in have_tags: 

2520 self.remote_has.add(t) 

2521 self.sha_done = set(self.remote_has) 

2522 

2523 # in fact, what we 'want' is commits, tags, and others 

2524 # we've found missing 

2525 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = { 

2526 (w, None, Commit.type_num, False) for w in missing_commits 

2527 } 

2528 missing_tags = want_tags.difference(have_tags) 

2529 self.objects_to_send.update( 

2530 {(w, None, Tag.type_num, False) for w in missing_tags} 

2531 ) 

2532 missing_others = want_others.difference(have_others) 

2533 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2534 

2535 if progress is None: 

2536 self.progress: Callable[[bytes], None] = lambda x: None 

2537 else: 

2538 self.progress = progress 

2539 self._tagged = (get_tagged and get_tagged()) or {} 

2540 

2541 def get_remote_has(self) -> set[bytes]: 

2542 """Get the set of SHAs the remote has. 

2543 

2544 Returns: 

2545 Set of SHA1s that the remote side already has 

2546 """ 

2547 return self.remote_has 

2548 

2549 def add_todo( 

2550 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]] 

2551 ) -> None: 

2552 """Add objects to the todo list. 

2553 

2554 Args: 

2555 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2556 """ 

2557 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2558 

2559 def __next__(self) -> tuple[bytes, PackHint | None]: 

2560 """Get the next object to send. 

2561 

2562 Returns: 

2563 Tuple of (sha, pack_hint) 

2564 

2565 Raises: 

2566 StopIteration: When no more objects to send 

2567 """ 

2568 while True: 

2569 if not self.objects_to_send: 

2570 self.progress( 

2571 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2572 ) 

2573 raise StopIteration 

2574 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2575 if sha not in self.sha_done: 

2576 break 

2577 if not leaf: 

2578 o = self.object_store[sha] 

2579 if isinstance(o, Commit): 

2580 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2581 elif isinstance(o, Tree): 

2582 todos = [] 

2583 for n, m, s in o.iteritems(): 

2584 assert m is not None 

2585 assert n is not None 

2586 assert s is not None 

2587 if not S_ISGITLINK(m): 

2588 todos.append( 

2589 ( 

2590 s, 

2591 n, 

2592 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2593 not stat.S_ISDIR(m), 

2594 ) 

2595 ) 

2596 self.add_todo(todos) 

2597 elif isinstance(o, Tag): 

2598 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2599 if sha in self._tagged: 

2600 self.add_todo([(self._tagged[sha], None, None, True)]) 

2601 self.sha_done.add(sha) 

2602 if len(self.sha_done) % 1000 == 0: 

2603 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2604 if type_num is None: 

2605 pack_hint = None 

2606 else: 

2607 pack_hint = (type_num, name) 

2608 return (sha, pack_hint) 

2609 

2610 def __iter__(self) -> Iterator[tuple[bytes, PackHint | None]]: 

2611 """Return iterator over objects to send. 

2612 

2613 Returns: 

2614 Self (this class implements the iterator protocol) 

2615 """ 

2616 return self 

2617 

2618 

2619class ObjectStoreGraphWalker: 

2620 """Graph walker that finds what commits are missing from an object store.""" 

2621 

2622 heads: set[ObjectID] 

2623 """Revisions without descendants in the local repo.""" 

2624 

2625 get_parents: Callable[[ObjectID], list[ObjectID]] 

2626 """Function to retrieve parents in the local repo.""" 

2627 

2628 shallow: set[ObjectID] 

2629 

2630 def __init__( 

2631 self, 

2632 local_heads: Iterable[ObjectID], 

2633 get_parents: Callable[[ObjectID], list[ObjectID]], 

2634 shallow: set[ObjectID] | None = None, 

2635 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None] 

2636 | None = None, 

2637 ) -> None: 

2638 """Create a new instance. 

2639 

2640 Args: 

2641 local_heads: Heads to start search with 

2642 get_parents: Function for finding the parents of a SHA1. 

2643 shallow: Set of shallow commits. 

2644 update_shallow: Function to update shallow commits. 

2645 """ 

2646 self.heads = set(local_heads) 

2647 self.get_parents = get_parents 

2648 self.parents: dict[ObjectID, list[ObjectID] | None] = {} 

2649 if shallow is None: 

2650 shallow = set() 

2651 self.shallow = shallow 

2652 self.update_shallow = update_shallow 

2653 

2654 def nak(self) -> None: 

2655 """Nothing in common was found.""" 

2656 

2657 def ack(self, sha: ObjectID) -> None: 

2658 """Ack that a revision and its ancestors are present in the source.""" 

2659 if len(sha) != 40: 

2660 raise ValueError(f"unexpected sha {sha!r} received") 

2661 ancestors = {sha} 

2662 

2663 # stop if we run out of heads to remove 

2664 while self.heads: 

2665 for a in ancestors: 

2666 if a in self.heads: 

2667 self.heads.remove(a) 

2668 

2669 # collect all ancestors 

2670 new_ancestors = set() 

2671 for a in ancestors: 

2672 ps = self.parents.get(a) 

2673 if ps is not None: 

2674 new_ancestors.update(ps) 

2675 self.parents[a] = None 

2676 

2677 # no more ancestors; stop 

2678 if not new_ancestors: 

2679 break 

2680 

2681 ancestors = new_ancestors 

2682 

2683 def next(self) -> ObjectID | None: 

2684 """Iterate over ancestors of heads in the target.""" 

2685 if self.heads: 

2686 ret = self.heads.pop() 

2687 try: 

2688 ps = self.get_parents(ret) 

2689 except KeyError: 

2690 return None 

2691 self.parents[ret] = ps 

2692 self.heads.update([p for p in ps if p not in self.parents]) 

2693 return ret 

2694 return None 

2695 

2696 __next__ = next 

2697 

2698 

2699def commit_tree_changes( 

2700 object_store: BaseObjectStore, 

2701 tree: ObjectID | Tree, 

2702 changes: Sequence[tuple[bytes, int | None, bytes | None]], 

2703) -> ObjectID: 

2704 """Commit a specified set of changes to a tree structure. 

2705 

2706 This will apply a set of changes on top of an existing tree, storing new 

2707 objects in object_store. 

2708 

2709 changes are a list of tuples with (path, mode, object_sha). 

2710 Paths can be both blobs and trees. See the mode and 

2711 object sha to None deletes the path. 

2712 

2713 This method works especially well if there are only a small 

2714 number of changes to a big tree. For a large number of changes 

2715 to a large tree, use e.g. commit_tree. 

2716 

2717 Args: 

2718 object_store: Object store to store new objects in 

2719 and retrieve old ones from. 

2720 tree: Original tree root (SHA or Tree object) 

2721 changes: changes to apply 

2722 Returns: New tree root object 

2723 """ 

2724 # TODO(jelmer): Save up the objects and add them using .add_objects 

2725 # rather than with individual calls to .add_object. 

2726 # Handle both Tree object and SHA 

2727 if isinstance(tree, Tree): 

2728 tree_obj: Tree = tree 

2729 else: 

2730 sha_obj = object_store[tree] 

2731 assert isinstance(sha_obj, Tree) 

2732 tree_obj = sha_obj 

2733 nested_changes: dict[bytes, list[tuple[bytes, int | None, bytes | None]]] = {} 

2734 for path, new_mode, new_sha in changes: 

2735 try: 

2736 (dirname, subpath) = path.split(b"/", 1) 

2737 except ValueError: 

2738 if new_sha is None: 

2739 del tree_obj[path] 

2740 else: 

2741 assert new_mode is not None 

2742 tree_obj[path] = (new_mode, new_sha) 

2743 else: 

2744 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2745 for name, subchanges in nested_changes.items(): 

2746 try: 

2747 orig_subtree_id: bytes | Tree = tree_obj[name][1] 

2748 except KeyError: 

2749 # For new directories, pass an empty Tree object 

2750 orig_subtree_id = Tree() 

2751 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

2752 subtree = object_store[subtree_id] 

2753 assert isinstance(subtree, Tree) 

2754 if len(subtree) == 0: 

2755 del tree_obj[name] 

2756 else: 

2757 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

2758 object_store.add_object(tree_obj) 

2759 return tree_obj.id 

2760 

2761 

2762class OverlayObjectStore(BaseObjectStore): 

2763 """Object store that can overlay multiple object stores.""" 

2764 

2765 def __init__( 

2766 self, 

2767 bases: list[BaseObjectStore], 

2768 add_store: BaseObjectStore | None = None, 

2769 ) -> None: 

2770 """Initialize an OverlayObjectStore. 

2771 

2772 Args: 

2773 bases: List of base object stores to overlay 

2774 add_store: Optional store to write new objects to 

2775 """ 

2776 self.bases = bases 

2777 self.add_store = add_store 

2778 

2779 def add_object(self, object: ShaFile) -> None: 

2780 """Add a single object to the store. 

2781 

2782 Args: 

2783 object: Object to add 

2784 

2785 Raises: 

2786 NotImplementedError: If no add_store was provided 

2787 """ 

2788 if self.add_store is None: 

2789 raise NotImplementedError(self.add_object) 

2790 return self.add_store.add_object(object) 

2791 

2792 def add_objects( 

2793 self, 

2794 objects: Sequence[tuple[ShaFile, str | None]], 

2795 progress: Callable[[str], None] | None = None, 

2796 ) -> Pack | None: 

2797 """Add multiple objects to the store. 

2798 

2799 Args: 

2800 objects: Iterator of objects to add 

2801 progress: Optional progress reporting callback 

2802 

2803 Raises: 

2804 NotImplementedError: If no add_store was provided 

2805 """ 

2806 if self.add_store is None: 

2807 raise NotImplementedError(self.add_object) 

2808 return self.add_store.add_objects(objects, progress) 

2809 

2810 @property 

2811 def packs(self) -> list[Pack]: 

2812 """Get the list of packs from all overlaid stores. 

2813 

2814 Returns: 

2815 Combined list of packs from all base stores 

2816 """ 

2817 ret = [] 

2818 for b in self.bases: 

2819 ret.extend(b.packs) 

2820 return ret 

2821 

2822 def __iter__(self) -> Iterator[ObjectID]: 

2823 """Iterate over all object SHAs in the overlaid stores. 

2824 

2825 Returns: 

2826 Iterator of object SHAs (deduped across stores) 

2827 """ 

2828 done = set() 

2829 for b in self.bases: 

2830 for o_id in b: 

2831 if o_id not in done: 

2832 yield o_id 

2833 done.add(o_id) 

2834 

2835 def iterobjects_subset( 

2836 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2837 ) -> Iterator[ShaFile]: 

2838 """Iterate over a subset of objects from the overlaid stores. 

2839 

2840 Args: 

2841 shas: Iterable of object SHAs to retrieve 

2842 allow_missing: If True, skip missing objects; if False, raise KeyError 

2843 

2844 Returns: 

2845 Iterator of ShaFile objects 

2846 

2847 Raises: 

2848 KeyError: If an object is missing and allow_missing is False 

2849 """ 

2850 todo = set(shas) 

2851 found: set[bytes] = set() 

2852 

2853 for b in self.bases: 

2854 # Create a copy of todo for each base to avoid modifying 

2855 # the set while iterating through it 

2856 current_todo = todo - found 

2857 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2858 yield o 

2859 found.add(o.id) 

2860 

2861 # Check for any remaining objects not found 

2862 missing = todo - found 

2863 if missing and not allow_missing: 

2864 raise KeyError(next(iter(missing))) 

2865 

2866 def iter_unpacked_subset( 

2867 self, 

2868 shas: Iterable[bytes], 

2869 include_comp: bool = False, 

2870 allow_missing: bool = False, 

2871 convert_ofs_delta: bool = True, 

2872 ) -> Iterator[UnpackedObject]: 

2873 """Iterate over unpacked objects from the overlaid stores. 

2874 

2875 Args: 

2876 shas: Iterable of object SHAs to retrieve 

2877 include_comp: Whether to include compressed data 

2878 allow_missing: If True, skip missing objects; if False, raise KeyError 

2879 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2880 

2881 Returns: 

2882 Iterator of unpacked objects 

2883 

2884 Raises: 

2885 KeyError: If an object is missing and allow_missing is False 

2886 """ 

2887 todo = set(shas) 

2888 for b in self.bases: 

2889 for o in b.iter_unpacked_subset( 

2890 todo, 

2891 include_comp=include_comp, 

2892 allow_missing=True, 

2893 convert_ofs_delta=convert_ofs_delta, 

2894 ): 

2895 yield o 

2896 todo.remove(o.sha()) 

2897 if todo and not allow_missing: 

2898 raise KeyError(next(iter(todo))) 

2899 

2900 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]: 

2901 """Get the raw object data from the overlaid stores. 

2902 

2903 Args: 

2904 sha_id: SHA of the object 

2905 

2906 Returns: 

2907 Tuple of (type_num, raw_data) 

2908 

2909 Raises: 

2910 KeyError: If object not found in any base store 

2911 """ 

2912 for b in self.bases: 

2913 try: 

2914 return b.get_raw(sha_id) 

2915 except KeyError: 

2916 pass 

2917 raise KeyError(sha_id) 

2918 

2919 def contains_packed(self, sha: bytes) -> bool: 

2920 """Check if an object is packed in any base store. 

2921 

2922 Args: 

2923 sha: SHA of the object 

2924 

2925 Returns: 

2926 True if object is packed in any base store 

2927 """ 

2928 for b in self.bases: 

2929 if b.contains_packed(sha): 

2930 return True 

2931 return False 

2932 

2933 def contains_loose(self, sha: bytes) -> bool: 

2934 """Check if an object is loose in any base store. 

2935 

2936 Args: 

2937 sha: SHA of the object 

2938 

2939 Returns: 

2940 True if object is loose in any base store 

2941 """ 

2942 for b in self.bases: 

2943 if b.contains_loose(sha): 

2944 return True 

2945 return False 

2946 

2947 

2948def read_packs_file(f: BinaryIO) -> Iterator[str]: 

2949 """Yield the packs listed in a packs file.""" 

2950 for line in f.read().splitlines(): 

2951 if not line: 

2952 continue 

2953 (kind, name) = line.split(b" ", 1) 

2954 if kind != b"P": 

2955 continue 

2956 yield os.fsdecode(name) 

2957 

2958 

2959class BucketBasedObjectStore(PackBasedObjectStore): 

2960 """Object store implementation that uses a bucket store like S3 as backend.""" 

2961 

2962 def _iter_loose_objects(self) -> Iterator[bytes]: 

2963 """Iterate over the SHAs of all loose objects.""" 

2964 return iter([]) 

2965 

2966 def _get_loose_object(self, sha: bytes) -> None: 

2967 return None 

2968 

2969 def delete_loose_object(self, sha: bytes) -> None: 

2970 """Delete a loose object (no-op for bucket stores). 

2971 

2972 Bucket-based stores don't have loose objects, so this is a no-op. 

2973 

2974 Args: 

2975 sha: SHA of the object to delete 

2976 """ 

2977 # Doesn't exist.. 

2978 

2979 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

2980 """Pack loose objects. Returns number of objects packed. 

2981 

2982 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2983 

2984 Args: 

2985 progress: Optional progress reporting callback (ignored) 

2986 """ 

2987 return 0 

2988 

2989 def _remove_pack_by_name(self, name: str) -> None: 

2990 """Remove a pack by name. Subclasses should implement this.""" 

2991 raise NotImplementedError(self._remove_pack_by_name) 

2992 

2993 def _iter_pack_names(self) -> Iterator[str]: 

2994 raise NotImplementedError(self._iter_pack_names) 

2995 

2996 def _get_pack(self, name: str) -> Pack: 

2997 raise NotImplementedError(self._get_pack) 

2998 

2999 def _update_pack_cache(self) -> list[Pack]: 

3000 pack_files = set(self._iter_pack_names()) 

3001 

3002 # Open newly appeared pack files 

3003 new_packs = [] 

3004 for f in pack_files: 

3005 if f not in self._pack_cache: 

3006 pack = self._get_pack(f) 

3007 new_packs.append(pack) 

3008 self._pack_cache[f] = pack 

3009 # Remove disappeared pack files 

3010 for f in set(self._pack_cache) - pack_files: 

3011 self._pack_cache.pop(f).close() 

3012 return new_packs 

3013 

3014 def _upload_pack( 

3015 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

3016 ) -> None: 

3017 raise NotImplementedError 

3018 

3019 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

3020 """Add a new pack to this object store. 

3021 

3022 Returns: Fileobject to write to, a commit function to 

3023 call when the pack is finished and an abort 

3024 function. 

3025 """ 

3026 import tempfile 

3027 

3028 pf = tempfile.SpooledTemporaryFile( 

3029 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3030 ) 

3031 

3032 def commit() -> Pack | None: 

3033 if pf.tell() == 0: 

3034 pf.close() 

3035 return None 

3036 

3037 pf.seek(0) 

3038 

3039 p = PackData(pf.name, pf) 

3040 entries = p.sorted_entries() 

3041 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

3042 idxf = tempfile.SpooledTemporaryFile( 

3043 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3044 ) 

3045 checksum = p.get_stored_checksum() 

3046 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

3047 idxf.seek(0) 

3048 idx = load_pack_index_file(basename + ".idx", idxf) 

3049 for pack in self.packs: 

3050 if pack.get_stored_checksum() == p.get_stored_checksum(): 

3051 p.close() 

3052 idx.close() 

3053 pf.close() 

3054 idxf.close() 

3055 return pack 

3056 pf.seek(0) 

3057 idxf.seek(0) 

3058 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

3059 final_pack = Pack.from_objects(p, idx) 

3060 self._add_cached_pack(basename, final_pack) 

3061 pf.close() 

3062 idxf.close() 

3063 return final_pack 

3064 

3065 return pf, commit, pf.close # type: ignore[return-value] 

3066 

3067 

3068def _collect_ancestors( 

3069 store: ObjectContainer, 

3070 heads: Iterable[ObjectID], 

3071 common: frozenset[ObjectID] = frozenset(), 

3072 shallow: frozenset[ObjectID] = frozenset(), 

3073 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

3074) -> tuple[set[ObjectID], set[ObjectID]]: 

3075 """Collect all ancestors of heads up to (excluding) those in common. 

3076 

3077 Args: 

3078 store: Object store to get commits from 

3079 heads: commits to start from 

3080 common: commits to end at, or empty set to walk repository 

3081 completely 

3082 shallow: Set of shallow commits 

3083 get_parents: Optional function for getting the parents of a 

3084 commit. 

3085 Returns: a tuple (A, B) where A - all commits reachable 

3086 from heads but not present in common, B - common (shared) elements 

3087 that are directly reachable from heads 

3088 """ 

3089 bases = set() 

3090 commits = set() 

3091 queue: list[ObjectID] = [] 

3092 queue.extend(heads) 

3093 

3094 # Try to use commit graph if available 

3095 commit_graph = store.get_commit_graph() 

3096 

3097 while queue: 

3098 e = queue.pop(0) 

3099 if e in common: 

3100 bases.add(e) 

3101 elif e not in commits: 

3102 commits.add(e) 

3103 if e in shallow: 

3104 continue 

3105 

3106 # Try to use commit graph for parent lookup 

3107 parents = None 

3108 if commit_graph: 

3109 parents = commit_graph.get_parents(e) 

3110 

3111 if parents is None: 

3112 # Fall back to loading the object 

3113 cmt = store[e] 

3114 assert isinstance(cmt, Commit) 

3115 parents = get_parents(cmt) 

3116 

3117 queue.extend(parents) 

3118 return (commits, bases) 

3119 

3120 

3121def iter_tree_contents( 

3122 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False 

3123) -> Iterator[TreeEntry]: 

3124 """Iterate the contents of a tree and all subtrees. 

3125 

3126 Iteration is depth-first pre-order, as in e.g. os.walk. 

3127 

3128 Args: 

3129 store: Object store to get trees from 

3130 tree_id: SHA1 of the tree. 

3131 include_trees: If True, include tree objects in the iteration. 

3132 

3133 Yields: TreeEntry namedtuples for all the objects in a tree. 

3134 """ 

3135 if tree_id is None: 

3136 return 

3137 # This could be fairly easily generalized to >2 trees if we find a use 

3138 # case. 

3139 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

3140 while todo: 

3141 entry = todo.pop() 

3142 assert entry.mode is not None 

3143 if stat.S_ISDIR(entry.mode): 

3144 extra = [] 

3145 assert entry.sha is not None 

3146 tree = store[entry.sha] 

3147 assert isinstance(tree, Tree) 

3148 for subentry in tree.iteritems(name_order=True): 

3149 assert entry.path is not None 

3150 extra.append(subentry.in_path(entry.path)) 

3151 todo.extend(reversed(extra)) 

3152 if not stat.S_ISDIR(entry.mode) or include_trees: 

3153 yield entry 

3154 

3155 

3156def iter_commit_contents( 

3157 store: ObjectContainer, 

3158 commit: Commit | bytes, 

3159 *, 

3160 include: Sequence[str | bytes | Path] | None = None, 

3161) -> Iterator[TreeEntry]: 

3162 """Iterate the contents of the repository at the specified commit. 

3163 

3164 This is a wrapper around iter_tree_contents() and 

3165 tree_lookup_path() to simplify the common task of getting the 

3166 contest of a repo at a particular commit. See also 

3167 dulwich.index.build_file_from_blob() for writing individual files 

3168 to disk. 

3169 

3170 Args: 

3171 store: Object store to get trees from 

3172 commit: Commit object, or SHA1 of a commit 

3173 include: if provided, only the entries whose paths are in the 

3174 list, or whose parent tree is in the list, will be 

3175 included. Note that duplicate or overlapping paths 

3176 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

3177 

3178 Yields: TreeEntry namedtuples for all matching files in a commit. 

3179 """ 

3180 sha = commit.id if isinstance(commit, Commit) else commit 

3181 if not isinstance(obj := store[sha], Commit): 

3182 raise TypeError( 

3183 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

3184 ) 

3185 commit = obj 

3186 encoding = commit.encoding or "utf-8" 

3187 include_bytes: list[bytes] = ( 

3188 [ 

3189 path if isinstance(path, bytes) else str(path).encode(encoding) 

3190 for path in include 

3191 ] 

3192 if include is not None 

3193 else [b""] 

3194 ) 

3195 

3196 for path in include_bytes: 

3197 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

3198 # Iterate all contained files if path points to a dir, otherwise just get that 

3199 # single file 

3200 if isinstance(store[obj_id], Tree): 

3201 for entry in iter_tree_contents(store, obj_id): 

3202 yield entry.in_path(path) 

3203 else: 

3204 yield TreeEntry(path, mode, obj_id) 

3205 

3206 

3207def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

3208 """Peel all tags from a SHA. 

3209 

3210 Args: 

3211 store: Object store to get objects from 

3212 sha: The object SHA to peel. 

3213 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

3214 intermediate tags; if the original ref does not point to a tag, 

3215 this will equal the original SHA1. 

3216 """ 

3217 unpeeled = obj = store[sha] 

3218 obj_class = object_class(obj.type_name) 

3219 while obj_class is Tag: 

3220 assert isinstance(obj, Tag) 

3221 obj_class, sha = obj.object 

3222 obj = store[sha] 

3223 return unpeeled, obj 

3224 

3225 

3226class GraphTraversalReachability: 

3227 """Naive graph traversal implementation of ObjectReachabilityProvider. 

3228 

3229 This implementation wraps existing graph traversal functions 

3230 (_collect_ancestors, _collect_filetree_revs) to provide the standard 

3231 reachability interface without any performance optimizations. 

3232 """ 

3233 

3234 def __init__(self, object_store: BaseObjectStore) -> None: 

3235 """Initialize the graph traversal provider. 

3236 

3237 Args: 

3238 object_store: Object store to query 

3239 """ 

3240 self.store = object_store 

3241 

3242 def get_reachable_commits( 

3243 self, 

3244 heads: Iterable[bytes], 

3245 exclude: Iterable[bytes] | None = None, 

3246 shallow: Set[bytes] | None = None, 

3247 ) -> set[bytes]: 

3248 """Get all commits reachable from heads, excluding those in exclude. 

3249 

3250 Uses _collect_ancestors for commit traversal. 

3251 

3252 Args: 

3253 heads: Starting commit SHAs 

3254 exclude: Commit SHAs to exclude (and their ancestors) 

3255 shallow: Set of shallow commit boundaries 

3256 

3257 Returns: 

3258 Set of commit SHAs reachable from heads but not from exclude 

3259 """ 

3260 exclude_set = frozenset(exclude) if exclude else frozenset() 

3261 shallow_set = frozenset(shallow) if shallow else frozenset() 

3262 commits, _bases = _collect_ancestors( 

3263 self.store, heads, exclude_set, shallow_set 

3264 ) 

3265 return commits 

3266 

3267 def get_tree_objects( 

3268 self, 

3269 tree_shas: Iterable[bytes], 

3270 ) -> set[bytes]: 

3271 """Get all trees and blobs reachable from the given trees. 

3272 

3273 Uses _collect_filetree_revs for tree traversal. 

3274 

3275 Args: 

3276 tree_shas: Starting tree SHAs 

3277 

3278 Returns: 

3279 Set of tree and blob SHAs 

3280 """ 

3281 result: set[bytes] = set() 

3282 for tree_sha in tree_shas: 

3283 _collect_filetree_revs(self.store, tree_sha, result) 

3284 return result 

3285 

3286 def get_reachable_objects( 

3287 self, 

3288 commits: Iterable[bytes], 

3289 exclude_commits: Iterable[bytes] | None = None, 

3290 ) -> set[bytes]: 

3291 """Get all objects (commits + trees + blobs) reachable from commits. 

3292 

3293 Args: 

3294 commits: Starting commit SHAs 

3295 exclude_commits: Commits whose objects should be excluded 

3296 

3297 Returns: 

3298 Set of all object SHAs (commits, trees, blobs) 

3299 """ 

3300 commits_set = set(commits) 

3301 result = set(commits_set) 

3302 

3303 # Get trees for all commits 

3304 tree_shas = [] 

3305 for commit_sha in commits_set: 

3306 try: 

3307 commit = self.store[commit_sha] 

3308 if isinstance(commit, Commit): 

3309 tree_shas.append(commit.tree) 

3310 except KeyError: 

3311 # Commit not in store, skip 

3312 continue 

3313 

3314 # Collect all tree/blob objects 

3315 result.update(self.get_tree_objects(tree_shas)) 

3316 

3317 # Exclude objects from exclude_commits if needed 

3318 if exclude_commits: 

3319 exclude_objects = self.get_reachable_objects(exclude_commits, None) 

3320 result -= exclude_objects 

3321 

3322 return result 

3323 

3324 

3325class BitmapReachability: 

3326 """Bitmap-accelerated implementation of ObjectReachabilityProvider. 

3327 

3328 This implementation uses packfile bitmap indexes where available to 

3329 accelerate reachability queries. Falls back to graph traversal when 

3330 bitmaps don't cover the requested commits. 

3331 """ 

3332 

3333 def __init__(self, object_store: "PackBasedObjectStore") -> None: 

3334 """Initialize the bitmap provider. 

3335 

3336 Args: 

3337 object_store: Pack-based object store with bitmap support 

3338 """ 

3339 self.store = object_store 

3340 # Fallback to graph traversal for operations not yet optimized 

3341 self._fallback = GraphTraversalReachability(object_store) 

3342 

3343 def _combine_commit_bitmaps( 

3344 self, 

3345 commit_shas: set[bytes], 

3346 exclude_shas: set[bytes] | None = None, 

3347 ) -> tuple["EWAHBitmap", "Pack"] | None: 

3348 """Combine bitmaps for multiple commits using OR, with optional exclusion. 

3349 

3350 Args: 

3351 commit_shas: Set of commit SHAs to combine 

3352 exclude_shas: Optional set of commit SHAs to exclude 

3353 

3354 Returns: 

3355 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable 

3356 """ 

3357 from .bitmap import find_commit_bitmaps 

3358 

3359 # Find bitmaps for the commits 

3360 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs) 

3361 

3362 # If we can't find bitmaps for all commits, return None 

3363 if len(commit_bitmaps) < len(commit_shas): 

3364 return None 

3365 

3366 # Combine bitmaps using OR 

3367 combined_bitmap = None 

3368 result_pack = None 

3369 

3370 for commit_sha in commit_shas: 

3371 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha] 

3372 commit_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3373 

3374 if commit_bitmap is None: 

3375 return None 

3376 

3377 if combined_bitmap is None: 

3378 combined_bitmap = commit_bitmap 

3379 result_pack = pack 

3380 elif pack == result_pack: 

3381 # Same pack, can OR directly 

3382 combined_bitmap = combined_bitmap | commit_bitmap 

3383 else: 

3384 # Different packs, can't combine 

3385 return None 

3386 

3387 # Handle exclusions if provided 

3388 if exclude_shas and result_pack and combined_bitmap: 

3389 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack]) 

3390 

3391 if len(exclude_bitmaps) == len(exclude_shas): 

3392 # All excludes have bitmaps, compute exclusion 

3393 exclude_combined = None 

3394 

3395 for commit_sha in exclude_shas: 

3396 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha] 

3397 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3398 

3399 if exclude_bitmap is None: 

3400 break 

3401 

3402 if exclude_combined is None: 

3403 exclude_combined = exclude_bitmap 

3404 else: 

3405 exclude_combined = exclude_combined | exclude_bitmap 

3406 

3407 # Subtract excludes using set difference 

3408 if exclude_combined: 

3409 combined_bitmap = combined_bitmap - exclude_combined 

3410 

3411 if combined_bitmap and result_pack: 

3412 return (combined_bitmap, result_pack) 

3413 return None 

3414 

3415 def get_reachable_commits( 

3416 self, 

3417 heads: Iterable[bytes], 

3418 exclude: Iterable[bytes] | None = None, 

3419 shallow: Set[bytes] | None = None, 

3420 ) -> set[bytes]: 

3421 """Get all commits reachable from heads using bitmaps where possible. 

3422 

3423 Args: 

3424 heads: Starting commit SHAs 

3425 exclude: Commit SHAs to exclude (and their ancestors) 

3426 shallow: Set of shallow commit boundaries 

3427 

3428 Returns: 

3429 Set of commit SHAs reachable from heads but not from exclude 

3430 """ 

3431 from .bitmap import bitmap_to_object_shas 

3432 

3433 # If shallow is specified, fall back to graph traversal 

3434 # (bitmaps don't support shallow boundaries well) 

3435 if shallow: 

3436 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3437 

3438 heads_set = set(heads) 

3439 exclude_set = set(exclude) if exclude else None 

3440 

3441 # Try to combine bitmaps 

3442 result = self._combine_commit_bitmaps(heads_set, exclude_set) 

3443 if result is None: 

3444 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3445 

3446 combined_bitmap, result_pack = result 

3447 

3448 # Convert bitmap to commit SHAs, filtering for commits only 

3449 pack_bitmap = result_pack.bitmap 

3450 if pack_bitmap is None: 

3451 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3452 commit_type_filter = pack_bitmap.commit_bitmap 

3453 return bitmap_to_object_shas( 

3454 combined_bitmap, result_pack.index, commit_type_filter 

3455 ) 

3456 

3457 def get_tree_objects( 

3458 self, 

3459 tree_shas: Iterable[bytes], 

3460 ) -> set[bytes]: 

3461 """Get all trees and blobs reachable from the given trees. 

3462 

3463 Args: 

3464 tree_shas: Starting tree SHAs 

3465 

3466 Returns: 

3467 Set of tree and blob SHAs 

3468 """ 

3469 # Tree traversal doesn't benefit much from bitmaps, use fallback 

3470 return self._fallback.get_tree_objects(tree_shas) 

3471 

3472 def get_reachable_objects( 

3473 self, 

3474 commits: Iterable[bytes], 

3475 exclude_commits: Iterable[bytes] | None = None, 

3476 ) -> set[bytes]: 

3477 """Get all objects reachable from commits using bitmaps. 

3478 

3479 Args: 

3480 commits: Starting commit SHAs 

3481 exclude_commits: Commits whose objects should be excluded 

3482 

3483 Returns: 

3484 Set of all object SHAs (commits, trees, blobs) 

3485 """ 

3486 from .bitmap import bitmap_to_object_shas 

3487 

3488 commits_set = set(commits) 

3489 exclude_set = set(exclude_commits) if exclude_commits else None 

3490 

3491 # Try to combine bitmaps 

3492 result = self._combine_commit_bitmaps(commits_set, exclude_set) 

3493 if result is None: 

3494 return self._fallback.get_reachable_objects(commits, exclude_commits) 

3495 

3496 combined_bitmap, result_pack = result 

3497 

3498 # Convert bitmap to all object SHAs (no type filter) 

3499 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)