Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1618 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26__all__ = [ 

27 "DEFAULT_TEMPFILE_GRACE_PERIOD", 

28 "INFODIR", 

29 "PACKDIR", 

30 "PACK_MODE", 

31 "BaseObjectStore", 

32 "BitmapReachability", 

33 "BucketBasedObjectStore", 

34 "DiskObjectStore", 

35 "GraphTraversalReachability", 

36 "GraphWalker", 

37 "MemoryObjectStore", 

38 "MissingObjectFinder", 

39 "ObjectIterator", 

40 "ObjectReachabilityProvider", 

41 "ObjectStoreGraphWalker", 

42 "OverlayObjectStore", 

43 "PackBasedObjectStore", 

44 "PackCapableObjectStore", 

45 "PackContainer", 

46 "commit_tree_changes", 

47 "find_shallow", 

48 "get_depth", 

49 "iter_commit_contents", 

50 "iter_tree_contents", 

51 "peel_sha", 

52 "read_packs_file", 

53 "tree_lookup_path", 

54] 

55 

56import binascii 

57import os 

58import stat 

59import sys 

60import time 

61import warnings 

62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set 

63from contextlib import suppress 

64from io import BytesIO 

65from pathlib import Path 

66from typing import ( 

67 TYPE_CHECKING, 

68 BinaryIO, 

69 Protocol, 

70 TypeVar, 

71 cast, 

72) 

73 

74if TYPE_CHECKING: 

75 from .object_format import ObjectFormat 

76 

77from .errors import NotTreeError 

78from .file import GitFile, _GitFile 

79from .midx import MultiPackIndex, load_midx 

80from .objects import ( 

81 S_ISGITLINK, 

82 Blob, 

83 Commit, 

84 ObjectID, 

85 RawObjectID, 

86 ShaFile, 

87 Tag, 

88 Tree, 

89 TreeEntry, 

90 hex_to_filename, 

91 hex_to_sha, 

92 object_class, 

93 sha_to_hex, 

94 valid_hexsha, 

95) 

96from .pack import ( 

97 PACK_SPOOL_FILE_MAX_SIZE, 

98 ObjectContainer, 

99 Pack, 

100 PackData, 

101 PackedObjectContainer, 

102 PackFileDisappeared, 

103 PackHint, 

104 PackIndexer, 

105 PackInflater, 

106 PackStreamCopier, 

107 UnpackedObject, 

108 extend_pack, 

109 full_unpacked_object, 

110 generate_unpacked_objects, 

111 iter_sha1, 

112 load_pack_index_file, 

113 pack_objects_to_data, 

114 write_pack_data, 

115 write_pack_index, 

116) 

117from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX 

118from .refs import Ref 

119 

120if TYPE_CHECKING: 

121 from .bitmap import EWAHBitmap 

122 from .commit_graph import CommitGraph 

123 from .config import Config 

124 from .diff_tree import RenameDetector 

125 from .pack import FilePackIndex, Pack 

126 

127 

128# Maximum number of times to rescan the pack directory after a pack file 

129# disappears between snapshot and lazy open (e.g. concurrent repack). 

130# Mirrors git's bounded reprepare_packed_git() retry. 

131_MAX_PACK_RESCAN_ATTEMPTS = 3 

132 

133_T = TypeVar("_T") 

134 

135 

136class GraphWalker(Protocol): 

137 """Protocol for graph walker objects.""" 

138 

139 def __next__(self) -> ObjectID | None: 

140 """Return the next object SHA to visit.""" 

141 ... 

142 

143 def ack(self, sha: ObjectID) -> None: 

144 """Acknowledge that an object has been received.""" 

145 ... 

146 

147 def nak(self) -> None: 

148 """Nothing in common was found.""" 

149 ... 

150 

151 

152class ObjectReachabilityProvider(Protocol): 

153 """Protocol for computing object reachability queries. 

154 

155 This abstraction allows reachability computations to be backed by either 

156 naive graph traversal or optimized bitmap indexes, with a consistent interface. 

157 """ 

158 

159 def get_reachable_commits( 

160 self, 

161 heads: Iterable[ObjectID], 

162 exclude: Iterable[ObjectID] | None = None, 

163 shallow: Set[ObjectID] | None = None, 

164 ) -> set[ObjectID]: 

165 """Get all commits reachable from heads, excluding those in exclude. 

166 

167 Args: 

168 heads: Starting commit SHAs 

169 exclude: Commit SHAs to exclude (and their ancestors) 

170 shallow: Set of shallow commit boundaries (traversal stops here) 

171 

172 Returns: 

173 Set of commit SHAs reachable from heads but not from exclude 

174 """ 

175 ... 

176 

177 def get_reachable_objects( 

178 self, 

179 commits: Iterable[ObjectID], 

180 exclude_commits: Iterable[ObjectID] | None = None, 

181 ) -> set[ObjectID]: 

182 """Get all objects (commits + trees + blobs) reachable from commits. 

183 

184 Args: 

185 commits: Starting commit SHAs 

186 exclude_commits: Commits whose objects should be excluded 

187 

188 Returns: 

189 Set of all object SHAs (commits, trees, blobs, tags) 

190 """ 

191 ... 

192 

193 def get_tree_objects( 

194 self, 

195 tree_shas: Iterable[ObjectID], 

196 ) -> set[ObjectID]: 

197 """Get all trees and blobs reachable from the given trees. 

198 

199 Args: 

200 tree_shas: Starting tree SHAs 

201 

202 Returns: 

203 Set of tree and blob SHAs 

204 """ 

205 ... 

206 

207 

208INFODIR = "info" 

209PACKDIR = "pack" 

210 

211# use permissions consistent with Git; just readable by everyone 

212# TODO: should packs also be non-writable on Windows? if so, that 

213# would requite some rather significant adjustments to the test suite 

214PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

215 

216# Grace period for cleaning up temporary pack files (in seconds) 

217# Matches git's default of 2 weeks 

218DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

219 

220 

221def find_shallow( 

222 store: ObjectContainer, heads: Iterable[ObjectID], depth: int 

223) -> tuple[set[ObjectID], set[ObjectID]]: 

224 """Find shallow commits according to a given depth. 

225 

226 Args: 

227 store: An ObjectStore for looking up objects. 

228 heads: Iterable of head SHAs to start walking from. 

229 depth: The depth of ancestors to include. A depth of one includes 

230 only the heads themselves. 

231 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

232 considered shallow and unshallow according to the arguments. Note that 

233 these sets may overlap if a commit is reachable along multiple paths. 

234 """ 

235 parents: dict[ObjectID, list[ObjectID]] = {} 

236 commit_graph = store.get_commit_graph() 

237 

238 def get_parents(sha: ObjectID) -> list[ObjectID]: 

239 result = parents.get(sha, None) 

240 if not result: 

241 # Try to use commit graph first if available 

242 if commit_graph: 

243 graph_parents = commit_graph.get_parents(sha) 

244 if graph_parents is not None: 

245 result = graph_parents 

246 parents[sha] = result 

247 return result 

248 # Fall back to loading the object 

249 commit = store[sha] 

250 assert isinstance(commit, Commit) 

251 result = commit.parents 

252 parents[sha] = result 

253 return result 

254 

255 todo = [] # stack of (sha, depth) 

256 for head_sha in heads: 

257 obj = store[head_sha] 

258 # Peel tags if necessary 

259 while isinstance(obj, Tag): 

260 _, sha = obj.object 

261 obj = store[sha] 

262 if isinstance(obj, Commit): 

263 todo.append((obj.id, 1)) 

264 

265 not_shallow = set() 

266 shallow = set() 

267 while todo: 

268 sha, cur_depth = todo.pop() 

269 if cur_depth < depth: 

270 not_shallow.add(sha) 

271 new_depth = cur_depth + 1 

272 todo.extend((p, new_depth) for p in get_parents(sha)) 

273 else: 

274 shallow.add(sha) 

275 

276 return shallow, not_shallow 

277 

278 

279def get_depth( 

280 store: ObjectContainer, 

281 head: ObjectID, 

282 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

283 max_depth: int | None = None, 

284) -> int: 

285 """Return the current available depth for the given head. 

286 

287 For commits with multiple parents, the largest possible depth will be 

288 returned. 

289 

290 Args: 

291 store: Object store to search in 

292 head: commit to start from 

293 get_parents: optional function for getting the parents of a commit 

294 max_depth: maximum depth to search 

295 """ 

296 if head not in store: 

297 return 0 

298 current_depth = 1 

299 queue = [(head, current_depth)] 

300 commit_graph = store.get_commit_graph() 

301 

302 while queue and (max_depth is None or current_depth < max_depth): 

303 e, depth = queue.pop(0) 

304 current_depth = max(current_depth, depth) 

305 

306 # Try to use commit graph for parent lookup if available 

307 parents = None 

308 if commit_graph: 

309 parents = commit_graph.get_parents(e) 

310 

311 if parents is None: 

312 # Fall back to loading the object 

313 cmt = store[e] 

314 if isinstance(cmt, Tag): 

315 _cls, sha = cmt.object 

316 cmt = store[sha] 

317 parents = get_parents(cmt) 

318 

319 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

320 return current_depth 

321 

322 

323class PackContainer(Protocol): 

324 """Protocol for containers that can accept pack files.""" 

325 

326 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

327 """Add a new pack.""" 

328 

329 

330class BaseObjectStore: 

331 """Object store interface.""" 

332 

333 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None: 

334 """Initialize object store. 

335 

336 Args: 

337 object_format: Object format to use (defaults to DEFAULT_OBJECT_FORMAT) 

338 """ 

339 from .object_format import DEFAULT_OBJECT_FORMAT 

340 

341 self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT 

342 

343 def determine_wants_all( 

344 self, refs: Mapping[Ref, ObjectID], depth: int | None = None 

345 ) -> list[ObjectID]: 

346 """Determine which objects are wanted based on refs.""" 

347 

348 def _want_deepen(sha: ObjectID) -> bool: 

349 if not depth: 

350 return False 

351 if depth == DEPTH_INFINITE: 

352 return True 

353 return depth > self._get_depth(sha) 

354 

355 return [ 

356 sha 

357 for (ref, sha) in refs.items() 

358 if (sha not in self or _want_deepen(sha)) 

359 and not ref.endswith(PEELED_TAG_SUFFIX) 

360 ] 

361 

362 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

363 """Check if a particular object is present by SHA1 and is loose.""" 

364 raise NotImplementedError(self.contains_loose) 

365 

366 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

367 """Check if a particular object is present by SHA1 and is packed.""" 

368 return False # Default implementation for stores that don't support packing 

369 

370 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool: 

371 """Check if a particular object is present by SHA1. 

372 

373 This method makes no distinction between loose and packed objects. 

374 """ 

375 return self.contains_loose(sha1) 

376 

377 @property 

378 def packs(self) -> list[Pack]: 

379 """Iterable of pack objects.""" 

380 raise NotImplementedError 

381 

382 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

383 """Obtain the raw text for an object. 

384 

385 Args: 

386 name: sha for the object. 

387 Returns: tuple with numeric type and object contents. 

388 """ 

389 raise NotImplementedError(self.get_raw) 

390 

391 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile: 

392 """Obtain an object by SHA1.""" 

393 type_num, uncomp = self.get_raw(sha1) 

394 return ShaFile.from_raw_string( 

395 type_num, uncomp, sha=sha1, object_format=self.object_format 

396 ) 

397 

398 def __iter__(self) -> Iterator[ObjectID]: 

399 """Iterate over the SHAs that are present in this store.""" 

400 raise NotImplementedError(self.__iter__) 

401 

402 def add_object(self, obj: ShaFile) -> None: 

403 """Add a single object to this object store.""" 

404 raise NotImplementedError(self.add_object) 

405 

406 def add_objects( 

407 self, 

408 objects: Sequence[tuple[ShaFile, str | None]], 

409 progress: Callable[..., None] | None = None, 

410 ) -> "Pack | None": 

411 """Add a set of objects to this object store. 

412 

413 Args: 

414 objects: Iterable over a list of (object, path) tuples 

415 progress: Optional progress callback 

416 """ 

417 raise NotImplementedError(self.add_objects) 

418 

419 def get_reachability_provider( 

420 self, prefer_bitmap: bool = True 

421 ) -> ObjectReachabilityProvider: 

422 """Get a reachability provider for this object store. 

423 

424 Returns an ObjectReachabilityProvider that can efficiently compute 

425 object reachability queries. Subclasses can override this to provide 

426 optimized implementations (e.g., using bitmap indexes). 

427 

428 Args: 

429 prefer_bitmap: Whether to prefer bitmap-based reachability if 

430 available. 

431 

432 Returns: 

433 ObjectReachabilityProvider instance 

434 """ 

435 return GraphTraversalReachability(self) 

436 

437 def tree_changes( 

438 self, 

439 source: ObjectID | None, 

440 target: ObjectID | None, 

441 want_unchanged: bool = False, 

442 include_trees: bool = False, 

443 change_type_same: bool = False, 

444 rename_detector: "RenameDetector | None" = None, 

445 paths: Sequence[bytes] | None = None, 

446 ) -> Iterator[ 

447 tuple[ 

448 tuple[bytes | None, bytes | None], 

449 tuple[int | None, int | None], 

450 tuple[ObjectID | None, ObjectID | None], 

451 ] 

452 ]: 

453 """Find the differences between the contents of two trees. 

454 

455 Args: 

456 source: SHA1 of the source tree 

457 target: SHA1 of the target tree 

458 want_unchanged: Whether unchanged files should be reported 

459 include_trees: Whether to include trees 

460 change_type_same: Whether to report files changing 

461 type in the same entry. 

462 rename_detector: RenameDetector object for detecting renames. 

463 paths: Optional list of paths to filter to (as bytes). 

464 Returns: Iterator over tuples with 

465 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

466 """ 

467 from .diff_tree import tree_changes 

468 

469 for change in tree_changes( 

470 self, 

471 source, 

472 target, 

473 want_unchanged=want_unchanged, 

474 include_trees=include_trees, 

475 change_type_same=change_type_same, 

476 rename_detector=rename_detector, 

477 paths=paths, 

478 ): 

479 old_path = change.old.path if change.old is not None else None 

480 new_path = change.new.path if change.new is not None else None 

481 old_mode = change.old.mode if change.old is not None else None 

482 new_mode = change.new.mode if change.new is not None else None 

483 old_sha = change.old.sha if change.old is not None else None 

484 new_sha = change.new.sha if change.new is not None else None 

485 yield ( 

486 (old_path, new_path), 

487 (old_mode, new_mode), 

488 (old_sha, new_sha), 

489 ) 

490 

491 def iter_tree_contents( 

492 self, tree_id: ObjectID, include_trees: bool = False 

493 ) -> Iterator[TreeEntry]: 

494 """Iterate the contents of a tree and all subtrees. 

495 

496 Iteration is depth-first pre-order, as in e.g. os.walk. 

497 

498 Args: 

499 tree_id: SHA1 of the tree. 

500 include_trees: If True, include tree objects in the iteration. 

501 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

502 tree. 

503 """ 

504 warnings.warn( 

505 "Please use dulwich.object_store.iter_tree_contents", 

506 DeprecationWarning, 

507 stacklevel=2, 

508 ) 

509 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

510 

511 def iterobjects_subset( 

512 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

513 ) -> Iterator[ShaFile]: 

514 """Iterate over a subset of objects in the store. 

515 

516 Args: 

517 shas: Iterable of object SHAs to retrieve 

518 allow_missing: If True, skip missing objects; if False, raise KeyError 

519 

520 Returns: 

521 Iterator of ShaFile objects 

522 

523 Raises: 

524 KeyError: If an object is missing and allow_missing is False 

525 """ 

526 for sha in shas: 

527 try: 

528 yield self[sha] 

529 except KeyError: 

530 if not allow_missing: 

531 raise 

532 

533 def iter_unpacked_subset( 

534 self, 

535 shas: Iterable[ObjectID | RawObjectID], 

536 include_comp: bool = False, 

537 allow_missing: bool = False, 

538 convert_ofs_delta: bool = True, 

539 ) -> "Iterator[UnpackedObject]": 

540 """Iterate over unpacked objects for a subset of SHAs. 

541 

542 Default implementation that converts ShaFile objects to UnpackedObject. 

543 Subclasses may override for more efficient unpacked access. 

544 

545 Args: 

546 shas: Iterable of object SHAs to retrieve 

547 include_comp: Whether to include compressed data (ignored in base 

548 implementation) 

549 allow_missing: If True, skip missing objects; if False, raise 

550 KeyError 

551 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in 

552 base implementation) 

553 

554 Returns: 

555 Iterator of UnpackedObject instances 

556 

557 Raises: 

558 KeyError: If an object is missing and allow_missing is False 

559 """ 

560 from .pack import UnpackedObject 

561 

562 for sha in shas: 

563 try: 

564 obj = self[sha] 

565 # Convert ShaFile to UnpackedObject 

566 unpacked = UnpackedObject( 

567 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

568 ) 

569 yield unpacked 

570 except KeyError: 

571 if not allow_missing: 

572 raise 

573 

574 def find_missing_objects( 

575 self, 

576 haves: Iterable[ObjectID], 

577 wants: Iterable[ObjectID], 

578 shallow: Set[ObjectID] | None = None, 

579 progress: Callable[..., None] | None = None, 

580 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

581 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

582 ) -> Iterator[tuple[ObjectID, PackHint | None]]: 

583 """Find the missing objects required for a set of revisions. 

584 

585 Args: 

586 haves: Iterable over SHAs already in common. 

587 wants: Iterable over SHAs of objects to fetch. 

588 shallow: Set of shallow commit SHA1s to skip 

589 progress: Simple progress function that will be called with 

590 updated progress strings. 

591 get_tagged: Function that returns a dict of pointed-to sha -> 

592 tag sha for including tags. 

593 get_parents: Optional function for getting the parents of a 

594 commit. 

595 Returns: Iterator over (sha, path) pairs. 

596 """ 

597 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

598 finder = MissingObjectFinder( 

599 self, 

600 haves=haves, 

601 wants=wants, 

602 shallow=shallow, 

603 progress=progress, 

604 get_tagged=get_tagged, 

605 get_parents=get_parents, 

606 ) 

607 return iter(finder) 

608 

609 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]: 

610 """Find which revisions this store has in common using graphwalker. 

611 

612 Args: 

613 graphwalker: A graphwalker object. 

614 Returns: List of SHAs that are in common 

615 """ 

616 haves = [] 

617 sha = next(graphwalker) 

618 while sha: 

619 if sha in self: 

620 haves.append(sha) 

621 graphwalker.ack(sha) 

622 sha = next(graphwalker) 

623 return haves 

624 

625 def generate_pack_data( 

626 self, 

627 have: Iterable[ObjectID], 

628 want: Iterable[ObjectID], 

629 *, 

630 shallow: Set[ObjectID] | None = None, 

631 progress: Callable[..., None] | None = None, 

632 ofs_delta: bool = True, 

633 ) -> tuple[int, Iterator[UnpackedObject]]: 

634 """Generate pack data objects for a set of wants/haves. 

635 

636 Args: 

637 have: List of SHA1s of objects that should not be sent 

638 want: List of SHA1s of objects that should be sent 

639 shallow: Set of shallow commit SHA1s to skip 

640 ofs_delta: Whether OFS deltas can be included 

641 progress: Optional progress reporting method 

642 """ 

643 # Note that the pack-specific implementation below is more efficient, 

644 # as it reuses deltas 

645 missing_objects = MissingObjectFinder( 

646 self, haves=have, wants=want, shallow=shallow, progress=progress 

647 ) 

648 object_ids = list(missing_objects) 

649 return pack_objects_to_data( 

650 [(self[oid], path) for oid, path in object_ids], 

651 ofs_delta=ofs_delta, 

652 progress=progress, 

653 ) 

654 

655 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

656 """Peel all tags from a SHA. 

657 

658 Args: 

659 sha: The object SHA to peel. 

660 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

661 intermediate tags; if the original ref does not point to a tag, 

662 this will equal the original SHA1. 

663 """ 

664 warnings.warn( 

665 "Please use dulwich.object_store.peel_sha()", 

666 DeprecationWarning, 

667 stacklevel=2, 

668 ) 

669 return peel_sha(self, sha)[1].id 

670 

671 def _get_depth( 

672 self, 

673 head: ObjectID, 

674 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

675 max_depth: int | None = None, 

676 ) -> int: 

677 """Return the current available depth for the given head. 

678 

679 For commits with multiple parents, the largest possible depth will be 

680 returned. 

681 

682 Args: 

683 head: commit to start from 

684 get_parents: optional function for getting the parents of a commit 

685 max_depth: maximum depth to search 

686 """ 

687 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

688 

689 def close(self) -> None: 

690 """Close any files opened by this object store.""" 

691 # Default implementation is a NO-OP 

692 

693 def prune(self, grace_period: int | None = None) -> None: 

694 """Prune/clean up this object store. 

695 

696 This includes removing orphaned temporary files and other 

697 housekeeping tasks. Default implementation is a NO-OP. 

698 

699 Args: 

700 grace_period: Grace period in seconds for removing temporary files. 

701 If None, uses the default grace period. 

702 """ 

703 # Default implementation is a NO-OP 

704 

705 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

706 """Iterate over all SHA1s that start with a given prefix. 

707 

708 The default implementation is a naive iteration over all objects. 

709 However, subclasses may override this method with more efficient 

710 implementations. 

711 """ 

712 for sha in self: 

713 if sha.startswith(prefix): 

714 yield sha 

715 

716 def get_commit_graph(self) -> "CommitGraph | None": 

717 """Get the commit graph for this object store. 

718 

719 Returns: 

720 CommitGraph object if available, None otherwise 

721 """ 

722 return None 

723 

724 def write_commit_graph( 

725 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

726 ) -> None: 

727 """Write a commit graph file for this object store. 

728 

729 Args: 

730 refs: List of refs to include. If None, includes all refs from object store. 

731 reachable: If True, includes all commits reachable from refs. 

732 If False, only includes the direct ref targets. 

733 

734 Note: 

735 Default implementation does nothing. Subclasses should override 

736 this method to provide commit graph writing functionality. 

737 """ 

738 raise NotImplementedError(self.write_commit_graph) 

739 

740 def get_object_mtime(self, sha: ObjectID) -> float: 

741 """Get the modification time of an object. 

742 

743 Args: 

744 sha: SHA1 of the object 

745 

746 Returns: 

747 Modification time as seconds since epoch 

748 

749 Raises: 

750 KeyError: if the object is not found 

751 """ 

752 # Default implementation raises KeyError 

753 # Subclasses should override to provide actual mtime 

754 raise KeyError(sha) 

755 

756 

757class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

758 """Object store that supports pack operations. 

759 

760 This is a base class for object stores that can handle pack files, 

761 including both disk-based and memory-based stores. 

762 """ 

763 

764 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

765 """Add a new pack to this object store. 

766 

767 Returns: Tuple of (file, commit_func, abort_func) 

768 """ 

769 raise NotImplementedError(self.add_pack) 

770 

771 def add_pack_data( 

772 self, 

773 count: int, 

774 unpacked_objects: Iterator["UnpackedObject"], 

775 progress: Callable[..., None] | None = None, 

776 ) -> "Pack | None": 

777 """Add pack data to this object store. 

778 

779 Args: 

780 count: Number of objects 

781 unpacked_objects: Iterator over unpacked objects 

782 progress: Optional progress callback 

783 """ 

784 raise NotImplementedError(self.add_pack_data) 

785 

786 def get_unpacked_object( 

787 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False 

788 ) -> "UnpackedObject": 

789 """Get a raw unresolved object. 

790 

791 Args: 

792 sha1: SHA-1 hash of the object 

793 include_comp: Whether to include compressed data 

794 

795 Returns: 

796 UnpackedObject instance 

797 """ 

798 from .pack import UnpackedObject 

799 

800 obj = self[sha1] 

801 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

802 

803 def iterobjects_subset( 

804 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

805 ) -> Iterator[ShaFile]: 

806 """Iterate over a subset of objects. 

807 

808 Args: 

809 shas: Iterable of object SHAs to retrieve 

810 allow_missing: If True, skip missing objects 

811 

812 Returns: 

813 Iterator of ShaFile objects 

814 """ 

815 for sha in shas: 

816 try: 

817 yield self[sha] 

818 except KeyError: 

819 if not allow_missing: 

820 raise 

821 

822 

823class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

824 """Object store that uses pack files for storage. 

825 

826 This class provides a base implementation for object stores that use 

827 Git pack files as their primary storage mechanism. It handles caching 

828 of open pack files and provides configuration for pack file operations. 

829 """ 

830 

831 def __init__( 

832 self, 

833 pack_compression_level: int = -1, 

834 pack_index_version: int | None = None, 

835 pack_delta_window_size: int | None = None, 

836 pack_window_memory: int | None = None, 

837 pack_delta_cache_size: int | None = None, 

838 pack_depth: int | None = None, 

839 pack_threads: int | None = None, 

840 pack_big_file_threshold: int | None = None, 

841 *, 

842 packed_git_limit: int | None = None, 

843 delta_base_cache_limit: int | None = None, 

844 object_format: "ObjectFormat | None" = None, 

845 ) -> None: 

846 """Initialize a PackBasedObjectStore. 

847 

848 Args: 

849 pack_compression_level: Compression level for pack files (-1 to 9) 

850 pack_index_version: Pack index version to use 

851 pack_delta_window_size: Window size for delta compression 

852 pack_window_memory: Maximum memory to use for delta window 

853 pack_delta_cache_size: Cache size for delta operations 

854 pack_depth: Maximum depth for pack deltas 

855 pack_threads: Number of threads to use for packing 

856 pack_big_file_threshold: Threshold for treating files as "big" 

857 packed_git_limit: Maximum total bytes for mmapped pack files. 

858 When exceeded, least-recently-used packs are closed to free memory. 

859 delta_base_cache_limit: Maximum bytes for caching delta base objects. 

860 Controls memory used to cache resolved base objects during delta 

861 unpacking, corresponding to Git's core.deltaBaseCacheLimit. 

862 object_format: Hash algorithm to use 

863 """ 

864 super().__init__(object_format=object_format) 

865 self._pack_cache: dict[str, Pack] = {} 

866 self._pack_access_order: list[str] = [] 

867 self.packed_git_limit = packed_git_limit 

868 self.delta_base_cache_limit = delta_base_cache_limit 

869 self.pack_compression_level = pack_compression_level 

870 self.pack_index_version = pack_index_version 

871 self.pack_delta_window_size = pack_delta_window_size 

872 self.pack_window_memory = pack_window_memory 

873 self.pack_delta_cache_size = pack_delta_cache_size 

874 self.pack_depth = pack_depth 

875 self.pack_threads = pack_threads 

876 self.pack_big_file_threshold = pack_big_file_threshold 

877 

878 def get_reachability_provider( 

879 self, 

880 prefer_bitmaps: bool = True, 

881 ) -> ObjectReachabilityProvider: 

882 """Get the best reachability provider for the object store. 

883 

884 Args: 

885 prefer_bitmaps: Whether to use bitmaps if available 

886 

887 Returns: 

888 ObjectReachabilityProvider implementation (either bitmap-accelerated 

889 or graph traversal) 

890 """ 

891 if prefer_bitmaps: 

892 # Check if any packs have bitmaps 

893 has_bitmap = False 

894 for pack in self.packs: 

895 try: 

896 # Try to access bitmap property 

897 if pack.bitmap is not None: 

898 has_bitmap = True 

899 break 

900 except FileNotFoundError: 

901 # Bitmap file doesn't exist for this pack 

902 continue 

903 

904 if has_bitmap: 

905 return BitmapReachability(self) 

906 

907 # Fall back to graph traversal 

908 return GraphTraversalReachability(self) 

909 

910 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

911 """Add a new pack to this object store.""" 

912 raise NotImplementedError(self.add_pack) 

913 

914 def add_pack_data( 

915 self, 

916 count: int, 

917 unpacked_objects: Iterator[UnpackedObject], 

918 progress: Callable[..., None] | None = None, 

919 ) -> "Pack | None": 

920 """Add pack data to this object store. 

921 

922 Args: 

923 count: Number of items to add 

924 unpacked_objects: Iterator of UnpackedObject instances 

925 progress: Optional progress callback 

926 """ 

927 if count == 0: 

928 # Don't bother writing an empty pack file 

929 return None 

930 f, commit, abort = self.add_pack() 

931 try: 

932 write_pack_data( 

933 f.write, 

934 unpacked_objects, 

935 num_records=count, 

936 progress=progress, 

937 compression_level=self.pack_compression_level, 

938 object_format=self.object_format, 

939 ) 

940 except BaseException: 

941 abort() 

942 raise 

943 else: 

944 return commit() 

945 

946 @property 

947 def alternates(self) -> list["BaseObjectStore"]: 

948 """Return list of alternate object stores.""" 

949 return [] 

950 

951 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

952 """Check if a particular object is present by SHA1 and is packed. 

953 

954 This does not check alternates. 

955 """ 

956 

957 def lookup(p: "Pack") -> bool: 

958 if sha in p: 

959 return True 

960 raise KeyError 

961 

962 try: 

963 return self._lookup_in_packs(lookup) 

964 except KeyError: 

965 return False 

966 

967 def __contains__(self, sha: ObjectID | RawObjectID) -> bool: 

968 """Check if a particular object is present by SHA1. 

969 

970 This method makes no distinction between loose and packed objects. 

971 """ 

972 if self.contains_packed(sha) or self.contains_loose(sha): 

973 return True 

974 for alternate in self.alternates: 

975 if sha in alternate: 

976 return True 

977 return False 

978 

979 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

980 """Add a newly appeared pack to the cache by path.""" 

981 prev_pack = self._pack_cache.get(base_name) 

982 if prev_pack is not pack: 

983 self._pack_cache[base_name] = pack 

984 if prev_pack: 

985 prev_pack.close() 

986 self._mark_pack_used(base_name) 

987 self._enforce_packed_git_limit() 

988 

989 def generate_pack_data( 

990 self, 

991 have: Iterable[ObjectID], 

992 want: Iterable[ObjectID], 

993 *, 

994 shallow: Set[ObjectID] | None = None, 

995 progress: Callable[..., None] | None = None, 

996 ofs_delta: bool = True, 

997 ) -> tuple[int, Iterator[UnpackedObject]]: 

998 """Generate pack data objects for a set of wants/haves. 

999 

1000 Args: 

1001 have: List of SHA1s of objects that should not be sent 

1002 want: List of SHA1s of objects that should be sent 

1003 shallow: Set of shallow commit SHA1s to skip 

1004 ofs_delta: Whether OFS deltas can be included 

1005 progress: Optional progress reporting method 

1006 """ 

1007 missing_objects = MissingObjectFinder( 

1008 self, haves=have, wants=want, shallow=shallow, progress=progress 

1009 ) 

1010 remote_has = missing_objects.get_remote_has() 

1011 object_ids = list(missing_objects) 

1012 return len(object_ids), generate_unpacked_objects( 

1013 self, 

1014 object_ids, 

1015 progress=progress, 

1016 ofs_delta=ofs_delta, 

1017 other_haves=remote_has, 

1018 ) 

1019 

1020 def _clear_cached_packs(self) -> None: 

1021 pack_cache = self._pack_cache 

1022 self._pack_cache = {} 

1023 self._pack_access_order = [] 

1024 while pack_cache: 

1025 (_name, pack) = pack_cache.popitem() 

1026 pack.close() 

1027 

1028 def _total_pack_mmap_size(self) -> int: 

1029 """Return the total mmapped memory across all cached packs.""" 

1030 return sum(pack.mmap_size for pack in self._pack_cache.values()) 

1031 

1032 def _mark_pack_used(self, pack_hash: str) -> None: 

1033 """Mark a pack as recently used for LRU tracking.""" 

1034 try: 

1035 self._pack_access_order.remove(pack_hash) 

1036 except ValueError: 

1037 pass 

1038 self._pack_access_order.append(pack_hash) 

1039 

1040 def _enforce_packed_git_limit(self) -> None: 

1041 """Evict least-recently-used packs if the memory limit is exceeded.""" 

1042 if self.packed_git_limit is None: 

1043 return 

1044 while ( 

1045 self._pack_access_order 

1046 and self._total_pack_mmap_size() > self.packed_git_limit 

1047 ): 

1048 oldest = self._pack_access_order.pop(0) 

1049 pack = self._pack_cache.get(oldest) 

1050 if pack is not None: 

1051 pack.close() 

1052 del self._pack_cache[oldest] 

1053 

1054 def _iter_cached_packs(self) -> Iterator[Pack]: 

1055 return iter(list(self._pack_cache.values())) 

1056 

1057 def _evict_pack(self, pack: "Pack | FilePackIndex") -> None: 

1058 """Evict a pack from the cache after its backing file disappeared. 

1059 

1060 ``pack`` may be a :class:`Pack` or a :class:`FilePackIndex`; in the 

1061 latter case the index's owning ``Pack`` is matched via the cached 

1062 pack's ``_idx`` reference. 

1063 """ 

1064 for key, cached in list(self._pack_cache.items()): 

1065 if cached is pack or cached._idx is pack: 

1066 del self._pack_cache[key] 

1067 try: 

1068 self._pack_access_order.remove(key) 

1069 except ValueError: 

1070 pass 

1071 try: 

1072 cached.close() 

1073 except OSError: 

1074 pass 

1075 break 

1076 

1077 def _lookup_in_packs(self, lookup: "Callable[[Pack], _T]") -> "_T": 

1078 """Run ``lookup(pack)`` against each cached pack and return the first hit. 

1079 

1080 ``lookup`` should raise ``KeyError`` if the pack does not contain the 

1081 target. ``PackFileDisappeared`` from a concurrent ``git repack`` / 

1082 ``gc --auto`` is caught: the stale pack is evicted, the pack 

1083 directory is rescanned, and the search retries — bounded, mirroring 

1084 git's ``reprepare_packed_git()``. If no cached pack has the object 

1085 the pack directory is rescanned once to pick up any newly-arrived 

1086 packs (e.g. another writer just landed one). ``KeyError`` is raised 

1087 if no pack — old or new — has the object. 

1088 """ 

1089 rescanned = False 

1090 for _attempt in range(_MAX_PACK_RESCAN_ATTEMPTS): 

1091 disappeared = False 

1092 for pack_hash, pack in list(self._pack_cache.items()): 

1093 try: 

1094 result = lookup(pack) 

1095 except KeyError: 

1096 continue 

1097 except PackFileDisappeared as exc: 

1098 self._evict_pack(exc.obj) 

1099 disappeared = True 

1100 continue 

1101 self._mark_pack_used(pack_hash) 

1102 self._enforce_packed_git_limit() 

1103 return result 

1104 if disappeared: 

1105 self._update_pack_cache() 

1106 rescanned = True 

1107 continue 

1108 if not rescanned: 

1109 # Maybe another process just landed a pack with the object. 

1110 if self._update_pack_cache(): 

1111 rescanned = True 

1112 continue 

1113 break 

1114 raise KeyError 

1115 

1116 def _update_pack_cache(self) -> list[Pack]: 

1117 raise NotImplementedError(self._update_pack_cache) 

1118 

1119 def close(self) -> None: 

1120 """Close the object store and release resources. 

1121 

1122 This method closes all cached pack files and frees associated resources. 

1123 Can be called multiple times safely. 

1124 """ 

1125 self._clear_cached_packs() 

1126 

1127 def __del__(self) -> None: 

1128 """Warn if the object store is being deleted with unclosed packs.""" 

1129 if self._pack_cache: 

1130 import warnings 

1131 

1132 warnings.warn( 

1133 f"ObjectStore {self!r} was destroyed with {len(self._pack_cache)} " 

1134 "unclosed pack(s). Please call close() explicitly.", 

1135 ResourceWarning, 

1136 stacklevel=2, 

1137 ) 

1138 self.close() 

1139 

1140 @property 

1141 def packs(self) -> list[Pack]: 

1142 """List with pack objects.""" 

1143 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

1144 

1145 def count_pack_files(self) -> int: 

1146 """Count the number of pack files. 

1147 

1148 Returns: 

1149 Number of pack files (excluding those with .keep files) 

1150 """ 

1151 count = 0 

1152 for pack in self.packs: 

1153 # Check if there's a .keep file for this pack 

1154 keep_path = pack._basename + ".keep" 

1155 if not os.path.exists(keep_path): 

1156 count += 1 

1157 return count 

1158 

1159 def _iter_alternate_objects(self) -> Iterator[ObjectID]: 

1160 """Iterate over the SHAs of all the objects in alternate stores.""" 

1161 for alternate in self.alternates: 

1162 yield from alternate 

1163 

1164 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1165 """Iterate over the SHAs of all loose objects.""" 

1166 raise NotImplementedError(self._iter_loose_objects) 

1167 

1168 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1169 raise NotImplementedError(self._get_loose_object) 

1170 

1171 def delete_loose_object(self, sha: ObjectID) -> None: 

1172 """Delete a loose object. 

1173 

1174 This method only handles loose objects. For packed objects, 

1175 use repack(exclude=...) to exclude them during repacking. 

1176 """ 

1177 raise NotImplementedError(self.delete_loose_object) 

1178 

1179 def _remove_pack(self, pack: "Pack") -> None: 

1180 raise NotImplementedError(self._remove_pack) 

1181 

1182 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

1183 """Pack loose objects. 

1184 

1185 Args: 

1186 progress: Optional progress reporting callback 

1187 

1188 Returns: Number of objects packed 

1189 """ 

1190 objects: list[tuple[ShaFile, None]] = [] 

1191 for sha in self._iter_loose_objects(): 

1192 obj = self._get_loose_object(sha) 

1193 if obj is not None: 

1194 objects.append((obj, None)) 

1195 self.add_objects(objects, progress=progress) 

1196 for obj, path in objects: 

1197 self.delete_loose_object(obj.id) 

1198 return len(objects) 

1199 

1200 def repack( 

1201 self, 

1202 exclude: Set[bytes] | None = None, 

1203 progress: Callable[[str], None] | None = None, 

1204 ) -> int: 

1205 """Repack the packs in this repository. 

1206 

1207 Note that this implementation is fairly naive and currently keeps all 

1208 objects in memory while it repacks. 

1209 

1210 Args: 

1211 exclude: Optional set of object SHAs to exclude from repacking 

1212 progress: Optional progress reporting callback 

1213 """ 

1214 if exclude is None: 

1215 exclude = set() 

1216 

1217 loose_objects = set() 

1218 excluded_loose_objects = set() 

1219 for sha in self._iter_loose_objects(): 

1220 if sha not in exclude: 

1221 obj = self._get_loose_object(sha) 

1222 if obj is not None: 

1223 loose_objects.add(obj) 

1224 else: 

1225 excluded_loose_objects.add(sha) 

1226 

1227 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

1228 old_packs = {p.name(): p for p in self.packs} 

1229 for name, pack in old_packs.items(): 

1230 objects.update( 

1231 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

1232 ) 

1233 

1234 # Only create a new pack if there are objects to pack 

1235 if objects: 

1236 # The name of the consolidated pack might match the name of a 

1237 # pre-existing pack. Take care not to remove the newly created 

1238 # consolidated pack. 

1239 consolidated = self.add_objects(list(objects), progress=progress) 

1240 if consolidated is not None: 

1241 old_packs.pop(consolidated.name(), None) 

1242 

1243 # Delete loose objects that were packed 

1244 for obj in loose_objects: 

1245 if obj is not None: 

1246 self.delete_loose_object(obj.id) 

1247 # Delete excluded loose objects 

1248 for sha in excluded_loose_objects: 

1249 self.delete_loose_object(sha) 

1250 for name, pack in old_packs.items(): 

1251 self._remove_pack(pack) 

1252 self._update_pack_cache() 

1253 return len(objects) 

1254 

1255 def generate_pack_bitmaps( 

1256 self, 

1257 refs: dict[Ref, ObjectID], 

1258 *, 

1259 commit_interval: int | None = None, 

1260 progress: Callable[[str], None] | None = None, 

1261 ) -> int: 

1262 """Generate bitmap indexes for all packs that don't have them. 

1263 

1264 This generates .bitmap files for packfiles, enabling fast reachability 

1265 queries. Equivalent to the bitmap generation part of 'git repack -b'. 

1266 

1267 Args: 

1268 refs: Dictionary of ref names to commit SHAs 

1269 commit_interval: Include every Nth commit in bitmap index (None for default) 

1270 progress: Optional progress reporting callback 

1271 

1272 Returns: 

1273 Number of bitmaps generated 

1274 """ 

1275 count = 0 

1276 for pack in self.packs: 

1277 pack.ensure_bitmap( 

1278 self, refs, commit_interval=commit_interval, progress=progress 

1279 ) 

1280 count += 1 

1281 

1282 # Update cache to pick up new bitmaps 

1283 self._update_pack_cache() 

1284 

1285 return count 

1286 

1287 def __iter__(self) -> Iterator[ObjectID]: 

1288 """Iterate over the SHAs that are present in this store.""" 

1289 self._update_pack_cache() 

1290 for pack in self._iter_cached_packs(): 

1291 try: 

1292 yield from pack 

1293 except PackFileDisappeared as exc: 

1294 self._evict_pack(exc.obj) 

1295 yield from self._iter_loose_objects() 

1296 yield from self._iter_alternate_objects() 

1297 

1298 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

1299 """Check if a particular object is present by SHA1 and is loose. 

1300 

1301 This does not check alternates. 

1302 """ 

1303 return self._get_loose_object(sha) is not None 

1304 

1305 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

1306 """Obtain the raw fulltext for an object. 

1307 

1308 Args: 

1309 name: sha for the object. 

1310 Returns: tuple with numeric type and object contents. 

1311 """ 

1312 sha: RawObjectID 

1313 if len(name) == self.object_format.hex_length: 

1314 sha = hex_to_sha(ObjectID(name)) 

1315 hexsha = name 

1316 elif len(name) == self.object_format.oid_length: 

1317 sha = RawObjectID(name) 

1318 hexsha = None 

1319 else: 

1320 raise AssertionError(f"Invalid object name {name!r}") 

1321 try: 

1322 return self._lookup_in_packs(lambda p: p.get_raw(sha)) 

1323 except KeyError: 

1324 pass 

1325 if hexsha is None: 

1326 hexsha = sha_to_hex(sha) 

1327 ret = self._get_loose_object(hexsha) 

1328 if ret is not None: 

1329 return ret.type_num, ret.as_raw_string() 

1330 for alternate in self.alternates: 

1331 try: 

1332 return alternate.get_raw(hexsha) 

1333 except KeyError: 

1334 pass 

1335 raise KeyError(hexsha) 

1336 

1337 def iter_unpacked_subset( 

1338 self, 

1339 shas: Iterable[ObjectID | RawObjectID], 

1340 include_comp: bool = False, 

1341 allow_missing: bool = False, 

1342 convert_ofs_delta: bool = True, 

1343 ) -> Iterator[UnpackedObject]: 

1344 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1345 

1346 Args: 

1347 shas: Set of object SHAs to retrieve 

1348 include_comp: Whether to include compressed data 

1349 allow_missing: If True, skip missing objects; if False, raise KeyError 

1350 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1351 

1352 Returns: 

1353 Iterator of UnpackedObject instances 

1354 

1355 Raises: 

1356 KeyError: If an object is missing and allow_missing is False 

1357 """ 

1358 todo: set[ObjectID | RawObjectID] = set(shas) 

1359 for p in self._iter_cached_packs(): 

1360 try: 

1361 for unpacked in p.iter_unpacked_subset( 

1362 todo, 

1363 include_comp=include_comp, 

1364 allow_missing=True, 

1365 convert_ofs_delta=convert_ofs_delta, 

1366 ): 

1367 yield unpacked 

1368 hexsha = sha_to_hex(unpacked.sha()) 

1369 todo.remove(hexsha) 

1370 except PackFileDisappeared as exc: 

1371 self._evict_pack(exc.obj) 

1372 # Maybe something else has added a pack with the object 

1373 # in the mean time? 

1374 for p in self._update_pack_cache(): 

1375 try: 

1376 for unpacked in p.iter_unpacked_subset( 

1377 todo, 

1378 include_comp=include_comp, 

1379 allow_missing=True, 

1380 convert_ofs_delta=convert_ofs_delta, 

1381 ): 

1382 yield unpacked 

1383 hexsha = sha_to_hex(unpacked.sha()) 

1384 todo.remove(hexsha) 

1385 except PackFileDisappeared as exc: 

1386 self._evict_pack(exc.obj) 

1387 for alternate in self.alternates: 

1388 assert isinstance(alternate, PackBasedObjectStore) 

1389 for unpacked in alternate.iter_unpacked_subset( 

1390 todo, 

1391 include_comp=include_comp, 

1392 allow_missing=True, 

1393 convert_ofs_delta=convert_ofs_delta, 

1394 ): 

1395 yield unpacked 

1396 hexsha = sha_to_hex(unpacked.sha()) 

1397 todo.remove(hexsha) 

1398 

1399 def iterobjects_subset( 

1400 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

1401 ) -> Iterator[ShaFile]: 

1402 """Iterate over a subset of objects in the store. 

1403 

1404 This method searches for objects in pack files, alternates, and loose storage. 

1405 

1406 Args: 

1407 shas: Iterable of object SHAs to retrieve 

1408 allow_missing: If True, skip missing objects; if False, raise KeyError 

1409 

1410 Returns: 

1411 Iterator of ShaFile objects 

1412 

1413 Raises: 

1414 KeyError: If an object is missing and allow_missing is False 

1415 """ 

1416 todo: set[ObjectID] = set(shas) 

1417 for p in self._iter_cached_packs(): 

1418 try: 

1419 for o in p.iterobjects_subset(todo, allow_missing=True): 

1420 yield o 

1421 todo.remove(o.id) 

1422 except PackFileDisappeared as exc: 

1423 self._evict_pack(exc.obj) 

1424 # Maybe something else has added a pack with the object 

1425 # in the mean time? 

1426 for p in self._update_pack_cache(): 

1427 try: 

1428 for o in p.iterobjects_subset(todo, allow_missing=True): 

1429 yield o 

1430 todo.remove(o.id) 

1431 except PackFileDisappeared as exc: 

1432 self._evict_pack(exc.obj) 

1433 for alternate in self.alternates: 

1434 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1435 yield o 

1436 todo.remove(o.id) 

1437 for oid in todo: 

1438 loose_obj: ShaFile | None = self._get_loose_object(oid) 

1439 if loose_obj is not None: 

1440 yield loose_obj 

1441 elif not allow_missing: 

1442 raise KeyError(oid) 

1443 

1444 def get_unpacked_object( 

1445 self, sha1: bytes, *, include_comp: bool = False 

1446 ) -> UnpackedObject: 

1447 """Obtain the unpacked object. 

1448 

1449 Args: 

1450 sha1: sha for the object. 

1451 include_comp: Whether to include compression metadata. 

1452 """ 

1453 if len(sha1) == self.object_format.hex_length: 

1454 sha = hex_to_sha(cast(ObjectID, sha1)) 

1455 hexsha = cast(ObjectID, sha1) 

1456 elif len(sha1) == self.object_format.oid_length: 

1457 sha = cast(RawObjectID, sha1) 

1458 hexsha = None 

1459 else: 

1460 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1461 try: 

1462 return self._lookup_in_packs( 

1463 lambda p: p.get_unpacked_object(sha, include_comp=include_comp) 

1464 ) 

1465 except KeyError: 

1466 pass 

1467 if hexsha is None: 

1468 hexsha = sha_to_hex(sha) 

1469 for alternate in self.alternates: 

1470 assert isinstance(alternate, PackBasedObjectStore) 

1471 try: 

1472 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1473 except KeyError: 

1474 pass 

1475 raise KeyError(hexsha) 

1476 

1477 def add_objects( 

1478 self, 

1479 objects: Sequence[tuple[ShaFile, str | None]], 

1480 progress: Callable[[str], None] | None = None, 

1481 ) -> "Pack | None": 

1482 """Add a set of objects to this object store. 

1483 

1484 Args: 

1485 objects: Iterable over (object, path) tuples, should support 

1486 __len__. 

1487 progress: Optional progress reporting function. 

1488 Returns: Pack object of the objects written. 

1489 """ 

1490 count = len(objects) 

1491 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1492 return self.add_pack_data(count, record_iter, progress=progress) 

1493 

1494 

1495class DiskObjectStore(PackBasedObjectStore): 

1496 """Git-style object store that exists on disk.""" 

1497 

1498 path: str | os.PathLike[str] 

1499 pack_dir: str | os.PathLike[str] 

1500 _alternates: "list[BaseObjectStore] | None" 

1501 _commit_graph: "CommitGraph | None" 

1502 

1503 def __init__( 

1504 self, 

1505 path: str | os.PathLike[str], 

1506 *, 

1507 loose_compression_level: int = -1, 

1508 pack_compression_level: int = -1, 

1509 pack_index_version: int | None = None, 

1510 pack_delta_window_size: int | None = None, 

1511 pack_window_memory: int | None = None, 

1512 pack_delta_cache_size: int | None = None, 

1513 pack_depth: int | None = None, 

1514 pack_threads: int | None = None, 

1515 pack_big_file_threshold: int | None = None, 

1516 packed_git_limit: int | None = None, 

1517 delta_base_cache_limit: int | None = None, 

1518 fsync_object_files: bool = False, 

1519 pack_write_bitmaps: bool = False, 

1520 pack_write_bitmap_hash_cache: bool = True, 

1521 pack_write_bitmap_lookup_table: bool = True, 

1522 file_mode: int | None = None, 

1523 dir_mode: int | None = None, 

1524 object_format: "ObjectFormat | None" = None, 

1525 ) -> None: 

1526 """Open an object store. 

1527 

1528 Args: 

1529 path: Path of the object store. 

1530 loose_compression_level: zlib compression level for loose objects 

1531 pack_compression_level: zlib compression level for pack objects 

1532 pack_index_version: pack index version to use (1, 2, or 3) 

1533 pack_delta_window_size: sliding window size for delta compression 

1534 pack_window_memory: memory limit for delta window operations 

1535 pack_delta_cache_size: size of cache for delta operations 

1536 pack_depth: maximum delta chain depth 

1537 pack_threads: number of threads for pack operations 

1538 pack_big_file_threshold: threshold for treating files as big 

1539 packed_git_limit: maximum total bytes for mmapped pack files 

1540 delta_base_cache_limit: maximum bytes for delta base object cache 

1541 fsync_object_files: whether to fsync object files for durability 

1542 pack_write_bitmaps: whether to write bitmap indexes for packs 

1543 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps 

1544 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps 

1545 file_mode: File permission mask for shared repository 

1546 dir_mode: Directory permission mask for shared repository 

1547 object_format: Hash algorithm to use (SHA1 or SHA256) 

1548 """ 

1549 # Import here to avoid circular dependency 

1550 from .object_format import DEFAULT_OBJECT_FORMAT 

1551 

1552 super().__init__( 

1553 pack_compression_level=pack_compression_level, 

1554 pack_index_version=pack_index_version, 

1555 pack_delta_window_size=pack_delta_window_size, 

1556 pack_window_memory=pack_window_memory, 

1557 pack_delta_cache_size=pack_delta_cache_size, 

1558 pack_depth=pack_depth, 

1559 pack_threads=pack_threads, 

1560 pack_big_file_threshold=pack_big_file_threshold, 

1561 packed_git_limit=packed_git_limit, 

1562 delta_base_cache_limit=delta_base_cache_limit, 

1563 object_format=object_format if object_format else DEFAULT_OBJECT_FORMAT, 

1564 ) 

1565 self.path = path 

1566 self.pack_dir = os.path.join(self.path, PACKDIR) 

1567 self._alternates = None 

1568 self.loose_compression_level = loose_compression_level 

1569 self.pack_compression_level = pack_compression_level 

1570 self.pack_index_version = pack_index_version 

1571 self.fsync_object_files = fsync_object_files 

1572 self.pack_write_bitmaps = pack_write_bitmaps 

1573 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache 

1574 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table 

1575 self.file_mode = file_mode 

1576 self.dir_mode = dir_mode 

1577 

1578 # Commit graph support - lazy loaded 

1579 self._commit_graph = None 

1580 self._use_commit_graph = True # Default to true 

1581 

1582 # Multi-pack-index support - lazy loaded 

1583 self._midx: MultiPackIndex | None = None 

1584 self._use_midx = True # Default to true 

1585 

1586 def __repr__(self) -> str: 

1587 """Return string representation of DiskObjectStore. 

1588 

1589 Returns: 

1590 String representation including the store path 

1591 """ 

1592 return f"<{self.__class__.__name__}({self.path!r})>" 

1593 

1594 @classmethod 

1595 def from_config( 

1596 cls, 

1597 path: str | os.PathLike[str], 

1598 config: "Config", 

1599 *, 

1600 file_mode: int | None = None, 

1601 dir_mode: int | None = None, 

1602 ) -> "DiskObjectStore": 

1603 """Create a DiskObjectStore from a configuration object. 

1604 

1605 Args: 

1606 path: Path to the object store directory 

1607 config: Configuration object to read settings from 

1608 file_mode: Optional file permission mask for shared repository 

1609 dir_mode: Optional directory permission mask for shared repository 

1610 

1611 Returns: 

1612 New DiskObjectStore instance configured according to config 

1613 """ 

1614 try: 

1615 default_compression_level = int( 

1616 config.get((b"core",), b"compression").decode() 

1617 ) 

1618 except KeyError: 

1619 default_compression_level = -1 

1620 try: 

1621 loose_compression_level = int( 

1622 config.get((b"core",), b"looseCompression").decode() 

1623 ) 

1624 except KeyError: 

1625 loose_compression_level = default_compression_level 

1626 try: 

1627 pack_compression_level = int( 

1628 config.get((b"core",), "packCompression").decode() 

1629 ) 

1630 except KeyError: 

1631 pack_compression_level = default_compression_level 

1632 try: 

1633 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1634 except KeyError: 

1635 pack_index_version = None 

1636 

1637 # Read pack configuration options 

1638 try: 

1639 pack_delta_window_size = int( 

1640 config.get((b"pack",), b"deltaWindowSize").decode() 

1641 ) 

1642 except KeyError: 

1643 pack_delta_window_size = None 

1644 try: 

1645 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1646 except KeyError: 

1647 pack_window_memory = None 

1648 try: 

1649 pack_delta_cache_size = int( 

1650 config.get((b"pack",), b"deltaCacheSize").decode() 

1651 ) 

1652 except KeyError: 

1653 pack_delta_cache_size = None 

1654 try: 

1655 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1656 except KeyError: 

1657 pack_depth = None 

1658 try: 

1659 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1660 except KeyError: 

1661 pack_threads = None 

1662 try: 

1663 pack_big_file_threshold = int( 

1664 config.get((b"pack",), b"bigFileThreshold").decode() 

1665 ) 

1666 except KeyError: 

1667 pack_big_file_threshold = None 

1668 

1669 # Read core.packedGitLimit setting 

1670 try: 

1671 packed_git_limit = int(config.get((b"core",), b"packedGitLimit").decode()) 

1672 except KeyError: 

1673 packed_git_limit = None 

1674 

1675 # Read core.deltaBaseCacheLimit setting 

1676 try: 

1677 delta_base_cache_limit = int( 

1678 config.get((b"core",), b"deltaBaseCacheLimit").decode() 

1679 ) 

1680 except KeyError: 

1681 delta_base_cache_limit = None 

1682 

1683 # Read core.commitGraph setting 

1684 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1685 

1686 # Read core.multiPackIndex setting 

1687 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True) 

1688 

1689 # Read core.fsyncObjectFiles setting 

1690 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False) 

1691 

1692 # Read bitmap settings 

1693 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False) 

1694 pack_write_bitmap_hash_cache = config.get_boolean( 

1695 (b"pack",), b"writeBitmapHashCache", True 

1696 ) 

1697 pack_write_bitmap_lookup_table = config.get_boolean( 

1698 (b"pack",), b"writeBitmapLookupTable", True 

1699 ) 

1700 # Also check repack.writeBitmaps for backwards compatibility 

1701 if not pack_write_bitmaps: 

1702 pack_write_bitmaps = config.get_boolean( 

1703 (b"repack",), b"writeBitmaps", False 

1704 ) 

1705 

1706 # Get hash algorithm from config 

1707 from .object_format import get_object_format 

1708 

1709 object_format = None 

1710 try: 

1711 try: 

1712 version = int(config.get((b"core",), b"repositoryformatversion")) 

1713 except KeyError: 

1714 version = 0 

1715 if version == 1: 

1716 try: 

1717 object_format_name = config.get((b"extensions",), b"objectformat") 

1718 except KeyError: 

1719 object_format_name = b"sha1" 

1720 object_format = get_object_format(object_format_name.decode("ascii")) 

1721 except (KeyError, ValueError): 

1722 pass 

1723 

1724 instance = cls( 

1725 path, 

1726 loose_compression_level=loose_compression_level, 

1727 pack_compression_level=pack_compression_level, 

1728 pack_index_version=pack_index_version, 

1729 pack_delta_window_size=pack_delta_window_size, 

1730 pack_window_memory=pack_window_memory, 

1731 pack_delta_cache_size=pack_delta_cache_size, 

1732 pack_depth=pack_depth, 

1733 pack_threads=pack_threads, 

1734 pack_big_file_threshold=pack_big_file_threshold, 

1735 packed_git_limit=packed_git_limit, 

1736 delta_base_cache_limit=delta_base_cache_limit, 

1737 fsync_object_files=fsync_object_files, 

1738 pack_write_bitmaps=pack_write_bitmaps, 

1739 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache, 

1740 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table, 

1741 file_mode=file_mode, 

1742 dir_mode=dir_mode, 

1743 object_format=object_format, 

1744 ) 

1745 instance._use_commit_graph = use_commit_graph 

1746 instance._use_midx = use_midx 

1747 return instance 

1748 

1749 @property 

1750 def alternates(self) -> list["BaseObjectStore"]: 

1751 """Get the list of alternate object stores. 

1752 

1753 Reads from .git/objects/info/alternates if not already cached. 

1754 

1755 Returns: 

1756 List of DiskObjectStore instances for alternate object directories 

1757 """ 

1758 if self._alternates is not None: 

1759 return self._alternates 

1760 self._alternates = [] 

1761 for path in self._read_alternate_paths(): 

1762 self._alternates.append(DiskObjectStore(path)) 

1763 return self._alternates 

1764 

1765 def _read_alternate_paths(self) -> Iterator[str]: 

1766 try: 

1767 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1768 except FileNotFoundError: 

1769 return 

1770 with f: 

1771 for line in f.readlines(): 

1772 line = line.rstrip(b"\n") 

1773 if line.startswith(b"#"): 

1774 continue 

1775 if os.path.isabs(line): 

1776 yield os.fsdecode(line) 

1777 else: 

1778 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1779 

1780 def add_alternate_path(self, path: str | os.PathLike[str]) -> None: 

1781 """Add an alternate path to this object store.""" 

1782 info_dir = os.path.join(self.path, INFODIR) 

1783 try: 

1784 os.mkdir(info_dir) 

1785 if self.dir_mode is not None: 

1786 os.chmod(info_dir, self.dir_mode) 

1787 except FileExistsError: 

1788 pass 

1789 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1790 mask = self.file_mode if self.file_mode is not None else 0o644 

1791 with GitFile(alternates_path, "wb", mask=mask) as f: 

1792 try: 

1793 orig_f = open(alternates_path, "rb") 

1794 except FileNotFoundError: 

1795 pass 

1796 else: 

1797 with orig_f: 

1798 f.write(orig_f.read()) 

1799 f.write(os.fsencode(path) + b"\n") 

1800 

1801 if not os.path.isabs(path): 

1802 path = os.path.join(self.path, path) 

1803 self.alternates.append(DiskObjectStore(path)) 

1804 

1805 def _update_pack_cache(self) -> list[Pack]: 

1806 """Read and iterate over new pack files and cache them.""" 

1807 try: 

1808 pack_dir_contents = os.listdir(self.pack_dir) 

1809 except FileNotFoundError: 

1810 return [] 

1811 pack_files = set() 

1812 for name in pack_dir_contents: 

1813 if name.startswith("pack-") and name.endswith(".pack"): 

1814 # verify that idx exists first (otherwise the pack was not yet 

1815 # fully written) 

1816 idx_name = os.path.splitext(name)[0] + ".idx" 

1817 if idx_name in pack_dir_contents: 

1818 # Extract just the hash (remove "pack-" prefix and ".pack" suffix) 

1819 pack_hash = name[len("pack-") : -len(".pack")] 

1820 pack_files.add(pack_hash) 

1821 

1822 # Open newly appeared pack files 

1823 new_packs = [] 

1824 for pack_hash in pack_files: 

1825 if pack_hash not in self._pack_cache: 

1826 pack = Pack( 

1827 os.path.join(self.pack_dir, "pack-" + pack_hash), 

1828 object_format=self.object_format, 

1829 delta_window_size=self.pack_delta_window_size, 

1830 window_memory=self.pack_window_memory, 

1831 delta_cache_size=self.pack_delta_cache_size, 

1832 depth=self.pack_depth, 

1833 threads=self.pack_threads, 

1834 big_file_threshold=self.pack_big_file_threshold, 

1835 delta_base_cache_limit=self.delta_base_cache_limit, 

1836 ) 

1837 new_packs.append(pack) 

1838 self._pack_cache[pack_hash] = pack 

1839 self._mark_pack_used(pack_hash) 

1840 # Remove disappeared pack files 

1841 for f in set(self._pack_cache) - pack_files: 

1842 self._pack_cache.pop(f).close() 

1843 try: 

1844 self._pack_access_order.remove(f) 

1845 except ValueError: 

1846 pass 

1847 self._enforce_packed_git_limit() 

1848 return new_packs 

1849 

1850 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str: 

1851 # Check from object dir 

1852 return hex_to_filename(os.fspath(self.path), sha) 

1853 

1854 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1855 for base in os.listdir(self.path): 

1856 if len(base) != 2: 

1857 continue 

1858 for rest in os.listdir(os.path.join(self.path, base)): 

1859 sha = os.fsencode(base + rest) 

1860 if not valid_hexsha(sha): 

1861 continue 

1862 yield ObjectID(sha) 

1863 

1864 def count_loose_objects(self) -> int: 

1865 """Count the number of loose objects in the object store. 

1866 

1867 Returns: 

1868 Number of loose objects 

1869 """ 

1870 # Calculate expected filename length for loose 

1871 # objects (excluding directory) 

1872 fn_length = self.object_format.hex_length - 2 

1873 count = 0 

1874 if not os.path.exists(self.path): 

1875 return 0 

1876 

1877 for i in range(256): 

1878 subdir = os.path.join(self.path, f"{i:02x}") 

1879 try: 

1880 count += len( 

1881 [name for name in os.listdir(subdir) if len(name) == fn_length] 

1882 ) 

1883 except FileNotFoundError: 

1884 # Directory may have been removed or is inaccessible 

1885 continue 

1886 

1887 return count 

1888 

1889 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1890 path = self._get_shafile_path(sha) 

1891 try: 

1892 # Load the object from path with SHA and hash algorithm from object store 

1893 # Convert to hex ObjectID if needed 

1894 if len(sha) == self.object_format.oid_length: 

1895 hex_sha: ObjectID = sha_to_hex(RawObjectID(sha)) 

1896 else: 

1897 hex_sha = ObjectID(sha) 

1898 return ShaFile.from_path(path, hex_sha, object_format=self.object_format) 

1899 except FileNotFoundError: 

1900 return None 

1901 

1902 def delete_loose_object(self, sha: ObjectID) -> None: 

1903 """Delete a loose object from disk. 

1904 

1905 Args: 

1906 sha: SHA1 of the object to delete 

1907 

1908 Raises: 

1909 FileNotFoundError: If the object file doesn't exist 

1910 """ 

1911 os.remove(self._get_shafile_path(sha)) 

1912 

1913 def get_object_mtime(self, sha: ObjectID) -> float: 

1914 """Get the modification time of an object. 

1915 

1916 Args: 

1917 sha: SHA1 of the object 

1918 

1919 Returns: 

1920 Modification time as seconds since epoch 

1921 

1922 Raises: 

1923 KeyError: if the object is not found 

1924 """ 

1925 # First check if it's a loose object 

1926 if self.contains_loose(sha): 

1927 path = self._get_shafile_path(sha) 

1928 try: 

1929 return os.path.getmtime(path) 

1930 except FileNotFoundError: 

1931 pass 

1932 

1933 # Check if it's in a pack file 

1934 for pack in self.packs: 

1935 try: 

1936 if sha in pack: 

1937 # Use the pack file's mtime for packed objects 

1938 pack_path = pack._data_path 

1939 try: 

1940 return os.path.getmtime(pack_path) 

1941 except (FileNotFoundError, AttributeError): 

1942 pass 

1943 except PackFileDisappeared: 

1944 pass 

1945 

1946 raise KeyError(sha) 

1947 

1948 def _remove_pack(self, pack: Pack) -> None: 

1949 # _pack_cache is keyed by bare pack hash; pack._basename ends in 

1950 # "pack-<hash>", so drop the "pack-" prefix to match. 

1951 basename = os.path.basename(pack._basename) 

1952 assert basename.startswith("pack-"), f"unexpected pack basename {basename!r}" 

1953 pack_hash = basename[len("pack-") :] 

1954 self._pack_cache.pop(pack_hash, None) 

1955 try: 

1956 self._pack_access_order.remove(pack_hash) 

1957 except ValueError: 

1958 pass 

1959 # Store paths before closing to avoid re-opening files on Windows 

1960 data_path = pack._data_path 

1961 idx_path = pack._idx_path 

1962 pack.close() 

1963 os.remove(data_path) 

1964 if os.path.exists(idx_path): 

1965 os.remove(idx_path) 

1966 

1967 def _get_pack_basepath( 

1968 self, entries: Iterable[tuple[bytes, int, int | None]] 

1969 ) -> str: 

1970 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1971 # TODO: Handle self.pack_dir being bytes 

1972 suffix = suffix_bytes.decode("ascii") 

1973 return os.path.join(self.pack_dir, "pack-" + suffix) 

1974 

1975 def _complete_pack( 

1976 self, 

1977 f: BinaryIO, 

1978 path: str, 

1979 num_objects: int, 

1980 indexer: PackIndexer, 

1981 progress: Callable[..., None] | None = None, 

1982 refs: dict[Ref, ObjectID] | None = None, 

1983 ) -> Pack: 

1984 """Move a specific file containing a pack into the pack directory. 

1985 

1986 Note: The file should be on the same file system as the 

1987 packs directory. 

1988 

1989 Args: 

1990 f: Open file object for the pack. 

1991 path: Path to the pack file. 

1992 num_objects: Number of objects in the pack. 

1993 indexer: A PackIndexer for indexing the pack. 

1994 progress: Optional progress reporting function. 

1995 refs: Optional dictionary of refs for bitmap generation. 

1996 """ 

1997 entries = [] 

1998 for i, entry in enumerate(indexer): 

1999 if progress is not None: 

2000 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

2001 entries.append(entry) 

2002 

2003 pack_sha, extra_entries = extend_pack( 

2004 f, 

2005 set(indexer.ext_refs()), 

2006 get_raw=self.get_raw, 

2007 compression_level=self.pack_compression_level, 

2008 progress=progress, 

2009 object_format=self.object_format, 

2010 ) 

2011 f.flush() 

2012 if self.fsync_object_files: 

2013 try: 

2014 fileno = f.fileno() 

2015 except AttributeError as e: 

2016 raise OSError("fsync requested but file has no fileno()") from e 

2017 else: 

2018 os.fsync(fileno) 

2019 f.close() 

2020 

2021 entries.extend(extra_entries) 

2022 

2023 # Move the pack in. 

2024 entries.sort() 

2025 pack_base_name = self._get_pack_basepath(entries) 

2026 

2027 for pack in self.packs: 

2028 if pack._basename == pack_base_name: 

2029 return pack 

2030 

2031 target_pack_path = pack_base_name + ".pack" 

2032 target_index_path = pack_base_name + ".idx" 

2033 if sys.platform == "win32": 

2034 # Windows might have the target pack file lingering. Attempt 

2035 # removal, silently passing if the target does not exist. 

2036 with suppress(FileNotFoundError): 

2037 os.remove(target_pack_path) 

2038 os.rename(path, target_pack_path) 

2039 

2040 # Write the index. 

2041 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

2042 with GitFile( 

2043 target_index_path, 

2044 "wb", 

2045 mask=mask, 

2046 fsync=self.fsync_object_files, 

2047 ) as index_file: 

2048 write_pack_index( 

2049 index_file, entries, pack_sha, version=self.pack_index_version 

2050 ) 

2051 

2052 # Generate bitmap if configured and refs are available 

2053 if self.pack_write_bitmaps and refs: 

2054 from .bitmap import generate_bitmap, write_bitmap 

2055 from .pack import load_pack_index_file 

2056 

2057 if progress: 

2058 progress("Generating bitmap index\r".encode("ascii")) 

2059 

2060 # Load the index we just wrote 

2061 with open(target_index_path, "rb") as idx_file: 

2062 pack_index = load_pack_index_file( 

2063 os.path.basename(target_index_path), 

2064 idx_file, 

2065 self.object_format, 

2066 ) 

2067 

2068 # Generate the bitmap 

2069 bitmap = generate_bitmap( 

2070 pack_index=pack_index, 

2071 object_store=self, 

2072 refs=refs, 

2073 pack_checksum=pack_sha, 

2074 include_hash_cache=self.pack_write_bitmap_hash_cache, 

2075 include_lookup_table=self.pack_write_bitmap_lookup_table, 

2076 progress=lambda msg: ( 

2077 progress(msg.encode("ascii")) 

2078 if progress and isinstance(msg, str) 

2079 else None 

2080 ), 

2081 ) 

2082 

2083 # Write the bitmap 

2084 target_bitmap_path = pack_base_name + ".bitmap" 

2085 write_bitmap(target_bitmap_path, bitmap) 

2086 

2087 if progress: 

2088 progress("Bitmap index written\r".encode("ascii")) 

2089 

2090 # Add the pack to the store and return it. 

2091 final_pack = Pack( 

2092 pack_base_name, 

2093 object_format=self.object_format, 

2094 delta_window_size=self.pack_delta_window_size, 

2095 window_memory=self.pack_window_memory, 

2096 delta_cache_size=self.pack_delta_cache_size, 

2097 depth=self.pack_depth, 

2098 threads=self.pack_threads, 

2099 big_file_threshold=self.pack_big_file_threshold, 

2100 delta_base_cache_limit=self.delta_base_cache_limit, 

2101 ) 

2102 try: 

2103 final_pack.check_length_and_checksum() 

2104 # Materialise every object so payloads that fail to parse 

2105 # (e.g. tree entries with garbage modes) are rejected rather 

2106 # than silently landed on disk. MemoryObjectStore already 

2107 # validates ingested objects this way via PackInflater; without 

2108 # the same check DiskObjectStore was strictly weaker. 

2109 for _obj in PackInflater.for_pack_data( 

2110 final_pack.data, resolve_ext_ref=self.get_raw 

2111 ): 

2112 pass 

2113 except BaseException: 

2114 final_pack.close() 

2115 with suppress(FileNotFoundError): 

2116 os.remove(target_pack_path) 

2117 with suppress(FileNotFoundError): 

2118 os.remove(target_index_path) 

2119 if self.pack_write_bitmaps and refs: 

2120 with suppress(FileNotFoundError): 

2121 os.remove(pack_base_name + ".bitmap") 

2122 raise 

2123 # Extract just the hash from pack_base_name (/path/to/pack-HASH -> HASH) 

2124 pack_hash = os.path.basename(pack_base_name)[len("pack-") :] 

2125 self._add_cached_pack(pack_hash, final_pack) 

2126 return final_pack 

2127 

2128 def add_thin_pack( 

2129 self, 

2130 read_all: Callable[[int], bytes], 

2131 read_some: Callable[[int], bytes] | None, 

2132 progress: Callable[..., None] | None = None, 

2133 ) -> "Pack": 

2134 """Add a new thin pack to this object store. 

2135 

2136 Thin packs are packs that contain deltas with parents that exist 

2137 outside the pack. They should never be placed in the object store 

2138 directly, and always indexed and completed as they are copied. 

2139 

2140 Args: 

2141 read_all: Read function that blocks until the number of 

2142 requested bytes are read. 

2143 read_some: Read function that returns at least one byte, but may 

2144 not return the number of bytes requested. 

2145 progress: Optional progress reporting function. 

2146 Returns: A Pack object pointing at the now-completed thin pack in the 

2147 objects/pack directory. 

2148 """ 

2149 import tempfile 

2150 

2151 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

2152 with os.fdopen(fd, "w+b") as f: 

2153 os.chmod(path, PACK_MODE) 

2154 indexer = PackIndexer( 

2155 f, 

2156 self.object_format.hash_func, 

2157 resolve_ext_ref=self.get_raw, 

2158 ) 

2159 copier = PackStreamCopier( 

2160 self.object_format.hash_func, 

2161 read_all, 

2162 read_some, 

2163 f, 

2164 delta_iter=indexer, # type: ignore[arg-type] 

2165 ) 

2166 copier.verify(progress=progress) 

2167 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

2168 

2169 def add_pack( 

2170 self, 

2171 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2172 """Add a new pack to this object store. 

2173 

2174 Returns: Fileobject to write to, a commit function to 

2175 call when the pack is finished and an abort 

2176 function. 

2177 """ 

2178 import tempfile 

2179 

2180 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

2181 f = os.fdopen(fd, "w+b") 

2182 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

2183 os.chmod(path, mask) 

2184 

2185 def commit() -> "Pack | None": 

2186 if f.tell() > 0: 

2187 f.seek(0) 

2188 

2189 with PackData(path, file=f, object_format=self.object_format) as pd: 

2190 indexer = PackIndexer.for_pack_data( 

2191 pd, 

2192 resolve_ext_ref=self.get_raw, 

2193 ) 

2194 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

2195 else: 

2196 f.close() 

2197 os.remove(path) 

2198 return None 

2199 

2200 def abort() -> None: 

2201 f.close() 

2202 os.remove(path) 

2203 

2204 return f, commit, abort # type: ignore[return-value] 

2205 

2206 def add_object(self, obj: ShaFile) -> None: 

2207 """Add a single object to this object store. 

2208 

2209 Args: 

2210 obj: Object to add 

2211 """ 

2212 # Use the correct hash algorithm for the object ID 

2213 obj_id = ObjectID(obj.get_id(self.object_format)) 

2214 path = self._get_shafile_path(obj_id) 

2215 dir = os.path.dirname(path) 

2216 try: 

2217 os.mkdir(dir) 

2218 if self.dir_mode is not None: 

2219 os.chmod(dir, self.dir_mode) 

2220 except FileExistsError: 

2221 pass 

2222 if os.path.exists(path): 

2223 return # Already there, no need to write again 

2224 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

2225 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f: 

2226 f.write( 

2227 obj.as_legacy_object(compression_level=self.loose_compression_level) 

2228 ) 

2229 

2230 @classmethod 

2231 def init( 

2232 cls, 

2233 path: str | os.PathLike[str], 

2234 *, 

2235 file_mode: int | None = None, 

2236 dir_mode: int | None = None, 

2237 object_format: "ObjectFormat | None" = None, 

2238 ) -> "DiskObjectStore": 

2239 """Initialize a new disk object store. 

2240 

2241 Creates the necessary directory structure for a Git object store. 

2242 

2243 Args: 

2244 path: Path where the object store should be created 

2245 file_mode: Optional file permission mask for shared repository 

2246 dir_mode: Optional directory permission mask for shared repository 

2247 object_format: Hash algorithm to use (SHA1 or SHA256) 

2248 

2249 Returns: 

2250 New DiskObjectStore instance 

2251 """ 

2252 try: 

2253 os.mkdir(path) 

2254 if dir_mode is not None: 

2255 os.chmod(path, dir_mode) 

2256 except FileExistsError: 

2257 pass 

2258 info_path = os.path.join(path, "info") 

2259 pack_path = os.path.join(path, PACKDIR) 

2260 os.mkdir(info_path) 

2261 os.mkdir(pack_path) 

2262 if dir_mode is not None: 

2263 os.chmod(info_path, dir_mode) 

2264 os.chmod(pack_path, dir_mode) 

2265 return cls( 

2266 path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format 

2267 ) 

2268 

2269 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

2270 """Iterate over all object SHAs with the given prefix. 

2271 

2272 Args: 

2273 prefix: Hex prefix to search for (as bytes) 

2274 

2275 Returns: 

2276 Iterator of object SHAs (as ObjectID) matching the prefix 

2277 """ 

2278 if len(prefix) < 2: 

2279 yield from super().iter_prefix(prefix) 

2280 return 

2281 seen = set() 

2282 dir = prefix[:2].decode() 

2283 rest = prefix[2:].decode() 

2284 try: 

2285 for name in os.listdir(os.path.join(self.path, dir)): 

2286 if name.startswith(rest): 

2287 sha = ObjectID(os.fsencode(dir + name)) 

2288 if sha not in seen: 

2289 seen.add(sha) 

2290 yield sha 

2291 except FileNotFoundError: 

2292 pass 

2293 

2294 for p in self.packs: 

2295 bin_prefix = ( 

2296 binascii.unhexlify(prefix) 

2297 if len(prefix) % 2 == 0 

2298 else binascii.unhexlify(prefix[:-1]) 

2299 ) 

2300 for bin_sha in p.index.iter_prefix(bin_prefix): 

2301 sha = sha_to_hex(bin_sha) 

2302 if sha.startswith(prefix) and sha not in seen: 

2303 seen.add(sha) 

2304 yield sha 

2305 for alternate in self.alternates: 

2306 for sha in alternate.iter_prefix(prefix): 

2307 if sha not in seen: 

2308 seen.add(sha) 

2309 yield sha 

2310 

2311 def get_commit_graph(self) -> "CommitGraph | None": 

2312 """Get the commit graph for this object store. 

2313 

2314 Returns: 

2315 CommitGraph object if available, None otherwise 

2316 """ 

2317 if not self._use_commit_graph: 

2318 return None 

2319 

2320 if self._commit_graph is None: 

2321 from .commit_graph import read_commit_graph 

2322 

2323 # Look for commit graph in our objects directory 

2324 graph_file = os.path.join(self.path, "info", "commit-graph") 

2325 if os.path.exists(graph_file): 

2326 self._commit_graph = read_commit_graph(graph_file) 

2327 return self._commit_graph 

2328 

2329 def get_midx(self) -> MultiPackIndex | None: 

2330 """Get the multi-pack-index for this object store. 

2331 

2332 Returns: 

2333 MultiPackIndex object if available, None otherwise 

2334 

2335 Raises: 

2336 ValueError: If MIDX file is corrupt 

2337 OSError: If MIDX file cannot be read 

2338 """ 

2339 if not self._use_midx: 

2340 return None 

2341 

2342 if self._midx is None: 

2343 # Look for MIDX in pack directory 

2344 midx_file = os.path.join(self.pack_dir, "multi-pack-index") 

2345 if os.path.exists(midx_file): 

2346 self._midx = load_midx(midx_file) 

2347 return self._midx 

2348 

2349 def _get_pack_by_name(self, pack_name: str) -> Pack: 

2350 """Get a pack referenced by a multi-pack-index entry. 

2351 

2352 Args: 

2353 pack_name: Pack file name as stored in the MIDX, of the form 

2354 ``pack-<hash>.idx``. 

2355 

2356 Returns: 

2357 Pack object 

2358 

2359 Raises: 

2360 KeyError: If pack doesn't exist 

2361 """ 

2362 assert pack_name.startswith("pack-") and pack_name.endswith(".idx"), ( 

2363 f"unexpected MIDX pack name {pack_name!r}" 

2364 ) 

2365 pack_hash = pack_name[len("pack-") : -len(".idx")] 

2366 

2367 try: 

2368 return self._pack_cache[pack_hash] 

2369 except KeyError: 

2370 pass 

2371 

2372 pack_path = os.path.join(self.pack_dir, "pack-" + pack_hash) 

2373 if not os.path.exists(pack_path + ".pack"): 

2374 raise KeyError(f"Pack {pack_name} not found") 

2375 

2376 pack = Pack( 

2377 pack_path, 

2378 object_format=self.object_format, 

2379 delta_window_size=self.pack_delta_window_size, 

2380 window_memory=self.pack_window_memory, 

2381 delta_cache_size=self.pack_delta_cache_size, 

2382 depth=self.pack_depth, 

2383 threads=self.pack_threads, 

2384 big_file_threshold=self.pack_big_file_threshold, 

2385 delta_base_cache_limit=self.delta_base_cache_limit, 

2386 ) 

2387 self._pack_cache[pack_hash] = pack 

2388 self._mark_pack_used(pack_hash) 

2389 return pack 

2390 

2391 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2392 """Check if a particular object is present by SHA1 and is packed. 

2393 

2394 This checks the MIDX first if available, then falls back to checking 

2395 individual pack indexes. 

2396 

2397 Args: 

2398 sha: SHA of the object (20/32 bytes binary or 40/64 bytes hex) 

2399 

2400 Returns: 

2401 True if the object is in a pack file 

2402 """ 

2403 # Normalise to binary once: MIDX requires it, and passing binary to 

2404 # the per-pack fallback avoids N redundant hex->binary conversions 

2405 # inside PackIndex.object_offset. Mirrors ``get_raw`` below. 

2406 if len(sha) == self.object_format.hex_length: 

2407 sha = hex_to_sha(cast(ObjectID, sha)) 

2408 

2409 midx = self.get_midx() 

2410 if midx is not None and sha in midx: 

2411 return True 

2412 

2413 # Fall back to checking individual packs 

2414 return super().contains_packed(sha) 

2415 

2416 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2417 """Obtain the raw fulltext for an object. 

2418 

2419 This uses the MIDX if available for faster lookups. 

2420 

2421 Args: 

2422 name: SHA for the object (20 bytes binary or 40 bytes hex) 

2423 

2424 Returns: 

2425 Tuple with numeric type and object contents 

2426 

2427 Raises: 

2428 KeyError: If object not found 

2429 """ 

2430 sha: RawObjectID 

2431 if len(name) in (40, 64): 

2432 # name is ObjectID (hex), convert to RawObjectID 

2433 # Support both SHA1 (40) and SHA256 (64) 

2434 sha = hex_to_sha(cast(ObjectID, name)) 

2435 elif len(name) in (20, 32): 

2436 # name is already RawObjectID (binary) 

2437 # Support both SHA1 (20) and SHA256 (32) 

2438 sha = RawObjectID(name) 

2439 else: 

2440 raise AssertionError(f"Invalid object name {name!r}") 

2441 

2442 # Try MIDX first for faster lookup 

2443 midx = self.get_midx() 

2444 if midx is not None: 

2445 result = midx.object_offset(sha) 

2446 if result is not None: 

2447 pack_name, _offset = result 

2448 try: 

2449 pack = self._get_pack_by_name(pack_name) 

2450 return pack.get_raw(sha) 

2451 except (KeyError, PackFileDisappeared): 

2452 # Pack disappeared or object not found, fall through to standard lookup 

2453 pass 

2454 

2455 # Fall back to the standard implementation 

2456 return super().get_raw(name) 

2457 

2458 def write_midx(self) -> bytes: 

2459 """Write a multi-pack-index file for this object store. 

2460 

2461 Creates a MIDX file that indexes all pack files in the pack directory. 

2462 

2463 Returns: 

2464 SHA-1 checksum of the written MIDX file 

2465 

2466 Raises: 

2467 OSError: If the pack directory doesn't exist or MIDX can't be written 

2468 """ 

2469 from .midx import write_midx_file 

2470 

2471 midx_path = os.path.join(self.pack_dir, "multi-pack-index") 

2472 # Skip packs that vanish mid-collection (e.g. concurrent 

2473 # ``git repack``); the survivors still produce a valid MIDX. 

2474 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = [] 

2475 for pack in self.packs: 

2476 try: 

2477 entries = list(pack.index.iterentries()) 

2478 except PackFileDisappeared as exc: 

2479 self._evict_pack(exc.obj) 

2480 continue 

2481 pack_entries.append((os.path.basename(pack._basename) + ".idx", entries)) 

2482 if not pack_entries: 

2483 return b"\x00" * 20 

2484 return write_midx_file(midx_path, pack_entries) 

2485 

2486 def write_commit_graph( 

2487 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

2488 ) -> None: 

2489 """Write a commit graph file for this object store. 

2490 

2491 Args: 

2492 refs: List of refs to include. If None, includes all refs from object store. 

2493 reachable: If True, includes all commits reachable from refs. 

2494 If False, only includes the direct ref targets. 

2495 """ 

2496 from .commit_graph import get_reachable_commits 

2497 

2498 if refs is None: 

2499 # Get all commit objects from the object store 

2500 all_refs = [] 

2501 # Iterate through all objects to find commits 

2502 for sha in self: 

2503 try: 

2504 obj = self[sha] 

2505 if obj.type_name == b"commit": 

2506 all_refs.append(sha) 

2507 except KeyError: 

2508 continue 

2509 else: 

2510 # Use provided refs 

2511 all_refs = list(refs) 

2512 

2513 if not all_refs: 

2514 return # No commits to include 

2515 

2516 if reachable: 

2517 # Get all reachable commits 

2518 commit_ids = get_reachable_commits(self, all_refs) 

2519 else: 

2520 # Just use the direct ref targets - ensure they're hex ObjectIDs 

2521 commit_ids = [] 

2522 for ref in all_refs: 

2523 if isinstance(ref, bytes) and len(ref) == self.object_format.hex_length: 

2524 # Already hex ObjectID 

2525 commit_ids.append(ref) 

2526 elif ( 

2527 isinstance(ref, bytes) and len(ref) == self.object_format.oid_length 

2528 ): 

2529 # Binary SHA, convert to hex ObjectID 

2530 from .objects import sha_to_hex 

2531 

2532 commit_ids.append(sha_to_hex(RawObjectID(ref))) 

2533 else: 

2534 # Assume it's already correct format 

2535 commit_ids.append(ref) 

2536 

2537 if commit_ids: 

2538 # Write commit graph directly to our object store path 

2539 # Generate the commit graph 

2540 from .commit_graph import generate_commit_graph 

2541 

2542 graph = generate_commit_graph(self, commit_ids) 

2543 

2544 if graph.entries: 

2545 # Ensure the info directory exists 

2546 info_dir = os.path.join(self.path, "info") 

2547 os.makedirs(info_dir, exist_ok=True) 

2548 if self.dir_mode is not None: 

2549 os.chmod(info_dir, self.dir_mode) 

2550 

2551 # Write using GitFile for atomic operation 

2552 graph_path = os.path.join(info_dir, "commit-graph") 

2553 mask = self.file_mode if self.file_mode is not None else 0o644 

2554 with GitFile(graph_path, "wb", mask=mask) as f: 

2555 assert isinstance( 

2556 f, _GitFile 

2557 ) # GitFile in write mode always returns _GitFile 

2558 graph.write_to_file(f) 

2559 

2560 # Clear cached commit graph so it gets reloaded 

2561 self._commit_graph = None 

2562 

2563 def prune(self, grace_period: int | None = None) -> None: 

2564 """Prune/clean up this object store. 

2565 

2566 This removes temporary files that were left behind by interrupted 

2567 pack operations. These are files that start with ``tmp_pack_`` in the 

2568 repository directory or files with .pack extension but no corresponding 

2569 .idx file in the pack directory. 

2570 

2571 Args: 

2572 grace_period: Grace period in seconds for removing temporary files. 

2573 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

2574 """ 

2575 import glob 

2576 

2577 if grace_period is None: 

2578 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

2579 

2580 # Clean up tmp_pack_* files in the repository directory 

2581 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

2582 # Check if file is old enough (more than grace period) 

2583 mtime = os.path.getmtime(tmp_file) 

2584 if time.time() - mtime > grace_period: 

2585 os.remove(tmp_file) 

2586 

2587 # Clean up orphaned .pack files without corresponding .idx files 

2588 try: 

2589 pack_dir_contents = os.listdir(self.pack_dir) 

2590 except FileNotFoundError: 

2591 return 

2592 

2593 pack_files = {} 

2594 idx_files = set() 

2595 

2596 for name in pack_dir_contents: 

2597 if name.endswith(".pack"): 

2598 base_name = name[:-5] # Remove .pack extension 

2599 pack_files[base_name] = name 

2600 elif name.endswith(".idx"): 

2601 base_name = name[:-4] # Remove .idx extension 

2602 idx_files.add(base_name) 

2603 

2604 # Remove .pack files without corresponding .idx files 

2605 for base_name, pack_name in pack_files.items(): 

2606 if base_name not in idx_files: 

2607 pack_path = os.path.join(self.pack_dir, pack_name) 

2608 # Check if file is old enough (more than grace period) 

2609 mtime = os.path.getmtime(pack_path) 

2610 if time.time() - mtime > grace_period: 

2611 os.remove(pack_path) 

2612 

2613 def close(self) -> None: 

2614 """Close the object store and release resources. 

2615 

2616 This method closes all cached pack files, MIDX, and frees associated resources. 

2617 Can be called multiple times safely. 

2618 """ 

2619 # Close MIDX if it's loaded 

2620 if self._midx is not None: 

2621 self._midx.close() 

2622 self._midx = None 

2623 

2624 # Close alternates 

2625 if self._alternates is not None: 

2626 for alt in self._alternates: 

2627 alt.close() 

2628 self._alternates = None 

2629 

2630 # Call parent class close to handle pack files 

2631 super().close() 

2632 

2633 

2634class MemoryObjectStore(PackCapableObjectStore): 

2635 """Object store that keeps all objects in memory.""" 

2636 

2637 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None: 

2638 """Initialize a MemoryObjectStore. 

2639 

2640 Creates an empty in-memory object store. 

2641 

2642 Args: 

2643 object_format: Hash algorithm to use (defaults to SHA1) 

2644 """ 

2645 super().__init__(object_format=object_format) 

2646 self._data: dict[ObjectID, ShaFile] = {} 

2647 self.pack_compression_level = -1 

2648 

2649 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

2650 if len(sha) == self.object_format.hex_length: 

2651 return cast(ObjectID, sha) 

2652 elif len(sha) == self.object_format.oid_length: 

2653 return sha_to_hex(cast(RawObjectID, sha)) 

2654 else: 

2655 raise ValueError(f"Invalid sha {sha!r}") 

2656 

2657 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

2658 """Check if a particular object is present by SHA1 and is loose.""" 

2659 return self._to_hexsha(sha) in self._data 

2660 

2661 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2662 """Check if a particular object is present by SHA1 and is packed.""" 

2663 return False 

2664 

2665 def __iter__(self) -> Iterator[ObjectID]: 

2666 """Iterate over the SHAs that are present in this store.""" 

2667 return iter(self._data.keys()) 

2668 

2669 @property 

2670 def packs(self) -> list[Pack]: 

2671 """List with pack objects.""" 

2672 return [] 

2673 

2674 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2675 """Obtain the raw text for an object. 

2676 

2677 Args: 

2678 name: sha for the object. 

2679 Returns: tuple with numeric type and object contents. 

2680 """ 

2681 obj = self[self._to_hexsha(name)] 

2682 return obj.type_num, obj.as_raw_string() 

2683 

2684 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile: 

2685 """Retrieve an object by SHA. 

2686 

2687 Args: 

2688 name: SHA of the object (as hex string or bytes) 

2689 

2690 Returns: 

2691 Copy of the ShaFile object 

2692 

2693 Raises: 

2694 KeyError: If the object is not found 

2695 """ 

2696 return self._data[self._to_hexsha(name)].copy() 

2697 

2698 def __delitem__(self, name: ObjectID) -> None: 

2699 """Delete an object from this store, for testing only.""" 

2700 del self._data[self._to_hexsha(name)] 

2701 

2702 def add_object(self, obj: ShaFile) -> None: 

2703 """Add a single object to this object store.""" 

2704 self._data[obj.id] = obj.copy() 

2705 

2706 def add_objects( 

2707 self, 

2708 objects: Iterable[tuple[ShaFile, str | None]], 

2709 progress: Callable[[str], None] | None = None, 

2710 ) -> None: 

2711 """Add a set of objects to this object store. 

2712 

2713 Args: 

2714 objects: Iterable over a list of (object, path) tuples 

2715 progress: Optional progress reporting function. 

2716 """ 

2717 for obj, path in objects: 

2718 self.add_object(obj) 

2719 

2720 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2721 """Add a new pack to this object store. 

2722 

2723 Because this object store doesn't support packs, we extract and add the 

2724 individual objects. 

2725 

2726 Returns: Fileobject to write to and a commit function to 

2727 call when the pack is finished. 

2728 """ 

2729 from tempfile import SpooledTemporaryFile 

2730 

2731 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2732 

2733 def commit() -> None: 

2734 size = f.tell() 

2735 if size > 0: 

2736 f.seek(0) 

2737 

2738 p = PackData.from_file(f, self.object_format, size) 

2739 try: 

2740 # Verify the trailing pack checksum before extracting 

2741 # objects. Without this, a fetch that delivered a 

2742 # truncated pack would still be accepted: ``add_pack`` 

2743 # iterates objects by offset and never reaches the 

2744 # trailing bytes, so a stream that lost the last few 

2745 # bytes of its trailer slipped through silently. 

2746 # ``add_thin_pack`` already validates via 

2747 # ``PackStreamCopier.verify``; do the equivalent here. 

2748 p.check() 

2749 for obj in PackInflater.for_pack_data(p, self.get_raw): 

2750 self.add_object(obj) 

2751 finally: 

2752 p.close() 

2753 f.close() 

2754 else: 

2755 f.close() 

2756 

2757 def abort() -> None: 

2758 f.close() 

2759 

2760 return f, commit, abort # type: ignore[return-value] 

2761 

2762 def add_pack_data( 

2763 self, 

2764 count: int, 

2765 unpacked_objects: Iterator[UnpackedObject], 

2766 progress: Callable[[str], None] | None = None, 

2767 ) -> None: 

2768 """Add pack data to this object store. 

2769 

2770 Args: 

2771 count: Number of items to add 

2772 unpacked_objects: Iterator of UnpackedObject instances 

2773 progress: Optional progress reporting function. 

2774 """ 

2775 if count == 0: 

2776 return 

2777 

2778 # Since MemoryObjectStore doesn't support pack files, we need to 

2779 # extract individual objects. To handle deltas properly, we write 

2780 # to a temporary pack and then use PackInflater to resolve them. 

2781 f, commit, abort = self.add_pack() 

2782 try: 

2783 write_pack_data( 

2784 f.write, 

2785 unpacked_objects, 

2786 num_records=count, 

2787 progress=progress, 

2788 object_format=self.object_format, 

2789 ) 

2790 except BaseException: 

2791 abort() 

2792 raise 

2793 else: 

2794 commit() 

2795 

2796 def add_thin_pack( 

2797 self, 

2798 read_all: Callable[[int], bytes], 

2799 read_some: Callable[[int], bytes] | None, 

2800 progress: Callable[[str], None] | None = None, 

2801 ) -> None: 

2802 """Add a new thin pack to this object store. 

2803 

2804 Thin packs are packs that contain deltas with parents that exist 

2805 outside the pack. Because this object store doesn't support packs, we 

2806 extract and add the individual objects. 

2807 

2808 Args: 

2809 read_all: Read function that blocks until the number of 

2810 requested bytes are read. 

2811 read_some: Read function that returns at least one byte, but may 

2812 not return the number of bytes requested. 

2813 progress: Optional progress reporting function. 

2814 """ 

2815 f, commit, abort = self.add_pack() 

2816 try: 

2817 copier = PackStreamCopier( 

2818 self.object_format.hash_func, 

2819 read_all, 

2820 read_some, 

2821 f, 

2822 ) 

2823 copier.verify() 

2824 except BaseException: 

2825 abort() 

2826 raise 

2827 else: 

2828 commit() 

2829 

2830 

2831class ObjectIterator(Protocol): 

2832 """Interface for iterating over objects.""" 

2833 

2834 def iterobjects(self) -> Iterator[ShaFile]: 

2835 """Iterate over all objects. 

2836 

2837 Returns: 

2838 Iterator of ShaFile objects 

2839 """ 

2840 raise NotImplementedError(self.iterobjects) 

2841 

2842 

2843def tree_lookup_path( 

2844 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile], 

2845 root_sha: ObjectID | RawObjectID, 

2846 path: bytes, 

2847) -> tuple[int, ObjectID]: 

2848 """Look up an object in a Git tree. 

2849 

2850 Args: 

2851 lookup_obj: Callback for retrieving object by SHA1 

2852 root_sha: SHA1 of the root tree 

2853 path: Path to lookup 

2854 Returns: A tuple of (mode, SHA) of the resulting path. 

2855 """ 

2856 tree = lookup_obj(root_sha) 

2857 if not isinstance(tree, Tree): 

2858 raise NotTreeError(root_sha) 

2859 return tree.lookup_path(lookup_obj, path) 

2860 

2861 

2862def _collect_filetree_revs( 

2863 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2864) -> None: 

2865 """Collect SHA1s of files and directories for specified tree. 

2866 

2867 Args: 

2868 obj_store: Object store to get objects by SHA from 

2869 tree_sha: tree reference to walk 

2870 kset: set to fill with references to files and directories 

2871 """ 

2872 filetree = obj_store[tree_sha] 

2873 assert isinstance(filetree, Tree) 

2874 for name, mode, sha in filetree.iteritems(): 

2875 assert mode is not None 

2876 assert sha is not None 

2877 if not S_ISGITLINK(mode) and sha not in kset: 

2878 kset.add(sha) 

2879 if stat.S_ISDIR(mode): 

2880 _collect_filetree_revs(obj_store, sha, kset) 

2881 

2882 

2883def _split_commits_and_tags( 

2884 obj_store: ObjectContainer, 

2885 lst: Iterable[ObjectID], 

2886 *, 

2887 unknown: str = "error", 

2888) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]: 

2889 """Split object id list into three lists with commit, tag, and other SHAs. 

2890 

2891 Commits referenced by tags are included into commits 

2892 list as well. Only SHA1s known in this repository will get 

2893 through, controlled by the unknown parameter. 

2894 

2895 Args: 

2896 obj_store: Object store to get objects by SHA1 from 

2897 lst: Collection of commit and tag SHAs 

2898 unknown: How to handle unknown objects: "error", "warn", or "ignore" 

2899 Returns: A tuple of (commits, tags, others) SHA1s 

2900 """ 

2901 import logging 

2902 

2903 if unknown not in ("error", "warn", "ignore"): 

2904 raise ValueError( 

2905 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}" 

2906 ) 

2907 

2908 commits: set[ObjectID] = set() 

2909 tags: set[ObjectID] = set() 

2910 others: set[ObjectID] = set() 

2911 for e in lst: 

2912 try: 

2913 o = obj_store[e] 

2914 except KeyError: 

2915 if unknown == "error": 

2916 raise 

2917 elif unknown == "warn": 

2918 logging.warning( 

2919 "Object %s not found in object store", e.decode("ascii") 

2920 ) 

2921 # else: ignore 

2922 else: 

2923 if isinstance(o, Commit): 

2924 commits.add(e) 

2925 elif isinstance(o, Tag): 

2926 tags.add(e) 

2927 tagged = o.object[1] 

2928 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown) 

2929 commits |= c 

2930 tags |= t 

2931 others |= os 

2932 else: 

2933 others.add(e) 

2934 return (commits, tags, others) 

2935 

2936 

2937class MissingObjectFinder: 

2938 """Find the objects missing from another object store. 

2939 

2940 Args: 

2941 object_store: Object store containing at least all objects to be 

2942 sent 

2943 haves: SHA1s of commits not to send (already present in target) 

2944 wants: SHA1s of commits to send 

2945 progress: Optional function to report progress to. 

2946 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2947 sha for including tags. 

2948 get_parents: Optional function for getting the parents of a commit. 

2949 """ 

2950 

2951 def __init__( 

2952 self, 

2953 object_store: BaseObjectStore, 

2954 haves: Iterable[ObjectID], 

2955 wants: Iterable[ObjectID], 

2956 *, 

2957 shallow: Set[ObjectID] | None = None, 

2958 progress: Callable[[bytes], None] | None = None, 

2959 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

2960 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

2961 ) -> None: 

2962 """Initialize a MissingObjectFinder. 

2963 

2964 Args: 

2965 object_store: Object store containing objects 

2966 haves: SHA1s of objects already present in target 

2967 wants: SHA1s of objects to send 

2968 shallow: Set of shallow commit SHA1s 

2969 progress: Optional progress reporting callback 

2970 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2971 get_parents: Function for getting commit parents 

2972 """ 

2973 self.object_store = object_store 

2974 if shallow is None: 

2975 shallow = set() 

2976 self._get_parents = get_parents 

2977 reachability = object_store.get_reachability_provider() 

2978 # process Commits and Tags differently 

2979 # haves may list commits/tags not available locally (silently ignore them). 

2980 # wants should only contain valid SHAs (fail fast if not). 

2981 have_commits, have_tags, have_others = _split_commits_and_tags( 

2982 object_store, haves, unknown="ignore" 

2983 ) 

2984 want_commits, want_tags, want_others = _split_commits_and_tags( 

2985 object_store, wants, unknown="error" 

2986 ) 

2987 # all_ancestors is a set of commits that shall not be sent 

2988 # (complete repository up to 'haves') 

2989 all_ancestors = reachability.get_reachable_commits( 

2990 have_commits, exclude=None, shallow=shallow 

2991 ) 

2992 # all_missing - complete set of commits between haves and wants 

2993 # common_commits - boundary commits directly encountered when traversing wants 

2994 # We use _collect_ancestors here because we need the exact boundary behavior: 

2995 # commits that are in all_ancestors and directly reachable from wants, 

2996 # but we don't traverse past them. This is hard to express with the 

2997 # reachability abstraction alone. 

2998 missing_commits, common_commits = _collect_ancestors( 

2999 object_store, 

3000 want_commits, 

3001 frozenset(all_ancestors), 

3002 shallow=frozenset(shallow), 

3003 get_parents=self._get_parents, 

3004 ) 

3005 

3006 self.remote_has: set[ObjectID] = set() 

3007 # Now, fill sha_done with commits and revisions of 

3008 # files and directories known to be both locally 

3009 # and on target. Thus these commits and files 

3010 # won't get selected for fetch 

3011 for h in common_commits: 

3012 self.remote_has.add(h) 

3013 cmt = object_store[h] 

3014 assert isinstance(cmt, Commit) 

3015 # Get tree objects for this commit 

3016 tree_objects = reachability.get_tree_objects([cmt.tree]) 

3017 self.remote_has.update(tree_objects) 

3018 

3019 # record tags we have as visited, too 

3020 for t in have_tags: 

3021 self.remote_has.add(t) 

3022 self.sha_done = set(self.remote_has) 

3023 

3024 # in fact, what we 'want' is commits, tags, and others 

3025 # we've found missing 

3026 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = { 

3027 (w, None, Commit.type_num, False) for w in missing_commits 

3028 } 

3029 missing_tags = want_tags.difference(have_tags) 

3030 self.objects_to_send.update( 

3031 {(w, None, Tag.type_num, False) for w in missing_tags} 

3032 ) 

3033 missing_others = want_others.difference(have_others) 

3034 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

3035 

3036 if progress is None: 

3037 self.progress: Callable[[bytes], None] = lambda x: None 

3038 else: 

3039 self.progress = progress 

3040 self._tagged = (get_tagged and get_tagged()) or {} 

3041 

3042 def get_remote_has(self) -> set[ObjectID]: 

3043 """Get the set of SHAs the remote has. 

3044 

3045 Returns: 

3046 Set of SHA1s that the remote side already has 

3047 """ 

3048 return self.remote_has 

3049 

3050 def add_todo( 

3051 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]] 

3052 ) -> None: 

3053 """Add objects to the todo list. 

3054 

3055 Args: 

3056 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

3057 """ 

3058 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

3059 

3060 def __next__(self) -> tuple[ObjectID, PackHint | None]: 

3061 """Get the next object to send. 

3062 

3063 Returns: 

3064 Tuple of (sha, pack_hint) 

3065 

3066 Raises: 

3067 StopIteration: When no more objects to send 

3068 """ 

3069 while True: 

3070 if not self.objects_to_send: 

3071 self.progress( 

3072 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

3073 ) 

3074 raise StopIteration 

3075 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

3076 if sha not in self.sha_done: 

3077 break 

3078 if not leaf: 

3079 o = self.object_store[sha] 

3080 if isinstance(o, Commit): 

3081 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

3082 elif isinstance(o, Tree): 

3083 todos = [] 

3084 for n, m, s in o.iteritems(): 

3085 assert m is not None 

3086 assert n is not None 

3087 assert s is not None 

3088 if not S_ISGITLINK(m): 

3089 todos.append( 

3090 ( 

3091 s, 

3092 n, 

3093 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

3094 not stat.S_ISDIR(m), 

3095 ) 

3096 ) 

3097 self.add_todo(todos) 

3098 elif isinstance(o, Tag): 

3099 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

3100 if sha in self._tagged: 

3101 self.add_todo([(self._tagged[sha], None, None, True)]) 

3102 self.sha_done.add(sha) 

3103 if len(self.sha_done) % 1000 == 0: 

3104 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

3105 if type_num is None: 

3106 pack_hint = None 

3107 else: 

3108 pack_hint = (type_num, name) 

3109 return (sha, pack_hint) 

3110 

3111 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]: 

3112 """Return iterator over objects to send. 

3113 

3114 Returns: 

3115 Self (this class implements the iterator protocol) 

3116 """ 

3117 return self 

3118 

3119 

3120class ObjectStoreGraphWalker: 

3121 """Graph walker that finds what commits are missing from an object store.""" 

3122 

3123 heads: set[ObjectID] 

3124 """Revisions without descendants in the local repo.""" 

3125 

3126 get_parents: Callable[[ObjectID], list[ObjectID]] 

3127 """Function to retrieve parents in the local repo.""" 

3128 

3129 shallow: set[ObjectID] 

3130 

3131 def __init__( 

3132 self, 

3133 local_heads: Iterable[ObjectID], 

3134 get_parents: Callable[[ObjectID], list[ObjectID]], 

3135 shallow: set[ObjectID] | None = None, 

3136 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None] 

3137 | None = None, 

3138 ) -> None: 

3139 """Create a new instance. 

3140 

3141 Args: 

3142 local_heads: Heads to start search with 

3143 get_parents: Function for finding the parents of a SHA1. 

3144 shallow: Set of shallow commits. 

3145 update_shallow: Function to update shallow commits. 

3146 """ 

3147 self.heads = set(local_heads) 

3148 self.get_parents = get_parents 

3149 self.parents: dict[ObjectID, list[ObjectID] | None] = {} 

3150 if shallow is None: 

3151 shallow = set() 

3152 self.shallow = shallow 

3153 self.update_shallow = update_shallow 

3154 

3155 def nak(self) -> None: 

3156 """Nothing in common was found.""" 

3157 

3158 def ack(self, sha: ObjectID) -> None: 

3159 """Ack that a revision and its ancestors are present in the source.""" 

3160 if len(sha) != 40: 

3161 # TODO: support SHA256 

3162 raise ValueError(f"unexpected sha {sha!r} received") 

3163 ancestors = {sha} 

3164 

3165 # stop if we run out of heads to remove 

3166 while self.heads: 

3167 for a in ancestors: 

3168 if a in self.heads: 

3169 self.heads.remove(a) 

3170 

3171 # collect all ancestors 

3172 new_ancestors = set() 

3173 for a in ancestors: 

3174 ps = self.parents.get(a) 

3175 if ps is not None: 

3176 new_ancestors.update(ps) 

3177 self.parents[a] = None 

3178 

3179 # no more ancestors; stop 

3180 if not new_ancestors: 

3181 break 

3182 

3183 ancestors = new_ancestors 

3184 

3185 def next(self) -> ObjectID | None: 

3186 """Iterate over ancestors of heads in the target.""" 

3187 if self.heads: 

3188 ret = self.heads.pop() 

3189 try: 

3190 ps = self.get_parents(ret) 

3191 except KeyError: 

3192 return None 

3193 self.parents[ret] = ps 

3194 self.heads.update([p for p in ps if p not in self.parents]) 

3195 return ret 

3196 return None 

3197 

3198 __next__ = next 

3199 

3200 

3201def commit_tree_changes( 

3202 object_store: BaseObjectStore, 

3203 tree: ObjectID | Tree, 

3204 changes: Sequence[tuple[bytes, int | None, ObjectID | None]], 

3205) -> ObjectID: 

3206 """Commit a specified set of changes to a tree structure. 

3207 

3208 This will apply a set of changes on top of an existing tree, storing new 

3209 objects in object_store. 

3210 

3211 changes are a list of tuples with (path, mode, object_sha). 

3212 Paths can be both blobs and trees. See the mode and 

3213 object sha to None deletes the path. 

3214 

3215 This method works especially well if there are only a small 

3216 number of changes to a big tree. For a large number of changes 

3217 to a large tree, use e.g. commit_tree. 

3218 

3219 Args: 

3220 object_store: Object store to store new objects in 

3221 and retrieve old ones from. 

3222 tree: Original tree root (SHA or Tree object) 

3223 changes: changes to apply 

3224 Returns: New tree root object 

3225 """ 

3226 # TODO(jelmer): Save up the objects and add them using .add_objects 

3227 # rather than with individual calls to .add_object. 

3228 # Handle both Tree object and SHA 

3229 if isinstance(tree, Tree): 

3230 tree_obj: Tree = tree 

3231 else: 

3232 sha_obj = object_store[tree] 

3233 assert isinstance(sha_obj, Tree) 

3234 tree_obj = sha_obj 

3235 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {} 

3236 for path, new_mode, new_sha in changes: 

3237 try: 

3238 (dirname, subpath) = path.split(b"/", 1) 

3239 except ValueError: 

3240 if new_sha is None: 

3241 del tree_obj[path] 

3242 else: 

3243 assert new_mode is not None 

3244 tree_obj[path] = (new_mode, new_sha) 

3245 else: 

3246 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

3247 for name, subchanges in nested_changes.items(): 

3248 try: 

3249 orig_subtree_id: ObjectID | Tree = tree_obj[name][1] 

3250 except KeyError: 

3251 # For new directories, pass an empty Tree object 

3252 orig_subtree_id = Tree() 

3253 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

3254 subtree = object_store[subtree_id] 

3255 assert isinstance(subtree, Tree) 

3256 if len(subtree) == 0: 

3257 del tree_obj[name] 

3258 else: 

3259 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

3260 object_store.add_object(tree_obj) 

3261 return tree_obj.id 

3262 

3263 

3264class OverlayObjectStore(BaseObjectStore): 

3265 """Object store that can overlay multiple object stores.""" 

3266 

3267 def __init__( 

3268 self, 

3269 bases: list[BaseObjectStore], 

3270 add_store: BaseObjectStore | None = None, 

3271 ) -> None: 

3272 """Initialize an OverlayObjectStore. 

3273 

3274 Args: 

3275 bases: List of base object stores to overlay 

3276 add_store: Optional store to write new objects to 

3277 

3278 Raises: 

3279 ValueError: If stores have different hash algorithms 

3280 """ 

3281 from .object_format import verify_same_object_format 

3282 

3283 # Verify all stores use the same hash algorithm 

3284 store_algorithms = [store.object_format for store in bases] 

3285 if add_store: 

3286 store_algorithms.append(add_store.object_format) 

3287 

3288 object_format = verify_same_object_format(*store_algorithms) 

3289 

3290 super().__init__(object_format=object_format) 

3291 self.bases = bases 

3292 self.add_store = add_store 

3293 

3294 def add_object(self, object: ShaFile) -> None: 

3295 """Add a single object to the store. 

3296 

3297 Args: 

3298 object: Object to add 

3299 

3300 Raises: 

3301 NotImplementedError: If no add_store was provided 

3302 """ 

3303 if self.add_store is None: 

3304 raise NotImplementedError(self.add_object) 

3305 return self.add_store.add_object(object) 

3306 

3307 def add_objects( 

3308 self, 

3309 objects: Sequence[tuple[ShaFile, str | None]], 

3310 progress: Callable[[str], None] | None = None, 

3311 ) -> Pack | None: 

3312 """Add multiple objects to the store. 

3313 

3314 Args: 

3315 objects: Iterator of objects to add 

3316 progress: Optional progress reporting callback 

3317 

3318 Raises: 

3319 NotImplementedError: If no add_store was provided 

3320 """ 

3321 if self.add_store is None: 

3322 raise NotImplementedError(self.add_object) 

3323 return self.add_store.add_objects(objects, progress) 

3324 

3325 @property 

3326 def packs(self) -> list[Pack]: 

3327 """Get the list of packs from all overlaid stores. 

3328 

3329 Returns: 

3330 Combined list of packs from all base stores 

3331 """ 

3332 ret = [] 

3333 for b in self.bases: 

3334 ret.extend(b.packs) 

3335 return ret 

3336 

3337 def __iter__(self) -> Iterator[ObjectID]: 

3338 """Iterate over all object SHAs in the overlaid stores. 

3339 

3340 Returns: 

3341 Iterator of object SHAs (deduped across stores) 

3342 """ 

3343 done = set() 

3344 for b in self.bases: 

3345 for o_id in b: 

3346 if o_id not in done: 

3347 yield o_id 

3348 done.add(o_id) 

3349 

3350 def iterobjects_subset( 

3351 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

3352 ) -> Iterator[ShaFile]: 

3353 """Iterate over a subset of objects from the overlaid stores. 

3354 

3355 Args: 

3356 shas: Iterable of object SHAs to retrieve 

3357 allow_missing: If True, skip missing objects; if False, raise KeyError 

3358 

3359 Returns: 

3360 Iterator of ShaFile objects 

3361 

3362 Raises: 

3363 KeyError: If an object is missing and allow_missing is False 

3364 """ 

3365 todo = set(shas) 

3366 found: set[ObjectID] = set() 

3367 

3368 for b in self.bases: 

3369 # Create a copy of todo for each base to avoid modifying 

3370 # the set while iterating through it 

3371 current_todo = todo - found 

3372 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

3373 yield o 

3374 found.add(o.id) 

3375 

3376 # Check for any remaining objects not found 

3377 missing = todo - found 

3378 if missing and not allow_missing: 

3379 raise KeyError(next(iter(missing))) 

3380 

3381 def iter_unpacked_subset( 

3382 self, 

3383 shas: Iterable[ObjectID | RawObjectID], 

3384 include_comp: bool = False, 

3385 allow_missing: bool = False, 

3386 convert_ofs_delta: bool = True, 

3387 ) -> Iterator[UnpackedObject]: 

3388 """Iterate over unpacked objects from the overlaid stores. 

3389 

3390 Args: 

3391 shas: Iterable of object SHAs to retrieve 

3392 include_comp: Whether to include compressed data 

3393 allow_missing: If True, skip missing objects; if False, raise KeyError 

3394 convert_ofs_delta: Whether to convert OFS_DELTA objects 

3395 

3396 Returns: 

3397 Iterator of unpacked objects 

3398 

3399 Raises: 

3400 KeyError: If an object is missing and allow_missing is False 

3401 """ 

3402 todo: set[ObjectID | RawObjectID] = set(shas) 

3403 for b in self.bases: 

3404 for o in b.iter_unpacked_subset( 

3405 todo, 

3406 include_comp=include_comp, 

3407 allow_missing=True, 

3408 convert_ofs_delta=convert_ofs_delta, 

3409 ): 

3410 yield o 

3411 todo.remove(o.sha()) 

3412 if todo and not allow_missing: 

3413 raise KeyError(next(iter(todo))) 

3414 

3415 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]: 

3416 """Get the raw object data from the overlaid stores. 

3417 

3418 Args: 

3419 sha_id: SHA of the object 

3420 

3421 Returns: 

3422 Tuple of (type_num, raw_data) 

3423 

3424 Raises: 

3425 KeyError: If object not found in any base store 

3426 """ 

3427 for b in self.bases: 

3428 try: 

3429 return b.get_raw(sha_id) 

3430 except KeyError: 

3431 pass 

3432 raise KeyError(sha_id) 

3433 

3434 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

3435 """Check if an object is packed in any base store. 

3436 

3437 Args: 

3438 sha: SHA of the object 

3439 

3440 Returns: 

3441 True if object is packed in any base store 

3442 """ 

3443 for b in self.bases: 

3444 if b.contains_packed(sha): 

3445 return True 

3446 return False 

3447 

3448 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

3449 """Check if an object is loose in any base store. 

3450 

3451 Args: 

3452 sha: SHA of the object 

3453 

3454 Returns: 

3455 True if object is loose in any base store 

3456 """ 

3457 for b in self.bases: 

3458 if b.contains_loose(sha): 

3459 return True 

3460 return False 

3461 

3462 

3463def read_packs_file(f: BinaryIO) -> Iterator[str]: 

3464 """Yield the packs listed in a packs file.""" 

3465 for line in f.read().splitlines(): 

3466 if not line: 

3467 continue 

3468 (kind, name) = line.split(b" ", 1) 

3469 if kind != b"P": 

3470 continue 

3471 yield os.fsdecode(name) 

3472 

3473 

3474class BucketBasedObjectStore(PackBasedObjectStore): 

3475 """Object store implementation that uses a bucket store like S3 as backend.""" 

3476 

3477 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

3478 """Iterate over the SHAs of all loose objects.""" 

3479 return iter([]) 

3480 

3481 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None: 

3482 return None 

3483 

3484 def delete_loose_object(self, sha: ObjectID) -> None: 

3485 """Delete a loose object (no-op for bucket stores). 

3486 

3487 Bucket-based stores don't have loose objects, so this is a no-op. 

3488 

3489 Args: 

3490 sha: SHA of the object to delete 

3491 """ 

3492 # Doesn't exist.. 

3493 

3494 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

3495 """Pack loose objects. Returns number of objects packed. 

3496 

3497 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

3498 

3499 Args: 

3500 progress: Optional progress reporting callback (ignored) 

3501 """ 

3502 return 0 

3503 

3504 def _remove_pack_by_name(self, name: str) -> None: 

3505 """Remove a pack by name. Subclasses should implement this.""" 

3506 raise NotImplementedError(self._remove_pack_by_name) 

3507 

3508 def _iter_pack_names(self) -> Iterator[str]: 

3509 raise NotImplementedError(self._iter_pack_names) 

3510 

3511 def _get_pack(self, name: str) -> Pack: 

3512 raise NotImplementedError(self._get_pack) 

3513 

3514 def _update_pack_cache(self) -> list[Pack]: 

3515 pack_files = set(self._iter_pack_names()) 

3516 

3517 # Open newly appeared pack files 

3518 new_packs = [] 

3519 for f in pack_files: 

3520 if f not in self._pack_cache: 

3521 pack = self._get_pack(f) 

3522 new_packs.append(pack) 

3523 self._pack_cache[f] = pack 

3524 # Remove disappeared pack files 

3525 for f in set(self._pack_cache) - pack_files: 

3526 self._pack_cache.pop(f).close() 

3527 return new_packs 

3528 

3529 def _upload_pack( 

3530 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

3531 ) -> None: 

3532 raise NotImplementedError 

3533 

3534 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

3535 """Add a new pack to this object store. 

3536 

3537 Returns: Fileobject to write to, a commit function to 

3538 call when the pack is finished and an abort 

3539 function. 

3540 """ 

3541 import tempfile 

3542 

3543 pf = tempfile.SpooledTemporaryFile( 

3544 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3545 ) 

3546 

3547 def commit() -> Pack | None: 

3548 if pf.tell() == 0: 

3549 pf.close() 

3550 return None 

3551 

3552 pf.seek(0) 

3553 

3554 p = PackData(pf.name, file=pf, object_format=self.object_format) 

3555 entries = p.sorted_entries() 

3556 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

3557 idxf = tempfile.SpooledTemporaryFile( 

3558 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3559 ) 

3560 checksum = p.get_stored_checksum() 

3561 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

3562 idxf.seek(0) 

3563 idx = load_pack_index_file(basename + ".idx", idxf, self.object_format) 

3564 for pack in self.packs: 

3565 if pack.get_stored_checksum() == p.get_stored_checksum(): 

3566 p.close() 

3567 idx.close() 

3568 pf.close() 

3569 idxf.close() 

3570 return pack 

3571 pf.seek(0) 

3572 idxf.seek(0) 

3573 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

3574 final_pack = Pack.from_objects(p, idx) 

3575 self._add_cached_pack(basename, final_pack) 

3576 pf.close() 

3577 idxf.close() 

3578 return final_pack 

3579 

3580 return pf, commit, pf.close # type: ignore[return-value] 

3581 

3582 

3583def _collect_ancestors( 

3584 store: ObjectContainer, 

3585 heads: Iterable[ObjectID], 

3586 common: frozenset[ObjectID] = frozenset(), 

3587 shallow: frozenset[ObjectID] = frozenset(), 

3588 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

3589) -> tuple[set[ObjectID], set[ObjectID]]: 

3590 """Collect all ancestors of heads up to (excluding) those in common. 

3591 

3592 Args: 

3593 store: Object store to get commits from 

3594 heads: commits to start from 

3595 common: commits to end at, or empty set to walk repository 

3596 completely 

3597 shallow: Set of shallow commits 

3598 get_parents: Optional function for getting the parents of a 

3599 commit. 

3600 Returns: a tuple (A, B) where A - all commits reachable 

3601 from heads but not present in common, B - common (shared) elements 

3602 that are directly reachable from heads 

3603 """ 

3604 bases = set() 

3605 commits = set() 

3606 queue: list[ObjectID] = [] 

3607 queue.extend(heads) 

3608 

3609 # Try to use commit graph if available 

3610 commit_graph = store.get_commit_graph() 

3611 

3612 while queue: 

3613 e = queue.pop(0) 

3614 if e in common: 

3615 bases.add(e) 

3616 elif e not in commits: 

3617 commits.add(e) 

3618 if e in shallow: 

3619 continue 

3620 

3621 # Try to use commit graph for parent lookup 

3622 parents = None 

3623 if commit_graph: 

3624 parents = commit_graph.get_parents(e) 

3625 

3626 if parents is None: 

3627 # Fall back to loading the object 

3628 cmt = store[e] 

3629 assert isinstance(cmt, Commit) 

3630 parents = get_parents(cmt) 

3631 

3632 queue.extend(parents) 

3633 return (commits, bases) 

3634 

3635 

3636def iter_tree_contents( 

3637 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False 

3638) -> Iterator[TreeEntry]: 

3639 """Iterate the contents of a tree and all subtrees. 

3640 

3641 Iteration is depth-first pre-order, as in e.g. os.walk. 

3642 

3643 Args: 

3644 store: Object store to get trees from 

3645 tree_id: SHA1 of the tree. 

3646 include_trees: If True, include tree objects in the iteration. 

3647 

3648 Yields: TreeEntry namedtuples for all the objects in a tree. 

3649 """ 

3650 if tree_id is None: 

3651 return 

3652 # This could be fairly easily generalized to >2 trees if we find a use 

3653 # case. 

3654 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

3655 while todo: 

3656 entry = todo.pop() 

3657 assert entry.mode is not None 

3658 if stat.S_ISDIR(entry.mode): 

3659 extra = [] 

3660 assert entry.sha is not None 

3661 tree = store[entry.sha] 

3662 assert isinstance(tree, Tree) 

3663 for subentry in tree.iteritems(name_order=True): 

3664 assert entry.path is not None 

3665 extra.append(subentry.in_path(entry.path)) 

3666 todo.extend(reversed(extra)) 

3667 if not stat.S_ISDIR(entry.mode) or include_trees: 

3668 yield entry 

3669 

3670 

3671def iter_commit_contents( 

3672 store: ObjectContainer, 

3673 commit: Commit | ObjectID | RawObjectID, 

3674 *, 

3675 include: Sequence[str | bytes | Path] | None = None, 

3676) -> Iterator[TreeEntry]: 

3677 """Iterate the contents of the repository at the specified commit. 

3678 

3679 This is a wrapper around iter_tree_contents() and 

3680 tree_lookup_path() to simplify the common task of getting the 

3681 contest of a repo at a particular commit. See also 

3682 dulwich.index.build_file_from_blob() for writing individual files 

3683 to disk. 

3684 

3685 Args: 

3686 store: Object store to get trees from 

3687 commit: Commit object, or SHA1 of a commit 

3688 include: if provided, only the entries whose paths are in the 

3689 list, or whose parent tree is in the list, will be 

3690 included. Note that duplicate or overlapping paths 

3691 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

3692 

3693 Yields: TreeEntry namedtuples for all matching files in a commit. 

3694 """ 

3695 sha = commit.id if isinstance(commit, Commit) else commit 

3696 if not isinstance(obj := store[sha], Commit): 

3697 raise TypeError( 

3698 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

3699 ) 

3700 commit = obj 

3701 encoding = commit.encoding or "utf-8" 

3702 include_bytes: list[bytes] = ( 

3703 [ 

3704 path if isinstance(path, bytes) else str(path).encode(encoding) 

3705 for path in include 

3706 ] 

3707 if include is not None 

3708 else [b""] 

3709 ) 

3710 

3711 for path in include_bytes: 

3712 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

3713 # Iterate all contained files if path points to a dir, otherwise just get that 

3714 # single file 

3715 if isinstance(store[obj_id], Tree): 

3716 for entry in iter_tree_contents(store, obj_id): 

3717 yield entry.in_path(path) 

3718 else: 

3719 yield TreeEntry(path, mode, obj_id) 

3720 

3721 

3722def peel_sha( 

3723 store: ObjectContainer, sha: ObjectID | RawObjectID 

3724) -> tuple[ShaFile, ShaFile]: 

3725 """Peel all tags from a SHA. 

3726 

3727 Args: 

3728 store: Object store to get objects from 

3729 sha: The object SHA to peel. 

3730 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

3731 intermediate tags; if the original ref does not point to a tag, 

3732 this will equal the original SHA1. 

3733 """ 

3734 unpeeled = obj = store[sha] 

3735 obj_class = object_class(obj.type_name) 

3736 while obj_class is Tag: 

3737 assert isinstance(obj, Tag) 

3738 obj_class, sha = obj.object 

3739 obj = store[sha] 

3740 return unpeeled, obj 

3741 

3742 

3743class GraphTraversalReachability: 

3744 """Naive graph traversal implementation of ObjectReachabilityProvider. 

3745 

3746 This implementation wraps existing graph traversal functions 

3747 (_collect_ancestors, _collect_filetree_revs) to provide the standard 

3748 reachability interface without any performance optimizations. 

3749 """ 

3750 

3751 def __init__(self, object_store: BaseObjectStore) -> None: 

3752 """Initialize the graph traversal provider. 

3753 

3754 Args: 

3755 object_store: Object store to query 

3756 """ 

3757 self.store = object_store 

3758 

3759 def get_reachable_commits( 

3760 self, 

3761 heads: Iterable[ObjectID], 

3762 exclude: Iterable[ObjectID] | None = None, 

3763 shallow: Set[ObjectID] | None = None, 

3764 ) -> set[ObjectID]: 

3765 """Get all commits reachable from heads, excluding those in exclude. 

3766 

3767 Uses _collect_ancestors for commit traversal. 

3768 

3769 Args: 

3770 heads: Starting commit SHAs 

3771 exclude: Commit SHAs to exclude (and their ancestors) 

3772 shallow: Set of shallow commit boundaries 

3773 

3774 Returns: 

3775 Set of commit SHAs reachable from heads but not from exclude 

3776 """ 

3777 exclude_set = frozenset(exclude) if exclude else frozenset() 

3778 shallow_set = frozenset(shallow) if shallow else frozenset() 

3779 commits, _bases = _collect_ancestors( 

3780 self.store, heads, exclude_set, shallow_set 

3781 ) 

3782 return commits 

3783 

3784 def get_tree_objects( 

3785 self, 

3786 tree_shas: Iterable[ObjectID], 

3787 ) -> set[ObjectID]: 

3788 """Get all trees and blobs reachable from the given trees. 

3789 

3790 Uses _collect_filetree_revs for tree traversal. 

3791 

3792 Args: 

3793 tree_shas: Starting tree SHAs 

3794 

3795 Returns: 

3796 Set of tree and blob SHAs 

3797 """ 

3798 result: set[ObjectID] = set() 

3799 for tree_sha in tree_shas: 

3800 _collect_filetree_revs(self.store, tree_sha, result) 

3801 return result 

3802 

3803 def get_reachable_objects( 

3804 self, 

3805 commits: Iterable[ObjectID], 

3806 exclude_commits: Iterable[ObjectID] | None = None, 

3807 ) -> set[ObjectID]: 

3808 """Get all objects (commits + trees + blobs) reachable from commits. 

3809 

3810 Args: 

3811 commits: Starting commit SHAs 

3812 exclude_commits: Commits whose objects should be excluded 

3813 

3814 Returns: 

3815 Set of all object SHAs (commits, trees, blobs) 

3816 """ 

3817 commits_set = set(commits) 

3818 result = set(commits_set) 

3819 

3820 # Get trees for all commits 

3821 tree_shas = [] 

3822 for commit_sha in commits_set: 

3823 try: 

3824 commit = self.store[commit_sha] 

3825 if isinstance(commit, Commit): 

3826 tree_shas.append(commit.tree) 

3827 except KeyError: 

3828 # Commit not in store, skip 

3829 continue 

3830 

3831 # Collect all tree/blob objects 

3832 result.update(self.get_tree_objects(tree_shas)) 

3833 

3834 # Exclude objects from exclude_commits if needed 

3835 if exclude_commits: 

3836 exclude_objects = self.get_reachable_objects(exclude_commits, None) 

3837 result -= exclude_objects 

3838 

3839 return result 

3840 

3841 

3842class BitmapReachability: 

3843 """Bitmap-accelerated implementation of ObjectReachabilityProvider. 

3844 

3845 This implementation uses packfile bitmap indexes where available to 

3846 accelerate reachability queries. Falls back to graph traversal when 

3847 bitmaps don't cover the requested commits. 

3848 """ 

3849 

3850 def __init__(self, object_store: "PackBasedObjectStore") -> None: 

3851 """Initialize the bitmap provider. 

3852 

3853 Args: 

3854 object_store: Pack-based object store with bitmap support 

3855 """ 

3856 self.store = object_store 

3857 # Fallback to graph traversal for operations not yet optimized 

3858 self._fallback = GraphTraversalReachability(object_store) 

3859 

3860 def _combine_commit_bitmaps( 

3861 self, 

3862 commit_shas: set[ObjectID], 

3863 exclude_shas: set[ObjectID] | None = None, 

3864 ) -> tuple["EWAHBitmap", "Pack"] | None: 

3865 """Combine bitmaps for multiple commits using OR, with optional exclusion. 

3866 

3867 Args: 

3868 commit_shas: Set of commit SHAs to combine 

3869 exclude_shas: Optional set of commit SHAs to exclude 

3870 

3871 Returns: 

3872 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable 

3873 """ 

3874 from .bitmap import find_commit_bitmaps 

3875 

3876 # Find bitmaps for the commits 

3877 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs) 

3878 

3879 # If we can't find bitmaps for all commits, return None 

3880 if len(commit_bitmaps) < len(commit_shas): 

3881 return None 

3882 

3883 # Combine bitmaps using OR 

3884 combined_bitmap = None 

3885 result_pack = None 

3886 

3887 for commit_sha in commit_shas: 

3888 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha] 

3889 commit_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3890 

3891 if commit_bitmap is None: 

3892 return None 

3893 

3894 if combined_bitmap is None: 

3895 combined_bitmap = commit_bitmap 

3896 result_pack = pack 

3897 elif pack == result_pack: 

3898 # Same pack, can OR directly 

3899 combined_bitmap = combined_bitmap | commit_bitmap 

3900 else: 

3901 # Different packs, can't combine 

3902 return None 

3903 

3904 # Handle exclusions if provided 

3905 if exclude_shas and result_pack and combined_bitmap: 

3906 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack]) 

3907 

3908 if len(exclude_bitmaps) == len(exclude_shas): 

3909 # All excludes have bitmaps, compute exclusion 

3910 exclude_combined = None 

3911 

3912 for commit_sha in exclude_shas: 

3913 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha] 

3914 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3915 

3916 if exclude_bitmap is None: 

3917 break 

3918 

3919 if exclude_combined is None: 

3920 exclude_combined = exclude_bitmap 

3921 else: 

3922 exclude_combined = exclude_combined | exclude_bitmap 

3923 

3924 # Subtract excludes using set difference 

3925 if exclude_combined: 

3926 combined_bitmap = combined_bitmap - exclude_combined 

3927 

3928 if combined_bitmap and result_pack: 

3929 return (combined_bitmap, result_pack) 

3930 return None 

3931 

3932 def get_reachable_commits( 

3933 self, 

3934 heads: Iterable[ObjectID], 

3935 exclude: Iterable[ObjectID] | None = None, 

3936 shallow: Set[ObjectID] | None = None, 

3937 ) -> set[ObjectID]: 

3938 """Get all commits reachable from heads using bitmaps where possible. 

3939 

3940 Args: 

3941 heads: Starting commit SHAs 

3942 exclude: Commit SHAs to exclude (and their ancestors) 

3943 shallow: Set of shallow commit boundaries 

3944 

3945 Returns: 

3946 Set of commit SHAs reachable from heads but not from exclude 

3947 """ 

3948 from .bitmap import bitmap_to_object_shas 

3949 

3950 # If shallow is specified, fall back to graph traversal 

3951 # (bitmaps don't support shallow boundaries well) 

3952 if shallow: 

3953 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3954 

3955 heads_set = set(heads) 

3956 exclude_set = set(exclude) if exclude else None 

3957 

3958 # Try to combine bitmaps 

3959 result = self._combine_commit_bitmaps(heads_set, exclude_set) 

3960 if result is None: 

3961 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3962 

3963 combined_bitmap, result_pack = result 

3964 

3965 # Convert bitmap to commit SHAs, filtering for commits only 

3966 pack_bitmap = result_pack.bitmap 

3967 if pack_bitmap is None: 

3968 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3969 commit_type_filter = pack_bitmap.commit_bitmap 

3970 return bitmap_to_object_shas( 

3971 combined_bitmap, result_pack.index, commit_type_filter 

3972 ) 

3973 

3974 def get_tree_objects( 

3975 self, 

3976 tree_shas: Iterable[ObjectID], 

3977 ) -> set[ObjectID]: 

3978 """Get all trees and blobs reachable from the given trees. 

3979 

3980 Args: 

3981 tree_shas: Starting tree SHAs 

3982 

3983 Returns: 

3984 Set of tree and blob SHAs 

3985 """ 

3986 # Tree traversal doesn't benefit much from bitmaps, use fallback 

3987 return self._fallback.get_tree_objects(tree_shas) 

3988 

3989 def get_reachable_objects( 

3990 self, 

3991 commits: Iterable[ObjectID], 

3992 exclude_commits: Iterable[ObjectID] | None = None, 

3993 ) -> set[ObjectID]: 

3994 """Get all objects reachable from commits using bitmaps. 

3995 

3996 Args: 

3997 commits: Starting commit SHAs 

3998 exclude_commits: Commits whose objects should be excluded 

3999 

4000 Returns: 

4001 Set of all object SHAs (commits, trees, blobs) 

4002 """ 

4003 from .bitmap import bitmap_to_object_shas 

4004 

4005 commits_set = set(commits) 

4006 exclude_set = set(exclude_commits) if exclude_commits else None 

4007 

4008 # Try to combine bitmaps 

4009 result = self._combine_commit_bitmaps(commits_set, exclude_set) 

4010 if result is None: 

4011 return self._fallback.get_reachable_objects(commits, exclude_commits) 

4012 

4013 combined_bitmap, result_pack = result 

4014 

4015 # Convert bitmap to all object SHAs (no type filter) 

4016 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)