Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1475 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26__all__ = [ 

27 "DEFAULT_TEMPFILE_GRACE_PERIOD", 

28 "INFODIR", 

29 "PACKDIR", 

30 "PACK_MODE", 

31 "BaseObjectStore", 

32 "BitmapReachability", 

33 "BucketBasedObjectStore", 

34 "DiskObjectStore", 

35 "GraphTraversalReachability", 

36 "GraphWalker", 

37 "MemoryObjectStore", 

38 "MissingObjectFinder", 

39 "ObjectIterator", 

40 "ObjectReachabilityProvider", 

41 "ObjectStoreGraphWalker", 

42 "OverlayObjectStore", 

43 "PackBasedObjectStore", 

44 "PackCapableObjectStore", 

45 "PackContainer", 

46 "commit_tree_changes", 

47 "find_shallow", 

48 "get_depth", 

49 "iter_commit_contents", 

50 "iter_tree_contents", 

51 "peel_sha", 

52 "read_packs_file", 

53 "tree_lookup_path", 

54] 

55 

56import binascii 

57import os 

58import stat 

59import sys 

60import time 

61import warnings 

62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set 

63from contextlib import suppress 

64from io import BytesIO 

65from pathlib import Path 

66from typing import ( 

67 TYPE_CHECKING, 

68 BinaryIO, 

69 Protocol, 

70 cast, 

71) 

72 

73from .errors import NotTreeError 

74from .file import GitFile, _GitFile 

75from .midx import MultiPackIndex, load_midx 

76from .objects import ( 

77 S_ISGITLINK, 

78 ZERO_SHA, 

79 Blob, 

80 Commit, 

81 ObjectID, 

82 RawObjectID, 

83 ShaFile, 

84 Tag, 

85 Tree, 

86 TreeEntry, 

87 hex_to_filename, 

88 hex_to_sha, 

89 object_class, 

90 sha_to_hex, 

91 valid_hexsha, 

92) 

93from .pack import ( 

94 PACK_SPOOL_FILE_MAX_SIZE, 

95 ObjectContainer, 

96 Pack, 

97 PackData, 

98 PackedObjectContainer, 

99 PackFileDisappeared, 

100 PackHint, 

101 PackIndexer, 

102 PackInflater, 

103 PackStreamCopier, 

104 UnpackedObject, 

105 extend_pack, 

106 full_unpacked_object, 

107 generate_unpacked_objects, 

108 iter_sha1, 

109 load_pack_index_file, 

110 pack_objects_to_data, 

111 write_pack_data, 

112 write_pack_index, 

113) 

114from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX 

115from .refs import Ref 

116 

117if TYPE_CHECKING: 

118 from .bitmap import EWAHBitmap 

119 from .commit_graph import CommitGraph 

120 from .config import Config 

121 from .diff_tree import RenameDetector 

122 from .pack import Pack 

123 

124 

125class GraphWalker(Protocol): 

126 """Protocol for graph walker objects.""" 

127 

128 def __next__(self) -> ObjectID | None: 

129 """Return the next object SHA to visit.""" 

130 ... 

131 

132 def ack(self, sha: ObjectID) -> None: 

133 """Acknowledge that an object has been received.""" 

134 ... 

135 

136 def nak(self) -> None: 

137 """Nothing in common was found.""" 

138 ... 

139 

140 

141class ObjectReachabilityProvider(Protocol): 

142 """Protocol for computing object reachability queries. 

143 

144 This abstraction allows reachability computations to be backed by either 

145 naive graph traversal or optimized bitmap indexes, with a consistent interface. 

146 """ 

147 

148 def get_reachable_commits( 

149 self, 

150 heads: Iterable[ObjectID], 

151 exclude: Iterable[ObjectID] | None = None, 

152 shallow: Set[ObjectID] | None = None, 

153 ) -> set[ObjectID]: 

154 """Get all commits reachable from heads, excluding those in exclude. 

155 

156 Args: 

157 heads: Starting commit SHAs 

158 exclude: Commit SHAs to exclude (and their ancestors) 

159 shallow: Set of shallow commit boundaries (traversal stops here) 

160 

161 Returns: 

162 Set of commit SHAs reachable from heads but not from exclude 

163 """ 

164 ... 

165 

166 def get_reachable_objects( 

167 self, 

168 commits: Iterable[ObjectID], 

169 exclude_commits: Iterable[ObjectID] | None = None, 

170 ) -> set[ObjectID]: 

171 """Get all objects (commits + trees + blobs) reachable from commits. 

172 

173 Args: 

174 commits: Starting commit SHAs 

175 exclude_commits: Commits whose objects should be excluded 

176 

177 Returns: 

178 Set of all object SHAs (commits, trees, blobs, tags) 

179 """ 

180 ... 

181 

182 def get_tree_objects( 

183 self, 

184 tree_shas: Iterable[ObjectID], 

185 ) -> set[ObjectID]: 

186 """Get all trees and blobs reachable from the given trees. 

187 

188 Args: 

189 tree_shas: Starting tree SHAs 

190 

191 Returns: 

192 Set of tree and blob SHAs 

193 """ 

194 ... 

195 

196 

197INFODIR = "info" 

198PACKDIR = "pack" 

199 

200# use permissions consistent with Git; just readable by everyone 

201# TODO: should packs also be non-writable on Windows? if so, that 

202# would requite some rather significant adjustments to the test suite 

203PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

204 

205# Grace period for cleaning up temporary pack files (in seconds) 

206# Matches git's default of 2 weeks 

207DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

208 

209 

210def find_shallow( 

211 store: ObjectContainer, heads: Iterable[ObjectID], depth: int 

212) -> tuple[set[ObjectID], set[ObjectID]]: 

213 """Find shallow commits according to a given depth. 

214 

215 Args: 

216 store: An ObjectStore for looking up objects. 

217 heads: Iterable of head SHAs to start walking from. 

218 depth: The depth of ancestors to include. A depth of one includes 

219 only the heads themselves. 

220 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

221 considered shallow and unshallow according to the arguments. Note that 

222 these sets may overlap if a commit is reachable along multiple paths. 

223 """ 

224 parents: dict[ObjectID, list[ObjectID]] = {} 

225 commit_graph = store.get_commit_graph() 

226 

227 def get_parents(sha: ObjectID) -> list[ObjectID]: 

228 result = parents.get(sha, None) 

229 if not result: 

230 # Try to use commit graph first if available 

231 if commit_graph: 

232 graph_parents = commit_graph.get_parents(sha) 

233 if graph_parents is not None: 

234 result = graph_parents 

235 parents[sha] = result 

236 return result 

237 # Fall back to loading the object 

238 commit = store[sha] 

239 assert isinstance(commit, Commit) 

240 result = commit.parents 

241 parents[sha] = result 

242 return result 

243 

244 todo = [] # stack of (sha, depth) 

245 for head_sha in heads: 

246 obj = store[head_sha] 

247 # Peel tags if necessary 

248 while isinstance(obj, Tag): 

249 _, sha = obj.object 

250 obj = store[sha] 

251 if isinstance(obj, Commit): 

252 todo.append((obj.id, 1)) 

253 

254 not_shallow = set() 

255 shallow = set() 

256 while todo: 

257 sha, cur_depth = todo.pop() 

258 if cur_depth < depth: 

259 not_shallow.add(sha) 

260 new_depth = cur_depth + 1 

261 todo.extend((p, new_depth) for p in get_parents(sha)) 

262 else: 

263 shallow.add(sha) 

264 

265 return shallow, not_shallow 

266 

267 

268def get_depth( 

269 store: ObjectContainer, 

270 head: ObjectID, 

271 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

272 max_depth: int | None = None, 

273) -> int: 

274 """Return the current available depth for the given head. 

275 

276 For commits with multiple parents, the largest possible depth will be 

277 returned. 

278 

279 Args: 

280 store: Object store to search in 

281 head: commit to start from 

282 get_parents: optional function for getting the parents of a commit 

283 max_depth: maximum depth to search 

284 """ 

285 if head not in store: 

286 return 0 

287 current_depth = 1 

288 queue = [(head, current_depth)] 

289 commit_graph = store.get_commit_graph() 

290 

291 while queue and (max_depth is None or current_depth < max_depth): 

292 e, depth = queue.pop(0) 

293 current_depth = max(current_depth, depth) 

294 

295 # Try to use commit graph for parent lookup if available 

296 parents = None 

297 if commit_graph: 

298 parents = commit_graph.get_parents(e) 

299 

300 if parents is None: 

301 # Fall back to loading the object 

302 cmt = store[e] 

303 if isinstance(cmt, Tag): 

304 _cls, sha = cmt.object 

305 cmt = store[sha] 

306 parents = get_parents(cmt) 

307 

308 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

309 return current_depth 

310 

311 

312class PackContainer(Protocol): 

313 """Protocol for containers that can accept pack files.""" 

314 

315 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

316 """Add a new pack.""" 

317 

318 

319class BaseObjectStore: 

320 """Object store interface.""" 

321 

322 def determine_wants_all( 

323 self, refs: Mapping[Ref, ObjectID], depth: int | None = None 

324 ) -> list[ObjectID]: 

325 """Determine which objects are wanted based on refs.""" 

326 

327 def _want_deepen(sha: ObjectID) -> bool: 

328 if not depth: 

329 return False 

330 if depth == DEPTH_INFINITE: 

331 return True 

332 return depth > self._get_depth(sha) 

333 

334 return [ 

335 sha 

336 for (ref, sha) in refs.items() 

337 if (sha not in self or _want_deepen(sha)) 

338 and not ref.endswith(PEELED_TAG_SUFFIX) 

339 and not sha == ZERO_SHA 

340 ] 

341 

342 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

343 """Check if a particular object is present by SHA1 and is loose.""" 

344 raise NotImplementedError(self.contains_loose) 

345 

346 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

347 """Check if a particular object is present by SHA1 and is packed.""" 

348 return False # Default implementation for stores that don't support packing 

349 

350 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool: 

351 """Check if a particular object is present by SHA1. 

352 

353 This method makes no distinction between loose and packed objects. 

354 """ 

355 return self.contains_loose(sha1) 

356 

357 @property 

358 def packs(self) -> list[Pack]: 

359 """Iterable of pack objects.""" 

360 raise NotImplementedError 

361 

362 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

363 """Obtain the raw text for an object. 

364 

365 Args: 

366 name: sha for the object. 

367 Returns: tuple with numeric type and object contents. 

368 """ 

369 raise NotImplementedError(self.get_raw) 

370 

371 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile: 

372 """Obtain an object by SHA1.""" 

373 type_num, uncomp = self.get_raw(sha1) 

374 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

375 

376 def __iter__(self) -> Iterator[ObjectID]: 

377 """Iterate over the SHAs that are present in this store.""" 

378 raise NotImplementedError(self.__iter__) 

379 

380 def add_object(self, obj: ShaFile) -> None: 

381 """Add a single object to this object store.""" 

382 raise NotImplementedError(self.add_object) 

383 

384 def add_objects( 

385 self, 

386 objects: Sequence[tuple[ShaFile, str | None]], 

387 progress: Callable[..., None] | None = None, 

388 ) -> "Pack | None": 

389 """Add a set of objects to this object store. 

390 

391 Args: 

392 objects: Iterable over a list of (object, path) tuples 

393 progress: Optional progress callback 

394 """ 

395 raise NotImplementedError(self.add_objects) 

396 

397 def get_reachability_provider( 

398 self, prefer_bitmap: bool = True 

399 ) -> ObjectReachabilityProvider: 

400 """Get a reachability provider for this object store. 

401 

402 Returns an ObjectReachabilityProvider that can efficiently compute 

403 object reachability queries. Subclasses can override this to provide 

404 optimized implementations (e.g., using bitmap indexes). 

405 

406 Args: 

407 prefer_bitmap: Whether to prefer bitmap-based reachability if 

408 available. 

409 

410 Returns: 

411 ObjectReachabilityProvider instance 

412 """ 

413 return GraphTraversalReachability(self) 

414 

415 def tree_changes( 

416 self, 

417 source: ObjectID | None, 

418 target: ObjectID | None, 

419 want_unchanged: bool = False, 

420 include_trees: bool = False, 

421 change_type_same: bool = False, 

422 rename_detector: "RenameDetector | None" = None, 

423 paths: Sequence[bytes] | None = None, 

424 ) -> Iterator[ 

425 tuple[ 

426 tuple[bytes | None, bytes | None], 

427 tuple[int | None, int | None], 

428 tuple[ObjectID | None, ObjectID | None], 

429 ] 

430 ]: 

431 """Find the differences between the contents of two trees. 

432 

433 Args: 

434 source: SHA1 of the source tree 

435 target: SHA1 of the target tree 

436 want_unchanged: Whether unchanged files should be reported 

437 include_trees: Whether to include trees 

438 change_type_same: Whether to report files changing 

439 type in the same entry. 

440 rename_detector: RenameDetector object for detecting renames. 

441 paths: Optional list of paths to filter to (as bytes). 

442 Returns: Iterator over tuples with 

443 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

444 """ 

445 from .diff_tree import tree_changes 

446 

447 for change in tree_changes( 

448 self, 

449 source, 

450 target, 

451 want_unchanged=want_unchanged, 

452 include_trees=include_trees, 

453 change_type_same=change_type_same, 

454 rename_detector=rename_detector, 

455 paths=paths, 

456 ): 

457 old_path = change.old.path if change.old is not None else None 

458 new_path = change.new.path if change.new is not None else None 

459 old_mode = change.old.mode if change.old is not None else None 

460 new_mode = change.new.mode if change.new is not None else None 

461 old_sha = change.old.sha if change.old is not None else None 

462 new_sha = change.new.sha if change.new is not None else None 

463 yield ( 

464 (old_path, new_path), 

465 (old_mode, new_mode), 

466 (old_sha, new_sha), 

467 ) 

468 

469 def iter_tree_contents( 

470 self, tree_id: ObjectID, include_trees: bool = False 

471 ) -> Iterator[TreeEntry]: 

472 """Iterate the contents of a tree and all subtrees. 

473 

474 Iteration is depth-first pre-order, as in e.g. os.walk. 

475 

476 Args: 

477 tree_id: SHA1 of the tree. 

478 include_trees: If True, include tree objects in the iteration. 

479 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

480 tree. 

481 """ 

482 warnings.warn( 

483 "Please use dulwich.object_store.iter_tree_contents", 

484 DeprecationWarning, 

485 stacklevel=2, 

486 ) 

487 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

488 

489 def iterobjects_subset( 

490 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

491 ) -> Iterator[ShaFile]: 

492 """Iterate over a subset of objects in the store. 

493 

494 Args: 

495 shas: Iterable of object SHAs to retrieve 

496 allow_missing: If True, skip missing objects; if False, raise KeyError 

497 

498 Returns: 

499 Iterator of ShaFile objects 

500 

501 Raises: 

502 KeyError: If an object is missing and allow_missing is False 

503 """ 

504 for sha in shas: 

505 try: 

506 yield self[sha] 

507 except KeyError: 

508 if not allow_missing: 

509 raise 

510 

511 def iter_unpacked_subset( 

512 self, 

513 shas: Iterable[ObjectID | RawObjectID], 

514 include_comp: bool = False, 

515 allow_missing: bool = False, 

516 convert_ofs_delta: bool = True, 

517 ) -> "Iterator[UnpackedObject]": 

518 """Iterate over unpacked objects for a subset of SHAs. 

519 

520 Default implementation that converts ShaFile objects to UnpackedObject. 

521 Subclasses may override for more efficient unpacked access. 

522 

523 Args: 

524 shas: Iterable of object SHAs to retrieve 

525 include_comp: Whether to include compressed data (ignored in base 

526 implementation) 

527 allow_missing: If True, skip missing objects; if False, raise 

528 KeyError 

529 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in 

530 base implementation) 

531 

532 Returns: 

533 Iterator of UnpackedObject instances 

534 

535 Raises: 

536 KeyError: If an object is missing and allow_missing is False 

537 """ 

538 from .pack import UnpackedObject 

539 

540 for sha in shas: 

541 try: 

542 obj = self[sha] 

543 # Convert ShaFile to UnpackedObject 

544 unpacked = UnpackedObject( 

545 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

546 ) 

547 yield unpacked 

548 except KeyError: 

549 if not allow_missing: 

550 raise 

551 

552 def find_missing_objects( 

553 self, 

554 haves: Iterable[ObjectID], 

555 wants: Iterable[ObjectID], 

556 shallow: Set[ObjectID] | None = None, 

557 progress: Callable[..., None] | None = None, 

558 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

559 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

560 ) -> Iterator[tuple[ObjectID, PackHint | None]]: 

561 """Find the missing objects required for a set of revisions. 

562 

563 Args: 

564 haves: Iterable over SHAs already in common. 

565 wants: Iterable over SHAs of objects to fetch. 

566 shallow: Set of shallow commit SHA1s to skip 

567 progress: Simple progress function that will be called with 

568 updated progress strings. 

569 get_tagged: Function that returns a dict of pointed-to sha -> 

570 tag sha for including tags. 

571 get_parents: Optional function for getting the parents of a 

572 commit. 

573 Returns: Iterator over (sha, path) pairs. 

574 """ 

575 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

576 finder = MissingObjectFinder( 

577 self, 

578 haves=haves, 

579 wants=wants, 

580 shallow=shallow, 

581 progress=progress, 

582 get_tagged=get_tagged, 

583 get_parents=get_parents, 

584 ) 

585 return iter(finder) 

586 

587 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]: 

588 """Find which revisions this store has in common using graphwalker. 

589 

590 Args: 

591 graphwalker: A graphwalker object. 

592 Returns: List of SHAs that are in common 

593 """ 

594 haves = [] 

595 sha = next(graphwalker) 

596 while sha: 

597 if sha in self: 

598 haves.append(sha) 

599 graphwalker.ack(sha) 

600 sha = next(graphwalker) 

601 return haves 

602 

603 def generate_pack_data( 

604 self, 

605 have: Iterable[ObjectID], 

606 want: Iterable[ObjectID], 

607 *, 

608 shallow: Set[ObjectID] | None = None, 

609 progress: Callable[..., None] | None = None, 

610 ofs_delta: bool = True, 

611 ) -> tuple[int, Iterator[UnpackedObject]]: 

612 """Generate pack data objects for a set of wants/haves. 

613 

614 Args: 

615 have: List of SHA1s of objects that should not be sent 

616 want: List of SHA1s of objects that should be sent 

617 shallow: Set of shallow commit SHA1s to skip 

618 ofs_delta: Whether OFS deltas can be included 

619 progress: Optional progress reporting method 

620 """ 

621 # Note that the pack-specific implementation below is more efficient, 

622 # as it reuses deltas 

623 missing_objects = MissingObjectFinder( 

624 self, haves=have, wants=want, shallow=shallow, progress=progress 

625 ) 

626 object_ids = list(missing_objects) 

627 return pack_objects_to_data( 

628 [(self[oid], path) for oid, path in object_ids], 

629 ofs_delta=ofs_delta, 

630 progress=progress, 

631 ) 

632 

633 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

634 """Peel all tags from a SHA. 

635 

636 Args: 

637 sha: The object SHA to peel. 

638 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

639 intermediate tags; if the original ref does not point to a tag, 

640 this will equal the original SHA1. 

641 """ 

642 warnings.warn( 

643 "Please use dulwich.object_store.peel_sha()", 

644 DeprecationWarning, 

645 stacklevel=2, 

646 ) 

647 return peel_sha(self, sha)[1].id 

648 

649 def _get_depth( 

650 self, 

651 head: ObjectID, 

652 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

653 max_depth: int | None = None, 

654 ) -> int: 

655 """Return the current available depth for the given head. 

656 

657 For commits with multiple parents, the largest possible depth will be 

658 returned. 

659 

660 Args: 

661 head: commit to start from 

662 get_parents: optional function for getting the parents of a commit 

663 max_depth: maximum depth to search 

664 """ 

665 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

666 

667 def close(self) -> None: 

668 """Close any files opened by this object store.""" 

669 # Default implementation is a NO-OP 

670 

671 def prune(self, grace_period: int | None = None) -> None: 

672 """Prune/clean up this object store. 

673 

674 This includes removing orphaned temporary files and other 

675 housekeeping tasks. Default implementation is a NO-OP. 

676 

677 Args: 

678 grace_period: Grace period in seconds for removing temporary files. 

679 If None, uses the default grace period. 

680 """ 

681 # Default implementation is a NO-OP 

682 

683 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

684 """Iterate over all SHA1s that start with a given prefix. 

685 

686 The default implementation is a naive iteration over all objects. 

687 However, subclasses may override this method with more efficient 

688 implementations. 

689 """ 

690 for sha in self: 

691 if sha.startswith(prefix): 

692 yield sha 

693 

694 def get_commit_graph(self) -> "CommitGraph | None": 

695 """Get the commit graph for this object store. 

696 

697 Returns: 

698 CommitGraph object if available, None otherwise 

699 """ 

700 return None 

701 

702 def write_commit_graph( 

703 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

704 ) -> None: 

705 """Write a commit graph file for this object store. 

706 

707 Args: 

708 refs: List of refs to include. If None, includes all refs from object store. 

709 reachable: If True, includes all commits reachable from refs. 

710 If False, only includes the direct ref targets. 

711 

712 Note: 

713 Default implementation does nothing. Subclasses should override 

714 this method to provide commit graph writing functionality. 

715 """ 

716 raise NotImplementedError(self.write_commit_graph) 

717 

718 def get_object_mtime(self, sha: ObjectID) -> float: 

719 """Get the modification time of an object. 

720 

721 Args: 

722 sha: SHA1 of the object 

723 

724 Returns: 

725 Modification time as seconds since epoch 

726 

727 Raises: 

728 KeyError: if the object is not found 

729 """ 

730 # Default implementation raises KeyError 

731 # Subclasses should override to provide actual mtime 

732 raise KeyError(sha) 

733 

734 

735class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

736 """Object store that supports pack operations. 

737 

738 This is a base class for object stores that can handle pack files, 

739 including both disk-based and memory-based stores. 

740 """ 

741 

742 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

743 """Add a new pack to this object store. 

744 

745 Returns: Tuple of (file, commit_func, abort_func) 

746 """ 

747 raise NotImplementedError(self.add_pack) 

748 

749 def add_pack_data( 

750 self, 

751 count: int, 

752 unpacked_objects: Iterator["UnpackedObject"], 

753 progress: Callable[..., None] | None = None, 

754 ) -> "Pack | None": 

755 """Add pack data to this object store. 

756 

757 Args: 

758 count: Number of objects 

759 unpacked_objects: Iterator over unpacked objects 

760 progress: Optional progress callback 

761 """ 

762 raise NotImplementedError(self.add_pack_data) 

763 

764 def get_unpacked_object( 

765 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False 

766 ) -> "UnpackedObject": 

767 """Get a raw unresolved object. 

768 

769 Args: 

770 sha1: SHA-1 hash of the object 

771 include_comp: Whether to include compressed data 

772 

773 Returns: 

774 UnpackedObject instance 

775 """ 

776 from .pack import UnpackedObject 

777 

778 obj = self[sha1] 

779 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

780 

781 def iterobjects_subset( 

782 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

783 ) -> Iterator[ShaFile]: 

784 """Iterate over a subset of objects. 

785 

786 Args: 

787 shas: Iterable of object SHAs to retrieve 

788 allow_missing: If True, skip missing objects 

789 

790 Returns: 

791 Iterator of ShaFile objects 

792 """ 

793 for sha in shas: 

794 try: 

795 yield self[sha] 

796 except KeyError: 

797 if not allow_missing: 

798 raise 

799 

800 

801class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

802 """Object store that uses pack files for storage. 

803 

804 This class provides a base implementation for object stores that use 

805 Git pack files as their primary storage mechanism. It handles caching 

806 of open pack files and provides configuration for pack file operations. 

807 """ 

808 

809 def __init__( 

810 self, 

811 pack_compression_level: int = -1, 

812 pack_index_version: int | None = None, 

813 pack_delta_window_size: int | None = None, 

814 pack_window_memory: int | None = None, 

815 pack_delta_cache_size: int | None = None, 

816 pack_depth: int | None = None, 

817 pack_threads: int | None = None, 

818 pack_big_file_threshold: int | None = None, 

819 ) -> None: 

820 """Initialize a PackBasedObjectStore. 

821 

822 Args: 

823 pack_compression_level: Compression level for pack files (-1 to 9) 

824 pack_index_version: Pack index version to use 

825 pack_delta_window_size: Window size for delta compression 

826 pack_window_memory: Maximum memory to use for delta window 

827 pack_delta_cache_size: Cache size for delta operations 

828 pack_depth: Maximum depth for pack deltas 

829 pack_threads: Number of threads to use for packing 

830 pack_big_file_threshold: Threshold for treating files as "big" 

831 """ 

832 self._pack_cache: dict[str, Pack] = {} 

833 self.pack_compression_level = pack_compression_level 

834 self.pack_index_version = pack_index_version 

835 self.pack_delta_window_size = pack_delta_window_size 

836 self.pack_window_memory = pack_window_memory 

837 self.pack_delta_cache_size = pack_delta_cache_size 

838 self.pack_depth = pack_depth 

839 self.pack_threads = pack_threads 

840 self.pack_big_file_threshold = pack_big_file_threshold 

841 

842 def get_reachability_provider( 

843 self, 

844 prefer_bitmaps: bool = True, 

845 ) -> ObjectReachabilityProvider: 

846 """Get the best reachability provider for the object store. 

847 

848 Args: 

849 prefer_bitmaps: Whether to use bitmaps if available 

850 

851 Returns: 

852 ObjectReachabilityProvider implementation (either bitmap-accelerated 

853 or graph traversal) 

854 """ 

855 if prefer_bitmaps: 

856 # Check if any packs have bitmaps 

857 has_bitmap = False 

858 for pack in self.packs: 

859 try: 

860 # Try to access bitmap property 

861 if pack.bitmap is not None: 

862 has_bitmap = True 

863 break 

864 except FileNotFoundError: 

865 # Bitmap file doesn't exist for this pack 

866 continue 

867 

868 if has_bitmap: 

869 return BitmapReachability(self) 

870 

871 # Fall back to graph traversal 

872 return GraphTraversalReachability(self) 

873 

874 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

875 """Add a new pack to this object store.""" 

876 raise NotImplementedError(self.add_pack) 

877 

878 def add_pack_data( 

879 self, 

880 count: int, 

881 unpacked_objects: Iterator[UnpackedObject], 

882 progress: Callable[..., None] | None = None, 

883 ) -> "Pack | None": 

884 """Add pack data to this object store. 

885 

886 Args: 

887 count: Number of items to add 

888 unpacked_objects: Iterator of UnpackedObject instances 

889 progress: Optional progress callback 

890 """ 

891 if count == 0: 

892 # Don't bother writing an empty pack file 

893 return None 

894 f, commit, abort = self.add_pack() 

895 try: 

896 write_pack_data( 

897 f.write, 

898 unpacked_objects, 

899 num_records=count, 

900 progress=progress, 

901 compression_level=self.pack_compression_level, 

902 ) 

903 except BaseException: 

904 abort() 

905 raise 

906 else: 

907 return commit() 

908 

909 @property 

910 def alternates(self) -> list["BaseObjectStore"]: 

911 """Return list of alternate object stores.""" 

912 return [] 

913 

914 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

915 """Check if a particular object is present by SHA1 and is packed. 

916 

917 This does not check alternates. 

918 """ 

919 for pack in self.packs: 

920 try: 

921 if sha in pack: 

922 return True 

923 except PackFileDisappeared: 

924 pass 

925 return False 

926 

927 def __contains__(self, sha: ObjectID | RawObjectID) -> bool: 

928 """Check if a particular object is present by SHA1. 

929 

930 This method makes no distinction between loose and packed objects. 

931 """ 

932 if self.contains_packed(sha) or self.contains_loose(sha): 

933 return True 

934 for alternate in self.alternates: 

935 if sha in alternate: 

936 return True 

937 return False 

938 

939 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

940 """Add a newly appeared pack to the cache by path.""" 

941 prev_pack = self._pack_cache.get(base_name) 

942 if prev_pack is not pack: 

943 self._pack_cache[base_name] = pack 

944 if prev_pack: 

945 prev_pack.close() 

946 

947 def generate_pack_data( 

948 self, 

949 have: Iterable[ObjectID], 

950 want: Iterable[ObjectID], 

951 *, 

952 shallow: Set[ObjectID] | None = None, 

953 progress: Callable[..., None] | None = None, 

954 ofs_delta: bool = True, 

955 ) -> tuple[int, Iterator[UnpackedObject]]: 

956 """Generate pack data objects for a set of wants/haves. 

957 

958 Args: 

959 have: List of SHA1s of objects that should not be sent 

960 want: List of SHA1s of objects that should be sent 

961 shallow: Set of shallow commit SHA1s to skip 

962 ofs_delta: Whether OFS deltas can be included 

963 progress: Optional progress reporting method 

964 """ 

965 missing_objects = MissingObjectFinder( 

966 self, haves=have, wants=want, shallow=shallow, progress=progress 

967 ) 

968 remote_has = missing_objects.get_remote_has() 

969 object_ids = list(missing_objects) 

970 return len(object_ids), generate_unpacked_objects( 

971 self, 

972 object_ids, 

973 progress=progress, 

974 ofs_delta=ofs_delta, 

975 other_haves=remote_has, 

976 ) 

977 

978 def _clear_cached_packs(self) -> None: 

979 pack_cache = self._pack_cache 

980 self._pack_cache = {} 

981 while pack_cache: 

982 (_name, pack) = pack_cache.popitem() 

983 pack.close() 

984 

985 def _iter_cached_packs(self) -> Iterator[Pack]: 

986 return iter(self._pack_cache.values()) 

987 

988 def _update_pack_cache(self) -> list[Pack]: 

989 raise NotImplementedError(self._update_pack_cache) 

990 

991 def close(self) -> None: 

992 """Close the object store and release resources. 

993 

994 This method closes all cached pack files and frees associated resources. 

995 """ 

996 self._clear_cached_packs() 

997 

998 @property 

999 def packs(self) -> list[Pack]: 

1000 """List with pack objects.""" 

1001 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

1002 

1003 def count_pack_files(self) -> int: 

1004 """Count the number of pack files. 

1005 

1006 Returns: 

1007 Number of pack files (excluding those with .keep files) 

1008 """ 

1009 count = 0 

1010 for pack in self.packs: 

1011 # Check if there's a .keep file for this pack 

1012 keep_path = pack._basename + ".keep" 

1013 if not os.path.exists(keep_path): 

1014 count += 1 

1015 return count 

1016 

1017 def _iter_alternate_objects(self) -> Iterator[ObjectID]: 

1018 """Iterate over the SHAs of all the objects in alternate stores.""" 

1019 for alternate in self.alternates: 

1020 yield from alternate 

1021 

1022 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1023 """Iterate over the SHAs of all loose objects.""" 

1024 raise NotImplementedError(self._iter_loose_objects) 

1025 

1026 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1027 raise NotImplementedError(self._get_loose_object) 

1028 

1029 def delete_loose_object(self, sha: ObjectID) -> None: 

1030 """Delete a loose object. 

1031 

1032 This method only handles loose objects. For packed objects, 

1033 use repack(exclude=...) to exclude them during repacking. 

1034 """ 

1035 raise NotImplementedError(self.delete_loose_object) 

1036 

1037 def _remove_pack(self, pack: "Pack") -> None: 

1038 raise NotImplementedError(self._remove_pack) 

1039 

1040 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

1041 """Pack loose objects. 

1042 

1043 Args: 

1044 progress: Optional progress reporting callback 

1045 

1046 Returns: Number of objects packed 

1047 """ 

1048 objects: list[tuple[ShaFile, None]] = [] 

1049 for sha in self._iter_loose_objects(): 

1050 obj = self._get_loose_object(sha) 

1051 if obj is not None: 

1052 objects.append((obj, None)) 

1053 self.add_objects(objects, progress=progress) 

1054 for obj, path in objects: 

1055 self.delete_loose_object(obj.id) 

1056 return len(objects) 

1057 

1058 def repack( 

1059 self, 

1060 exclude: Set[bytes] | None = None, 

1061 progress: Callable[[str], None] | None = None, 

1062 ) -> int: 

1063 """Repack the packs in this repository. 

1064 

1065 Note that this implementation is fairly naive and currently keeps all 

1066 objects in memory while it repacks. 

1067 

1068 Args: 

1069 exclude: Optional set of object SHAs to exclude from repacking 

1070 progress: Optional progress reporting callback 

1071 """ 

1072 if exclude is None: 

1073 exclude = set() 

1074 

1075 loose_objects = set() 

1076 excluded_loose_objects = set() 

1077 for sha in self._iter_loose_objects(): 

1078 if sha not in exclude: 

1079 obj = self._get_loose_object(sha) 

1080 if obj is not None: 

1081 loose_objects.add(obj) 

1082 else: 

1083 excluded_loose_objects.add(sha) 

1084 

1085 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

1086 old_packs = {p.name(): p for p in self.packs} 

1087 for name, pack in old_packs.items(): 

1088 objects.update( 

1089 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

1090 ) 

1091 

1092 # Only create a new pack if there are objects to pack 

1093 if objects: 

1094 # The name of the consolidated pack might match the name of a 

1095 # pre-existing pack. Take care not to remove the newly created 

1096 # consolidated pack. 

1097 consolidated = self.add_objects(list(objects), progress=progress) 

1098 if consolidated is not None: 

1099 old_packs.pop(consolidated.name(), None) 

1100 

1101 # Delete loose objects that were packed 

1102 for obj in loose_objects: 

1103 if obj is not None: 

1104 self.delete_loose_object(obj.id) 

1105 # Delete excluded loose objects 

1106 for sha in excluded_loose_objects: 

1107 self.delete_loose_object(sha) 

1108 for name, pack in old_packs.items(): 

1109 self._remove_pack(pack) 

1110 self._update_pack_cache() 

1111 return len(objects) 

1112 

1113 def generate_pack_bitmaps( 

1114 self, 

1115 refs: dict[Ref, ObjectID], 

1116 *, 

1117 commit_interval: int | None = None, 

1118 progress: Callable[[str], None] | None = None, 

1119 ) -> int: 

1120 """Generate bitmap indexes for all packs that don't have them. 

1121 

1122 This generates .bitmap files for packfiles, enabling fast reachability 

1123 queries. Equivalent to the bitmap generation part of 'git repack -b'. 

1124 

1125 Args: 

1126 refs: Dictionary of ref names to commit SHAs 

1127 commit_interval: Include every Nth commit in bitmap index (None for default) 

1128 progress: Optional progress reporting callback 

1129 

1130 Returns: 

1131 Number of bitmaps generated 

1132 """ 

1133 count = 0 

1134 for pack in self.packs: 

1135 pack.ensure_bitmap( 

1136 self, refs, commit_interval=commit_interval, progress=progress 

1137 ) 

1138 count += 1 

1139 

1140 # Update cache to pick up new bitmaps 

1141 self._update_pack_cache() 

1142 

1143 return count 

1144 

1145 def __iter__(self) -> Iterator[ObjectID]: 

1146 """Iterate over the SHAs that are present in this store.""" 

1147 self._update_pack_cache() 

1148 for pack in self._iter_cached_packs(): 

1149 try: 

1150 yield from pack 

1151 except PackFileDisappeared: 

1152 pass 

1153 yield from self._iter_loose_objects() 

1154 yield from self._iter_alternate_objects() 

1155 

1156 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

1157 """Check if a particular object is present by SHA1 and is loose. 

1158 

1159 This does not check alternates. 

1160 """ 

1161 return self._get_loose_object(sha) is not None 

1162 

1163 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

1164 """Obtain the raw fulltext for an object. 

1165 

1166 Args: 

1167 name: sha for the object. 

1168 Returns: tuple with numeric type and object contents. 

1169 """ 

1170 if name == ZERO_SHA: 

1171 raise KeyError(name) 

1172 if len(name) == 40: 

1173 sha = hex_to_sha(cast(ObjectID, name)) 

1174 hexsha = cast(ObjectID, name) 

1175 elif len(name) == 20: 

1176 sha = cast(RawObjectID, name) 

1177 hexsha = None 

1178 else: 

1179 raise AssertionError(f"Invalid object name {name!r}") 

1180 for pack in self._iter_cached_packs(): 

1181 try: 

1182 return pack.get_raw(sha) 

1183 except (KeyError, PackFileDisappeared): 

1184 pass 

1185 if hexsha is None: 

1186 hexsha = sha_to_hex(sha) 

1187 ret = self._get_loose_object(hexsha) 

1188 if ret is not None: 

1189 return ret.type_num, ret.as_raw_string() 

1190 # Maybe something else has added a pack with the object 

1191 # in the mean time? 

1192 for pack in self._update_pack_cache(): 

1193 try: 

1194 return pack.get_raw(sha) 

1195 except KeyError: 

1196 pass 

1197 for alternate in self.alternates: 

1198 try: 

1199 return alternate.get_raw(hexsha) 

1200 except KeyError: 

1201 pass 

1202 raise KeyError(hexsha) 

1203 

1204 def iter_unpacked_subset( 

1205 self, 

1206 shas: Iterable[ObjectID | RawObjectID], 

1207 include_comp: bool = False, 

1208 allow_missing: bool = False, 

1209 convert_ofs_delta: bool = True, 

1210 ) -> Iterator[UnpackedObject]: 

1211 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1212 

1213 Args: 

1214 shas: Set of object SHAs to retrieve 

1215 include_comp: Whether to include compressed data 

1216 allow_missing: If True, skip missing objects; if False, raise KeyError 

1217 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1218 

1219 Returns: 

1220 Iterator of UnpackedObject instances 

1221 

1222 Raises: 

1223 KeyError: If an object is missing and allow_missing is False 

1224 """ 

1225 todo: set[ObjectID | RawObjectID] = set(shas) 

1226 for p in self._iter_cached_packs(): 

1227 for unpacked in p.iter_unpacked_subset( 

1228 todo, 

1229 include_comp=include_comp, 

1230 allow_missing=True, 

1231 convert_ofs_delta=convert_ofs_delta, 

1232 ): 

1233 yield unpacked 

1234 hexsha = sha_to_hex(unpacked.sha()) 

1235 todo.remove(hexsha) 

1236 # Maybe something else has added a pack with the object 

1237 # in the mean time? 

1238 for p in self._update_pack_cache(): 

1239 for unpacked in p.iter_unpacked_subset( 

1240 todo, 

1241 include_comp=include_comp, 

1242 allow_missing=True, 

1243 convert_ofs_delta=convert_ofs_delta, 

1244 ): 

1245 yield unpacked 

1246 hexsha = sha_to_hex(unpacked.sha()) 

1247 todo.remove(hexsha) 

1248 for alternate in self.alternates: 

1249 assert isinstance(alternate, PackBasedObjectStore) 

1250 for unpacked in alternate.iter_unpacked_subset( 

1251 todo, 

1252 include_comp=include_comp, 

1253 allow_missing=True, 

1254 convert_ofs_delta=convert_ofs_delta, 

1255 ): 

1256 yield unpacked 

1257 hexsha = sha_to_hex(unpacked.sha()) 

1258 todo.remove(hexsha) 

1259 

1260 def iterobjects_subset( 

1261 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

1262 ) -> Iterator[ShaFile]: 

1263 """Iterate over a subset of objects in the store. 

1264 

1265 This method searches for objects in pack files, alternates, and loose storage. 

1266 

1267 Args: 

1268 shas: Iterable of object SHAs to retrieve 

1269 allow_missing: If True, skip missing objects; if False, raise KeyError 

1270 

1271 Returns: 

1272 Iterator of ShaFile objects 

1273 

1274 Raises: 

1275 KeyError: If an object is missing and allow_missing is False 

1276 """ 

1277 todo: set[ObjectID] = set(shas) 

1278 for p in self._iter_cached_packs(): 

1279 for o in p.iterobjects_subset(todo, allow_missing=True): 

1280 yield o 

1281 todo.remove(o.id) 

1282 # Maybe something else has added a pack with the object 

1283 # in the mean time? 

1284 for p in self._update_pack_cache(): 

1285 for o in p.iterobjects_subset(todo, allow_missing=True): 

1286 yield o 

1287 todo.remove(o.id) 

1288 for alternate in self.alternates: 

1289 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1290 yield o 

1291 todo.remove(o.id) 

1292 for oid in todo: 

1293 loose_obj: ShaFile | None = self._get_loose_object(oid) 

1294 if loose_obj is not None: 

1295 yield loose_obj 

1296 elif not allow_missing: 

1297 raise KeyError(oid) 

1298 

1299 def get_unpacked_object( 

1300 self, sha1: bytes, *, include_comp: bool = False 

1301 ) -> UnpackedObject: 

1302 """Obtain the unpacked object. 

1303 

1304 Args: 

1305 sha1: sha for the object. 

1306 include_comp: Whether to include compression metadata. 

1307 """ 

1308 if sha1 == ZERO_SHA: 

1309 raise KeyError(sha1) 

1310 if len(sha1) == 40: 

1311 sha = hex_to_sha(cast(ObjectID, sha1)) 

1312 hexsha = cast(ObjectID, sha1) 

1313 elif len(sha1) == 20: 

1314 sha = cast(RawObjectID, sha1) 

1315 hexsha = None 

1316 else: 

1317 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1318 for pack in self._iter_cached_packs(): 

1319 try: 

1320 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1321 except (KeyError, PackFileDisappeared): 

1322 pass 

1323 if hexsha is None: 

1324 hexsha = sha_to_hex(sha) 

1325 # Maybe something else has added a pack with the object 

1326 # in the mean time? 

1327 for pack in self._update_pack_cache(): 

1328 try: 

1329 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1330 except KeyError: 

1331 pass 

1332 for alternate in self.alternates: 

1333 assert isinstance(alternate, PackBasedObjectStore) 

1334 try: 

1335 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1336 except KeyError: 

1337 pass 

1338 raise KeyError(hexsha) 

1339 

1340 def add_objects( 

1341 self, 

1342 objects: Sequence[tuple[ShaFile, str | None]], 

1343 progress: Callable[[str], None] | None = None, 

1344 ) -> "Pack | None": 

1345 """Add a set of objects to this object store. 

1346 

1347 Args: 

1348 objects: Iterable over (object, path) tuples, should support 

1349 __len__. 

1350 progress: Optional progress reporting function. 

1351 Returns: Pack object of the objects written. 

1352 """ 

1353 count = len(objects) 

1354 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1355 return self.add_pack_data(count, record_iter, progress=progress) 

1356 

1357 

1358class DiskObjectStore(PackBasedObjectStore): 

1359 """Git-style object store that exists on disk.""" 

1360 

1361 path: str | os.PathLike[str] 

1362 pack_dir: str | os.PathLike[str] 

1363 _alternates: "list[BaseObjectStore] | None" 

1364 _commit_graph: "CommitGraph | None" 

1365 

1366 def __init__( 

1367 self, 

1368 path: str | os.PathLike[str], 

1369 *, 

1370 loose_compression_level: int = -1, 

1371 pack_compression_level: int = -1, 

1372 pack_index_version: int | None = None, 

1373 pack_delta_window_size: int | None = None, 

1374 pack_window_memory: int | None = None, 

1375 pack_delta_cache_size: int | None = None, 

1376 pack_depth: int | None = None, 

1377 pack_threads: int | None = None, 

1378 pack_big_file_threshold: int | None = None, 

1379 fsync_object_files: bool = False, 

1380 pack_write_bitmaps: bool = False, 

1381 pack_write_bitmap_hash_cache: bool = True, 

1382 pack_write_bitmap_lookup_table: bool = True, 

1383 file_mode: int | None = None, 

1384 dir_mode: int | None = None, 

1385 ) -> None: 

1386 """Open an object store. 

1387 

1388 Args: 

1389 path: Path of the object store. 

1390 loose_compression_level: zlib compression level for loose objects 

1391 pack_compression_level: zlib compression level for pack objects 

1392 pack_index_version: pack index version to use (1, 2, or 3) 

1393 pack_delta_window_size: sliding window size for delta compression 

1394 pack_window_memory: memory limit for delta window operations 

1395 pack_delta_cache_size: size of cache for delta operations 

1396 pack_depth: maximum delta chain depth 

1397 pack_threads: number of threads for pack operations 

1398 pack_big_file_threshold: threshold for treating files as big 

1399 fsync_object_files: whether to fsync object files for durability 

1400 pack_write_bitmaps: whether to write bitmap indexes for packs 

1401 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps 

1402 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps 

1403 file_mode: File permission mask for shared repository 

1404 dir_mode: Directory permission mask for shared repository 

1405 """ 

1406 super().__init__( 

1407 pack_compression_level=pack_compression_level, 

1408 pack_index_version=pack_index_version, 

1409 pack_delta_window_size=pack_delta_window_size, 

1410 pack_window_memory=pack_window_memory, 

1411 pack_delta_cache_size=pack_delta_cache_size, 

1412 pack_depth=pack_depth, 

1413 pack_threads=pack_threads, 

1414 pack_big_file_threshold=pack_big_file_threshold, 

1415 ) 

1416 self.path = path 

1417 self.pack_dir = os.path.join(self.path, PACKDIR) 

1418 self._alternates = None 

1419 self.loose_compression_level = loose_compression_level 

1420 self.pack_compression_level = pack_compression_level 

1421 self.pack_index_version = pack_index_version 

1422 self.fsync_object_files = fsync_object_files 

1423 self.pack_write_bitmaps = pack_write_bitmaps 

1424 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache 

1425 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table 

1426 self.file_mode = file_mode 

1427 self.dir_mode = dir_mode 

1428 

1429 # Commit graph support - lazy loaded 

1430 self._commit_graph = None 

1431 self._use_commit_graph = True # Default to true 

1432 

1433 # Multi-pack-index support - lazy loaded 

1434 self._midx: MultiPackIndex | None = None 

1435 self._use_midx = True # Default to true 

1436 

1437 def __repr__(self) -> str: 

1438 """Return string representation of DiskObjectStore. 

1439 

1440 Returns: 

1441 String representation including the store path 

1442 """ 

1443 return f"<{self.__class__.__name__}({self.path!r})>" 

1444 

1445 @classmethod 

1446 def from_config( 

1447 cls, 

1448 path: str | os.PathLike[str], 

1449 config: "Config", 

1450 *, 

1451 file_mode: int | None = None, 

1452 dir_mode: int | None = None, 

1453 ) -> "DiskObjectStore": 

1454 """Create a DiskObjectStore from a configuration object. 

1455 

1456 Args: 

1457 path: Path to the object store directory 

1458 config: Configuration object to read settings from 

1459 file_mode: Optional file permission mask for shared repository 

1460 dir_mode: Optional directory permission mask for shared repository 

1461 

1462 Returns: 

1463 New DiskObjectStore instance configured according to config 

1464 """ 

1465 try: 

1466 default_compression_level = int( 

1467 config.get((b"core",), b"compression").decode() 

1468 ) 

1469 except KeyError: 

1470 default_compression_level = -1 

1471 try: 

1472 loose_compression_level = int( 

1473 config.get((b"core",), b"looseCompression").decode() 

1474 ) 

1475 except KeyError: 

1476 loose_compression_level = default_compression_level 

1477 try: 

1478 pack_compression_level = int( 

1479 config.get((b"core",), "packCompression").decode() 

1480 ) 

1481 except KeyError: 

1482 pack_compression_level = default_compression_level 

1483 try: 

1484 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1485 except KeyError: 

1486 pack_index_version = None 

1487 

1488 # Read pack configuration options 

1489 try: 

1490 pack_delta_window_size = int( 

1491 config.get((b"pack",), b"deltaWindowSize").decode() 

1492 ) 

1493 except KeyError: 

1494 pack_delta_window_size = None 

1495 try: 

1496 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1497 except KeyError: 

1498 pack_window_memory = None 

1499 try: 

1500 pack_delta_cache_size = int( 

1501 config.get((b"pack",), b"deltaCacheSize").decode() 

1502 ) 

1503 except KeyError: 

1504 pack_delta_cache_size = None 

1505 try: 

1506 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1507 except KeyError: 

1508 pack_depth = None 

1509 try: 

1510 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1511 except KeyError: 

1512 pack_threads = None 

1513 try: 

1514 pack_big_file_threshold = int( 

1515 config.get((b"pack",), b"bigFileThreshold").decode() 

1516 ) 

1517 except KeyError: 

1518 pack_big_file_threshold = None 

1519 

1520 # Read core.commitGraph setting 

1521 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1522 

1523 # Read core.multiPackIndex setting 

1524 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True) 

1525 

1526 # Read core.fsyncObjectFiles setting 

1527 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False) 

1528 

1529 # Read bitmap settings 

1530 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False) 

1531 pack_write_bitmap_hash_cache = config.get_boolean( 

1532 (b"pack",), b"writeBitmapHashCache", True 

1533 ) 

1534 pack_write_bitmap_lookup_table = config.get_boolean( 

1535 (b"pack",), b"writeBitmapLookupTable", True 

1536 ) 

1537 # Also check repack.writeBitmaps for backwards compatibility 

1538 if not pack_write_bitmaps: 

1539 pack_write_bitmaps = config.get_boolean( 

1540 (b"repack",), b"writeBitmaps", False 

1541 ) 

1542 

1543 instance = cls( 

1544 path, 

1545 loose_compression_level=loose_compression_level, 

1546 pack_compression_level=pack_compression_level, 

1547 pack_index_version=pack_index_version, 

1548 pack_delta_window_size=pack_delta_window_size, 

1549 pack_window_memory=pack_window_memory, 

1550 pack_delta_cache_size=pack_delta_cache_size, 

1551 pack_depth=pack_depth, 

1552 pack_threads=pack_threads, 

1553 pack_big_file_threshold=pack_big_file_threshold, 

1554 fsync_object_files=fsync_object_files, 

1555 pack_write_bitmaps=pack_write_bitmaps, 

1556 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache, 

1557 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table, 

1558 file_mode=file_mode, 

1559 dir_mode=dir_mode, 

1560 ) 

1561 instance._use_commit_graph = use_commit_graph 

1562 instance._use_midx = use_midx 

1563 return instance 

1564 

1565 @property 

1566 def alternates(self) -> list["BaseObjectStore"]: 

1567 """Get the list of alternate object stores. 

1568 

1569 Reads from .git/objects/info/alternates if not already cached. 

1570 

1571 Returns: 

1572 List of DiskObjectStore instances for alternate object directories 

1573 """ 

1574 if self._alternates is not None: 

1575 return self._alternates 

1576 self._alternates = [] 

1577 for path in self._read_alternate_paths(): 

1578 self._alternates.append(DiskObjectStore(path)) 

1579 return self._alternates 

1580 

1581 def _read_alternate_paths(self) -> Iterator[str]: 

1582 try: 

1583 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1584 except FileNotFoundError: 

1585 return 

1586 with f: 

1587 for line in f.readlines(): 

1588 line = line.rstrip(b"\n") 

1589 if line.startswith(b"#"): 

1590 continue 

1591 if os.path.isabs(line): 

1592 yield os.fsdecode(line) 

1593 else: 

1594 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1595 

1596 def add_alternate_path(self, path: str | os.PathLike[str]) -> None: 

1597 """Add an alternate path to this object store.""" 

1598 info_dir = os.path.join(self.path, INFODIR) 

1599 try: 

1600 os.mkdir(info_dir) 

1601 if self.dir_mode is not None: 

1602 os.chmod(info_dir, self.dir_mode) 

1603 except FileExistsError: 

1604 pass 

1605 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1606 mask = self.file_mode if self.file_mode is not None else 0o644 

1607 with GitFile(alternates_path, "wb", mask=mask) as f: 

1608 try: 

1609 orig_f = open(alternates_path, "rb") 

1610 except FileNotFoundError: 

1611 pass 

1612 else: 

1613 with orig_f: 

1614 f.write(orig_f.read()) 

1615 f.write(os.fsencode(path) + b"\n") 

1616 

1617 if not os.path.isabs(path): 

1618 path = os.path.join(self.path, path) 

1619 self.alternates.append(DiskObjectStore(path)) 

1620 

1621 def _update_pack_cache(self) -> list[Pack]: 

1622 """Read and iterate over new pack files and cache them.""" 

1623 try: 

1624 pack_dir_contents = os.listdir(self.pack_dir) 

1625 except FileNotFoundError: 

1626 self.close() 

1627 return [] 

1628 pack_files = set() 

1629 for name in pack_dir_contents: 

1630 if name.startswith("pack-") and name.endswith(".pack"): 

1631 # verify that idx exists first (otherwise the pack was not yet 

1632 # fully written) 

1633 idx_name = os.path.splitext(name)[0] + ".idx" 

1634 if idx_name in pack_dir_contents: 

1635 pack_name = name[: -len(".pack")] 

1636 pack_files.add(pack_name) 

1637 

1638 # Open newly appeared pack files 

1639 new_packs = [] 

1640 for f in pack_files: 

1641 if f not in self._pack_cache: 

1642 pack = Pack( 

1643 os.path.join(self.pack_dir, f), 

1644 delta_window_size=self.pack_delta_window_size, 

1645 window_memory=self.pack_window_memory, 

1646 delta_cache_size=self.pack_delta_cache_size, 

1647 depth=self.pack_depth, 

1648 threads=self.pack_threads, 

1649 big_file_threshold=self.pack_big_file_threshold, 

1650 ) 

1651 new_packs.append(pack) 

1652 self._pack_cache[f] = pack 

1653 # Remove disappeared pack files 

1654 for f in set(self._pack_cache) - pack_files: 

1655 self._pack_cache.pop(f).close() 

1656 return new_packs 

1657 

1658 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str: 

1659 # Check from object dir 

1660 return hex_to_filename(os.fspath(self.path), sha) 

1661 

1662 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1663 for base in os.listdir(self.path): 

1664 if len(base) != 2: 

1665 continue 

1666 for rest in os.listdir(os.path.join(self.path, base)): 

1667 sha = os.fsencode(base + rest) 

1668 if not valid_hexsha(sha): 

1669 continue 

1670 yield ObjectID(sha) 

1671 

1672 def count_loose_objects(self) -> int: 

1673 """Count the number of loose objects in the object store. 

1674 

1675 Returns: 

1676 Number of loose objects 

1677 """ 

1678 count = 0 

1679 if not os.path.exists(self.path): 

1680 return 0 

1681 

1682 for i in range(256): 

1683 subdir = os.path.join(self.path, f"{i:02x}") 

1684 try: 

1685 count += len( 

1686 [ 

1687 name 

1688 for name in os.listdir(subdir) 

1689 if len(name) == 38 # 40 - 2 for the prefix 

1690 ] 

1691 ) 

1692 except FileNotFoundError: 

1693 # Directory may have been removed or is inaccessible 

1694 continue 

1695 

1696 return count 

1697 

1698 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1699 path = self._get_shafile_path(sha) 

1700 try: 

1701 return ShaFile.from_path(path) 

1702 except FileNotFoundError: 

1703 return None 

1704 

1705 def delete_loose_object(self, sha: ObjectID) -> None: 

1706 """Delete a loose object from disk. 

1707 

1708 Args: 

1709 sha: SHA1 of the object to delete 

1710 

1711 Raises: 

1712 FileNotFoundError: If the object file doesn't exist 

1713 """ 

1714 os.remove(self._get_shafile_path(sha)) 

1715 

1716 def get_object_mtime(self, sha: ObjectID) -> float: 

1717 """Get the modification time of an object. 

1718 

1719 Args: 

1720 sha: SHA1 of the object 

1721 

1722 Returns: 

1723 Modification time as seconds since epoch 

1724 

1725 Raises: 

1726 KeyError: if the object is not found 

1727 """ 

1728 # First check if it's a loose object 

1729 if self.contains_loose(sha): 

1730 path = self._get_shafile_path(sha) 

1731 try: 

1732 return os.path.getmtime(path) 

1733 except FileNotFoundError: 

1734 pass 

1735 

1736 # Check if it's in a pack file 

1737 for pack in self.packs: 

1738 try: 

1739 if sha in pack: 

1740 # Use the pack file's mtime for packed objects 

1741 pack_path = pack._data_path 

1742 try: 

1743 return os.path.getmtime(pack_path) 

1744 except (FileNotFoundError, AttributeError): 

1745 pass 

1746 except PackFileDisappeared: 

1747 pass 

1748 

1749 raise KeyError(sha) 

1750 

1751 def _remove_pack(self, pack: Pack) -> None: 

1752 try: 

1753 del self._pack_cache[os.path.basename(pack._basename)] 

1754 except KeyError: 

1755 pass 

1756 pack.close() 

1757 os.remove(pack.data.path) 

1758 if hasattr(pack.index, "path"): 

1759 os.remove(pack.index.path) 

1760 

1761 def _get_pack_basepath( 

1762 self, entries: Iterable[tuple[bytes, int, int | None]] 

1763 ) -> str: 

1764 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1765 # TODO: Handle self.pack_dir being bytes 

1766 suffix = suffix_bytes.decode("ascii") 

1767 return os.path.join(self.pack_dir, "pack-" + suffix) 

1768 

1769 def _complete_pack( 

1770 self, 

1771 f: BinaryIO, 

1772 path: str, 

1773 num_objects: int, 

1774 indexer: PackIndexer, 

1775 progress: Callable[..., None] | None = None, 

1776 refs: dict[Ref, ObjectID] | None = None, 

1777 ) -> Pack: 

1778 """Move a specific file containing a pack into the pack directory. 

1779 

1780 Note: The file should be on the same file system as the 

1781 packs directory. 

1782 

1783 Args: 

1784 f: Open file object for the pack. 

1785 path: Path to the pack file. 

1786 num_objects: Number of objects in the pack. 

1787 indexer: A PackIndexer for indexing the pack. 

1788 progress: Optional progress reporting function. 

1789 refs: Optional dictionary of refs for bitmap generation. 

1790 """ 

1791 entries = [] 

1792 for i, entry in enumerate(indexer): 

1793 if progress is not None: 

1794 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1795 entries.append(entry) 

1796 

1797 pack_sha, extra_entries = extend_pack( 

1798 f, 

1799 set(indexer.ext_refs()), 

1800 get_raw=self.get_raw, 

1801 compression_level=self.pack_compression_level, 

1802 progress=progress, 

1803 ) 

1804 f.flush() 

1805 if self.fsync_object_files: 

1806 try: 

1807 fileno = f.fileno() 

1808 except AttributeError as e: 

1809 raise OSError("fsync requested but file has no fileno()") from e 

1810 else: 

1811 os.fsync(fileno) 

1812 f.close() 

1813 

1814 entries.extend(extra_entries) 

1815 

1816 # Move the pack in. 

1817 entries.sort() 

1818 pack_base_name = self._get_pack_basepath(entries) 

1819 

1820 for pack in self.packs: 

1821 if pack._basename == pack_base_name: 

1822 return pack 

1823 

1824 target_pack_path = pack_base_name + ".pack" 

1825 target_index_path = pack_base_name + ".idx" 

1826 if sys.platform == "win32": 

1827 # Windows might have the target pack file lingering. Attempt 

1828 # removal, silently passing if the target does not exist. 

1829 with suppress(FileNotFoundError): 

1830 os.remove(target_pack_path) 

1831 os.rename(path, target_pack_path) 

1832 

1833 # Write the index. 

1834 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1835 with GitFile( 

1836 target_index_path, 

1837 "wb", 

1838 mask=mask, 

1839 fsync=self.fsync_object_files, 

1840 ) as index_file: 

1841 write_pack_index( 

1842 index_file, entries, pack_sha, version=self.pack_index_version 

1843 ) 

1844 

1845 # Generate bitmap if configured and refs are available 

1846 if self.pack_write_bitmaps and refs: 

1847 from .bitmap import generate_bitmap, write_bitmap 

1848 from .pack import load_pack_index_file 

1849 

1850 if progress: 

1851 progress("Generating bitmap index\r".encode("ascii")) 

1852 

1853 # Load the index we just wrote 

1854 with open(target_index_path, "rb") as idx_file: 

1855 pack_index = load_pack_index_file( 

1856 os.path.basename(target_index_path), idx_file 

1857 ) 

1858 

1859 # Generate the bitmap 

1860 bitmap = generate_bitmap( 

1861 pack_index=pack_index, 

1862 object_store=self, 

1863 refs=refs, 

1864 pack_checksum=pack_sha, 

1865 include_hash_cache=self.pack_write_bitmap_hash_cache, 

1866 include_lookup_table=self.pack_write_bitmap_lookup_table, 

1867 progress=lambda msg: progress(msg.encode("ascii")) 

1868 if progress and isinstance(msg, str) 

1869 else None, 

1870 ) 

1871 

1872 # Write the bitmap 

1873 target_bitmap_path = pack_base_name + ".bitmap" 

1874 write_bitmap(target_bitmap_path, bitmap) 

1875 

1876 if progress: 

1877 progress("Bitmap index written\r".encode("ascii")) 

1878 

1879 # Add the pack to the store and return it. 

1880 final_pack = Pack( 

1881 pack_base_name, 

1882 delta_window_size=self.pack_delta_window_size, 

1883 window_memory=self.pack_window_memory, 

1884 delta_cache_size=self.pack_delta_cache_size, 

1885 depth=self.pack_depth, 

1886 threads=self.pack_threads, 

1887 big_file_threshold=self.pack_big_file_threshold, 

1888 ) 

1889 final_pack.check_length_and_checksum() 

1890 self._add_cached_pack(pack_base_name, final_pack) 

1891 return final_pack 

1892 

1893 def add_thin_pack( 

1894 self, 

1895 read_all: Callable[[int], bytes], 

1896 read_some: Callable[[int], bytes] | None, 

1897 progress: Callable[..., None] | None = None, 

1898 ) -> "Pack": 

1899 """Add a new thin pack to this object store. 

1900 

1901 Thin packs are packs that contain deltas with parents that exist 

1902 outside the pack. They should never be placed in the object store 

1903 directly, and always indexed and completed as they are copied. 

1904 

1905 Args: 

1906 read_all: Read function that blocks until the number of 

1907 requested bytes are read. 

1908 read_some: Read function that returns at least one byte, but may 

1909 not return the number of bytes requested. 

1910 progress: Optional progress reporting function. 

1911 Returns: A Pack object pointing at the now-completed thin pack in the 

1912 objects/pack directory. 

1913 """ 

1914 import tempfile 

1915 

1916 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1917 with os.fdopen(fd, "w+b") as f: 

1918 os.chmod(path, PACK_MODE) 

1919 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type] 

1920 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type] 

1921 copier.verify(progress=progress) 

1922 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1923 

1924 def add_pack( 

1925 self, 

1926 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

1927 """Add a new pack to this object store. 

1928 

1929 Returns: Fileobject to write to, a commit function to 

1930 call when the pack is finished and an abort 

1931 function. 

1932 """ 

1933 import tempfile 

1934 

1935 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1936 f = os.fdopen(fd, "w+b") 

1937 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1938 os.chmod(path, mask) 

1939 

1940 def commit() -> "Pack | None": 

1941 if f.tell() > 0: 

1942 f.seek(0) 

1943 

1944 with PackData(path, f) as pd: 

1945 indexer = PackIndexer.for_pack_data( 

1946 pd, 

1947 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

1948 ) 

1949 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

1950 else: 

1951 f.close() 

1952 os.remove(path) 

1953 return None 

1954 

1955 def abort() -> None: 

1956 f.close() 

1957 os.remove(path) 

1958 

1959 return f, commit, abort # type: ignore[return-value] 

1960 

1961 def add_object(self, obj: ShaFile) -> None: 

1962 """Add a single object to this object store. 

1963 

1964 Args: 

1965 obj: Object to add 

1966 """ 

1967 path = self._get_shafile_path(obj.id) 

1968 dir = os.path.dirname(path) 

1969 try: 

1970 os.mkdir(dir) 

1971 if self.dir_mode is not None: 

1972 os.chmod(dir, self.dir_mode) 

1973 except FileExistsError: 

1974 pass 

1975 if os.path.exists(path): 

1976 return # Already there, no need to write again 

1977 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1978 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f: 

1979 f.write( 

1980 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1981 ) 

1982 

1983 @classmethod 

1984 def init( 

1985 cls, 

1986 path: str | os.PathLike[str], 

1987 *, 

1988 file_mode: int | None = None, 

1989 dir_mode: int | None = None, 

1990 ) -> "DiskObjectStore": 

1991 """Initialize a new disk object store. 

1992 

1993 Creates the necessary directory structure for a Git object store. 

1994 

1995 Args: 

1996 path: Path where the object store should be created 

1997 file_mode: Optional file permission mask for shared repository 

1998 dir_mode: Optional directory permission mask for shared repository 

1999 

2000 Returns: 

2001 New DiskObjectStore instance 

2002 """ 

2003 try: 

2004 os.mkdir(path) 

2005 if dir_mode is not None: 

2006 os.chmod(path, dir_mode) 

2007 except FileExistsError: 

2008 pass 

2009 info_path = os.path.join(path, "info") 

2010 pack_path = os.path.join(path, PACKDIR) 

2011 os.mkdir(info_path) 

2012 os.mkdir(pack_path) 

2013 if dir_mode is not None: 

2014 os.chmod(info_path, dir_mode) 

2015 os.chmod(pack_path, dir_mode) 

2016 return cls(path, file_mode=file_mode, dir_mode=dir_mode) 

2017 

2018 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

2019 """Iterate over all object SHAs with the given prefix. 

2020 

2021 Args: 

2022 prefix: Hex prefix to search for (as bytes) 

2023 

2024 Returns: 

2025 Iterator of object SHAs (as ObjectID) matching the prefix 

2026 """ 

2027 if len(prefix) < 2: 

2028 yield from super().iter_prefix(prefix) 

2029 return 

2030 seen = set() 

2031 dir = prefix[:2].decode() 

2032 rest = prefix[2:].decode() 

2033 try: 

2034 for name in os.listdir(os.path.join(self.path, dir)): 

2035 if name.startswith(rest): 

2036 sha = ObjectID(os.fsencode(dir + name)) 

2037 if sha not in seen: 

2038 seen.add(sha) 

2039 yield sha 

2040 except FileNotFoundError: 

2041 pass 

2042 

2043 for p in self.packs: 

2044 bin_prefix = ( 

2045 binascii.unhexlify(prefix) 

2046 if len(prefix) % 2 == 0 

2047 else binascii.unhexlify(prefix[:-1]) 

2048 ) 

2049 for bin_sha in p.index.iter_prefix(bin_prefix): 

2050 sha = sha_to_hex(bin_sha) 

2051 if sha.startswith(prefix) and sha not in seen: 

2052 seen.add(sha) 

2053 yield sha 

2054 for alternate in self.alternates: 

2055 for sha in alternate.iter_prefix(prefix): 

2056 if sha not in seen: 

2057 seen.add(sha) 

2058 yield sha 

2059 

2060 def get_commit_graph(self) -> "CommitGraph | None": 

2061 """Get the commit graph for this object store. 

2062 

2063 Returns: 

2064 CommitGraph object if available, None otherwise 

2065 """ 

2066 if not self._use_commit_graph: 

2067 return None 

2068 

2069 if self._commit_graph is None: 

2070 from .commit_graph import read_commit_graph 

2071 

2072 # Look for commit graph in our objects directory 

2073 graph_file = os.path.join(self.path, "info", "commit-graph") 

2074 if os.path.exists(graph_file): 

2075 self._commit_graph = read_commit_graph(graph_file) 

2076 return self._commit_graph 

2077 

2078 def get_midx(self) -> MultiPackIndex | None: 

2079 """Get the multi-pack-index for this object store. 

2080 

2081 Returns: 

2082 MultiPackIndex object if available, None otherwise 

2083 

2084 Raises: 

2085 ValueError: If MIDX file is corrupt 

2086 OSError: If MIDX file cannot be read 

2087 """ 

2088 if not self._use_midx: 

2089 return None 

2090 

2091 if self._midx is None: 

2092 # Look for MIDX in pack directory 

2093 midx_file = os.path.join(self.pack_dir, "multi-pack-index") 

2094 if os.path.exists(midx_file): 

2095 self._midx = load_midx(midx_file) 

2096 return self._midx 

2097 

2098 def _get_pack_by_name(self, pack_name: str) -> Pack: 

2099 """Get a pack by its base name. 

2100 

2101 Args: 

2102 pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx') 

2103 

2104 Returns: 

2105 Pack object 

2106 

2107 Raises: 

2108 KeyError: If pack doesn't exist 

2109 """ 

2110 # Remove .pack or .idx extension if present 

2111 if pack_name.endswith(".pack"): 

2112 base_name = pack_name[:-5] 

2113 elif pack_name.endswith(".idx"): 

2114 base_name = pack_name[:-4] 

2115 else: 

2116 base_name = pack_name 

2117 

2118 # Check if already in cache 

2119 if base_name in self._pack_cache: 

2120 return self._pack_cache[base_name] 

2121 

2122 # Load the pack 

2123 pack_path = os.path.join(self.pack_dir, base_name) 

2124 if not os.path.exists(pack_path + ".pack"): 

2125 raise KeyError(f"Pack {pack_name} not found") 

2126 

2127 pack = Pack( 

2128 pack_path, 

2129 delta_window_size=self.pack_delta_window_size, 

2130 window_memory=self.pack_window_memory, 

2131 delta_cache_size=self.pack_delta_cache_size, 

2132 depth=self.pack_depth, 

2133 threads=self.pack_threads, 

2134 big_file_threshold=self.pack_big_file_threshold, 

2135 ) 

2136 self._pack_cache[base_name] = pack 

2137 return pack 

2138 

2139 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2140 """Check if a particular object is present by SHA1 and is packed. 

2141 

2142 This checks the MIDX first if available, then falls back to checking 

2143 individual pack indexes. 

2144 

2145 Args: 

2146 sha: Binary SHA of the object 

2147 

2148 Returns: 

2149 True if the object is in a pack file 

2150 """ 

2151 # Check MIDX first for faster lookup 

2152 midx = self.get_midx() 

2153 if midx is not None and sha in midx: 

2154 return True 

2155 

2156 # Fall back to checking individual packs 

2157 return super().contains_packed(sha) 

2158 

2159 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2160 """Obtain the raw fulltext for an object. 

2161 

2162 This uses the MIDX if available for faster lookups. 

2163 

2164 Args: 

2165 name: SHA for the object (20 bytes binary or 40 bytes hex) 

2166 

2167 Returns: 

2168 Tuple with numeric type and object contents 

2169 

2170 Raises: 

2171 KeyError: If object not found 

2172 """ 

2173 if name == ZERO_SHA: 

2174 raise KeyError(name) 

2175 

2176 sha: RawObjectID 

2177 if len(name) == 40: 

2178 # name is ObjectID (hex), convert to RawObjectID 

2179 sha = hex_to_sha(cast(ObjectID, name)) 

2180 elif len(name) == 20: 

2181 # name is already RawObjectID (binary) 

2182 sha = RawObjectID(name) 

2183 else: 

2184 raise AssertionError(f"Invalid object name {name!r}") 

2185 

2186 # Try MIDX first for faster lookup 

2187 midx = self.get_midx() 

2188 if midx is not None: 

2189 result = midx.object_offset(sha) 

2190 if result is not None: 

2191 pack_name, _offset = result 

2192 try: 

2193 pack = self._get_pack_by_name(pack_name) 

2194 return pack.get_raw(sha) 

2195 except (KeyError, PackFileDisappeared): 

2196 # Pack disappeared or object not found, fall through to standard lookup 

2197 pass 

2198 

2199 # Fall back to the standard implementation 

2200 return super().get_raw(name) 

2201 

2202 def write_midx(self) -> bytes: 

2203 """Write a multi-pack-index file for this object store. 

2204 

2205 Creates a MIDX file that indexes all pack files in the pack directory. 

2206 

2207 Returns: 

2208 SHA-1 checksum of the written MIDX file 

2209 

2210 Raises: 

2211 OSError: If the pack directory doesn't exist or MIDX can't be written 

2212 """ 

2213 from .midx import write_midx_file 

2214 

2215 # Get all pack files 

2216 packs = self.packs 

2217 if not packs: 

2218 # No packs to index 

2219 return b"\x00" * 20 

2220 

2221 # Collect entries from all packs 

2222 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = [] 

2223 

2224 for pack in packs: 

2225 # Git stores .idx extension in MIDX, not .pack 

2226 pack_name = os.path.basename(pack._basename) + ".idx" 

2227 entries = list(pack.index.iterentries()) 

2228 pack_entries.append((pack_name, entries)) 

2229 

2230 # Write MIDX file 

2231 midx_path = os.path.join(self.pack_dir, "multi-pack-index") 

2232 return write_midx_file(midx_path, pack_entries) 

2233 

2234 def write_commit_graph( 

2235 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

2236 ) -> None: 

2237 """Write a commit graph file for this object store. 

2238 

2239 Args: 

2240 refs: List of refs to include. If None, includes all refs from object store. 

2241 reachable: If True, includes all commits reachable from refs. 

2242 If False, only includes the direct ref targets. 

2243 """ 

2244 from .commit_graph import get_reachable_commits 

2245 

2246 if refs is None: 

2247 # Get all commit objects from the object store 

2248 all_refs = [] 

2249 # Iterate through all objects to find commits 

2250 for sha in self: 

2251 try: 

2252 obj = self[sha] 

2253 if obj.type_name == b"commit": 

2254 all_refs.append(sha) 

2255 except KeyError: 

2256 continue 

2257 else: 

2258 # Use provided refs 

2259 all_refs = list(refs) 

2260 

2261 if not all_refs: 

2262 return # No commits to include 

2263 

2264 if reachable: 

2265 # Get all reachable commits 

2266 commit_ids = get_reachable_commits(self, all_refs) 

2267 else: 

2268 # Just use the direct ref targets (already ObjectIDs) 

2269 commit_ids = all_refs 

2270 

2271 if commit_ids: 

2272 # Write commit graph directly to our object store path 

2273 # Generate the commit graph 

2274 from .commit_graph import generate_commit_graph 

2275 

2276 graph = generate_commit_graph(self, commit_ids) 

2277 

2278 if graph.entries: 

2279 # Ensure the info directory exists 

2280 info_dir = os.path.join(self.path, "info") 

2281 os.makedirs(info_dir, exist_ok=True) 

2282 if self.dir_mode is not None: 

2283 os.chmod(info_dir, self.dir_mode) 

2284 

2285 # Write using GitFile for atomic operation 

2286 graph_path = os.path.join(info_dir, "commit-graph") 

2287 mask = self.file_mode if self.file_mode is not None else 0o644 

2288 with GitFile(graph_path, "wb", mask=mask) as f: 

2289 assert isinstance( 

2290 f, _GitFile 

2291 ) # GitFile in write mode always returns _GitFile 

2292 graph.write_to_file(f) 

2293 

2294 # Clear cached commit graph so it gets reloaded 

2295 self._commit_graph = None 

2296 

2297 def prune(self, grace_period: int | None = None) -> None: 

2298 """Prune/clean up this object store. 

2299 

2300 This removes temporary files that were left behind by interrupted 

2301 pack operations. These are files that start with ``tmp_pack_`` in the 

2302 repository directory or files with .pack extension but no corresponding 

2303 .idx file in the pack directory. 

2304 

2305 Args: 

2306 grace_period: Grace period in seconds for removing temporary files. 

2307 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

2308 """ 

2309 import glob 

2310 

2311 if grace_period is None: 

2312 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

2313 

2314 # Clean up tmp_pack_* files in the repository directory 

2315 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

2316 # Check if file is old enough (more than grace period) 

2317 mtime = os.path.getmtime(tmp_file) 

2318 if time.time() - mtime > grace_period: 

2319 os.remove(tmp_file) 

2320 

2321 # Clean up orphaned .pack files without corresponding .idx files 

2322 try: 

2323 pack_dir_contents = os.listdir(self.pack_dir) 

2324 except FileNotFoundError: 

2325 return 

2326 

2327 pack_files = {} 

2328 idx_files = set() 

2329 

2330 for name in pack_dir_contents: 

2331 if name.endswith(".pack"): 

2332 base_name = name[:-5] # Remove .pack extension 

2333 pack_files[base_name] = name 

2334 elif name.endswith(".idx"): 

2335 base_name = name[:-4] # Remove .idx extension 

2336 idx_files.add(base_name) 

2337 

2338 # Remove .pack files without corresponding .idx files 

2339 for base_name, pack_name in pack_files.items(): 

2340 if base_name not in idx_files: 

2341 pack_path = os.path.join(self.pack_dir, pack_name) 

2342 # Check if file is old enough (more than grace period) 

2343 mtime = os.path.getmtime(pack_path) 

2344 if time.time() - mtime > grace_period: 

2345 os.remove(pack_path) 

2346 

2347 def close(self) -> None: 

2348 """Close the object store and release resources. 

2349 

2350 This method closes all cached pack files, MIDX, and frees associated resources. 

2351 """ 

2352 # Close MIDX if it's loaded 

2353 if self._midx is not None: 

2354 self._midx.close() 

2355 self._midx = None 

2356 

2357 # Close alternates 

2358 if self._alternates is not None: 

2359 for alt in self._alternates: 

2360 alt.close() 

2361 self._alternates = None 

2362 

2363 # Call parent class close to handle pack files 

2364 super().close() 

2365 

2366 

2367class MemoryObjectStore(PackCapableObjectStore): 

2368 """Object store that keeps all objects in memory.""" 

2369 

2370 def __init__(self) -> None: 

2371 """Initialize a MemoryObjectStore. 

2372 

2373 Creates an empty in-memory object store. 

2374 """ 

2375 super().__init__() 

2376 self._data: dict[ObjectID, ShaFile] = {} 

2377 self.pack_compression_level = -1 

2378 

2379 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

2380 if len(sha) == 40: 

2381 return cast(ObjectID, sha) 

2382 elif len(sha) == 20: 

2383 return sha_to_hex(cast(RawObjectID, sha)) 

2384 else: 

2385 raise ValueError(f"Invalid sha {sha!r}") 

2386 

2387 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

2388 """Check if a particular object is present by SHA1 and is loose.""" 

2389 return self._to_hexsha(sha) in self._data 

2390 

2391 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2392 """Check if a particular object is present by SHA1 and is packed.""" 

2393 return False 

2394 

2395 def __iter__(self) -> Iterator[ObjectID]: 

2396 """Iterate over the SHAs that are present in this store.""" 

2397 return iter(self._data.keys()) 

2398 

2399 @property 

2400 def packs(self) -> list[Pack]: 

2401 """List with pack objects.""" 

2402 return [] 

2403 

2404 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2405 """Obtain the raw text for an object. 

2406 

2407 Args: 

2408 name: sha for the object. 

2409 Returns: tuple with numeric type and object contents. 

2410 """ 

2411 obj = self[self._to_hexsha(name)] 

2412 return obj.type_num, obj.as_raw_string() 

2413 

2414 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile: 

2415 """Retrieve an object by SHA. 

2416 

2417 Args: 

2418 name: SHA of the object (as hex string or bytes) 

2419 

2420 Returns: 

2421 Copy of the ShaFile object 

2422 

2423 Raises: 

2424 KeyError: If the object is not found 

2425 """ 

2426 return self._data[self._to_hexsha(name)].copy() 

2427 

2428 def __delitem__(self, name: ObjectID) -> None: 

2429 """Delete an object from this store, for testing only.""" 

2430 del self._data[self._to_hexsha(name)] 

2431 

2432 def add_object(self, obj: ShaFile) -> None: 

2433 """Add a single object to this object store.""" 

2434 self._data[obj.id] = obj.copy() 

2435 

2436 def add_objects( 

2437 self, 

2438 objects: Iterable[tuple[ShaFile, str | None]], 

2439 progress: Callable[[str], None] | None = None, 

2440 ) -> None: 

2441 """Add a set of objects to this object store. 

2442 

2443 Args: 

2444 objects: Iterable over a list of (object, path) tuples 

2445 progress: Optional progress reporting function. 

2446 """ 

2447 for obj, path in objects: 

2448 self.add_object(obj) 

2449 

2450 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2451 """Add a new pack to this object store. 

2452 

2453 Because this object store doesn't support packs, we extract and add the 

2454 individual objects. 

2455 

2456 Returns: Fileobject to write to and a commit function to 

2457 call when the pack is finished. 

2458 """ 

2459 from tempfile import SpooledTemporaryFile 

2460 

2461 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2462 

2463 def commit() -> None: 

2464 size = f.tell() 

2465 if size > 0: 

2466 f.seek(0) 

2467 

2468 p = PackData.from_file(f, size) 

2469 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type] 

2470 self.add_object(obj) 

2471 p.close() 

2472 f.close() 

2473 else: 

2474 f.close() 

2475 

2476 def abort() -> None: 

2477 f.close() 

2478 

2479 return f, commit, abort # type: ignore[return-value] 

2480 

2481 def add_pack_data( 

2482 self, 

2483 count: int, 

2484 unpacked_objects: Iterator[UnpackedObject], 

2485 progress: Callable[[str], None] | None = None, 

2486 ) -> None: 

2487 """Add pack data to this object store. 

2488 

2489 Args: 

2490 count: Number of items to add 

2491 unpacked_objects: Iterator of UnpackedObject instances 

2492 progress: Optional progress reporting function. 

2493 """ 

2494 if count == 0: 

2495 return 

2496 

2497 # Since MemoryObjectStore doesn't support pack files, we need to 

2498 # extract individual objects. To handle deltas properly, we write 

2499 # to a temporary pack and then use PackInflater to resolve them. 

2500 f, commit, abort = self.add_pack() 

2501 try: 

2502 write_pack_data( 

2503 f.write, 

2504 unpacked_objects, 

2505 num_records=count, 

2506 progress=progress, 

2507 ) 

2508 except BaseException: 

2509 abort() 

2510 raise 

2511 else: 

2512 commit() 

2513 

2514 def add_thin_pack( 

2515 self, 

2516 read_all: Callable[[], bytes], 

2517 read_some: Callable[[int], bytes], 

2518 progress: Callable[[str], None] | None = None, 

2519 ) -> None: 

2520 """Add a new thin pack to this object store. 

2521 

2522 Thin packs are packs that contain deltas with parents that exist 

2523 outside the pack. Because this object store doesn't support packs, we 

2524 extract and add the individual objects. 

2525 

2526 Args: 

2527 read_all: Read function that blocks until the number of 

2528 requested bytes are read. 

2529 read_some: Read function that returns at least one byte, but may 

2530 not return the number of bytes requested. 

2531 progress: Optional progress reporting function. 

2532 """ 

2533 f, commit, abort = self.add_pack() 

2534 try: 

2535 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type] 

2536 copier.verify() 

2537 except BaseException: 

2538 abort() 

2539 raise 

2540 else: 

2541 commit() 

2542 

2543 

2544class ObjectIterator(Protocol): 

2545 """Interface for iterating over objects.""" 

2546 

2547 def iterobjects(self) -> Iterator[ShaFile]: 

2548 """Iterate over all objects. 

2549 

2550 Returns: 

2551 Iterator of ShaFile objects 

2552 """ 

2553 raise NotImplementedError(self.iterobjects) 

2554 

2555 

2556def tree_lookup_path( 

2557 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile], 

2558 root_sha: ObjectID | RawObjectID, 

2559 path: bytes, 

2560) -> tuple[int, ObjectID]: 

2561 """Look up an object in a Git tree. 

2562 

2563 Args: 

2564 lookup_obj: Callback for retrieving object by SHA1 

2565 root_sha: SHA1 of the root tree 

2566 path: Path to lookup 

2567 Returns: A tuple of (mode, SHA) of the resulting path. 

2568 """ 

2569 tree = lookup_obj(root_sha) 

2570 if not isinstance(tree, Tree): 

2571 raise NotTreeError(root_sha) 

2572 return tree.lookup_path(lookup_obj, path) 

2573 

2574 

2575def _collect_filetree_revs( 

2576 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2577) -> None: 

2578 """Collect SHA1s of files and directories for specified tree. 

2579 

2580 Args: 

2581 obj_store: Object store to get objects by SHA from 

2582 tree_sha: tree reference to walk 

2583 kset: set to fill with references to files and directories 

2584 """ 

2585 filetree = obj_store[tree_sha] 

2586 assert isinstance(filetree, Tree) 

2587 for name, mode, sha in filetree.iteritems(): 

2588 assert mode is not None 

2589 assert sha is not None 

2590 if not S_ISGITLINK(mode) and sha not in kset: 

2591 kset.add(sha) 

2592 if stat.S_ISDIR(mode): 

2593 _collect_filetree_revs(obj_store, sha, kset) 

2594 

2595 

2596def _split_commits_and_tags( 

2597 obj_store: ObjectContainer, 

2598 lst: Iterable[ObjectID], 

2599 *, 

2600 unknown: str = "error", 

2601) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]: 

2602 """Split object id list into three lists with commit, tag, and other SHAs. 

2603 

2604 Commits referenced by tags are included into commits 

2605 list as well. Only SHA1s known in this repository will get 

2606 through, controlled by the unknown parameter. 

2607 

2608 Args: 

2609 obj_store: Object store to get objects by SHA1 from 

2610 lst: Collection of commit and tag SHAs 

2611 unknown: How to handle unknown objects: "error", "warn", or "ignore" 

2612 Returns: A tuple of (commits, tags, others) SHA1s 

2613 """ 

2614 import logging 

2615 

2616 if unknown not in ("error", "warn", "ignore"): 

2617 raise ValueError( 

2618 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}" 

2619 ) 

2620 

2621 commits: set[ObjectID] = set() 

2622 tags: set[ObjectID] = set() 

2623 others: set[ObjectID] = set() 

2624 for e in lst: 

2625 try: 

2626 o = obj_store[e] 

2627 except KeyError: 

2628 if unknown == "error": 

2629 raise 

2630 elif unknown == "warn": 

2631 logging.warning( 

2632 "Object %s not found in object store", e.decode("ascii") 

2633 ) 

2634 # else: ignore 

2635 else: 

2636 if isinstance(o, Commit): 

2637 commits.add(e) 

2638 elif isinstance(o, Tag): 

2639 tags.add(e) 

2640 tagged = o.object[1] 

2641 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown) 

2642 commits |= c 

2643 tags |= t 

2644 others |= os 

2645 else: 

2646 others.add(e) 

2647 return (commits, tags, others) 

2648 

2649 

2650class MissingObjectFinder: 

2651 """Find the objects missing from another object store. 

2652 

2653 Args: 

2654 object_store: Object store containing at least all objects to be 

2655 sent 

2656 haves: SHA1s of commits not to send (already present in target) 

2657 wants: SHA1s of commits to send 

2658 progress: Optional function to report progress to. 

2659 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2660 sha for including tags. 

2661 get_parents: Optional function for getting the parents of a commit. 

2662 """ 

2663 

2664 def __init__( 

2665 self, 

2666 object_store: BaseObjectStore, 

2667 haves: Iterable[ObjectID], 

2668 wants: Iterable[ObjectID], 

2669 *, 

2670 shallow: Set[ObjectID] | None = None, 

2671 progress: Callable[[bytes], None] | None = None, 

2672 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

2673 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

2674 ) -> None: 

2675 """Initialize a MissingObjectFinder. 

2676 

2677 Args: 

2678 object_store: Object store containing objects 

2679 haves: SHA1s of objects already present in target 

2680 wants: SHA1s of objects to send 

2681 shallow: Set of shallow commit SHA1s 

2682 progress: Optional progress reporting callback 

2683 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2684 get_parents: Function for getting commit parents 

2685 """ 

2686 self.object_store = object_store 

2687 if shallow is None: 

2688 shallow = set() 

2689 self._get_parents = get_parents 

2690 reachability = object_store.get_reachability_provider() 

2691 # process Commits and Tags differently 

2692 # haves may list commits/tags not available locally (silently ignore them). 

2693 # wants should only contain valid SHAs (fail fast if not). 

2694 have_commits, have_tags, have_others = _split_commits_and_tags( 

2695 object_store, haves, unknown="ignore" 

2696 ) 

2697 want_commits, want_tags, want_others = _split_commits_and_tags( 

2698 object_store, wants, unknown="error" 

2699 ) 

2700 # all_ancestors is a set of commits that shall not be sent 

2701 # (complete repository up to 'haves') 

2702 all_ancestors = reachability.get_reachable_commits( 

2703 have_commits, exclude=None, shallow=shallow 

2704 ) 

2705 # all_missing - complete set of commits between haves and wants 

2706 # common_commits - boundary commits directly encountered when traversing wants 

2707 # We use _collect_ancestors here because we need the exact boundary behavior: 

2708 # commits that are in all_ancestors and directly reachable from wants, 

2709 # but we don't traverse past them. This is hard to express with the 

2710 # reachability abstraction alone. 

2711 missing_commits, common_commits = _collect_ancestors( 

2712 object_store, 

2713 want_commits, 

2714 frozenset(all_ancestors), 

2715 shallow=frozenset(shallow), 

2716 get_parents=self._get_parents, 

2717 ) 

2718 

2719 self.remote_has: set[ObjectID] = set() 

2720 # Now, fill sha_done with commits and revisions of 

2721 # files and directories known to be both locally 

2722 # and on target. Thus these commits and files 

2723 # won't get selected for fetch 

2724 for h in common_commits: 

2725 self.remote_has.add(h) 

2726 cmt = object_store[h] 

2727 assert isinstance(cmt, Commit) 

2728 # Get tree objects for this commit 

2729 tree_objects = reachability.get_tree_objects([cmt.tree]) 

2730 self.remote_has.update(tree_objects) 

2731 

2732 # record tags we have as visited, too 

2733 for t in have_tags: 

2734 self.remote_has.add(t) 

2735 self.sha_done = set(self.remote_has) 

2736 

2737 # in fact, what we 'want' is commits, tags, and others 

2738 # we've found missing 

2739 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = { 

2740 (w, None, Commit.type_num, False) for w in missing_commits 

2741 } 

2742 missing_tags = want_tags.difference(have_tags) 

2743 self.objects_to_send.update( 

2744 {(w, None, Tag.type_num, False) for w in missing_tags} 

2745 ) 

2746 missing_others = want_others.difference(have_others) 

2747 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2748 

2749 if progress is None: 

2750 self.progress: Callable[[bytes], None] = lambda x: None 

2751 else: 

2752 self.progress = progress 

2753 self._tagged = (get_tagged and get_tagged()) or {} 

2754 

2755 def get_remote_has(self) -> set[ObjectID]: 

2756 """Get the set of SHAs the remote has. 

2757 

2758 Returns: 

2759 Set of SHA1s that the remote side already has 

2760 """ 

2761 return self.remote_has 

2762 

2763 def add_todo( 

2764 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]] 

2765 ) -> None: 

2766 """Add objects to the todo list. 

2767 

2768 Args: 

2769 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2770 """ 

2771 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2772 

2773 def __next__(self) -> tuple[ObjectID, PackHint | None]: 

2774 """Get the next object to send. 

2775 

2776 Returns: 

2777 Tuple of (sha, pack_hint) 

2778 

2779 Raises: 

2780 StopIteration: When no more objects to send 

2781 """ 

2782 while True: 

2783 if not self.objects_to_send: 

2784 self.progress( 

2785 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2786 ) 

2787 raise StopIteration 

2788 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2789 if sha not in self.sha_done: 

2790 break 

2791 if not leaf: 

2792 o = self.object_store[sha] 

2793 if isinstance(o, Commit): 

2794 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2795 elif isinstance(o, Tree): 

2796 todos = [] 

2797 for n, m, s in o.iteritems(): 

2798 assert m is not None 

2799 assert n is not None 

2800 assert s is not None 

2801 if not S_ISGITLINK(m): 

2802 todos.append( 

2803 ( 

2804 s, 

2805 n, 

2806 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2807 not stat.S_ISDIR(m), 

2808 ) 

2809 ) 

2810 self.add_todo(todos) 

2811 elif isinstance(o, Tag): 

2812 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2813 if sha in self._tagged: 

2814 self.add_todo([(self._tagged[sha], None, None, True)]) 

2815 self.sha_done.add(sha) 

2816 if len(self.sha_done) % 1000 == 0: 

2817 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2818 if type_num is None: 

2819 pack_hint = None 

2820 else: 

2821 pack_hint = (type_num, name) 

2822 return (sha, pack_hint) 

2823 

2824 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]: 

2825 """Return iterator over objects to send. 

2826 

2827 Returns: 

2828 Self (this class implements the iterator protocol) 

2829 """ 

2830 return self 

2831 

2832 

2833class ObjectStoreGraphWalker: 

2834 """Graph walker that finds what commits are missing from an object store.""" 

2835 

2836 heads: set[ObjectID] 

2837 """Revisions without descendants in the local repo.""" 

2838 

2839 get_parents: Callable[[ObjectID], list[ObjectID]] 

2840 """Function to retrieve parents in the local repo.""" 

2841 

2842 shallow: set[ObjectID] 

2843 

2844 def __init__( 

2845 self, 

2846 local_heads: Iterable[ObjectID], 

2847 get_parents: Callable[[ObjectID], list[ObjectID]], 

2848 shallow: set[ObjectID] | None = None, 

2849 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None] 

2850 | None = None, 

2851 ) -> None: 

2852 """Create a new instance. 

2853 

2854 Args: 

2855 local_heads: Heads to start search with 

2856 get_parents: Function for finding the parents of a SHA1. 

2857 shallow: Set of shallow commits. 

2858 update_shallow: Function to update shallow commits. 

2859 """ 

2860 self.heads = set(local_heads) 

2861 self.get_parents = get_parents 

2862 self.parents: dict[ObjectID, list[ObjectID] | None] = {} 

2863 if shallow is None: 

2864 shallow = set() 

2865 self.shallow = shallow 

2866 self.update_shallow = update_shallow 

2867 

2868 def nak(self) -> None: 

2869 """Nothing in common was found.""" 

2870 

2871 def ack(self, sha: ObjectID) -> None: 

2872 """Ack that a revision and its ancestors are present in the source.""" 

2873 if len(sha) != 40: 

2874 raise ValueError(f"unexpected sha {sha!r} received") 

2875 ancestors = {sha} 

2876 

2877 # stop if we run out of heads to remove 

2878 while self.heads: 

2879 for a in ancestors: 

2880 if a in self.heads: 

2881 self.heads.remove(a) 

2882 

2883 # collect all ancestors 

2884 new_ancestors = set() 

2885 for a in ancestors: 

2886 ps = self.parents.get(a) 

2887 if ps is not None: 

2888 new_ancestors.update(ps) 

2889 self.parents[a] = None 

2890 

2891 # no more ancestors; stop 

2892 if not new_ancestors: 

2893 break 

2894 

2895 ancestors = new_ancestors 

2896 

2897 def next(self) -> ObjectID | None: 

2898 """Iterate over ancestors of heads in the target.""" 

2899 if self.heads: 

2900 ret = self.heads.pop() 

2901 try: 

2902 ps = self.get_parents(ret) 

2903 except KeyError: 

2904 return None 

2905 self.parents[ret] = ps 

2906 self.heads.update([p for p in ps if p not in self.parents]) 

2907 return ret 

2908 return None 

2909 

2910 __next__ = next 

2911 

2912 

2913def commit_tree_changes( 

2914 object_store: BaseObjectStore, 

2915 tree: ObjectID | Tree, 

2916 changes: Sequence[tuple[bytes, int | None, ObjectID | None]], 

2917) -> ObjectID: 

2918 """Commit a specified set of changes to a tree structure. 

2919 

2920 This will apply a set of changes on top of an existing tree, storing new 

2921 objects in object_store. 

2922 

2923 changes are a list of tuples with (path, mode, object_sha). 

2924 Paths can be both blobs and trees. See the mode and 

2925 object sha to None deletes the path. 

2926 

2927 This method works especially well if there are only a small 

2928 number of changes to a big tree. For a large number of changes 

2929 to a large tree, use e.g. commit_tree. 

2930 

2931 Args: 

2932 object_store: Object store to store new objects in 

2933 and retrieve old ones from. 

2934 tree: Original tree root (SHA or Tree object) 

2935 changes: changes to apply 

2936 Returns: New tree root object 

2937 """ 

2938 # TODO(jelmer): Save up the objects and add them using .add_objects 

2939 # rather than with individual calls to .add_object. 

2940 # Handle both Tree object and SHA 

2941 if isinstance(tree, Tree): 

2942 tree_obj: Tree = tree 

2943 else: 

2944 sha_obj = object_store[tree] 

2945 assert isinstance(sha_obj, Tree) 

2946 tree_obj = sha_obj 

2947 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {} 

2948 for path, new_mode, new_sha in changes: 

2949 try: 

2950 (dirname, subpath) = path.split(b"/", 1) 

2951 except ValueError: 

2952 if new_sha is None: 

2953 del tree_obj[path] 

2954 else: 

2955 assert new_mode is not None 

2956 tree_obj[path] = (new_mode, new_sha) 

2957 else: 

2958 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2959 for name, subchanges in nested_changes.items(): 

2960 try: 

2961 orig_subtree_id: ObjectID | Tree = tree_obj[name][1] 

2962 except KeyError: 

2963 # For new directories, pass an empty Tree object 

2964 orig_subtree_id = Tree() 

2965 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

2966 subtree = object_store[subtree_id] 

2967 assert isinstance(subtree, Tree) 

2968 if len(subtree) == 0: 

2969 del tree_obj[name] 

2970 else: 

2971 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

2972 object_store.add_object(tree_obj) 

2973 return tree_obj.id 

2974 

2975 

2976class OverlayObjectStore(BaseObjectStore): 

2977 """Object store that can overlay multiple object stores.""" 

2978 

2979 def __init__( 

2980 self, 

2981 bases: list[BaseObjectStore], 

2982 add_store: BaseObjectStore | None = None, 

2983 ) -> None: 

2984 """Initialize an OverlayObjectStore. 

2985 

2986 Args: 

2987 bases: List of base object stores to overlay 

2988 add_store: Optional store to write new objects to 

2989 """ 

2990 self.bases = bases 

2991 self.add_store = add_store 

2992 

2993 def add_object(self, object: ShaFile) -> None: 

2994 """Add a single object to the store. 

2995 

2996 Args: 

2997 object: Object to add 

2998 

2999 Raises: 

3000 NotImplementedError: If no add_store was provided 

3001 """ 

3002 if self.add_store is None: 

3003 raise NotImplementedError(self.add_object) 

3004 return self.add_store.add_object(object) 

3005 

3006 def add_objects( 

3007 self, 

3008 objects: Sequence[tuple[ShaFile, str | None]], 

3009 progress: Callable[[str], None] | None = None, 

3010 ) -> Pack | None: 

3011 """Add multiple objects to the store. 

3012 

3013 Args: 

3014 objects: Iterator of objects to add 

3015 progress: Optional progress reporting callback 

3016 

3017 Raises: 

3018 NotImplementedError: If no add_store was provided 

3019 """ 

3020 if self.add_store is None: 

3021 raise NotImplementedError(self.add_object) 

3022 return self.add_store.add_objects(objects, progress) 

3023 

3024 @property 

3025 def packs(self) -> list[Pack]: 

3026 """Get the list of packs from all overlaid stores. 

3027 

3028 Returns: 

3029 Combined list of packs from all base stores 

3030 """ 

3031 ret = [] 

3032 for b in self.bases: 

3033 ret.extend(b.packs) 

3034 return ret 

3035 

3036 def __iter__(self) -> Iterator[ObjectID]: 

3037 """Iterate over all object SHAs in the overlaid stores. 

3038 

3039 Returns: 

3040 Iterator of object SHAs (deduped across stores) 

3041 """ 

3042 done = set() 

3043 for b in self.bases: 

3044 for o_id in b: 

3045 if o_id not in done: 

3046 yield o_id 

3047 done.add(o_id) 

3048 

3049 def iterobjects_subset( 

3050 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

3051 ) -> Iterator[ShaFile]: 

3052 """Iterate over a subset of objects from the overlaid stores. 

3053 

3054 Args: 

3055 shas: Iterable of object SHAs to retrieve 

3056 allow_missing: If True, skip missing objects; if False, raise KeyError 

3057 

3058 Returns: 

3059 Iterator of ShaFile objects 

3060 

3061 Raises: 

3062 KeyError: If an object is missing and allow_missing is False 

3063 """ 

3064 todo = set(shas) 

3065 found: set[ObjectID] = set() 

3066 

3067 for b in self.bases: 

3068 # Create a copy of todo for each base to avoid modifying 

3069 # the set while iterating through it 

3070 current_todo = todo - found 

3071 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

3072 yield o 

3073 found.add(o.id) 

3074 

3075 # Check for any remaining objects not found 

3076 missing = todo - found 

3077 if missing and not allow_missing: 

3078 raise KeyError(next(iter(missing))) 

3079 

3080 def iter_unpacked_subset( 

3081 self, 

3082 shas: Iterable[ObjectID | RawObjectID], 

3083 include_comp: bool = False, 

3084 allow_missing: bool = False, 

3085 convert_ofs_delta: bool = True, 

3086 ) -> Iterator[UnpackedObject]: 

3087 """Iterate over unpacked objects from the overlaid stores. 

3088 

3089 Args: 

3090 shas: Iterable of object SHAs to retrieve 

3091 include_comp: Whether to include compressed data 

3092 allow_missing: If True, skip missing objects; if False, raise KeyError 

3093 convert_ofs_delta: Whether to convert OFS_DELTA objects 

3094 

3095 Returns: 

3096 Iterator of unpacked objects 

3097 

3098 Raises: 

3099 KeyError: If an object is missing and allow_missing is False 

3100 """ 

3101 todo: set[ObjectID | RawObjectID] = set(shas) 

3102 for b in self.bases: 

3103 for o in b.iter_unpacked_subset( 

3104 todo, 

3105 include_comp=include_comp, 

3106 allow_missing=True, 

3107 convert_ofs_delta=convert_ofs_delta, 

3108 ): 

3109 yield o 

3110 todo.remove(o.sha()) 

3111 if todo and not allow_missing: 

3112 raise KeyError(next(iter(todo))) 

3113 

3114 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]: 

3115 """Get the raw object data from the overlaid stores. 

3116 

3117 Args: 

3118 sha_id: SHA of the object 

3119 

3120 Returns: 

3121 Tuple of (type_num, raw_data) 

3122 

3123 Raises: 

3124 KeyError: If object not found in any base store 

3125 """ 

3126 for b in self.bases: 

3127 try: 

3128 return b.get_raw(sha_id) 

3129 except KeyError: 

3130 pass 

3131 raise KeyError(sha_id) 

3132 

3133 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

3134 """Check if an object is packed in any base store. 

3135 

3136 Args: 

3137 sha: SHA of the object 

3138 

3139 Returns: 

3140 True if object is packed in any base store 

3141 """ 

3142 for b in self.bases: 

3143 if b.contains_packed(sha): 

3144 return True 

3145 return False 

3146 

3147 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

3148 """Check if an object is loose in any base store. 

3149 

3150 Args: 

3151 sha: SHA of the object 

3152 

3153 Returns: 

3154 True if object is loose in any base store 

3155 """ 

3156 for b in self.bases: 

3157 if b.contains_loose(sha): 

3158 return True 

3159 return False 

3160 

3161 

3162def read_packs_file(f: BinaryIO) -> Iterator[str]: 

3163 """Yield the packs listed in a packs file.""" 

3164 for line in f.read().splitlines(): 

3165 if not line: 

3166 continue 

3167 (kind, name) = line.split(b" ", 1) 

3168 if kind != b"P": 

3169 continue 

3170 yield os.fsdecode(name) 

3171 

3172 

3173class BucketBasedObjectStore(PackBasedObjectStore): 

3174 """Object store implementation that uses a bucket store like S3 as backend.""" 

3175 

3176 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

3177 """Iterate over the SHAs of all loose objects.""" 

3178 return iter([]) 

3179 

3180 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None: 

3181 return None 

3182 

3183 def delete_loose_object(self, sha: ObjectID) -> None: 

3184 """Delete a loose object (no-op for bucket stores). 

3185 

3186 Bucket-based stores don't have loose objects, so this is a no-op. 

3187 

3188 Args: 

3189 sha: SHA of the object to delete 

3190 """ 

3191 # Doesn't exist.. 

3192 

3193 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

3194 """Pack loose objects. Returns number of objects packed. 

3195 

3196 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

3197 

3198 Args: 

3199 progress: Optional progress reporting callback (ignored) 

3200 """ 

3201 return 0 

3202 

3203 def _remove_pack_by_name(self, name: str) -> None: 

3204 """Remove a pack by name. Subclasses should implement this.""" 

3205 raise NotImplementedError(self._remove_pack_by_name) 

3206 

3207 def _iter_pack_names(self) -> Iterator[str]: 

3208 raise NotImplementedError(self._iter_pack_names) 

3209 

3210 def _get_pack(self, name: str) -> Pack: 

3211 raise NotImplementedError(self._get_pack) 

3212 

3213 def _update_pack_cache(self) -> list[Pack]: 

3214 pack_files = set(self._iter_pack_names()) 

3215 

3216 # Open newly appeared pack files 

3217 new_packs = [] 

3218 for f in pack_files: 

3219 if f not in self._pack_cache: 

3220 pack = self._get_pack(f) 

3221 new_packs.append(pack) 

3222 self._pack_cache[f] = pack 

3223 # Remove disappeared pack files 

3224 for f in set(self._pack_cache) - pack_files: 

3225 self._pack_cache.pop(f).close() 

3226 return new_packs 

3227 

3228 def _upload_pack( 

3229 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

3230 ) -> None: 

3231 raise NotImplementedError 

3232 

3233 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

3234 """Add a new pack to this object store. 

3235 

3236 Returns: Fileobject to write to, a commit function to 

3237 call when the pack is finished and an abort 

3238 function. 

3239 """ 

3240 import tempfile 

3241 

3242 pf = tempfile.SpooledTemporaryFile( 

3243 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3244 ) 

3245 

3246 def commit() -> Pack | None: 

3247 if pf.tell() == 0: 

3248 pf.close() 

3249 return None 

3250 

3251 pf.seek(0) 

3252 

3253 p = PackData(pf.name, pf) 

3254 entries = p.sorted_entries() 

3255 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

3256 idxf = tempfile.SpooledTemporaryFile( 

3257 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3258 ) 

3259 checksum = p.get_stored_checksum() 

3260 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

3261 idxf.seek(0) 

3262 idx = load_pack_index_file(basename + ".idx", idxf) 

3263 for pack in self.packs: 

3264 if pack.get_stored_checksum() == p.get_stored_checksum(): 

3265 p.close() 

3266 idx.close() 

3267 pf.close() 

3268 idxf.close() 

3269 return pack 

3270 pf.seek(0) 

3271 idxf.seek(0) 

3272 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

3273 final_pack = Pack.from_objects(p, idx) 

3274 self._add_cached_pack(basename, final_pack) 

3275 pf.close() 

3276 idxf.close() 

3277 return final_pack 

3278 

3279 return pf, commit, pf.close # type: ignore[return-value] 

3280 

3281 

3282def _collect_ancestors( 

3283 store: ObjectContainer, 

3284 heads: Iterable[ObjectID], 

3285 common: frozenset[ObjectID] = frozenset(), 

3286 shallow: frozenset[ObjectID] = frozenset(), 

3287 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

3288) -> tuple[set[ObjectID], set[ObjectID]]: 

3289 """Collect all ancestors of heads up to (excluding) those in common. 

3290 

3291 Args: 

3292 store: Object store to get commits from 

3293 heads: commits to start from 

3294 common: commits to end at, or empty set to walk repository 

3295 completely 

3296 shallow: Set of shallow commits 

3297 get_parents: Optional function for getting the parents of a 

3298 commit. 

3299 Returns: a tuple (A, B) where A - all commits reachable 

3300 from heads but not present in common, B - common (shared) elements 

3301 that are directly reachable from heads 

3302 """ 

3303 bases = set() 

3304 commits = set() 

3305 queue: list[ObjectID] = [] 

3306 queue.extend(heads) 

3307 

3308 # Try to use commit graph if available 

3309 commit_graph = store.get_commit_graph() 

3310 

3311 while queue: 

3312 e = queue.pop(0) 

3313 if e in common: 

3314 bases.add(e) 

3315 elif e not in commits: 

3316 commits.add(e) 

3317 if e in shallow: 

3318 continue 

3319 

3320 # Try to use commit graph for parent lookup 

3321 parents = None 

3322 if commit_graph: 

3323 parents = commit_graph.get_parents(e) 

3324 

3325 if parents is None: 

3326 # Fall back to loading the object 

3327 cmt = store[e] 

3328 assert isinstance(cmt, Commit) 

3329 parents = get_parents(cmt) 

3330 

3331 queue.extend(parents) 

3332 return (commits, bases) 

3333 

3334 

3335def iter_tree_contents( 

3336 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False 

3337) -> Iterator[TreeEntry]: 

3338 """Iterate the contents of a tree and all subtrees. 

3339 

3340 Iteration is depth-first pre-order, as in e.g. os.walk. 

3341 

3342 Args: 

3343 store: Object store to get trees from 

3344 tree_id: SHA1 of the tree. 

3345 include_trees: If True, include tree objects in the iteration. 

3346 

3347 Yields: TreeEntry namedtuples for all the objects in a tree. 

3348 """ 

3349 if tree_id is None: 

3350 return 

3351 # This could be fairly easily generalized to >2 trees if we find a use 

3352 # case. 

3353 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

3354 while todo: 

3355 entry = todo.pop() 

3356 assert entry.mode is not None 

3357 if stat.S_ISDIR(entry.mode): 

3358 extra = [] 

3359 assert entry.sha is not None 

3360 tree = store[entry.sha] 

3361 assert isinstance(tree, Tree) 

3362 for subentry in tree.iteritems(name_order=True): 

3363 assert entry.path is not None 

3364 extra.append(subentry.in_path(entry.path)) 

3365 todo.extend(reversed(extra)) 

3366 if not stat.S_ISDIR(entry.mode) or include_trees: 

3367 yield entry 

3368 

3369 

3370def iter_commit_contents( 

3371 store: ObjectContainer, 

3372 commit: Commit | ObjectID | RawObjectID, 

3373 *, 

3374 include: Sequence[str | bytes | Path] | None = None, 

3375) -> Iterator[TreeEntry]: 

3376 """Iterate the contents of the repository at the specified commit. 

3377 

3378 This is a wrapper around iter_tree_contents() and 

3379 tree_lookup_path() to simplify the common task of getting the 

3380 contest of a repo at a particular commit. See also 

3381 dulwich.index.build_file_from_blob() for writing individual files 

3382 to disk. 

3383 

3384 Args: 

3385 store: Object store to get trees from 

3386 commit: Commit object, or SHA1 of a commit 

3387 include: if provided, only the entries whose paths are in the 

3388 list, or whose parent tree is in the list, will be 

3389 included. Note that duplicate or overlapping paths 

3390 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

3391 

3392 Yields: TreeEntry namedtuples for all matching files in a commit. 

3393 """ 

3394 sha = commit.id if isinstance(commit, Commit) else commit 

3395 if not isinstance(obj := store[sha], Commit): 

3396 raise TypeError( 

3397 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

3398 ) 

3399 commit = obj 

3400 encoding = commit.encoding or "utf-8" 

3401 include_bytes: list[bytes] = ( 

3402 [ 

3403 path if isinstance(path, bytes) else str(path).encode(encoding) 

3404 for path in include 

3405 ] 

3406 if include is not None 

3407 else [b""] 

3408 ) 

3409 

3410 for path in include_bytes: 

3411 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

3412 # Iterate all contained files if path points to a dir, otherwise just get that 

3413 # single file 

3414 if isinstance(store[obj_id], Tree): 

3415 for entry in iter_tree_contents(store, obj_id): 

3416 yield entry.in_path(path) 

3417 else: 

3418 yield TreeEntry(path, mode, obj_id) 

3419 

3420 

3421def peel_sha( 

3422 store: ObjectContainer, sha: ObjectID | RawObjectID 

3423) -> tuple[ShaFile, ShaFile]: 

3424 """Peel all tags from a SHA. 

3425 

3426 Args: 

3427 store: Object store to get objects from 

3428 sha: The object SHA to peel. 

3429 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

3430 intermediate tags; if the original ref does not point to a tag, 

3431 this will equal the original SHA1. 

3432 """ 

3433 unpeeled = obj = store[sha] 

3434 obj_class = object_class(obj.type_name) 

3435 while obj_class is Tag: 

3436 assert isinstance(obj, Tag) 

3437 obj_class, sha = obj.object 

3438 obj = store[sha] 

3439 return unpeeled, obj 

3440 

3441 

3442class GraphTraversalReachability: 

3443 """Naive graph traversal implementation of ObjectReachabilityProvider. 

3444 

3445 This implementation wraps existing graph traversal functions 

3446 (_collect_ancestors, _collect_filetree_revs) to provide the standard 

3447 reachability interface without any performance optimizations. 

3448 """ 

3449 

3450 def __init__(self, object_store: BaseObjectStore) -> None: 

3451 """Initialize the graph traversal provider. 

3452 

3453 Args: 

3454 object_store: Object store to query 

3455 """ 

3456 self.store = object_store 

3457 

3458 def get_reachable_commits( 

3459 self, 

3460 heads: Iterable[ObjectID], 

3461 exclude: Iterable[ObjectID] | None = None, 

3462 shallow: Set[ObjectID] | None = None, 

3463 ) -> set[ObjectID]: 

3464 """Get all commits reachable from heads, excluding those in exclude. 

3465 

3466 Uses _collect_ancestors for commit traversal. 

3467 

3468 Args: 

3469 heads: Starting commit SHAs 

3470 exclude: Commit SHAs to exclude (and their ancestors) 

3471 shallow: Set of shallow commit boundaries 

3472 

3473 Returns: 

3474 Set of commit SHAs reachable from heads but not from exclude 

3475 """ 

3476 exclude_set = frozenset(exclude) if exclude else frozenset() 

3477 shallow_set = frozenset(shallow) if shallow else frozenset() 

3478 commits, _bases = _collect_ancestors( 

3479 self.store, heads, exclude_set, shallow_set 

3480 ) 

3481 return commits 

3482 

3483 def get_tree_objects( 

3484 self, 

3485 tree_shas: Iterable[ObjectID], 

3486 ) -> set[ObjectID]: 

3487 """Get all trees and blobs reachable from the given trees. 

3488 

3489 Uses _collect_filetree_revs for tree traversal. 

3490 

3491 Args: 

3492 tree_shas: Starting tree SHAs 

3493 

3494 Returns: 

3495 Set of tree and blob SHAs 

3496 """ 

3497 result: set[ObjectID] = set() 

3498 for tree_sha in tree_shas: 

3499 _collect_filetree_revs(self.store, tree_sha, result) 

3500 return result 

3501 

3502 def get_reachable_objects( 

3503 self, 

3504 commits: Iterable[ObjectID], 

3505 exclude_commits: Iterable[ObjectID] | None = None, 

3506 ) -> set[ObjectID]: 

3507 """Get all objects (commits + trees + blobs) reachable from commits. 

3508 

3509 Args: 

3510 commits: Starting commit SHAs 

3511 exclude_commits: Commits whose objects should be excluded 

3512 

3513 Returns: 

3514 Set of all object SHAs (commits, trees, blobs) 

3515 """ 

3516 commits_set = set(commits) 

3517 result = set(commits_set) 

3518 

3519 # Get trees for all commits 

3520 tree_shas = [] 

3521 for commit_sha in commits_set: 

3522 try: 

3523 commit = self.store[commit_sha] 

3524 if isinstance(commit, Commit): 

3525 tree_shas.append(commit.tree) 

3526 except KeyError: 

3527 # Commit not in store, skip 

3528 continue 

3529 

3530 # Collect all tree/blob objects 

3531 result.update(self.get_tree_objects(tree_shas)) 

3532 

3533 # Exclude objects from exclude_commits if needed 

3534 if exclude_commits: 

3535 exclude_objects = self.get_reachable_objects(exclude_commits, None) 

3536 result -= exclude_objects 

3537 

3538 return result 

3539 

3540 

3541class BitmapReachability: 

3542 """Bitmap-accelerated implementation of ObjectReachabilityProvider. 

3543 

3544 This implementation uses packfile bitmap indexes where available to 

3545 accelerate reachability queries. Falls back to graph traversal when 

3546 bitmaps don't cover the requested commits. 

3547 """ 

3548 

3549 def __init__(self, object_store: "PackBasedObjectStore") -> None: 

3550 """Initialize the bitmap provider. 

3551 

3552 Args: 

3553 object_store: Pack-based object store with bitmap support 

3554 """ 

3555 self.store = object_store 

3556 # Fallback to graph traversal for operations not yet optimized 

3557 self._fallback = GraphTraversalReachability(object_store) 

3558 

3559 def _combine_commit_bitmaps( 

3560 self, 

3561 commit_shas: set[ObjectID], 

3562 exclude_shas: set[ObjectID] | None = None, 

3563 ) -> tuple["EWAHBitmap", "Pack"] | None: 

3564 """Combine bitmaps for multiple commits using OR, with optional exclusion. 

3565 

3566 Args: 

3567 commit_shas: Set of commit SHAs to combine 

3568 exclude_shas: Optional set of commit SHAs to exclude 

3569 

3570 Returns: 

3571 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable 

3572 """ 

3573 from .bitmap import find_commit_bitmaps 

3574 

3575 # Find bitmaps for the commits 

3576 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs) 

3577 

3578 # If we can't find bitmaps for all commits, return None 

3579 if len(commit_bitmaps) < len(commit_shas): 

3580 return None 

3581 

3582 # Combine bitmaps using OR 

3583 combined_bitmap = None 

3584 result_pack = None 

3585 

3586 for commit_sha in commit_shas: 

3587 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha] 

3588 commit_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3589 

3590 if commit_bitmap is None: 

3591 return None 

3592 

3593 if combined_bitmap is None: 

3594 combined_bitmap = commit_bitmap 

3595 result_pack = pack 

3596 elif pack == result_pack: 

3597 # Same pack, can OR directly 

3598 combined_bitmap = combined_bitmap | commit_bitmap 

3599 else: 

3600 # Different packs, can't combine 

3601 return None 

3602 

3603 # Handle exclusions if provided 

3604 if exclude_shas and result_pack and combined_bitmap: 

3605 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack]) 

3606 

3607 if len(exclude_bitmaps) == len(exclude_shas): 

3608 # All excludes have bitmaps, compute exclusion 

3609 exclude_combined = None 

3610 

3611 for commit_sha in exclude_shas: 

3612 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha] 

3613 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3614 

3615 if exclude_bitmap is None: 

3616 break 

3617 

3618 if exclude_combined is None: 

3619 exclude_combined = exclude_bitmap 

3620 else: 

3621 exclude_combined = exclude_combined | exclude_bitmap 

3622 

3623 # Subtract excludes using set difference 

3624 if exclude_combined: 

3625 combined_bitmap = combined_bitmap - exclude_combined 

3626 

3627 if combined_bitmap and result_pack: 

3628 return (combined_bitmap, result_pack) 

3629 return None 

3630 

3631 def get_reachable_commits( 

3632 self, 

3633 heads: Iterable[ObjectID], 

3634 exclude: Iterable[ObjectID] | None = None, 

3635 shallow: Set[ObjectID] | None = None, 

3636 ) -> set[ObjectID]: 

3637 """Get all commits reachable from heads using bitmaps where possible. 

3638 

3639 Args: 

3640 heads: Starting commit SHAs 

3641 exclude: Commit SHAs to exclude (and their ancestors) 

3642 shallow: Set of shallow commit boundaries 

3643 

3644 Returns: 

3645 Set of commit SHAs reachable from heads but not from exclude 

3646 """ 

3647 from .bitmap import bitmap_to_object_shas 

3648 

3649 # If shallow is specified, fall back to graph traversal 

3650 # (bitmaps don't support shallow boundaries well) 

3651 if shallow: 

3652 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3653 

3654 heads_set = set(heads) 

3655 exclude_set = set(exclude) if exclude else None 

3656 

3657 # Try to combine bitmaps 

3658 result = self._combine_commit_bitmaps(heads_set, exclude_set) 

3659 if result is None: 

3660 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3661 

3662 combined_bitmap, result_pack = result 

3663 

3664 # Convert bitmap to commit SHAs, filtering for commits only 

3665 pack_bitmap = result_pack.bitmap 

3666 if pack_bitmap is None: 

3667 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3668 commit_type_filter = pack_bitmap.commit_bitmap 

3669 return bitmap_to_object_shas( 

3670 combined_bitmap, result_pack.index, commit_type_filter 

3671 ) 

3672 

3673 def get_tree_objects( 

3674 self, 

3675 tree_shas: Iterable[ObjectID], 

3676 ) -> set[ObjectID]: 

3677 """Get all trees and blobs reachable from the given trees. 

3678 

3679 Args: 

3680 tree_shas: Starting tree SHAs 

3681 

3682 Returns: 

3683 Set of tree and blob SHAs 

3684 """ 

3685 # Tree traversal doesn't benefit much from bitmaps, use fallback 

3686 return self._fallback.get_tree_objects(tree_shas) 

3687 

3688 def get_reachable_objects( 

3689 self, 

3690 commits: Iterable[ObjectID], 

3691 exclude_commits: Iterable[ObjectID] | None = None, 

3692 ) -> set[ObjectID]: 

3693 """Get all objects reachable from commits using bitmaps. 

3694 

3695 Args: 

3696 commits: Starting commit SHAs 

3697 exclude_commits: Commits whose objects should be excluded 

3698 

3699 Returns: 

3700 Set of all object SHAs (commits, trees, blobs) 

3701 """ 

3702 from .bitmap import bitmap_to_object_shas 

3703 

3704 commits_set = set(commits) 

3705 exclude_set = set(exclude_commits) if exclude_commits else None 

3706 

3707 # Try to combine bitmaps 

3708 result = self._combine_commit_bitmaps(commits_set, exclude_set) 

3709 if result is None: 

3710 return self._fallback.get_reachable_objects(commits, exclude_commits) 

3711 

3712 combined_bitmap, result_pack = result 

3713 

3714 # Convert bitmap to all object SHAs (no type filter) 

3715 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)