Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1556 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26__all__ = [ 

27 "DEFAULT_TEMPFILE_GRACE_PERIOD", 

28 "INFODIR", 

29 "PACKDIR", 

30 "PACK_MODE", 

31 "BaseObjectStore", 

32 "BitmapReachability", 

33 "BucketBasedObjectStore", 

34 "DiskObjectStore", 

35 "GraphTraversalReachability", 

36 "GraphWalker", 

37 "MemoryObjectStore", 

38 "MissingObjectFinder", 

39 "ObjectIterator", 

40 "ObjectReachabilityProvider", 

41 "ObjectStoreGraphWalker", 

42 "OverlayObjectStore", 

43 "PackBasedObjectStore", 

44 "PackCapableObjectStore", 

45 "PackContainer", 

46 "commit_tree_changes", 

47 "find_shallow", 

48 "get_depth", 

49 "iter_commit_contents", 

50 "iter_tree_contents", 

51 "peel_sha", 

52 "read_packs_file", 

53 "tree_lookup_path", 

54] 

55 

56import binascii 

57import os 

58import stat 

59import sys 

60import time 

61import warnings 

62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set 

63from contextlib import suppress 

64from io import BytesIO 

65from pathlib import Path 

66from typing import ( 

67 TYPE_CHECKING, 

68 BinaryIO, 

69 Protocol, 

70 cast, 

71) 

72 

73if TYPE_CHECKING: 

74 from .object_format import ObjectFormat 

75 

76from .errors import NotTreeError 

77from .file import GitFile, _GitFile 

78from .midx import MultiPackIndex, load_midx 

79from .objects import ( 

80 S_ISGITLINK, 

81 Blob, 

82 Commit, 

83 ObjectID, 

84 RawObjectID, 

85 ShaFile, 

86 Tag, 

87 Tree, 

88 TreeEntry, 

89 hex_to_filename, 

90 hex_to_sha, 

91 object_class, 

92 sha_to_hex, 

93 valid_hexsha, 

94) 

95from .pack import ( 

96 PACK_SPOOL_FILE_MAX_SIZE, 

97 ObjectContainer, 

98 Pack, 

99 PackData, 

100 PackedObjectContainer, 

101 PackFileDisappeared, 

102 PackHint, 

103 PackIndexer, 

104 PackInflater, 

105 PackStreamCopier, 

106 UnpackedObject, 

107 extend_pack, 

108 full_unpacked_object, 

109 generate_unpacked_objects, 

110 iter_sha1, 

111 load_pack_index_file, 

112 pack_objects_to_data, 

113 write_pack_data, 

114 write_pack_index, 

115) 

116from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX 

117from .refs import Ref 

118 

119if TYPE_CHECKING: 

120 from .bitmap import EWAHBitmap 

121 from .commit_graph import CommitGraph 

122 from .config import Config 

123 from .diff_tree import RenameDetector 

124 from .pack import Pack 

125 

126 

127class GraphWalker(Protocol): 

128 """Protocol for graph walker objects.""" 

129 

130 def __next__(self) -> ObjectID | None: 

131 """Return the next object SHA to visit.""" 

132 ... 

133 

134 def ack(self, sha: ObjectID) -> None: 

135 """Acknowledge that an object has been received.""" 

136 ... 

137 

138 def nak(self) -> None: 

139 """Nothing in common was found.""" 

140 ... 

141 

142 

143class ObjectReachabilityProvider(Protocol): 

144 """Protocol for computing object reachability queries. 

145 

146 This abstraction allows reachability computations to be backed by either 

147 naive graph traversal or optimized bitmap indexes, with a consistent interface. 

148 """ 

149 

150 def get_reachable_commits( 

151 self, 

152 heads: Iterable[ObjectID], 

153 exclude: Iterable[ObjectID] | None = None, 

154 shallow: Set[ObjectID] | None = None, 

155 ) -> set[ObjectID]: 

156 """Get all commits reachable from heads, excluding those in exclude. 

157 

158 Args: 

159 heads: Starting commit SHAs 

160 exclude: Commit SHAs to exclude (and their ancestors) 

161 shallow: Set of shallow commit boundaries (traversal stops here) 

162 

163 Returns: 

164 Set of commit SHAs reachable from heads but not from exclude 

165 """ 

166 ... 

167 

168 def get_reachable_objects( 

169 self, 

170 commits: Iterable[ObjectID], 

171 exclude_commits: Iterable[ObjectID] | None = None, 

172 ) -> set[ObjectID]: 

173 """Get all objects (commits + trees + blobs) reachable from commits. 

174 

175 Args: 

176 commits: Starting commit SHAs 

177 exclude_commits: Commits whose objects should be excluded 

178 

179 Returns: 

180 Set of all object SHAs (commits, trees, blobs, tags) 

181 """ 

182 ... 

183 

184 def get_tree_objects( 

185 self, 

186 tree_shas: Iterable[ObjectID], 

187 ) -> set[ObjectID]: 

188 """Get all trees and blobs reachable from the given trees. 

189 

190 Args: 

191 tree_shas: Starting tree SHAs 

192 

193 Returns: 

194 Set of tree and blob SHAs 

195 """ 

196 ... 

197 

198 

199INFODIR = "info" 

200PACKDIR = "pack" 

201 

202# use permissions consistent with Git; just readable by everyone 

203# TODO: should packs also be non-writable on Windows? if so, that 

204# would requite some rather significant adjustments to the test suite 

205PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

206 

207# Grace period for cleaning up temporary pack files (in seconds) 

208# Matches git's default of 2 weeks 

209DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

210 

211 

212def find_shallow( 

213 store: ObjectContainer, heads: Iterable[ObjectID], depth: int 

214) -> tuple[set[ObjectID], set[ObjectID]]: 

215 """Find shallow commits according to a given depth. 

216 

217 Args: 

218 store: An ObjectStore for looking up objects. 

219 heads: Iterable of head SHAs to start walking from. 

220 depth: The depth of ancestors to include. A depth of one includes 

221 only the heads themselves. 

222 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

223 considered shallow and unshallow according to the arguments. Note that 

224 these sets may overlap if a commit is reachable along multiple paths. 

225 """ 

226 parents: dict[ObjectID, list[ObjectID]] = {} 

227 commit_graph = store.get_commit_graph() 

228 

229 def get_parents(sha: ObjectID) -> list[ObjectID]: 

230 result = parents.get(sha, None) 

231 if not result: 

232 # Try to use commit graph first if available 

233 if commit_graph: 

234 graph_parents = commit_graph.get_parents(sha) 

235 if graph_parents is not None: 

236 result = graph_parents 

237 parents[sha] = result 

238 return result 

239 # Fall back to loading the object 

240 commit = store[sha] 

241 assert isinstance(commit, Commit) 

242 result = commit.parents 

243 parents[sha] = result 

244 return result 

245 

246 todo = [] # stack of (sha, depth) 

247 for head_sha in heads: 

248 obj = store[head_sha] 

249 # Peel tags if necessary 

250 while isinstance(obj, Tag): 

251 _, sha = obj.object 

252 obj = store[sha] 

253 if isinstance(obj, Commit): 

254 todo.append((obj.id, 1)) 

255 

256 not_shallow = set() 

257 shallow = set() 

258 while todo: 

259 sha, cur_depth = todo.pop() 

260 if cur_depth < depth: 

261 not_shallow.add(sha) 

262 new_depth = cur_depth + 1 

263 todo.extend((p, new_depth) for p in get_parents(sha)) 

264 else: 

265 shallow.add(sha) 

266 

267 return shallow, not_shallow 

268 

269 

270def get_depth( 

271 store: ObjectContainer, 

272 head: ObjectID, 

273 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

274 max_depth: int | None = None, 

275) -> int: 

276 """Return the current available depth for the given head. 

277 

278 For commits with multiple parents, the largest possible depth will be 

279 returned. 

280 

281 Args: 

282 store: Object store to search in 

283 head: commit to start from 

284 get_parents: optional function for getting the parents of a commit 

285 max_depth: maximum depth to search 

286 """ 

287 if head not in store: 

288 return 0 

289 current_depth = 1 

290 queue = [(head, current_depth)] 

291 commit_graph = store.get_commit_graph() 

292 

293 while queue and (max_depth is None or current_depth < max_depth): 

294 e, depth = queue.pop(0) 

295 current_depth = max(current_depth, depth) 

296 

297 # Try to use commit graph for parent lookup if available 

298 parents = None 

299 if commit_graph: 

300 parents = commit_graph.get_parents(e) 

301 

302 if parents is None: 

303 # Fall back to loading the object 

304 cmt = store[e] 

305 if isinstance(cmt, Tag): 

306 _cls, sha = cmt.object 

307 cmt = store[sha] 

308 parents = get_parents(cmt) 

309 

310 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

311 return current_depth 

312 

313 

314class PackContainer(Protocol): 

315 """Protocol for containers that can accept pack files.""" 

316 

317 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

318 """Add a new pack.""" 

319 

320 

321class BaseObjectStore: 

322 """Object store interface.""" 

323 

324 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None: 

325 """Initialize object store. 

326 

327 Args: 

328 object_format: Object format to use (defaults to DEFAULT_OBJECT_FORMAT) 

329 """ 

330 from .object_format import DEFAULT_OBJECT_FORMAT 

331 

332 self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT 

333 

334 def determine_wants_all( 

335 self, refs: Mapping[Ref, ObjectID], depth: int | None = None 

336 ) -> list[ObjectID]: 

337 """Determine which objects are wanted based on refs.""" 

338 

339 def _want_deepen(sha: ObjectID) -> bool: 

340 if not depth: 

341 return False 

342 if depth == DEPTH_INFINITE: 

343 return True 

344 return depth > self._get_depth(sha) 

345 

346 return [ 

347 sha 

348 for (ref, sha) in refs.items() 

349 if (sha not in self or _want_deepen(sha)) 

350 and not ref.endswith(PEELED_TAG_SUFFIX) 

351 ] 

352 

353 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

354 """Check if a particular object is present by SHA1 and is loose.""" 

355 raise NotImplementedError(self.contains_loose) 

356 

357 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

358 """Check if a particular object is present by SHA1 and is packed.""" 

359 return False # Default implementation for stores that don't support packing 

360 

361 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool: 

362 """Check if a particular object is present by SHA1. 

363 

364 This method makes no distinction between loose and packed objects. 

365 """ 

366 return self.contains_loose(sha1) 

367 

368 @property 

369 def packs(self) -> list[Pack]: 

370 """Iterable of pack objects.""" 

371 raise NotImplementedError 

372 

373 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

374 """Obtain the raw text for an object. 

375 

376 Args: 

377 name: sha for the object. 

378 Returns: tuple with numeric type and object contents. 

379 """ 

380 raise NotImplementedError(self.get_raw) 

381 

382 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile: 

383 """Obtain an object by SHA1.""" 

384 type_num, uncomp = self.get_raw(sha1) 

385 return ShaFile.from_raw_string( 

386 type_num, uncomp, sha=sha1, object_format=self.object_format 

387 ) 

388 

389 def __iter__(self) -> Iterator[ObjectID]: 

390 """Iterate over the SHAs that are present in this store.""" 

391 raise NotImplementedError(self.__iter__) 

392 

393 def add_object(self, obj: ShaFile) -> None: 

394 """Add a single object to this object store.""" 

395 raise NotImplementedError(self.add_object) 

396 

397 def add_objects( 

398 self, 

399 objects: Sequence[tuple[ShaFile, str | None]], 

400 progress: Callable[..., None] | None = None, 

401 ) -> "Pack | None": 

402 """Add a set of objects to this object store. 

403 

404 Args: 

405 objects: Iterable over a list of (object, path) tuples 

406 progress: Optional progress callback 

407 """ 

408 raise NotImplementedError(self.add_objects) 

409 

410 def get_reachability_provider( 

411 self, prefer_bitmap: bool = True 

412 ) -> ObjectReachabilityProvider: 

413 """Get a reachability provider for this object store. 

414 

415 Returns an ObjectReachabilityProvider that can efficiently compute 

416 object reachability queries. Subclasses can override this to provide 

417 optimized implementations (e.g., using bitmap indexes). 

418 

419 Args: 

420 prefer_bitmap: Whether to prefer bitmap-based reachability if 

421 available. 

422 

423 Returns: 

424 ObjectReachabilityProvider instance 

425 """ 

426 return GraphTraversalReachability(self) 

427 

428 def tree_changes( 

429 self, 

430 source: ObjectID | None, 

431 target: ObjectID | None, 

432 want_unchanged: bool = False, 

433 include_trees: bool = False, 

434 change_type_same: bool = False, 

435 rename_detector: "RenameDetector | None" = None, 

436 paths: Sequence[bytes] | None = None, 

437 ) -> Iterator[ 

438 tuple[ 

439 tuple[bytes | None, bytes | None], 

440 tuple[int | None, int | None], 

441 tuple[ObjectID | None, ObjectID | None], 

442 ] 

443 ]: 

444 """Find the differences between the contents of two trees. 

445 

446 Args: 

447 source: SHA1 of the source tree 

448 target: SHA1 of the target tree 

449 want_unchanged: Whether unchanged files should be reported 

450 include_trees: Whether to include trees 

451 change_type_same: Whether to report files changing 

452 type in the same entry. 

453 rename_detector: RenameDetector object for detecting renames. 

454 paths: Optional list of paths to filter to (as bytes). 

455 Returns: Iterator over tuples with 

456 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

457 """ 

458 from .diff_tree import tree_changes 

459 

460 for change in tree_changes( 

461 self, 

462 source, 

463 target, 

464 want_unchanged=want_unchanged, 

465 include_trees=include_trees, 

466 change_type_same=change_type_same, 

467 rename_detector=rename_detector, 

468 paths=paths, 

469 ): 

470 old_path = change.old.path if change.old is not None else None 

471 new_path = change.new.path if change.new is not None else None 

472 old_mode = change.old.mode if change.old is not None else None 

473 new_mode = change.new.mode if change.new is not None else None 

474 old_sha = change.old.sha if change.old is not None else None 

475 new_sha = change.new.sha if change.new is not None else None 

476 yield ( 

477 (old_path, new_path), 

478 (old_mode, new_mode), 

479 (old_sha, new_sha), 

480 ) 

481 

482 def iter_tree_contents( 

483 self, tree_id: ObjectID, include_trees: bool = False 

484 ) -> Iterator[TreeEntry]: 

485 """Iterate the contents of a tree and all subtrees. 

486 

487 Iteration is depth-first pre-order, as in e.g. os.walk. 

488 

489 Args: 

490 tree_id: SHA1 of the tree. 

491 include_trees: If True, include tree objects in the iteration. 

492 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

493 tree. 

494 """ 

495 warnings.warn( 

496 "Please use dulwich.object_store.iter_tree_contents", 

497 DeprecationWarning, 

498 stacklevel=2, 

499 ) 

500 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

501 

502 def iterobjects_subset( 

503 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

504 ) -> Iterator[ShaFile]: 

505 """Iterate over a subset of objects in the store. 

506 

507 Args: 

508 shas: Iterable of object SHAs to retrieve 

509 allow_missing: If True, skip missing objects; if False, raise KeyError 

510 

511 Returns: 

512 Iterator of ShaFile objects 

513 

514 Raises: 

515 KeyError: If an object is missing and allow_missing is False 

516 """ 

517 for sha in shas: 

518 try: 

519 yield self[sha] 

520 except KeyError: 

521 if not allow_missing: 

522 raise 

523 

524 def iter_unpacked_subset( 

525 self, 

526 shas: Iterable[ObjectID | RawObjectID], 

527 include_comp: bool = False, 

528 allow_missing: bool = False, 

529 convert_ofs_delta: bool = True, 

530 ) -> "Iterator[UnpackedObject]": 

531 """Iterate over unpacked objects for a subset of SHAs. 

532 

533 Default implementation that converts ShaFile objects to UnpackedObject. 

534 Subclasses may override for more efficient unpacked access. 

535 

536 Args: 

537 shas: Iterable of object SHAs to retrieve 

538 include_comp: Whether to include compressed data (ignored in base 

539 implementation) 

540 allow_missing: If True, skip missing objects; if False, raise 

541 KeyError 

542 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in 

543 base implementation) 

544 

545 Returns: 

546 Iterator of UnpackedObject instances 

547 

548 Raises: 

549 KeyError: If an object is missing and allow_missing is False 

550 """ 

551 from .pack import UnpackedObject 

552 

553 for sha in shas: 

554 try: 

555 obj = self[sha] 

556 # Convert ShaFile to UnpackedObject 

557 unpacked = UnpackedObject( 

558 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

559 ) 

560 yield unpacked 

561 except KeyError: 

562 if not allow_missing: 

563 raise 

564 

565 def find_missing_objects( 

566 self, 

567 haves: Iterable[ObjectID], 

568 wants: Iterable[ObjectID], 

569 shallow: Set[ObjectID] | None = None, 

570 progress: Callable[..., None] | None = None, 

571 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

572 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

573 ) -> Iterator[tuple[ObjectID, PackHint | None]]: 

574 """Find the missing objects required for a set of revisions. 

575 

576 Args: 

577 haves: Iterable over SHAs already in common. 

578 wants: Iterable over SHAs of objects to fetch. 

579 shallow: Set of shallow commit SHA1s to skip 

580 progress: Simple progress function that will be called with 

581 updated progress strings. 

582 get_tagged: Function that returns a dict of pointed-to sha -> 

583 tag sha for including tags. 

584 get_parents: Optional function for getting the parents of a 

585 commit. 

586 Returns: Iterator over (sha, path) pairs. 

587 """ 

588 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

589 finder = MissingObjectFinder( 

590 self, 

591 haves=haves, 

592 wants=wants, 

593 shallow=shallow, 

594 progress=progress, 

595 get_tagged=get_tagged, 

596 get_parents=get_parents, 

597 ) 

598 return iter(finder) 

599 

600 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]: 

601 """Find which revisions this store has in common using graphwalker. 

602 

603 Args: 

604 graphwalker: A graphwalker object. 

605 Returns: List of SHAs that are in common 

606 """ 

607 haves = [] 

608 sha = next(graphwalker) 

609 while sha: 

610 if sha in self: 

611 haves.append(sha) 

612 graphwalker.ack(sha) 

613 sha = next(graphwalker) 

614 return haves 

615 

616 def generate_pack_data( 

617 self, 

618 have: Iterable[ObjectID], 

619 want: Iterable[ObjectID], 

620 *, 

621 shallow: Set[ObjectID] | None = None, 

622 progress: Callable[..., None] | None = None, 

623 ofs_delta: bool = True, 

624 ) -> tuple[int, Iterator[UnpackedObject]]: 

625 """Generate pack data objects for a set of wants/haves. 

626 

627 Args: 

628 have: List of SHA1s of objects that should not be sent 

629 want: List of SHA1s of objects that should be sent 

630 shallow: Set of shallow commit SHA1s to skip 

631 ofs_delta: Whether OFS deltas can be included 

632 progress: Optional progress reporting method 

633 """ 

634 # Note that the pack-specific implementation below is more efficient, 

635 # as it reuses deltas 

636 missing_objects = MissingObjectFinder( 

637 self, haves=have, wants=want, shallow=shallow, progress=progress 

638 ) 

639 object_ids = list(missing_objects) 

640 return pack_objects_to_data( 

641 [(self[oid], path) for oid, path in object_ids], 

642 ofs_delta=ofs_delta, 

643 progress=progress, 

644 ) 

645 

646 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

647 """Peel all tags from a SHA. 

648 

649 Args: 

650 sha: The object SHA to peel. 

651 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

652 intermediate tags; if the original ref does not point to a tag, 

653 this will equal the original SHA1. 

654 """ 

655 warnings.warn( 

656 "Please use dulwich.object_store.peel_sha()", 

657 DeprecationWarning, 

658 stacklevel=2, 

659 ) 

660 return peel_sha(self, sha)[1].id 

661 

662 def _get_depth( 

663 self, 

664 head: ObjectID, 

665 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

666 max_depth: int | None = None, 

667 ) -> int: 

668 """Return the current available depth for the given head. 

669 

670 For commits with multiple parents, the largest possible depth will be 

671 returned. 

672 

673 Args: 

674 head: commit to start from 

675 get_parents: optional function for getting the parents of a commit 

676 max_depth: maximum depth to search 

677 """ 

678 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

679 

680 def close(self) -> None: 

681 """Close any files opened by this object store.""" 

682 # Default implementation is a NO-OP 

683 

684 def prune(self, grace_period: int | None = None) -> None: 

685 """Prune/clean up this object store. 

686 

687 This includes removing orphaned temporary files and other 

688 housekeeping tasks. Default implementation is a NO-OP. 

689 

690 Args: 

691 grace_period: Grace period in seconds for removing temporary files. 

692 If None, uses the default grace period. 

693 """ 

694 # Default implementation is a NO-OP 

695 

696 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

697 """Iterate over all SHA1s that start with a given prefix. 

698 

699 The default implementation is a naive iteration over all objects. 

700 However, subclasses may override this method with more efficient 

701 implementations. 

702 """ 

703 for sha in self: 

704 if sha.startswith(prefix): 

705 yield sha 

706 

707 def get_commit_graph(self) -> "CommitGraph | None": 

708 """Get the commit graph for this object store. 

709 

710 Returns: 

711 CommitGraph object if available, None otherwise 

712 """ 

713 return None 

714 

715 def write_commit_graph( 

716 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

717 ) -> None: 

718 """Write a commit graph file for this object store. 

719 

720 Args: 

721 refs: List of refs to include. If None, includes all refs from object store. 

722 reachable: If True, includes all commits reachable from refs. 

723 If False, only includes the direct ref targets. 

724 

725 Note: 

726 Default implementation does nothing. Subclasses should override 

727 this method to provide commit graph writing functionality. 

728 """ 

729 raise NotImplementedError(self.write_commit_graph) 

730 

731 def get_object_mtime(self, sha: ObjectID) -> float: 

732 """Get the modification time of an object. 

733 

734 Args: 

735 sha: SHA1 of the object 

736 

737 Returns: 

738 Modification time as seconds since epoch 

739 

740 Raises: 

741 KeyError: if the object is not found 

742 """ 

743 # Default implementation raises KeyError 

744 # Subclasses should override to provide actual mtime 

745 raise KeyError(sha) 

746 

747 

748class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

749 """Object store that supports pack operations. 

750 

751 This is a base class for object stores that can handle pack files, 

752 including both disk-based and memory-based stores. 

753 """ 

754 

755 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

756 """Add a new pack to this object store. 

757 

758 Returns: Tuple of (file, commit_func, abort_func) 

759 """ 

760 raise NotImplementedError(self.add_pack) 

761 

762 def add_pack_data( 

763 self, 

764 count: int, 

765 unpacked_objects: Iterator["UnpackedObject"], 

766 progress: Callable[..., None] | None = None, 

767 ) -> "Pack | None": 

768 """Add pack data to this object store. 

769 

770 Args: 

771 count: Number of objects 

772 unpacked_objects: Iterator over unpacked objects 

773 progress: Optional progress callback 

774 """ 

775 raise NotImplementedError(self.add_pack_data) 

776 

777 def get_unpacked_object( 

778 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False 

779 ) -> "UnpackedObject": 

780 """Get a raw unresolved object. 

781 

782 Args: 

783 sha1: SHA-1 hash of the object 

784 include_comp: Whether to include compressed data 

785 

786 Returns: 

787 UnpackedObject instance 

788 """ 

789 from .pack import UnpackedObject 

790 

791 obj = self[sha1] 

792 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

793 

794 def iterobjects_subset( 

795 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

796 ) -> Iterator[ShaFile]: 

797 """Iterate over a subset of objects. 

798 

799 Args: 

800 shas: Iterable of object SHAs to retrieve 

801 allow_missing: If True, skip missing objects 

802 

803 Returns: 

804 Iterator of ShaFile objects 

805 """ 

806 for sha in shas: 

807 try: 

808 yield self[sha] 

809 except KeyError: 

810 if not allow_missing: 

811 raise 

812 

813 

814class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

815 """Object store that uses pack files for storage. 

816 

817 This class provides a base implementation for object stores that use 

818 Git pack files as their primary storage mechanism. It handles caching 

819 of open pack files and provides configuration for pack file operations. 

820 """ 

821 

822 def __init__( 

823 self, 

824 pack_compression_level: int = -1, 

825 pack_index_version: int | None = None, 

826 pack_delta_window_size: int | None = None, 

827 pack_window_memory: int | None = None, 

828 pack_delta_cache_size: int | None = None, 

829 pack_depth: int | None = None, 

830 pack_threads: int | None = None, 

831 pack_big_file_threshold: int | None = None, 

832 *, 

833 packed_git_limit: int | None = None, 

834 delta_base_cache_limit: int | None = None, 

835 object_format: "ObjectFormat | None" = None, 

836 ) -> None: 

837 """Initialize a PackBasedObjectStore. 

838 

839 Args: 

840 pack_compression_level: Compression level for pack files (-1 to 9) 

841 pack_index_version: Pack index version to use 

842 pack_delta_window_size: Window size for delta compression 

843 pack_window_memory: Maximum memory to use for delta window 

844 pack_delta_cache_size: Cache size for delta operations 

845 pack_depth: Maximum depth for pack deltas 

846 pack_threads: Number of threads to use for packing 

847 pack_big_file_threshold: Threshold for treating files as "big" 

848 packed_git_limit: Maximum total bytes for mmapped pack files. 

849 When exceeded, least-recently-used packs are closed to free memory. 

850 delta_base_cache_limit: Maximum bytes for caching delta base objects. 

851 Controls memory used to cache resolved base objects during delta 

852 unpacking, corresponding to Git's core.deltaBaseCacheLimit. 

853 object_format: Hash algorithm to use 

854 """ 

855 super().__init__(object_format=object_format) 

856 self._pack_cache: dict[str, Pack] = {} 

857 self._pack_access_order: list[str] = [] 

858 self.packed_git_limit = packed_git_limit 

859 self.delta_base_cache_limit = delta_base_cache_limit 

860 self.pack_compression_level = pack_compression_level 

861 self.pack_index_version = pack_index_version 

862 self.pack_delta_window_size = pack_delta_window_size 

863 self.pack_window_memory = pack_window_memory 

864 self.pack_delta_cache_size = pack_delta_cache_size 

865 self.pack_depth = pack_depth 

866 self.pack_threads = pack_threads 

867 self.pack_big_file_threshold = pack_big_file_threshold 

868 

869 def get_reachability_provider( 

870 self, 

871 prefer_bitmaps: bool = True, 

872 ) -> ObjectReachabilityProvider: 

873 """Get the best reachability provider for the object store. 

874 

875 Args: 

876 prefer_bitmaps: Whether to use bitmaps if available 

877 

878 Returns: 

879 ObjectReachabilityProvider implementation (either bitmap-accelerated 

880 or graph traversal) 

881 """ 

882 if prefer_bitmaps: 

883 # Check if any packs have bitmaps 

884 has_bitmap = False 

885 for pack in self.packs: 

886 try: 

887 # Try to access bitmap property 

888 if pack.bitmap is not None: 

889 has_bitmap = True 

890 break 

891 except FileNotFoundError: 

892 # Bitmap file doesn't exist for this pack 

893 continue 

894 

895 if has_bitmap: 

896 return BitmapReachability(self) 

897 

898 # Fall back to graph traversal 

899 return GraphTraversalReachability(self) 

900 

901 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

902 """Add a new pack to this object store.""" 

903 raise NotImplementedError(self.add_pack) 

904 

905 def add_pack_data( 

906 self, 

907 count: int, 

908 unpacked_objects: Iterator[UnpackedObject], 

909 progress: Callable[..., None] | None = None, 

910 ) -> "Pack | None": 

911 """Add pack data to this object store. 

912 

913 Args: 

914 count: Number of items to add 

915 unpacked_objects: Iterator of UnpackedObject instances 

916 progress: Optional progress callback 

917 """ 

918 if count == 0: 

919 # Don't bother writing an empty pack file 

920 return None 

921 f, commit, abort = self.add_pack() 

922 try: 

923 write_pack_data( 

924 f.write, 

925 unpacked_objects, 

926 num_records=count, 

927 progress=progress, 

928 compression_level=self.pack_compression_level, 

929 object_format=self.object_format, 

930 ) 

931 except BaseException: 

932 abort() 

933 raise 

934 else: 

935 return commit() 

936 

937 @property 

938 def alternates(self) -> list["BaseObjectStore"]: 

939 """Return list of alternate object stores.""" 

940 return [] 

941 

942 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

943 """Check if a particular object is present by SHA1 and is packed. 

944 

945 This does not check alternates. 

946 """ 

947 for pack in self.packs: 

948 try: 

949 if sha in pack: 

950 return True 

951 except PackFileDisappeared: 

952 pass 

953 return False 

954 

955 def __contains__(self, sha: ObjectID | RawObjectID) -> bool: 

956 """Check if a particular object is present by SHA1. 

957 

958 This method makes no distinction between loose and packed objects. 

959 """ 

960 if self.contains_packed(sha) or self.contains_loose(sha): 

961 return True 

962 for alternate in self.alternates: 

963 if sha in alternate: 

964 return True 

965 return False 

966 

967 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

968 """Add a newly appeared pack to the cache by path.""" 

969 prev_pack = self._pack_cache.get(base_name) 

970 if prev_pack is not pack: 

971 self._pack_cache[base_name] = pack 

972 if prev_pack: 

973 prev_pack.close() 

974 self._mark_pack_used(base_name) 

975 self._enforce_packed_git_limit() 

976 

977 def generate_pack_data( 

978 self, 

979 have: Iterable[ObjectID], 

980 want: Iterable[ObjectID], 

981 *, 

982 shallow: Set[ObjectID] | None = None, 

983 progress: Callable[..., None] | None = None, 

984 ofs_delta: bool = True, 

985 ) -> tuple[int, Iterator[UnpackedObject]]: 

986 """Generate pack data objects for a set of wants/haves. 

987 

988 Args: 

989 have: List of SHA1s of objects that should not be sent 

990 want: List of SHA1s of objects that should be sent 

991 shallow: Set of shallow commit SHA1s to skip 

992 ofs_delta: Whether OFS deltas can be included 

993 progress: Optional progress reporting method 

994 """ 

995 missing_objects = MissingObjectFinder( 

996 self, haves=have, wants=want, shallow=shallow, progress=progress 

997 ) 

998 remote_has = missing_objects.get_remote_has() 

999 object_ids = list(missing_objects) 

1000 return len(object_ids), generate_unpacked_objects( 

1001 self, 

1002 object_ids, 

1003 progress=progress, 

1004 ofs_delta=ofs_delta, 

1005 other_haves=remote_has, 

1006 ) 

1007 

1008 def _clear_cached_packs(self) -> None: 

1009 pack_cache = self._pack_cache 

1010 self._pack_cache = {} 

1011 self._pack_access_order = [] 

1012 while pack_cache: 

1013 (_name, pack) = pack_cache.popitem() 

1014 pack.close() 

1015 

1016 def _total_pack_mmap_size(self) -> int: 

1017 """Return the total mmapped memory across all cached packs.""" 

1018 return sum(pack.mmap_size for pack in self._pack_cache.values()) 

1019 

1020 def _mark_pack_used(self, pack_hash: str) -> None: 

1021 """Mark a pack as recently used for LRU tracking.""" 

1022 try: 

1023 self._pack_access_order.remove(pack_hash) 

1024 except ValueError: 

1025 pass 

1026 self._pack_access_order.append(pack_hash) 

1027 

1028 def _enforce_packed_git_limit(self) -> None: 

1029 """Evict least-recently-used packs if the memory limit is exceeded.""" 

1030 if self.packed_git_limit is None: 

1031 return 

1032 while ( 

1033 self._pack_access_order 

1034 and self._total_pack_mmap_size() > self.packed_git_limit 

1035 ): 

1036 oldest = self._pack_access_order.pop(0) 

1037 pack = self._pack_cache.get(oldest) 

1038 if pack is not None: 

1039 pack.close() 

1040 del self._pack_cache[oldest] 

1041 

1042 def _iter_cached_packs(self) -> Iterator[Pack]: 

1043 return iter(self._pack_cache.values()) 

1044 

1045 def _update_pack_cache(self) -> list[Pack]: 

1046 raise NotImplementedError(self._update_pack_cache) 

1047 

1048 def close(self) -> None: 

1049 """Close the object store and release resources. 

1050 

1051 This method closes all cached pack files and frees associated resources. 

1052 Can be called multiple times safely. 

1053 """ 

1054 self._clear_cached_packs() 

1055 

1056 def __del__(self) -> None: 

1057 """Warn if the object store is being deleted with unclosed packs.""" 

1058 if self._pack_cache: 

1059 import warnings 

1060 

1061 warnings.warn( 

1062 f"ObjectStore {self!r} was destroyed with {len(self._pack_cache)} " 

1063 "unclosed pack(s). Please call close() explicitly.", 

1064 ResourceWarning, 

1065 stacklevel=2, 

1066 ) 

1067 self.close() 

1068 

1069 @property 

1070 def packs(self) -> list[Pack]: 

1071 """List with pack objects.""" 

1072 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

1073 

1074 def count_pack_files(self) -> int: 

1075 """Count the number of pack files. 

1076 

1077 Returns: 

1078 Number of pack files (excluding those with .keep files) 

1079 """ 

1080 count = 0 

1081 for pack in self.packs: 

1082 # Check if there's a .keep file for this pack 

1083 keep_path = pack._basename + ".keep" 

1084 if not os.path.exists(keep_path): 

1085 count += 1 

1086 return count 

1087 

1088 def _iter_alternate_objects(self) -> Iterator[ObjectID]: 

1089 """Iterate over the SHAs of all the objects in alternate stores.""" 

1090 for alternate in self.alternates: 

1091 yield from alternate 

1092 

1093 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1094 """Iterate over the SHAs of all loose objects.""" 

1095 raise NotImplementedError(self._iter_loose_objects) 

1096 

1097 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1098 raise NotImplementedError(self._get_loose_object) 

1099 

1100 def delete_loose_object(self, sha: ObjectID) -> None: 

1101 """Delete a loose object. 

1102 

1103 This method only handles loose objects. For packed objects, 

1104 use repack(exclude=...) to exclude them during repacking. 

1105 """ 

1106 raise NotImplementedError(self.delete_loose_object) 

1107 

1108 def _remove_pack(self, pack: "Pack") -> None: 

1109 raise NotImplementedError(self._remove_pack) 

1110 

1111 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

1112 """Pack loose objects. 

1113 

1114 Args: 

1115 progress: Optional progress reporting callback 

1116 

1117 Returns: Number of objects packed 

1118 """ 

1119 objects: list[tuple[ShaFile, None]] = [] 

1120 for sha in self._iter_loose_objects(): 

1121 obj = self._get_loose_object(sha) 

1122 if obj is not None: 

1123 objects.append((obj, None)) 

1124 self.add_objects(objects, progress=progress) 

1125 for obj, path in objects: 

1126 self.delete_loose_object(obj.id) 

1127 return len(objects) 

1128 

1129 def repack( 

1130 self, 

1131 exclude: Set[bytes] | None = None, 

1132 progress: Callable[[str], None] | None = None, 

1133 ) -> int: 

1134 """Repack the packs in this repository. 

1135 

1136 Note that this implementation is fairly naive and currently keeps all 

1137 objects in memory while it repacks. 

1138 

1139 Args: 

1140 exclude: Optional set of object SHAs to exclude from repacking 

1141 progress: Optional progress reporting callback 

1142 """ 

1143 if exclude is None: 

1144 exclude = set() 

1145 

1146 loose_objects = set() 

1147 excluded_loose_objects = set() 

1148 for sha in self._iter_loose_objects(): 

1149 if sha not in exclude: 

1150 obj = self._get_loose_object(sha) 

1151 if obj is not None: 

1152 loose_objects.add(obj) 

1153 else: 

1154 excluded_loose_objects.add(sha) 

1155 

1156 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

1157 old_packs = {p.name(): p for p in self.packs} 

1158 for name, pack in old_packs.items(): 

1159 objects.update( 

1160 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

1161 ) 

1162 

1163 # Only create a new pack if there are objects to pack 

1164 if objects: 

1165 # The name of the consolidated pack might match the name of a 

1166 # pre-existing pack. Take care not to remove the newly created 

1167 # consolidated pack. 

1168 consolidated = self.add_objects(list(objects), progress=progress) 

1169 if consolidated is not None: 

1170 old_packs.pop(consolidated.name(), None) 

1171 

1172 # Delete loose objects that were packed 

1173 for obj in loose_objects: 

1174 if obj is not None: 

1175 self.delete_loose_object(obj.id) 

1176 # Delete excluded loose objects 

1177 for sha in excluded_loose_objects: 

1178 self.delete_loose_object(sha) 

1179 for name, pack in old_packs.items(): 

1180 self._remove_pack(pack) 

1181 self._update_pack_cache() 

1182 return len(objects) 

1183 

1184 def generate_pack_bitmaps( 

1185 self, 

1186 refs: dict[Ref, ObjectID], 

1187 *, 

1188 commit_interval: int | None = None, 

1189 progress: Callable[[str], None] | None = None, 

1190 ) -> int: 

1191 """Generate bitmap indexes for all packs that don't have them. 

1192 

1193 This generates .bitmap files for packfiles, enabling fast reachability 

1194 queries. Equivalent to the bitmap generation part of 'git repack -b'. 

1195 

1196 Args: 

1197 refs: Dictionary of ref names to commit SHAs 

1198 commit_interval: Include every Nth commit in bitmap index (None for default) 

1199 progress: Optional progress reporting callback 

1200 

1201 Returns: 

1202 Number of bitmaps generated 

1203 """ 

1204 count = 0 

1205 for pack in self.packs: 

1206 pack.ensure_bitmap( 

1207 self, refs, commit_interval=commit_interval, progress=progress 

1208 ) 

1209 count += 1 

1210 

1211 # Update cache to pick up new bitmaps 

1212 self._update_pack_cache() 

1213 

1214 return count 

1215 

1216 def __iter__(self) -> Iterator[ObjectID]: 

1217 """Iterate over the SHAs that are present in this store.""" 

1218 self._update_pack_cache() 

1219 for pack in self._iter_cached_packs(): 

1220 try: 

1221 yield from pack 

1222 except PackFileDisappeared: 

1223 pass 

1224 yield from self._iter_loose_objects() 

1225 yield from self._iter_alternate_objects() 

1226 

1227 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

1228 """Check if a particular object is present by SHA1 and is loose. 

1229 

1230 This does not check alternates. 

1231 """ 

1232 return self._get_loose_object(sha) is not None 

1233 

1234 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

1235 """Obtain the raw fulltext for an object. 

1236 

1237 Args: 

1238 name: sha for the object. 

1239 Returns: tuple with numeric type and object contents. 

1240 """ 

1241 sha: RawObjectID 

1242 if len(name) == self.object_format.hex_length: 

1243 sha = hex_to_sha(ObjectID(name)) 

1244 hexsha = name 

1245 elif len(name) == self.object_format.oid_length: 

1246 sha = RawObjectID(name) 

1247 hexsha = None 

1248 else: 

1249 raise AssertionError(f"Invalid object name {name!r}") 

1250 for pack_hash, pack in self._pack_cache.items(): 

1251 try: 

1252 result = pack.get_raw(sha) 

1253 except (KeyError, PackFileDisappeared): 

1254 pass 

1255 else: 

1256 self._mark_pack_used(pack_hash) 

1257 self._enforce_packed_git_limit() 

1258 return result 

1259 if hexsha is None: 

1260 hexsha = sha_to_hex(sha) 

1261 ret = self._get_loose_object(hexsha) 

1262 if ret is not None: 

1263 return ret.type_num, ret.as_raw_string() 

1264 # Maybe something else has added a pack with the object 

1265 # in the mean time? 

1266 for pack in self._update_pack_cache(): 

1267 try: 

1268 return pack.get_raw(sha) 

1269 except KeyError: 

1270 pass 

1271 for alternate in self.alternates: 

1272 try: 

1273 return alternate.get_raw(hexsha) 

1274 except KeyError: 

1275 pass 

1276 raise KeyError(hexsha) 

1277 

1278 def iter_unpacked_subset( 

1279 self, 

1280 shas: Iterable[ObjectID | RawObjectID], 

1281 include_comp: bool = False, 

1282 allow_missing: bool = False, 

1283 convert_ofs_delta: bool = True, 

1284 ) -> Iterator[UnpackedObject]: 

1285 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1286 

1287 Args: 

1288 shas: Set of object SHAs to retrieve 

1289 include_comp: Whether to include compressed data 

1290 allow_missing: If True, skip missing objects; if False, raise KeyError 

1291 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1292 

1293 Returns: 

1294 Iterator of UnpackedObject instances 

1295 

1296 Raises: 

1297 KeyError: If an object is missing and allow_missing is False 

1298 """ 

1299 todo: set[ObjectID | RawObjectID] = set(shas) 

1300 for p in self._iter_cached_packs(): 

1301 for unpacked in p.iter_unpacked_subset( 

1302 todo, 

1303 include_comp=include_comp, 

1304 allow_missing=True, 

1305 convert_ofs_delta=convert_ofs_delta, 

1306 ): 

1307 yield unpacked 

1308 hexsha = sha_to_hex(unpacked.sha()) 

1309 todo.remove(hexsha) 

1310 # Maybe something else has added a pack with the object 

1311 # in the mean time? 

1312 for p in self._update_pack_cache(): 

1313 for unpacked in p.iter_unpacked_subset( 

1314 todo, 

1315 include_comp=include_comp, 

1316 allow_missing=True, 

1317 convert_ofs_delta=convert_ofs_delta, 

1318 ): 

1319 yield unpacked 

1320 hexsha = sha_to_hex(unpacked.sha()) 

1321 todo.remove(hexsha) 

1322 for alternate in self.alternates: 

1323 assert isinstance(alternate, PackBasedObjectStore) 

1324 for unpacked in alternate.iter_unpacked_subset( 

1325 todo, 

1326 include_comp=include_comp, 

1327 allow_missing=True, 

1328 convert_ofs_delta=convert_ofs_delta, 

1329 ): 

1330 yield unpacked 

1331 hexsha = sha_to_hex(unpacked.sha()) 

1332 todo.remove(hexsha) 

1333 

1334 def iterobjects_subset( 

1335 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

1336 ) -> Iterator[ShaFile]: 

1337 """Iterate over a subset of objects in the store. 

1338 

1339 This method searches for objects in pack files, alternates, and loose storage. 

1340 

1341 Args: 

1342 shas: Iterable of object SHAs to retrieve 

1343 allow_missing: If True, skip missing objects; if False, raise KeyError 

1344 

1345 Returns: 

1346 Iterator of ShaFile objects 

1347 

1348 Raises: 

1349 KeyError: If an object is missing and allow_missing is False 

1350 """ 

1351 todo: set[ObjectID] = set(shas) 

1352 for p in self._iter_cached_packs(): 

1353 for o in p.iterobjects_subset(todo, allow_missing=True): 

1354 yield o 

1355 todo.remove(o.id) 

1356 # Maybe something else has added a pack with the object 

1357 # in the mean time? 

1358 for p in self._update_pack_cache(): 

1359 for o in p.iterobjects_subset(todo, allow_missing=True): 

1360 yield o 

1361 todo.remove(o.id) 

1362 for alternate in self.alternates: 

1363 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1364 yield o 

1365 todo.remove(o.id) 

1366 for oid in todo: 

1367 loose_obj: ShaFile | None = self._get_loose_object(oid) 

1368 if loose_obj is not None: 

1369 yield loose_obj 

1370 elif not allow_missing: 

1371 raise KeyError(oid) 

1372 

1373 def get_unpacked_object( 

1374 self, sha1: bytes, *, include_comp: bool = False 

1375 ) -> UnpackedObject: 

1376 """Obtain the unpacked object. 

1377 

1378 Args: 

1379 sha1: sha for the object. 

1380 include_comp: Whether to include compression metadata. 

1381 """ 

1382 if len(sha1) == self.object_format.hex_length: 

1383 sha = hex_to_sha(cast(ObjectID, sha1)) 

1384 hexsha = cast(ObjectID, sha1) 

1385 elif len(sha1) == self.object_format.oid_length: 

1386 sha = cast(RawObjectID, sha1) 

1387 hexsha = None 

1388 else: 

1389 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1390 for pack in self._iter_cached_packs(): 

1391 try: 

1392 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1393 except (KeyError, PackFileDisappeared): 

1394 pass 

1395 if hexsha is None: 

1396 hexsha = sha_to_hex(sha) 

1397 # Maybe something else has added a pack with the object 

1398 # in the mean time? 

1399 for pack in self._update_pack_cache(): 

1400 try: 

1401 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1402 except KeyError: 

1403 pass 

1404 for alternate in self.alternates: 

1405 assert isinstance(alternate, PackBasedObjectStore) 

1406 try: 

1407 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1408 except KeyError: 

1409 pass 

1410 raise KeyError(hexsha) 

1411 

1412 def add_objects( 

1413 self, 

1414 objects: Sequence[tuple[ShaFile, str | None]], 

1415 progress: Callable[[str], None] | None = None, 

1416 ) -> "Pack | None": 

1417 """Add a set of objects to this object store. 

1418 

1419 Args: 

1420 objects: Iterable over (object, path) tuples, should support 

1421 __len__. 

1422 progress: Optional progress reporting function. 

1423 Returns: Pack object of the objects written. 

1424 """ 

1425 count = len(objects) 

1426 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1427 return self.add_pack_data(count, record_iter, progress=progress) 

1428 

1429 

1430class DiskObjectStore(PackBasedObjectStore): 

1431 """Git-style object store that exists on disk.""" 

1432 

1433 path: str | os.PathLike[str] 

1434 pack_dir: str | os.PathLike[str] 

1435 _alternates: "list[BaseObjectStore] | None" 

1436 _commit_graph: "CommitGraph | None" 

1437 

1438 def __init__( 

1439 self, 

1440 path: str | os.PathLike[str], 

1441 *, 

1442 loose_compression_level: int = -1, 

1443 pack_compression_level: int = -1, 

1444 pack_index_version: int | None = None, 

1445 pack_delta_window_size: int | None = None, 

1446 pack_window_memory: int | None = None, 

1447 pack_delta_cache_size: int | None = None, 

1448 pack_depth: int | None = None, 

1449 pack_threads: int | None = None, 

1450 pack_big_file_threshold: int | None = None, 

1451 packed_git_limit: int | None = None, 

1452 delta_base_cache_limit: int | None = None, 

1453 fsync_object_files: bool = False, 

1454 pack_write_bitmaps: bool = False, 

1455 pack_write_bitmap_hash_cache: bool = True, 

1456 pack_write_bitmap_lookup_table: bool = True, 

1457 file_mode: int | None = None, 

1458 dir_mode: int | None = None, 

1459 object_format: "ObjectFormat | None" = None, 

1460 ) -> None: 

1461 """Open an object store. 

1462 

1463 Args: 

1464 path: Path of the object store. 

1465 loose_compression_level: zlib compression level for loose objects 

1466 pack_compression_level: zlib compression level for pack objects 

1467 pack_index_version: pack index version to use (1, 2, or 3) 

1468 pack_delta_window_size: sliding window size for delta compression 

1469 pack_window_memory: memory limit for delta window operations 

1470 pack_delta_cache_size: size of cache for delta operations 

1471 pack_depth: maximum delta chain depth 

1472 pack_threads: number of threads for pack operations 

1473 pack_big_file_threshold: threshold for treating files as big 

1474 packed_git_limit: maximum total bytes for mmapped pack files 

1475 delta_base_cache_limit: maximum bytes for delta base object cache 

1476 fsync_object_files: whether to fsync object files for durability 

1477 pack_write_bitmaps: whether to write bitmap indexes for packs 

1478 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps 

1479 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps 

1480 file_mode: File permission mask for shared repository 

1481 dir_mode: Directory permission mask for shared repository 

1482 object_format: Hash algorithm to use (SHA1 or SHA256) 

1483 """ 

1484 # Import here to avoid circular dependency 

1485 from .object_format import DEFAULT_OBJECT_FORMAT 

1486 

1487 super().__init__( 

1488 pack_compression_level=pack_compression_level, 

1489 pack_index_version=pack_index_version, 

1490 pack_delta_window_size=pack_delta_window_size, 

1491 pack_window_memory=pack_window_memory, 

1492 pack_delta_cache_size=pack_delta_cache_size, 

1493 pack_depth=pack_depth, 

1494 pack_threads=pack_threads, 

1495 pack_big_file_threshold=pack_big_file_threshold, 

1496 packed_git_limit=packed_git_limit, 

1497 delta_base_cache_limit=delta_base_cache_limit, 

1498 object_format=object_format if object_format else DEFAULT_OBJECT_FORMAT, 

1499 ) 

1500 self.path = path 

1501 self.pack_dir = os.path.join(self.path, PACKDIR) 

1502 self._alternates = None 

1503 self.loose_compression_level = loose_compression_level 

1504 self.pack_compression_level = pack_compression_level 

1505 self.pack_index_version = pack_index_version 

1506 self.fsync_object_files = fsync_object_files 

1507 self.pack_write_bitmaps = pack_write_bitmaps 

1508 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache 

1509 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table 

1510 self.file_mode = file_mode 

1511 self.dir_mode = dir_mode 

1512 

1513 # Commit graph support - lazy loaded 

1514 self._commit_graph = None 

1515 self._use_commit_graph = True # Default to true 

1516 

1517 # Multi-pack-index support - lazy loaded 

1518 self._midx: MultiPackIndex | None = None 

1519 self._use_midx = True # Default to true 

1520 

1521 def __repr__(self) -> str: 

1522 """Return string representation of DiskObjectStore. 

1523 

1524 Returns: 

1525 String representation including the store path 

1526 """ 

1527 return f"<{self.__class__.__name__}({self.path!r})>" 

1528 

1529 @classmethod 

1530 def from_config( 

1531 cls, 

1532 path: str | os.PathLike[str], 

1533 config: "Config", 

1534 *, 

1535 file_mode: int | None = None, 

1536 dir_mode: int | None = None, 

1537 ) -> "DiskObjectStore": 

1538 """Create a DiskObjectStore from a configuration object. 

1539 

1540 Args: 

1541 path: Path to the object store directory 

1542 config: Configuration object to read settings from 

1543 file_mode: Optional file permission mask for shared repository 

1544 dir_mode: Optional directory permission mask for shared repository 

1545 

1546 Returns: 

1547 New DiskObjectStore instance configured according to config 

1548 """ 

1549 try: 

1550 default_compression_level = int( 

1551 config.get((b"core",), b"compression").decode() 

1552 ) 

1553 except KeyError: 

1554 default_compression_level = -1 

1555 try: 

1556 loose_compression_level = int( 

1557 config.get((b"core",), b"looseCompression").decode() 

1558 ) 

1559 except KeyError: 

1560 loose_compression_level = default_compression_level 

1561 try: 

1562 pack_compression_level = int( 

1563 config.get((b"core",), "packCompression").decode() 

1564 ) 

1565 except KeyError: 

1566 pack_compression_level = default_compression_level 

1567 try: 

1568 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1569 except KeyError: 

1570 pack_index_version = None 

1571 

1572 # Read pack configuration options 

1573 try: 

1574 pack_delta_window_size = int( 

1575 config.get((b"pack",), b"deltaWindowSize").decode() 

1576 ) 

1577 except KeyError: 

1578 pack_delta_window_size = None 

1579 try: 

1580 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1581 except KeyError: 

1582 pack_window_memory = None 

1583 try: 

1584 pack_delta_cache_size = int( 

1585 config.get((b"pack",), b"deltaCacheSize").decode() 

1586 ) 

1587 except KeyError: 

1588 pack_delta_cache_size = None 

1589 try: 

1590 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1591 except KeyError: 

1592 pack_depth = None 

1593 try: 

1594 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1595 except KeyError: 

1596 pack_threads = None 

1597 try: 

1598 pack_big_file_threshold = int( 

1599 config.get((b"pack",), b"bigFileThreshold").decode() 

1600 ) 

1601 except KeyError: 

1602 pack_big_file_threshold = None 

1603 

1604 # Read core.packedGitLimit setting 

1605 try: 

1606 packed_git_limit = int(config.get((b"core",), b"packedGitLimit").decode()) 

1607 except KeyError: 

1608 packed_git_limit = None 

1609 

1610 # Read core.deltaBaseCacheLimit setting 

1611 try: 

1612 delta_base_cache_limit = int( 

1613 config.get((b"core",), b"deltaBaseCacheLimit").decode() 

1614 ) 

1615 except KeyError: 

1616 delta_base_cache_limit = None 

1617 

1618 # Read core.commitGraph setting 

1619 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1620 

1621 # Read core.multiPackIndex setting 

1622 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True) 

1623 

1624 # Read core.fsyncObjectFiles setting 

1625 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False) 

1626 

1627 # Read bitmap settings 

1628 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False) 

1629 pack_write_bitmap_hash_cache = config.get_boolean( 

1630 (b"pack",), b"writeBitmapHashCache", True 

1631 ) 

1632 pack_write_bitmap_lookup_table = config.get_boolean( 

1633 (b"pack",), b"writeBitmapLookupTable", True 

1634 ) 

1635 # Also check repack.writeBitmaps for backwards compatibility 

1636 if not pack_write_bitmaps: 

1637 pack_write_bitmaps = config.get_boolean( 

1638 (b"repack",), b"writeBitmaps", False 

1639 ) 

1640 

1641 # Get hash algorithm from config 

1642 from .object_format import get_object_format 

1643 

1644 object_format = None 

1645 try: 

1646 try: 

1647 version = int(config.get((b"core",), b"repositoryformatversion")) 

1648 except KeyError: 

1649 version = 0 

1650 if version == 1: 

1651 try: 

1652 object_format_name = config.get((b"extensions",), b"objectformat") 

1653 except KeyError: 

1654 object_format_name = b"sha1" 

1655 object_format = get_object_format(object_format_name.decode("ascii")) 

1656 except (KeyError, ValueError): 

1657 pass 

1658 

1659 instance = cls( 

1660 path, 

1661 loose_compression_level=loose_compression_level, 

1662 pack_compression_level=pack_compression_level, 

1663 pack_index_version=pack_index_version, 

1664 pack_delta_window_size=pack_delta_window_size, 

1665 pack_window_memory=pack_window_memory, 

1666 pack_delta_cache_size=pack_delta_cache_size, 

1667 pack_depth=pack_depth, 

1668 pack_threads=pack_threads, 

1669 pack_big_file_threshold=pack_big_file_threshold, 

1670 packed_git_limit=packed_git_limit, 

1671 delta_base_cache_limit=delta_base_cache_limit, 

1672 fsync_object_files=fsync_object_files, 

1673 pack_write_bitmaps=pack_write_bitmaps, 

1674 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache, 

1675 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table, 

1676 file_mode=file_mode, 

1677 dir_mode=dir_mode, 

1678 object_format=object_format, 

1679 ) 

1680 instance._use_commit_graph = use_commit_graph 

1681 instance._use_midx = use_midx 

1682 return instance 

1683 

1684 @property 

1685 def alternates(self) -> list["BaseObjectStore"]: 

1686 """Get the list of alternate object stores. 

1687 

1688 Reads from .git/objects/info/alternates if not already cached. 

1689 

1690 Returns: 

1691 List of DiskObjectStore instances for alternate object directories 

1692 """ 

1693 if self._alternates is not None: 

1694 return self._alternates 

1695 self._alternates = [] 

1696 for path in self._read_alternate_paths(): 

1697 self._alternates.append(DiskObjectStore(path)) 

1698 return self._alternates 

1699 

1700 def _read_alternate_paths(self) -> Iterator[str]: 

1701 try: 

1702 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1703 except FileNotFoundError: 

1704 return 

1705 with f: 

1706 for line in f.readlines(): 

1707 line = line.rstrip(b"\n") 

1708 if line.startswith(b"#"): 

1709 continue 

1710 if os.path.isabs(line): 

1711 yield os.fsdecode(line) 

1712 else: 

1713 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1714 

1715 def add_alternate_path(self, path: str | os.PathLike[str]) -> None: 

1716 """Add an alternate path to this object store.""" 

1717 info_dir = os.path.join(self.path, INFODIR) 

1718 try: 

1719 os.mkdir(info_dir) 

1720 if self.dir_mode is not None: 

1721 os.chmod(info_dir, self.dir_mode) 

1722 except FileExistsError: 

1723 pass 

1724 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1725 mask = self.file_mode if self.file_mode is not None else 0o644 

1726 with GitFile(alternates_path, "wb", mask=mask) as f: 

1727 try: 

1728 orig_f = open(alternates_path, "rb") 

1729 except FileNotFoundError: 

1730 pass 

1731 else: 

1732 with orig_f: 

1733 f.write(orig_f.read()) 

1734 f.write(os.fsencode(path) + b"\n") 

1735 

1736 if not os.path.isabs(path): 

1737 path = os.path.join(self.path, path) 

1738 self.alternates.append(DiskObjectStore(path)) 

1739 

1740 def _update_pack_cache(self) -> list[Pack]: 

1741 """Read and iterate over new pack files and cache them.""" 

1742 try: 

1743 pack_dir_contents = os.listdir(self.pack_dir) 

1744 except FileNotFoundError: 

1745 return [] 

1746 pack_files = set() 

1747 for name in pack_dir_contents: 

1748 if name.startswith("pack-") and name.endswith(".pack"): 

1749 # verify that idx exists first (otherwise the pack was not yet 

1750 # fully written) 

1751 idx_name = os.path.splitext(name)[0] + ".idx" 

1752 if idx_name in pack_dir_contents: 

1753 # Extract just the hash (remove "pack-" prefix and ".pack" suffix) 

1754 pack_hash = name[len("pack-") : -len(".pack")] 

1755 pack_files.add(pack_hash) 

1756 

1757 # Open newly appeared pack files 

1758 new_packs = [] 

1759 for pack_hash in pack_files: 

1760 if pack_hash not in self._pack_cache: 

1761 pack = Pack( 

1762 os.path.join(self.pack_dir, "pack-" + pack_hash), 

1763 object_format=self.object_format, 

1764 delta_window_size=self.pack_delta_window_size, 

1765 window_memory=self.pack_window_memory, 

1766 delta_cache_size=self.pack_delta_cache_size, 

1767 depth=self.pack_depth, 

1768 threads=self.pack_threads, 

1769 big_file_threshold=self.pack_big_file_threshold, 

1770 delta_base_cache_limit=self.delta_base_cache_limit, 

1771 ) 

1772 new_packs.append(pack) 

1773 self._pack_cache[pack_hash] = pack 

1774 self._mark_pack_used(pack_hash) 

1775 # Remove disappeared pack files 

1776 for f in set(self._pack_cache) - pack_files: 

1777 self._pack_cache.pop(f).close() 

1778 try: 

1779 self._pack_access_order.remove(f) 

1780 except ValueError: 

1781 pass 

1782 self._enforce_packed_git_limit() 

1783 return new_packs 

1784 

1785 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str: 

1786 # Check from object dir 

1787 return hex_to_filename(os.fspath(self.path), sha) 

1788 

1789 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1790 for base in os.listdir(self.path): 

1791 if len(base) != 2: 

1792 continue 

1793 for rest in os.listdir(os.path.join(self.path, base)): 

1794 sha = os.fsencode(base + rest) 

1795 if not valid_hexsha(sha): 

1796 continue 

1797 yield ObjectID(sha) 

1798 

1799 def count_loose_objects(self) -> int: 

1800 """Count the number of loose objects in the object store. 

1801 

1802 Returns: 

1803 Number of loose objects 

1804 """ 

1805 # Calculate expected filename length for loose 

1806 # objects (excluding directory) 

1807 fn_length = self.object_format.hex_length - 2 

1808 count = 0 

1809 if not os.path.exists(self.path): 

1810 return 0 

1811 

1812 for i in range(256): 

1813 subdir = os.path.join(self.path, f"{i:02x}") 

1814 try: 

1815 count += len( 

1816 [name for name in os.listdir(subdir) if len(name) == fn_length] 

1817 ) 

1818 except FileNotFoundError: 

1819 # Directory may have been removed or is inaccessible 

1820 continue 

1821 

1822 return count 

1823 

1824 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1825 path = self._get_shafile_path(sha) 

1826 try: 

1827 # Load the object from path with SHA and hash algorithm from object store 

1828 # Convert to hex ObjectID if needed 

1829 if len(sha) == self.object_format.oid_length: 

1830 hex_sha: ObjectID = sha_to_hex(RawObjectID(sha)) 

1831 else: 

1832 hex_sha = ObjectID(sha) 

1833 return ShaFile.from_path(path, hex_sha, object_format=self.object_format) 

1834 except FileNotFoundError: 

1835 return None 

1836 

1837 def delete_loose_object(self, sha: ObjectID) -> None: 

1838 """Delete a loose object from disk. 

1839 

1840 Args: 

1841 sha: SHA1 of the object to delete 

1842 

1843 Raises: 

1844 FileNotFoundError: If the object file doesn't exist 

1845 """ 

1846 os.remove(self._get_shafile_path(sha)) 

1847 

1848 def get_object_mtime(self, sha: ObjectID) -> float: 

1849 """Get the modification time of an object. 

1850 

1851 Args: 

1852 sha: SHA1 of the object 

1853 

1854 Returns: 

1855 Modification time as seconds since epoch 

1856 

1857 Raises: 

1858 KeyError: if the object is not found 

1859 """ 

1860 # First check if it's a loose object 

1861 if self.contains_loose(sha): 

1862 path = self._get_shafile_path(sha) 

1863 try: 

1864 return os.path.getmtime(path) 

1865 except FileNotFoundError: 

1866 pass 

1867 

1868 # Check if it's in a pack file 

1869 for pack in self.packs: 

1870 try: 

1871 if sha in pack: 

1872 # Use the pack file's mtime for packed objects 

1873 pack_path = pack._data_path 

1874 try: 

1875 return os.path.getmtime(pack_path) 

1876 except (FileNotFoundError, AttributeError): 

1877 pass 

1878 except PackFileDisappeared: 

1879 pass 

1880 

1881 raise KeyError(sha) 

1882 

1883 def _remove_pack(self, pack: Pack) -> None: 

1884 try: 

1885 del self._pack_cache[os.path.basename(pack._basename)] 

1886 except KeyError: 

1887 pass 

1888 # Store paths before closing to avoid re-opening files on Windows 

1889 data_path = pack._data_path 

1890 idx_path = pack._idx_path 

1891 pack.close() 

1892 os.remove(data_path) 

1893 if os.path.exists(idx_path): 

1894 os.remove(idx_path) 

1895 

1896 def _get_pack_basepath( 

1897 self, entries: Iterable[tuple[bytes, int, int | None]] 

1898 ) -> str: 

1899 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1900 # TODO: Handle self.pack_dir being bytes 

1901 suffix = suffix_bytes.decode("ascii") 

1902 return os.path.join(self.pack_dir, "pack-" + suffix) 

1903 

1904 def _complete_pack( 

1905 self, 

1906 f: BinaryIO, 

1907 path: str, 

1908 num_objects: int, 

1909 indexer: PackIndexer, 

1910 progress: Callable[..., None] | None = None, 

1911 refs: dict[Ref, ObjectID] | None = None, 

1912 ) -> Pack: 

1913 """Move a specific file containing a pack into the pack directory. 

1914 

1915 Note: The file should be on the same file system as the 

1916 packs directory. 

1917 

1918 Args: 

1919 f: Open file object for the pack. 

1920 path: Path to the pack file. 

1921 num_objects: Number of objects in the pack. 

1922 indexer: A PackIndexer for indexing the pack. 

1923 progress: Optional progress reporting function. 

1924 refs: Optional dictionary of refs for bitmap generation. 

1925 """ 

1926 entries = [] 

1927 for i, entry in enumerate(indexer): 

1928 if progress is not None: 

1929 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1930 entries.append(entry) 

1931 

1932 pack_sha, extra_entries = extend_pack( 

1933 f, 

1934 set(indexer.ext_refs()), 

1935 get_raw=self.get_raw, 

1936 compression_level=self.pack_compression_level, 

1937 progress=progress, 

1938 object_format=self.object_format, 

1939 ) 

1940 f.flush() 

1941 if self.fsync_object_files: 

1942 try: 

1943 fileno = f.fileno() 

1944 except AttributeError as e: 

1945 raise OSError("fsync requested but file has no fileno()") from e 

1946 else: 

1947 os.fsync(fileno) 

1948 f.close() 

1949 

1950 entries.extend(extra_entries) 

1951 

1952 # Move the pack in. 

1953 entries.sort() 

1954 pack_base_name = self._get_pack_basepath(entries) 

1955 

1956 for pack in self.packs: 

1957 if pack._basename == pack_base_name: 

1958 return pack 

1959 

1960 target_pack_path = pack_base_name + ".pack" 

1961 target_index_path = pack_base_name + ".idx" 

1962 if sys.platform == "win32": 

1963 # Windows might have the target pack file lingering. Attempt 

1964 # removal, silently passing if the target does not exist. 

1965 with suppress(FileNotFoundError): 

1966 os.remove(target_pack_path) 

1967 os.rename(path, target_pack_path) 

1968 

1969 # Write the index. 

1970 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1971 with GitFile( 

1972 target_index_path, 

1973 "wb", 

1974 mask=mask, 

1975 fsync=self.fsync_object_files, 

1976 ) as index_file: 

1977 write_pack_index( 

1978 index_file, entries, pack_sha, version=self.pack_index_version 

1979 ) 

1980 

1981 # Generate bitmap if configured and refs are available 

1982 if self.pack_write_bitmaps and refs: 

1983 from .bitmap import generate_bitmap, write_bitmap 

1984 from .pack import load_pack_index_file 

1985 

1986 if progress: 

1987 progress("Generating bitmap index\r".encode("ascii")) 

1988 

1989 # Load the index we just wrote 

1990 with open(target_index_path, "rb") as idx_file: 

1991 pack_index = load_pack_index_file( 

1992 os.path.basename(target_index_path), 

1993 idx_file, 

1994 self.object_format, 

1995 ) 

1996 

1997 # Generate the bitmap 

1998 bitmap = generate_bitmap( 

1999 pack_index=pack_index, 

2000 object_store=self, 

2001 refs=refs, 

2002 pack_checksum=pack_sha, 

2003 include_hash_cache=self.pack_write_bitmap_hash_cache, 

2004 include_lookup_table=self.pack_write_bitmap_lookup_table, 

2005 progress=lambda msg: ( 

2006 progress(msg.encode("ascii")) 

2007 if progress and isinstance(msg, str) 

2008 else None 

2009 ), 

2010 ) 

2011 

2012 # Write the bitmap 

2013 target_bitmap_path = pack_base_name + ".bitmap" 

2014 write_bitmap(target_bitmap_path, bitmap) 

2015 

2016 if progress: 

2017 progress("Bitmap index written\r".encode("ascii")) 

2018 

2019 # Add the pack to the store and return it. 

2020 final_pack = Pack( 

2021 pack_base_name, 

2022 object_format=self.object_format, 

2023 delta_window_size=self.pack_delta_window_size, 

2024 window_memory=self.pack_window_memory, 

2025 delta_cache_size=self.pack_delta_cache_size, 

2026 depth=self.pack_depth, 

2027 threads=self.pack_threads, 

2028 big_file_threshold=self.pack_big_file_threshold, 

2029 delta_base_cache_limit=self.delta_base_cache_limit, 

2030 ) 

2031 final_pack.check_length_and_checksum() 

2032 # Extract just the hash from pack_base_name (/path/to/pack-HASH -> HASH) 

2033 pack_hash = os.path.basename(pack_base_name)[len("pack-") :] 

2034 self._add_cached_pack(pack_hash, final_pack) 

2035 return final_pack 

2036 

2037 def add_thin_pack( 

2038 self, 

2039 read_all: Callable[[int], bytes], 

2040 read_some: Callable[[int], bytes] | None, 

2041 progress: Callable[..., None] | None = None, 

2042 ) -> "Pack": 

2043 """Add a new thin pack to this object store. 

2044 

2045 Thin packs are packs that contain deltas with parents that exist 

2046 outside the pack. They should never be placed in the object store 

2047 directly, and always indexed and completed as they are copied. 

2048 

2049 Args: 

2050 read_all: Read function that blocks until the number of 

2051 requested bytes are read. 

2052 read_some: Read function that returns at least one byte, but may 

2053 not return the number of bytes requested. 

2054 progress: Optional progress reporting function. 

2055 Returns: A Pack object pointing at the now-completed thin pack in the 

2056 objects/pack directory. 

2057 """ 

2058 import tempfile 

2059 

2060 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

2061 with os.fdopen(fd, "w+b") as f: 

2062 os.chmod(path, PACK_MODE) 

2063 indexer = PackIndexer( 

2064 f, 

2065 self.object_format.hash_func, 

2066 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

2067 ) 

2068 copier = PackStreamCopier( 

2069 self.object_format.hash_func, 

2070 read_all, 

2071 read_some, 

2072 f, 

2073 delta_iter=indexer, # type: ignore[arg-type] 

2074 ) 

2075 copier.verify(progress=progress) 

2076 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

2077 

2078 def add_pack( 

2079 self, 

2080 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2081 """Add a new pack to this object store. 

2082 

2083 Returns: Fileobject to write to, a commit function to 

2084 call when the pack is finished and an abort 

2085 function. 

2086 """ 

2087 import tempfile 

2088 

2089 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

2090 f = os.fdopen(fd, "w+b") 

2091 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

2092 os.chmod(path, mask) 

2093 

2094 def commit() -> "Pack | None": 

2095 if f.tell() > 0: 

2096 f.seek(0) 

2097 

2098 with PackData(path, file=f, object_format=self.object_format) as pd: 

2099 indexer = PackIndexer.for_pack_data( 

2100 pd, 

2101 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

2102 ) 

2103 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

2104 else: 

2105 f.close() 

2106 os.remove(path) 

2107 return None 

2108 

2109 def abort() -> None: 

2110 f.close() 

2111 os.remove(path) 

2112 

2113 return f, commit, abort # type: ignore[return-value] 

2114 

2115 def add_object(self, obj: ShaFile) -> None: 

2116 """Add a single object to this object store. 

2117 

2118 Args: 

2119 obj: Object to add 

2120 """ 

2121 # Use the correct hash algorithm for the object ID 

2122 obj_id = ObjectID(obj.get_id(self.object_format)) 

2123 path = self._get_shafile_path(obj_id) 

2124 dir = os.path.dirname(path) 

2125 try: 

2126 os.mkdir(dir) 

2127 if self.dir_mode is not None: 

2128 os.chmod(dir, self.dir_mode) 

2129 except FileExistsError: 

2130 pass 

2131 if os.path.exists(path): 

2132 return # Already there, no need to write again 

2133 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

2134 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f: 

2135 f.write( 

2136 obj.as_legacy_object(compression_level=self.loose_compression_level) 

2137 ) 

2138 

2139 @classmethod 

2140 def init( 

2141 cls, 

2142 path: str | os.PathLike[str], 

2143 *, 

2144 file_mode: int | None = None, 

2145 dir_mode: int | None = None, 

2146 object_format: "ObjectFormat | None" = None, 

2147 ) -> "DiskObjectStore": 

2148 """Initialize a new disk object store. 

2149 

2150 Creates the necessary directory structure for a Git object store. 

2151 

2152 Args: 

2153 path: Path where the object store should be created 

2154 file_mode: Optional file permission mask for shared repository 

2155 dir_mode: Optional directory permission mask for shared repository 

2156 object_format: Hash algorithm to use (SHA1 or SHA256) 

2157 

2158 Returns: 

2159 New DiskObjectStore instance 

2160 """ 

2161 try: 

2162 os.mkdir(path) 

2163 if dir_mode is not None: 

2164 os.chmod(path, dir_mode) 

2165 except FileExistsError: 

2166 pass 

2167 info_path = os.path.join(path, "info") 

2168 pack_path = os.path.join(path, PACKDIR) 

2169 os.mkdir(info_path) 

2170 os.mkdir(pack_path) 

2171 if dir_mode is not None: 

2172 os.chmod(info_path, dir_mode) 

2173 os.chmod(pack_path, dir_mode) 

2174 return cls( 

2175 path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format 

2176 ) 

2177 

2178 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

2179 """Iterate over all object SHAs with the given prefix. 

2180 

2181 Args: 

2182 prefix: Hex prefix to search for (as bytes) 

2183 

2184 Returns: 

2185 Iterator of object SHAs (as ObjectID) matching the prefix 

2186 """ 

2187 if len(prefix) < 2: 

2188 yield from super().iter_prefix(prefix) 

2189 return 

2190 seen = set() 

2191 dir = prefix[:2].decode() 

2192 rest = prefix[2:].decode() 

2193 try: 

2194 for name in os.listdir(os.path.join(self.path, dir)): 

2195 if name.startswith(rest): 

2196 sha = ObjectID(os.fsencode(dir + name)) 

2197 if sha not in seen: 

2198 seen.add(sha) 

2199 yield sha 

2200 except FileNotFoundError: 

2201 pass 

2202 

2203 for p in self.packs: 

2204 bin_prefix = ( 

2205 binascii.unhexlify(prefix) 

2206 if len(prefix) % 2 == 0 

2207 else binascii.unhexlify(prefix[:-1]) 

2208 ) 

2209 for bin_sha in p.index.iter_prefix(bin_prefix): 

2210 sha = sha_to_hex(bin_sha) 

2211 if sha.startswith(prefix) and sha not in seen: 

2212 seen.add(sha) 

2213 yield sha 

2214 for alternate in self.alternates: 

2215 for sha in alternate.iter_prefix(prefix): 

2216 if sha not in seen: 

2217 seen.add(sha) 

2218 yield sha 

2219 

2220 def get_commit_graph(self) -> "CommitGraph | None": 

2221 """Get the commit graph for this object store. 

2222 

2223 Returns: 

2224 CommitGraph object if available, None otherwise 

2225 """ 

2226 if not self._use_commit_graph: 

2227 return None 

2228 

2229 if self._commit_graph is None: 

2230 from .commit_graph import read_commit_graph 

2231 

2232 # Look for commit graph in our objects directory 

2233 graph_file = os.path.join(self.path, "info", "commit-graph") 

2234 if os.path.exists(graph_file): 

2235 self._commit_graph = read_commit_graph(graph_file) 

2236 return self._commit_graph 

2237 

2238 def get_midx(self) -> MultiPackIndex | None: 

2239 """Get the multi-pack-index for this object store. 

2240 

2241 Returns: 

2242 MultiPackIndex object if available, None otherwise 

2243 

2244 Raises: 

2245 ValueError: If MIDX file is corrupt 

2246 OSError: If MIDX file cannot be read 

2247 """ 

2248 if not self._use_midx: 

2249 return None 

2250 

2251 if self._midx is None: 

2252 # Look for MIDX in pack directory 

2253 midx_file = os.path.join(self.pack_dir, "multi-pack-index") 

2254 if os.path.exists(midx_file): 

2255 self._midx = load_midx(midx_file) 

2256 return self._midx 

2257 

2258 def _get_pack_by_name(self, pack_name: str) -> Pack: 

2259 """Get a pack by its base name. 

2260 

2261 Args: 

2262 pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx') 

2263 

2264 Returns: 

2265 Pack object 

2266 

2267 Raises: 

2268 KeyError: If pack doesn't exist 

2269 """ 

2270 # Remove .pack or .idx extension if present 

2271 if pack_name.endswith(".pack"): 

2272 base_name = pack_name[:-5] 

2273 elif pack_name.endswith(".idx"): 

2274 base_name = pack_name[:-4] 

2275 else: 

2276 base_name = pack_name 

2277 

2278 # Check if already in cache 

2279 if base_name in self._pack_cache: 

2280 return self._pack_cache[base_name] 

2281 

2282 # Load the pack 

2283 pack_path = os.path.join(self.pack_dir, base_name) 

2284 if not os.path.exists(pack_path + ".pack"): 

2285 raise KeyError(f"Pack {pack_name} not found") 

2286 

2287 pack = Pack( 

2288 pack_path, 

2289 object_format=self.object_format, 

2290 delta_window_size=self.pack_delta_window_size, 

2291 window_memory=self.pack_window_memory, 

2292 delta_cache_size=self.pack_delta_cache_size, 

2293 depth=self.pack_depth, 

2294 threads=self.pack_threads, 

2295 big_file_threshold=self.pack_big_file_threshold, 

2296 delta_base_cache_limit=self.delta_base_cache_limit, 

2297 ) 

2298 self._pack_cache[base_name] = pack 

2299 return pack 

2300 

2301 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2302 """Check if a particular object is present by SHA1 and is packed. 

2303 

2304 This checks the MIDX first if available, then falls back to checking 

2305 individual pack indexes. 

2306 

2307 Args: 

2308 sha: Binary SHA of the object 

2309 

2310 Returns: 

2311 True if the object is in a pack file 

2312 """ 

2313 # Check MIDX first for faster lookup 

2314 midx = self.get_midx() 

2315 if midx is not None and sha in midx: 

2316 return True 

2317 

2318 # Fall back to checking individual packs 

2319 return super().contains_packed(sha) 

2320 

2321 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2322 """Obtain the raw fulltext for an object. 

2323 

2324 This uses the MIDX if available for faster lookups. 

2325 

2326 Args: 

2327 name: SHA for the object (20 bytes binary or 40 bytes hex) 

2328 

2329 Returns: 

2330 Tuple with numeric type and object contents 

2331 

2332 Raises: 

2333 KeyError: If object not found 

2334 """ 

2335 sha: RawObjectID 

2336 if len(name) in (40, 64): 

2337 # name is ObjectID (hex), convert to RawObjectID 

2338 # Support both SHA1 (40) and SHA256 (64) 

2339 sha = hex_to_sha(cast(ObjectID, name)) 

2340 elif len(name) in (20, 32): 

2341 # name is already RawObjectID (binary) 

2342 # Support both SHA1 (20) and SHA256 (32) 

2343 sha = RawObjectID(name) 

2344 else: 

2345 raise AssertionError(f"Invalid object name {name!r}") 

2346 

2347 # Try MIDX first for faster lookup 

2348 midx = self.get_midx() 

2349 if midx is not None: 

2350 result = midx.object_offset(sha) 

2351 if result is not None: 

2352 pack_name, _offset = result 

2353 try: 

2354 pack = self._get_pack_by_name(pack_name) 

2355 return pack.get_raw(sha) 

2356 except (KeyError, PackFileDisappeared): 

2357 # Pack disappeared or object not found, fall through to standard lookup 

2358 pass 

2359 

2360 # Fall back to the standard implementation 

2361 return super().get_raw(name) 

2362 

2363 def write_midx(self) -> bytes: 

2364 """Write a multi-pack-index file for this object store. 

2365 

2366 Creates a MIDX file that indexes all pack files in the pack directory. 

2367 

2368 Returns: 

2369 SHA-1 checksum of the written MIDX file 

2370 

2371 Raises: 

2372 OSError: If the pack directory doesn't exist or MIDX can't be written 

2373 """ 

2374 from .midx import write_midx_file 

2375 

2376 # Get all pack files 

2377 packs = self.packs 

2378 if not packs: 

2379 # No packs to index 

2380 return b"\x00" * 20 

2381 

2382 # Collect entries from all packs 

2383 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = [] 

2384 

2385 for pack in packs: 

2386 # Git stores .idx extension in MIDX, not .pack 

2387 pack_name = os.path.basename(pack._basename) + ".idx" 

2388 entries = list(pack.index.iterentries()) 

2389 pack_entries.append((pack_name, entries)) 

2390 

2391 # Write MIDX file 

2392 midx_path = os.path.join(self.pack_dir, "multi-pack-index") 

2393 return write_midx_file(midx_path, pack_entries) 

2394 

2395 def write_commit_graph( 

2396 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

2397 ) -> None: 

2398 """Write a commit graph file for this object store. 

2399 

2400 Args: 

2401 refs: List of refs to include. If None, includes all refs from object store. 

2402 reachable: If True, includes all commits reachable from refs. 

2403 If False, only includes the direct ref targets. 

2404 """ 

2405 from .commit_graph import get_reachable_commits 

2406 

2407 if refs is None: 

2408 # Get all commit objects from the object store 

2409 all_refs = [] 

2410 # Iterate through all objects to find commits 

2411 for sha in self: 

2412 try: 

2413 obj = self[sha] 

2414 if obj.type_name == b"commit": 

2415 all_refs.append(sha) 

2416 except KeyError: 

2417 continue 

2418 else: 

2419 # Use provided refs 

2420 all_refs = list(refs) 

2421 

2422 if not all_refs: 

2423 return # No commits to include 

2424 

2425 if reachable: 

2426 # Get all reachable commits 

2427 commit_ids = get_reachable_commits(self, all_refs) 

2428 else: 

2429 # Just use the direct ref targets - ensure they're hex ObjectIDs 

2430 commit_ids = [] 

2431 for ref in all_refs: 

2432 if isinstance(ref, bytes) and len(ref) == self.object_format.hex_length: 

2433 # Already hex ObjectID 

2434 commit_ids.append(ref) 

2435 elif ( 

2436 isinstance(ref, bytes) and len(ref) == self.object_format.oid_length 

2437 ): 

2438 # Binary SHA, convert to hex ObjectID 

2439 from .objects import sha_to_hex 

2440 

2441 commit_ids.append(sha_to_hex(RawObjectID(ref))) 

2442 else: 

2443 # Assume it's already correct format 

2444 commit_ids.append(ref) 

2445 

2446 if commit_ids: 

2447 # Write commit graph directly to our object store path 

2448 # Generate the commit graph 

2449 from .commit_graph import generate_commit_graph 

2450 

2451 graph = generate_commit_graph(self, commit_ids) 

2452 

2453 if graph.entries: 

2454 # Ensure the info directory exists 

2455 info_dir = os.path.join(self.path, "info") 

2456 os.makedirs(info_dir, exist_ok=True) 

2457 if self.dir_mode is not None: 

2458 os.chmod(info_dir, self.dir_mode) 

2459 

2460 # Write using GitFile for atomic operation 

2461 graph_path = os.path.join(info_dir, "commit-graph") 

2462 mask = self.file_mode if self.file_mode is not None else 0o644 

2463 with GitFile(graph_path, "wb", mask=mask) as f: 

2464 assert isinstance( 

2465 f, _GitFile 

2466 ) # GitFile in write mode always returns _GitFile 

2467 graph.write_to_file(f) 

2468 

2469 # Clear cached commit graph so it gets reloaded 

2470 self._commit_graph = None 

2471 

2472 def prune(self, grace_period: int | None = None) -> None: 

2473 """Prune/clean up this object store. 

2474 

2475 This removes temporary files that were left behind by interrupted 

2476 pack operations. These are files that start with ``tmp_pack_`` in the 

2477 repository directory or files with .pack extension but no corresponding 

2478 .idx file in the pack directory. 

2479 

2480 Args: 

2481 grace_period: Grace period in seconds for removing temporary files. 

2482 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

2483 """ 

2484 import glob 

2485 

2486 if grace_period is None: 

2487 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

2488 

2489 # Clean up tmp_pack_* files in the repository directory 

2490 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

2491 # Check if file is old enough (more than grace period) 

2492 mtime = os.path.getmtime(tmp_file) 

2493 if time.time() - mtime > grace_period: 

2494 os.remove(tmp_file) 

2495 

2496 # Clean up orphaned .pack files without corresponding .idx files 

2497 try: 

2498 pack_dir_contents = os.listdir(self.pack_dir) 

2499 except FileNotFoundError: 

2500 return 

2501 

2502 pack_files = {} 

2503 idx_files = set() 

2504 

2505 for name in pack_dir_contents: 

2506 if name.endswith(".pack"): 

2507 base_name = name[:-5] # Remove .pack extension 

2508 pack_files[base_name] = name 

2509 elif name.endswith(".idx"): 

2510 base_name = name[:-4] # Remove .idx extension 

2511 idx_files.add(base_name) 

2512 

2513 # Remove .pack files without corresponding .idx files 

2514 for base_name, pack_name in pack_files.items(): 

2515 if base_name not in idx_files: 

2516 pack_path = os.path.join(self.pack_dir, pack_name) 

2517 # Check if file is old enough (more than grace period) 

2518 mtime = os.path.getmtime(pack_path) 

2519 if time.time() - mtime > grace_period: 

2520 os.remove(pack_path) 

2521 

2522 def close(self) -> None: 

2523 """Close the object store and release resources. 

2524 

2525 This method closes all cached pack files, MIDX, and frees associated resources. 

2526 Can be called multiple times safely. 

2527 """ 

2528 # Close MIDX if it's loaded 

2529 if self._midx is not None: 

2530 self._midx.close() 

2531 self._midx = None 

2532 

2533 # Close alternates 

2534 if self._alternates is not None: 

2535 for alt in self._alternates: 

2536 alt.close() 

2537 self._alternates = None 

2538 

2539 # Call parent class close to handle pack files 

2540 super().close() 

2541 

2542 

2543class MemoryObjectStore(PackCapableObjectStore): 

2544 """Object store that keeps all objects in memory.""" 

2545 

2546 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None: 

2547 """Initialize a MemoryObjectStore. 

2548 

2549 Creates an empty in-memory object store. 

2550 

2551 Args: 

2552 object_format: Hash algorithm to use (defaults to SHA1) 

2553 """ 

2554 super().__init__(object_format=object_format) 

2555 self._data: dict[ObjectID, ShaFile] = {} 

2556 self.pack_compression_level = -1 

2557 

2558 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

2559 if len(sha) == self.object_format.hex_length: 

2560 return cast(ObjectID, sha) 

2561 elif len(sha) == self.object_format.oid_length: 

2562 return sha_to_hex(cast(RawObjectID, sha)) 

2563 else: 

2564 raise ValueError(f"Invalid sha {sha!r}") 

2565 

2566 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

2567 """Check if a particular object is present by SHA1 and is loose.""" 

2568 return self._to_hexsha(sha) in self._data 

2569 

2570 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2571 """Check if a particular object is present by SHA1 and is packed.""" 

2572 return False 

2573 

2574 def __iter__(self) -> Iterator[ObjectID]: 

2575 """Iterate over the SHAs that are present in this store.""" 

2576 return iter(self._data.keys()) 

2577 

2578 @property 

2579 def packs(self) -> list[Pack]: 

2580 """List with pack objects.""" 

2581 return [] 

2582 

2583 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2584 """Obtain the raw text for an object. 

2585 

2586 Args: 

2587 name: sha for the object. 

2588 Returns: tuple with numeric type and object contents. 

2589 """ 

2590 obj = self[self._to_hexsha(name)] 

2591 return obj.type_num, obj.as_raw_string() 

2592 

2593 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile: 

2594 """Retrieve an object by SHA. 

2595 

2596 Args: 

2597 name: SHA of the object (as hex string or bytes) 

2598 

2599 Returns: 

2600 Copy of the ShaFile object 

2601 

2602 Raises: 

2603 KeyError: If the object is not found 

2604 """ 

2605 return self._data[self._to_hexsha(name)].copy() 

2606 

2607 def __delitem__(self, name: ObjectID) -> None: 

2608 """Delete an object from this store, for testing only.""" 

2609 del self._data[self._to_hexsha(name)] 

2610 

2611 def add_object(self, obj: ShaFile) -> None: 

2612 """Add a single object to this object store.""" 

2613 self._data[obj.id] = obj.copy() 

2614 

2615 def add_objects( 

2616 self, 

2617 objects: Iterable[tuple[ShaFile, str | None]], 

2618 progress: Callable[[str], None] | None = None, 

2619 ) -> None: 

2620 """Add a set of objects to this object store. 

2621 

2622 Args: 

2623 objects: Iterable over a list of (object, path) tuples 

2624 progress: Optional progress reporting function. 

2625 """ 

2626 for obj, path in objects: 

2627 self.add_object(obj) 

2628 

2629 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2630 """Add a new pack to this object store. 

2631 

2632 Because this object store doesn't support packs, we extract and add the 

2633 individual objects. 

2634 

2635 Returns: Fileobject to write to and a commit function to 

2636 call when the pack is finished. 

2637 """ 

2638 from tempfile import SpooledTemporaryFile 

2639 

2640 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2641 

2642 def commit() -> None: 

2643 size = f.tell() 

2644 if size > 0: 

2645 f.seek(0) 

2646 

2647 p = PackData.from_file(f, self.object_format, size) 

2648 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type] 

2649 self.add_object(obj) 

2650 p.close() 

2651 f.close() 

2652 else: 

2653 f.close() 

2654 

2655 def abort() -> None: 

2656 f.close() 

2657 

2658 return f, commit, abort # type: ignore[return-value] 

2659 

2660 def add_pack_data( 

2661 self, 

2662 count: int, 

2663 unpacked_objects: Iterator[UnpackedObject], 

2664 progress: Callable[[str], None] | None = None, 

2665 ) -> None: 

2666 """Add pack data to this object store. 

2667 

2668 Args: 

2669 count: Number of items to add 

2670 unpacked_objects: Iterator of UnpackedObject instances 

2671 progress: Optional progress reporting function. 

2672 """ 

2673 if count == 0: 

2674 return 

2675 

2676 # Since MemoryObjectStore doesn't support pack files, we need to 

2677 # extract individual objects. To handle deltas properly, we write 

2678 # to a temporary pack and then use PackInflater to resolve them. 

2679 f, commit, abort = self.add_pack() 

2680 try: 

2681 write_pack_data( 

2682 f.write, 

2683 unpacked_objects, 

2684 num_records=count, 

2685 progress=progress, 

2686 object_format=self.object_format, 

2687 ) 

2688 except BaseException: 

2689 abort() 

2690 raise 

2691 else: 

2692 commit() 

2693 

2694 def add_thin_pack( 

2695 self, 

2696 read_all: Callable[[int], bytes], 

2697 read_some: Callable[[int], bytes] | None, 

2698 progress: Callable[[str], None] | None = None, 

2699 ) -> None: 

2700 """Add a new thin pack to this object store. 

2701 

2702 Thin packs are packs that contain deltas with parents that exist 

2703 outside the pack. Because this object store doesn't support packs, we 

2704 extract and add the individual objects. 

2705 

2706 Args: 

2707 read_all: Read function that blocks until the number of 

2708 requested bytes are read. 

2709 read_some: Read function that returns at least one byte, but may 

2710 not return the number of bytes requested. 

2711 progress: Optional progress reporting function. 

2712 """ 

2713 f, commit, abort = self.add_pack() 

2714 try: 

2715 copier = PackStreamCopier( 

2716 self.object_format.hash_func, 

2717 read_all, 

2718 read_some, 

2719 f, 

2720 ) 

2721 copier.verify() 

2722 except BaseException: 

2723 abort() 

2724 raise 

2725 else: 

2726 commit() 

2727 

2728 

2729class ObjectIterator(Protocol): 

2730 """Interface for iterating over objects.""" 

2731 

2732 def iterobjects(self) -> Iterator[ShaFile]: 

2733 """Iterate over all objects. 

2734 

2735 Returns: 

2736 Iterator of ShaFile objects 

2737 """ 

2738 raise NotImplementedError(self.iterobjects) 

2739 

2740 

2741def tree_lookup_path( 

2742 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile], 

2743 root_sha: ObjectID | RawObjectID, 

2744 path: bytes, 

2745) -> tuple[int, ObjectID]: 

2746 """Look up an object in a Git tree. 

2747 

2748 Args: 

2749 lookup_obj: Callback for retrieving object by SHA1 

2750 root_sha: SHA1 of the root tree 

2751 path: Path to lookup 

2752 Returns: A tuple of (mode, SHA) of the resulting path. 

2753 """ 

2754 tree = lookup_obj(root_sha) 

2755 if not isinstance(tree, Tree): 

2756 raise NotTreeError(root_sha) 

2757 return tree.lookup_path(lookup_obj, path) 

2758 

2759 

2760def _collect_filetree_revs( 

2761 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2762) -> None: 

2763 """Collect SHA1s of files and directories for specified tree. 

2764 

2765 Args: 

2766 obj_store: Object store to get objects by SHA from 

2767 tree_sha: tree reference to walk 

2768 kset: set to fill with references to files and directories 

2769 """ 

2770 filetree = obj_store[tree_sha] 

2771 assert isinstance(filetree, Tree) 

2772 for name, mode, sha in filetree.iteritems(): 

2773 assert mode is not None 

2774 assert sha is not None 

2775 if not S_ISGITLINK(mode) and sha not in kset: 

2776 kset.add(sha) 

2777 if stat.S_ISDIR(mode): 

2778 _collect_filetree_revs(obj_store, sha, kset) 

2779 

2780 

2781def _split_commits_and_tags( 

2782 obj_store: ObjectContainer, 

2783 lst: Iterable[ObjectID], 

2784 *, 

2785 unknown: str = "error", 

2786) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]: 

2787 """Split object id list into three lists with commit, tag, and other SHAs. 

2788 

2789 Commits referenced by tags are included into commits 

2790 list as well. Only SHA1s known in this repository will get 

2791 through, controlled by the unknown parameter. 

2792 

2793 Args: 

2794 obj_store: Object store to get objects by SHA1 from 

2795 lst: Collection of commit and tag SHAs 

2796 unknown: How to handle unknown objects: "error", "warn", or "ignore" 

2797 Returns: A tuple of (commits, tags, others) SHA1s 

2798 """ 

2799 import logging 

2800 

2801 if unknown not in ("error", "warn", "ignore"): 

2802 raise ValueError( 

2803 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}" 

2804 ) 

2805 

2806 commits: set[ObjectID] = set() 

2807 tags: set[ObjectID] = set() 

2808 others: set[ObjectID] = set() 

2809 for e in lst: 

2810 try: 

2811 o = obj_store[e] 

2812 except KeyError: 

2813 if unknown == "error": 

2814 raise 

2815 elif unknown == "warn": 

2816 logging.warning( 

2817 "Object %s not found in object store", e.decode("ascii") 

2818 ) 

2819 # else: ignore 

2820 else: 

2821 if isinstance(o, Commit): 

2822 commits.add(e) 

2823 elif isinstance(o, Tag): 

2824 tags.add(e) 

2825 tagged = o.object[1] 

2826 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown) 

2827 commits |= c 

2828 tags |= t 

2829 others |= os 

2830 else: 

2831 others.add(e) 

2832 return (commits, tags, others) 

2833 

2834 

2835class MissingObjectFinder: 

2836 """Find the objects missing from another object store. 

2837 

2838 Args: 

2839 object_store: Object store containing at least all objects to be 

2840 sent 

2841 haves: SHA1s of commits not to send (already present in target) 

2842 wants: SHA1s of commits to send 

2843 progress: Optional function to report progress to. 

2844 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2845 sha for including tags. 

2846 get_parents: Optional function for getting the parents of a commit. 

2847 """ 

2848 

2849 def __init__( 

2850 self, 

2851 object_store: BaseObjectStore, 

2852 haves: Iterable[ObjectID], 

2853 wants: Iterable[ObjectID], 

2854 *, 

2855 shallow: Set[ObjectID] | None = None, 

2856 progress: Callable[[bytes], None] | None = None, 

2857 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

2858 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

2859 ) -> None: 

2860 """Initialize a MissingObjectFinder. 

2861 

2862 Args: 

2863 object_store: Object store containing objects 

2864 haves: SHA1s of objects already present in target 

2865 wants: SHA1s of objects to send 

2866 shallow: Set of shallow commit SHA1s 

2867 progress: Optional progress reporting callback 

2868 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2869 get_parents: Function for getting commit parents 

2870 """ 

2871 self.object_store = object_store 

2872 if shallow is None: 

2873 shallow = set() 

2874 self._get_parents = get_parents 

2875 reachability = object_store.get_reachability_provider() 

2876 # process Commits and Tags differently 

2877 # haves may list commits/tags not available locally (silently ignore them). 

2878 # wants should only contain valid SHAs (fail fast if not). 

2879 have_commits, have_tags, have_others = _split_commits_and_tags( 

2880 object_store, haves, unknown="ignore" 

2881 ) 

2882 want_commits, want_tags, want_others = _split_commits_and_tags( 

2883 object_store, wants, unknown="error" 

2884 ) 

2885 # all_ancestors is a set of commits that shall not be sent 

2886 # (complete repository up to 'haves') 

2887 all_ancestors = reachability.get_reachable_commits( 

2888 have_commits, exclude=None, shallow=shallow 

2889 ) 

2890 # all_missing - complete set of commits between haves and wants 

2891 # common_commits - boundary commits directly encountered when traversing wants 

2892 # We use _collect_ancestors here because we need the exact boundary behavior: 

2893 # commits that are in all_ancestors and directly reachable from wants, 

2894 # but we don't traverse past them. This is hard to express with the 

2895 # reachability abstraction alone. 

2896 missing_commits, common_commits = _collect_ancestors( 

2897 object_store, 

2898 want_commits, 

2899 frozenset(all_ancestors), 

2900 shallow=frozenset(shallow), 

2901 get_parents=self._get_parents, 

2902 ) 

2903 

2904 self.remote_has: set[ObjectID] = set() 

2905 # Now, fill sha_done with commits and revisions of 

2906 # files and directories known to be both locally 

2907 # and on target. Thus these commits and files 

2908 # won't get selected for fetch 

2909 for h in common_commits: 

2910 self.remote_has.add(h) 

2911 cmt = object_store[h] 

2912 assert isinstance(cmt, Commit) 

2913 # Get tree objects for this commit 

2914 tree_objects = reachability.get_tree_objects([cmt.tree]) 

2915 self.remote_has.update(tree_objects) 

2916 

2917 # record tags we have as visited, too 

2918 for t in have_tags: 

2919 self.remote_has.add(t) 

2920 self.sha_done = set(self.remote_has) 

2921 

2922 # in fact, what we 'want' is commits, tags, and others 

2923 # we've found missing 

2924 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = { 

2925 (w, None, Commit.type_num, False) for w in missing_commits 

2926 } 

2927 missing_tags = want_tags.difference(have_tags) 

2928 self.objects_to_send.update( 

2929 {(w, None, Tag.type_num, False) for w in missing_tags} 

2930 ) 

2931 missing_others = want_others.difference(have_others) 

2932 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2933 

2934 if progress is None: 

2935 self.progress: Callable[[bytes], None] = lambda x: None 

2936 else: 

2937 self.progress = progress 

2938 self._tagged = (get_tagged and get_tagged()) or {} 

2939 

2940 def get_remote_has(self) -> set[ObjectID]: 

2941 """Get the set of SHAs the remote has. 

2942 

2943 Returns: 

2944 Set of SHA1s that the remote side already has 

2945 """ 

2946 return self.remote_has 

2947 

2948 def add_todo( 

2949 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]] 

2950 ) -> None: 

2951 """Add objects to the todo list. 

2952 

2953 Args: 

2954 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2955 """ 

2956 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2957 

2958 def __next__(self) -> tuple[ObjectID, PackHint | None]: 

2959 """Get the next object to send. 

2960 

2961 Returns: 

2962 Tuple of (sha, pack_hint) 

2963 

2964 Raises: 

2965 StopIteration: When no more objects to send 

2966 """ 

2967 while True: 

2968 if not self.objects_to_send: 

2969 self.progress( 

2970 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2971 ) 

2972 raise StopIteration 

2973 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2974 if sha not in self.sha_done: 

2975 break 

2976 if not leaf: 

2977 o = self.object_store[sha] 

2978 if isinstance(o, Commit): 

2979 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2980 elif isinstance(o, Tree): 

2981 todos = [] 

2982 for n, m, s in o.iteritems(): 

2983 assert m is not None 

2984 assert n is not None 

2985 assert s is not None 

2986 if not S_ISGITLINK(m): 

2987 todos.append( 

2988 ( 

2989 s, 

2990 n, 

2991 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2992 not stat.S_ISDIR(m), 

2993 ) 

2994 ) 

2995 self.add_todo(todos) 

2996 elif isinstance(o, Tag): 

2997 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2998 if sha in self._tagged: 

2999 self.add_todo([(self._tagged[sha], None, None, True)]) 

3000 self.sha_done.add(sha) 

3001 if len(self.sha_done) % 1000 == 0: 

3002 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

3003 if type_num is None: 

3004 pack_hint = None 

3005 else: 

3006 pack_hint = (type_num, name) 

3007 return (sha, pack_hint) 

3008 

3009 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]: 

3010 """Return iterator over objects to send. 

3011 

3012 Returns: 

3013 Self (this class implements the iterator protocol) 

3014 """ 

3015 return self 

3016 

3017 

3018class ObjectStoreGraphWalker: 

3019 """Graph walker that finds what commits are missing from an object store.""" 

3020 

3021 heads: set[ObjectID] 

3022 """Revisions without descendants in the local repo.""" 

3023 

3024 get_parents: Callable[[ObjectID], list[ObjectID]] 

3025 """Function to retrieve parents in the local repo.""" 

3026 

3027 shallow: set[ObjectID] 

3028 

3029 def __init__( 

3030 self, 

3031 local_heads: Iterable[ObjectID], 

3032 get_parents: Callable[[ObjectID], list[ObjectID]], 

3033 shallow: set[ObjectID] | None = None, 

3034 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None] 

3035 | None = None, 

3036 ) -> None: 

3037 """Create a new instance. 

3038 

3039 Args: 

3040 local_heads: Heads to start search with 

3041 get_parents: Function for finding the parents of a SHA1. 

3042 shallow: Set of shallow commits. 

3043 update_shallow: Function to update shallow commits. 

3044 """ 

3045 self.heads = set(local_heads) 

3046 self.get_parents = get_parents 

3047 self.parents: dict[ObjectID, list[ObjectID] | None] = {} 

3048 if shallow is None: 

3049 shallow = set() 

3050 self.shallow = shallow 

3051 self.update_shallow = update_shallow 

3052 

3053 def nak(self) -> None: 

3054 """Nothing in common was found.""" 

3055 

3056 def ack(self, sha: ObjectID) -> None: 

3057 """Ack that a revision and its ancestors are present in the source.""" 

3058 if len(sha) != 40: 

3059 # TODO: support SHA256 

3060 raise ValueError(f"unexpected sha {sha!r} received") 

3061 ancestors = {sha} 

3062 

3063 # stop if we run out of heads to remove 

3064 while self.heads: 

3065 for a in ancestors: 

3066 if a in self.heads: 

3067 self.heads.remove(a) 

3068 

3069 # collect all ancestors 

3070 new_ancestors = set() 

3071 for a in ancestors: 

3072 ps = self.parents.get(a) 

3073 if ps is not None: 

3074 new_ancestors.update(ps) 

3075 self.parents[a] = None 

3076 

3077 # no more ancestors; stop 

3078 if not new_ancestors: 

3079 break 

3080 

3081 ancestors = new_ancestors 

3082 

3083 def next(self) -> ObjectID | None: 

3084 """Iterate over ancestors of heads in the target.""" 

3085 if self.heads: 

3086 ret = self.heads.pop() 

3087 try: 

3088 ps = self.get_parents(ret) 

3089 except KeyError: 

3090 return None 

3091 self.parents[ret] = ps 

3092 self.heads.update([p for p in ps if p not in self.parents]) 

3093 return ret 

3094 return None 

3095 

3096 __next__ = next 

3097 

3098 

3099def commit_tree_changes( 

3100 object_store: BaseObjectStore, 

3101 tree: ObjectID | Tree, 

3102 changes: Sequence[tuple[bytes, int | None, ObjectID | None]], 

3103) -> ObjectID: 

3104 """Commit a specified set of changes to a tree structure. 

3105 

3106 This will apply a set of changes on top of an existing tree, storing new 

3107 objects in object_store. 

3108 

3109 changes are a list of tuples with (path, mode, object_sha). 

3110 Paths can be both blobs and trees. See the mode and 

3111 object sha to None deletes the path. 

3112 

3113 This method works especially well if there are only a small 

3114 number of changes to a big tree. For a large number of changes 

3115 to a large tree, use e.g. commit_tree. 

3116 

3117 Args: 

3118 object_store: Object store to store new objects in 

3119 and retrieve old ones from. 

3120 tree: Original tree root (SHA or Tree object) 

3121 changes: changes to apply 

3122 Returns: New tree root object 

3123 """ 

3124 # TODO(jelmer): Save up the objects and add them using .add_objects 

3125 # rather than with individual calls to .add_object. 

3126 # Handle both Tree object and SHA 

3127 if isinstance(tree, Tree): 

3128 tree_obj: Tree = tree 

3129 else: 

3130 sha_obj = object_store[tree] 

3131 assert isinstance(sha_obj, Tree) 

3132 tree_obj = sha_obj 

3133 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {} 

3134 for path, new_mode, new_sha in changes: 

3135 try: 

3136 (dirname, subpath) = path.split(b"/", 1) 

3137 except ValueError: 

3138 if new_sha is None: 

3139 del tree_obj[path] 

3140 else: 

3141 assert new_mode is not None 

3142 tree_obj[path] = (new_mode, new_sha) 

3143 else: 

3144 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

3145 for name, subchanges in nested_changes.items(): 

3146 try: 

3147 orig_subtree_id: ObjectID | Tree = tree_obj[name][1] 

3148 except KeyError: 

3149 # For new directories, pass an empty Tree object 

3150 orig_subtree_id = Tree() 

3151 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

3152 subtree = object_store[subtree_id] 

3153 assert isinstance(subtree, Tree) 

3154 if len(subtree) == 0: 

3155 del tree_obj[name] 

3156 else: 

3157 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

3158 object_store.add_object(tree_obj) 

3159 return tree_obj.id 

3160 

3161 

3162class OverlayObjectStore(BaseObjectStore): 

3163 """Object store that can overlay multiple object stores.""" 

3164 

3165 def __init__( 

3166 self, 

3167 bases: list[BaseObjectStore], 

3168 add_store: BaseObjectStore | None = None, 

3169 ) -> None: 

3170 """Initialize an OverlayObjectStore. 

3171 

3172 Args: 

3173 bases: List of base object stores to overlay 

3174 add_store: Optional store to write new objects to 

3175 

3176 Raises: 

3177 ValueError: If stores have different hash algorithms 

3178 """ 

3179 from .object_format import verify_same_object_format 

3180 

3181 # Verify all stores use the same hash algorithm 

3182 store_algorithms = [store.object_format for store in bases] 

3183 if add_store: 

3184 store_algorithms.append(add_store.object_format) 

3185 

3186 object_format = verify_same_object_format(*store_algorithms) 

3187 

3188 super().__init__(object_format=object_format) 

3189 self.bases = bases 

3190 self.add_store = add_store 

3191 

3192 def add_object(self, object: ShaFile) -> None: 

3193 """Add a single object to the store. 

3194 

3195 Args: 

3196 object: Object to add 

3197 

3198 Raises: 

3199 NotImplementedError: If no add_store was provided 

3200 """ 

3201 if self.add_store is None: 

3202 raise NotImplementedError(self.add_object) 

3203 return self.add_store.add_object(object) 

3204 

3205 def add_objects( 

3206 self, 

3207 objects: Sequence[tuple[ShaFile, str | None]], 

3208 progress: Callable[[str], None] | None = None, 

3209 ) -> Pack | None: 

3210 """Add multiple objects to the store. 

3211 

3212 Args: 

3213 objects: Iterator of objects to add 

3214 progress: Optional progress reporting callback 

3215 

3216 Raises: 

3217 NotImplementedError: If no add_store was provided 

3218 """ 

3219 if self.add_store is None: 

3220 raise NotImplementedError(self.add_object) 

3221 return self.add_store.add_objects(objects, progress) 

3222 

3223 @property 

3224 def packs(self) -> list[Pack]: 

3225 """Get the list of packs from all overlaid stores. 

3226 

3227 Returns: 

3228 Combined list of packs from all base stores 

3229 """ 

3230 ret = [] 

3231 for b in self.bases: 

3232 ret.extend(b.packs) 

3233 return ret 

3234 

3235 def __iter__(self) -> Iterator[ObjectID]: 

3236 """Iterate over all object SHAs in the overlaid stores. 

3237 

3238 Returns: 

3239 Iterator of object SHAs (deduped across stores) 

3240 """ 

3241 done = set() 

3242 for b in self.bases: 

3243 for o_id in b: 

3244 if o_id not in done: 

3245 yield o_id 

3246 done.add(o_id) 

3247 

3248 def iterobjects_subset( 

3249 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

3250 ) -> Iterator[ShaFile]: 

3251 """Iterate over a subset of objects from the overlaid stores. 

3252 

3253 Args: 

3254 shas: Iterable of object SHAs to retrieve 

3255 allow_missing: If True, skip missing objects; if False, raise KeyError 

3256 

3257 Returns: 

3258 Iterator of ShaFile objects 

3259 

3260 Raises: 

3261 KeyError: If an object is missing and allow_missing is False 

3262 """ 

3263 todo = set(shas) 

3264 found: set[ObjectID] = set() 

3265 

3266 for b in self.bases: 

3267 # Create a copy of todo for each base to avoid modifying 

3268 # the set while iterating through it 

3269 current_todo = todo - found 

3270 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

3271 yield o 

3272 found.add(o.id) 

3273 

3274 # Check for any remaining objects not found 

3275 missing = todo - found 

3276 if missing and not allow_missing: 

3277 raise KeyError(next(iter(missing))) 

3278 

3279 def iter_unpacked_subset( 

3280 self, 

3281 shas: Iterable[ObjectID | RawObjectID], 

3282 include_comp: bool = False, 

3283 allow_missing: bool = False, 

3284 convert_ofs_delta: bool = True, 

3285 ) -> Iterator[UnpackedObject]: 

3286 """Iterate over unpacked objects from the overlaid stores. 

3287 

3288 Args: 

3289 shas: Iterable of object SHAs to retrieve 

3290 include_comp: Whether to include compressed data 

3291 allow_missing: If True, skip missing objects; if False, raise KeyError 

3292 convert_ofs_delta: Whether to convert OFS_DELTA objects 

3293 

3294 Returns: 

3295 Iterator of unpacked objects 

3296 

3297 Raises: 

3298 KeyError: If an object is missing and allow_missing is False 

3299 """ 

3300 todo: set[ObjectID | RawObjectID] = set(shas) 

3301 for b in self.bases: 

3302 for o in b.iter_unpacked_subset( 

3303 todo, 

3304 include_comp=include_comp, 

3305 allow_missing=True, 

3306 convert_ofs_delta=convert_ofs_delta, 

3307 ): 

3308 yield o 

3309 todo.remove(o.sha()) 

3310 if todo and not allow_missing: 

3311 raise KeyError(next(iter(todo))) 

3312 

3313 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]: 

3314 """Get the raw object data from the overlaid stores. 

3315 

3316 Args: 

3317 sha_id: SHA of the object 

3318 

3319 Returns: 

3320 Tuple of (type_num, raw_data) 

3321 

3322 Raises: 

3323 KeyError: If object not found in any base store 

3324 """ 

3325 for b in self.bases: 

3326 try: 

3327 return b.get_raw(sha_id) 

3328 except KeyError: 

3329 pass 

3330 raise KeyError(sha_id) 

3331 

3332 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

3333 """Check if an object is packed in any base store. 

3334 

3335 Args: 

3336 sha: SHA of the object 

3337 

3338 Returns: 

3339 True if object is packed in any base store 

3340 """ 

3341 for b in self.bases: 

3342 if b.contains_packed(sha): 

3343 return True 

3344 return False 

3345 

3346 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

3347 """Check if an object is loose in any base store. 

3348 

3349 Args: 

3350 sha: SHA of the object 

3351 

3352 Returns: 

3353 True if object is loose in any base store 

3354 """ 

3355 for b in self.bases: 

3356 if b.contains_loose(sha): 

3357 return True 

3358 return False 

3359 

3360 

3361def read_packs_file(f: BinaryIO) -> Iterator[str]: 

3362 """Yield the packs listed in a packs file.""" 

3363 for line in f.read().splitlines(): 

3364 if not line: 

3365 continue 

3366 (kind, name) = line.split(b" ", 1) 

3367 if kind != b"P": 

3368 continue 

3369 yield os.fsdecode(name) 

3370 

3371 

3372class BucketBasedObjectStore(PackBasedObjectStore): 

3373 """Object store implementation that uses a bucket store like S3 as backend.""" 

3374 

3375 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

3376 """Iterate over the SHAs of all loose objects.""" 

3377 return iter([]) 

3378 

3379 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None: 

3380 return None 

3381 

3382 def delete_loose_object(self, sha: ObjectID) -> None: 

3383 """Delete a loose object (no-op for bucket stores). 

3384 

3385 Bucket-based stores don't have loose objects, so this is a no-op. 

3386 

3387 Args: 

3388 sha: SHA of the object to delete 

3389 """ 

3390 # Doesn't exist.. 

3391 

3392 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

3393 """Pack loose objects. Returns number of objects packed. 

3394 

3395 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

3396 

3397 Args: 

3398 progress: Optional progress reporting callback (ignored) 

3399 """ 

3400 return 0 

3401 

3402 def _remove_pack_by_name(self, name: str) -> None: 

3403 """Remove a pack by name. Subclasses should implement this.""" 

3404 raise NotImplementedError(self._remove_pack_by_name) 

3405 

3406 def _iter_pack_names(self) -> Iterator[str]: 

3407 raise NotImplementedError(self._iter_pack_names) 

3408 

3409 def _get_pack(self, name: str) -> Pack: 

3410 raise NotImplementedError(self._get_pack) 

3411 

3412 def _update_pack_cache(self) -> list[Pack]: 

3413 pack_files = set(self._iter_pack_names()) 

3414 

3415 # Open newly appeared pack files 

3416 new_packs = [] 

3417 for f in pack_files: 

3418 if f not in self._pack_cache: 

3419 pack = self._get_pack(f) 

3420 new_packs.append(pack) 

3421 self._pack_cache[f] = pack 

3422 # Remove disappeared pack files 

3423 for f in set(self._pack_cache) - pack_files: 

3424 self._pack_cache.pop(f).close() 

3425 return new_packs 

3426 

3427 def _upload_pack( 

3428 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

3429 ) -> None: 

3430 raise NotImplementedError 

3431 

3432 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

3433 """Add a new pack to this object store. 

3434 

3435 Returns: Fileobject to write to, a commit function to 

3436 call when the pack is finished and an abort 

3437 function. 

3438 """ 

3439 import tempfile 

3440 

3441 pf = tempfile.SpooledTemporaryFile( 

3442 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3443 ) 

3444 

3445 def commit() -> Pack | None: 

3446 if pf.tell() == 0: 

3447 pf.close() 

3448 return None 

3449 

3450 pf.seek(0) 

3451 

3452 p = PackData(pf.name, file=pf, object_format=self.object_format) 

3453 entries = p.sorted_entries() 

3454 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

3455 idxf = tempfile.SpooledTemporaryFile( 

3456 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3457 ) 

3458 checksum = p.get_stored_checksum() 

3459 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

3460 idxf.seek(0) 

3461 idx = load_pack_index_file(basename + ".idx", idxf, self.object_format) 

3462 for pack in self.packs: 

3463 if pack.get_stored_checksum() == p.get_stored_checksum(): 

3464 p.close() 

3465 idx.close() 

3466 pf.close() 

3467 idxf.close() 

3468 return pack 

3469 pf.seek(0) 

3470 idxf.seek(0) 

3471 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

3472 final_pack = Pack.from_objects(p, idx) 

3473 self._add_cached_pack(basename, final_pack) 

3474 pf.close() 

3475 idxf.close() 

3476 return final_pack 

3477 

3478 return pf, commit, pf.close # type: ignore[return-value] 

3479 

3480 

3481def _collect_ancestors( 

3482 store: ObjectContainer, 

3483 heads: Iterable[ObjectID], 

3484 common: frozenset[ObjectID] = frozenset(), 

3485 shallow: frozenset[ObjectID] = frozenset(), 

3486 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

3487) -> tuple[set[ObjectID], set[ObjectID]]: 

3488 """Collect all ancestors of heads up to (excluding) those in common. 

3489 

3490 Args: 

3491 store: Object store to get commits from 

3492 heads: commits to start from 

3493 common: commits to end at, or empty set to walk repository 

3494 completely 

3495 shallow: Set of shallow commits 

3496 get_parents: Optional function for getting the parents of a 

3497 commit. 

3498 Returns: a tuple (A, B) where A - all commits reachable 

3499 from heads but not present in common, B - common (shared) elements 

3500 that are directly reachable from heads 

3501 """ 

3502 bases = set() 

3503 commits = set() 

3504 queue: list[ObjectID] = [] 

3505 queue.extend(heads) 

3506 

3507 # Try to use commit graph if available 

3508 commit_graph = store.get_commit_graph() 

3509 

3510 while queue: 

3511 e = queue.pop(0) 

3512 if e in common: 

3513 bases.add(e) 

3514 elif e not in commits: 

3515 commits.add(e) 

3516 if e in shallow: 

3517 continue 

3518 

3519 # Try to use commit graph for parent lookup 

3520 parents = None 

3521 if commit_graph: 

3522 parents = commit_graph.get_parents(e) 

3523 

3524 if parents is None: 

3525 # Fall back to loading the object 

3526 cmt = store[e] 

3527 assert isinstance(cmt, Commit) 

3528 parents = get_parents(cmt) 

3529 

3530 queue.extend(parents) 

3531 return (commits, bases) 

3532 

3533 

3534def iter_tree_contents( 

3535 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False 

3536) -> Iterator[TreeEntry]: 

3537 """Iterate the contents of a tree and all subtrees. 

3538 

3539 Iteration is depth-first pre-order, as in e.g. os.walk. 

3540 

3541 Args: 

3542 store: Object store to get trees from 

3543 tree_id: SHA1 of the tree. 

3544 include_trees: If True, include tree objects in the iteration. 

3545 

3546 Yields: TreeEntry namedtuples for all the objects in a tree. 

3547 """ 

3548 if tree_id is None: 

3549 return 

3550 # This could be fairly easily generalized to >2 trees if we find a use 

3551 # case. 

3552 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

3553 while todo: 

3554 entry = todo.pop() 

3555 assert entry.mode is not None 

3556 if stat.S_ISDIR(entry.mode): 

3557 extra = [] 

3558 assert entry.sha is not None 

3559 tree = store[entry.sha] 

3560 assert isinstance(tree, Tree) 

3561 for subentry in tree.iteritems(name_order=True): 

3562 assert entry.path is not None 

3563 extra.append(subentry.in_path(entry.path)) 

3564 todo.extend(reversed(extra)) 

3565 if not stat.S_ISDIR(entry.mode) or include_trees: 

3566 yield entry 

3567 

3568 

3569def iter_commit_contents( 

3570 store: ObjectContainer, 

3571 commit: Commit | ObjectID | RawObjectID, 

3572 *, 

3573 include: Sequence[str | bytes | Path] | None = None, 

3574) -> Iterator[TreeEntry]: 

3575 """Iterate the contents of the repository at the specified commit. 

3576 

3577 This is a wrapper around iter_tree_contents() and 

3578 tree_lookup_path() to simplify the common task of getting the 

3579 contest of a repo at a particular commit. See also 

3580 dulwich.index.build_file_from_blob() for writing individual files 

3581 to disk. 

3582 

3583 Args: 

3584 store: Object store to get trees from 

3585 commit: Commit object, or SHA1 of a commit 

3586 include: if provided, only the entries whose paths are in the 

3587 list, or whose parent tree is in the list, will be 

3588 included. Note that duplicate or overlapping paths 

3589 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

3590 

3591 Yields: TreeEntry namedtuples for all matching files in a commit. 

3592 """ 

3593 sha = commit.id if isinstance(commit, Commit) else commit 

3594 if not isinstance(obj := store[sha], Commit): 

3595 raise TypeError( 

3596 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

3597 ) 

3598 commit = obj 

3599 encoding = commit.encoding or "utf-8" 

3600 include_bytes: list[bytes] = ( 

3601 [ 

3602 path if isinstance(path, bytes) else str(path).encode(encoding) 

3603 for path in include 

3604 ] 

3605 if include is not None 

3606 else [b""] 

3607 ) 

3608 

3609 for path in include_bytes: 

3610 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

3611 # Iterate all contained files if path points to a dir, otherwise just get that 

3612 # single file 

3613 if isinstance(store[obj_id], Tree): 

3614 for entry in iter_tree_contents(store, obj_id): 

3615 yield entry.in_path(path) 

3616 else: 

3617 yield TreeEntry(path, mode, obj_id) 

3618 

3619 

3620def peel_sha( 

3621 store: ObjectContainer, sha: ObjectID | RawObjectID 

3622) -> tuple[ShaFile, ShaFile]: 

3623 """Peel all tags from a SHA. 

3624 

3625 Args: 

3626 store: Object store to get objects from 

3627 sha: The object SHA to peel. 

3628 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

3629 intermediate tags; if the original ref does not point to a tag, 

3630 this will equal the original SHA1. 

3631 """ 

3632 unpeeled = obj = store[sha] 

3633 obj_class = object_class(obj.type_name) 

3634 while obj_class is Tag: 

3635 assert isinstance(obj, Tag) 

3636 obj_class, sha = obj.object 

3637 obj = store[sha] 

3638 return unpeeled, obj 

3639 

3640 

3641class GraphTraversalReachability: 

3642 """Naive graph traversal implementation of ObjectReachabilityProvider. 

3643 

3644 This implementation wraps existing graph traversal functions 

3645 (_collect_ancestors, _collect_filetree_revs) to provide the standard 

3646 reachability interface without any performance optimizations. 

3647 """ 

3648 

3649 def __init__(self, object_store: BaseObjectStore) -> None: 

3650 """Initialize the graph traversal provider. 

3651 

3652 Args: 

3653 object_store: Object store to query 

3654 """ 

3655 self.store = object_store 

3656 

3657 def get_reachable_commits( 

3658 self, 

3659 heads: Iterable[ObjectID], 

3660 exclude: Iterable[ObjectID] | None = None, 

3661 shallow: Set[ObjectID] | None = None, 

3662 ) -> set[ObjectID]: 

3663 """Get all commits reachable from heads, excluding those in exclude. 

3664 

3665 Uses _collect_ancestors for commit traversal. 

3666 

3667 Args: 

3668 heads: Starting commit SHAs 

3669 exclude: Commit SHAs to exclude (and their ancestors) 

3670 shallow: Set of shallow commit boundaries 

3671 

3672 Returns: 

3673 Set of commit SHAs reachable from heads but not from exclude 

3674 """ 

3675 exclude_set = frozenset(exclude) if exclude else frozenset() 

3676 shallow_set = frozenset(shallow) if shallow else frozenset() 

3677 commits, _bases = _collect_ancestors( 

3678 self.store, heads, exclude_set, shallow_set 

3679 ) 

3680 return commits 

3681 

3682 def get_tree_objects( 

3683 self, 

3684 tree_shas: Iterable[ObjectID], 

3685 ) -> set[ObjectID]: 

3686 """Get all trees and blobs reachable from the given trees. 

3687 

3688 Uses _collect_filetree_revs for tree traversal. 

3689 

3690 Args: 

3691 tree_shas: Starting tree SHAs 

3692 

3693 Returns: 

3694 Set of tree and blob SHAs 

3695 """ 

3696 result: set[ObjectID] = set() 

3697 for tree_sha in tree_shas: 

3698 _collect_filetree_revs(self.store, tree_sha, result) 

3699 return result 

3700 

3701 def get_reachable_objects( 

3702 self, 

3703 commits: Iterable[ObjectID], 

3704 exclude_commits: Iterable[ObjectID] | None = None, 

3705 ) -> set[ObjectID]: 

3706 """Get all objects (commits + trees + blobs) reachable from commits. 

3707 

3708 Args: 

3709 commits: Starting commit SHAs 

3710 exclude_commits: Commits whose objects should be excluded 

3711 

3712 Returns: 

3713 Set of all object SHAs (commits, trees, blobs) 

3714 """ 

3715 commits_set = set(commits) 

3716 result = set(commits_set) 

3717 

3718 # Get trees for all commits 

3719 tree_shas = [] 

3720 for commit_sha in commits_set: 

3721 try: 

3722 commit = self.store[commit_sha] 

3723 if isinstance(commit, Commit): 

3724 tree_shas.append(commit.tree) 

3725 except KeyError: 

3726 # Commit not in store, skip 

3727 continue 

3728 

3729 # Collect all tree/blob objects 

3730 result.update(self.get_tree_objects(tree_shas)) 

3731 

3732 # Exclude objects from exclude_commits if needed 

3733 if exclude_commits: 

3734 exclude_objects = self.get_reachable_objects(exclude_commits, None) 

3735 result -= exclude_objects 

3736 

3737 return result 

3738 

3739 

3740class BitmapReachability: 

3741 """Bitmap-accelerated implementation of ObjectReachabilityProvider. 

3742 

3743 This implementation uses packfile bitmap indexes where available to 

3744 accelerate reachability queries. Falls back to graph traversal when 

3745 bitmaps don't cover the requested commits. 

3746 """ 

3747 

3748 def __init__(self, object_store: "PackBasedObjectStore") -> None: 

3749 """Initialize the bitmap provider. 

3750 

3751 Args: 

3752 object_store: Pack-based object store with bitmap support 

3753 """ 

3754 self.store = object_store 

3755 # Fallback to graph traversal for operations not yet optimized 

3756 self._fallback = GraphTraversalReachability(object_store) 

3757 

3758 def _combine_commit_bitmaps( 

3759 self, 

3760 commit_shas: set[ObjectID], 

3761 exclude_shas: set[ObjectID] | None = None, 

3762 ) -> tuple["EWAHBitmap", "Pack"] | None: 

3763 """Combine bitmaps for multiple commits using OR, with optional exclusion. 

3764 

3765 Args: 

3766 commit_shas: Set of commit SHAs to combine 

3767 exclude_shas: Optional set of commit SHAs to exclude 

3768 

3769 Returns: 

3770 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable 

3771 """ 

3772 from .bitmap import find_commit_bitmaps 

3773 

3774 # Find bitmaps for the commits 

3775 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs) 

3776 

3777 # If we can't find bitmaps for all commits, return None 

3778 if len(commit_bitmaps) < len(commit_shas): 

3779 return None 

3780 

3781 # Combine bitmaps using OR 

3782 combined_bitmap = None 

3783 result_pack = None 

3784 

3785 for commit_sha in commit_shas: 

3786 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha] 

3787 commit_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3788 

3789 if commit_bitmap is None: 

3790 return None 

3791 

3792 if combined_bitmap is None: 

3793 combined_bitmap = commit_bitmap 

3794 result_pack = pack 

3795 elif pack == result_pack: 

3796 # Same pack, can OR directly 

3797 combined_bitmap = combined_bitmap | commit_bitmap 

3798 else: 

3799 # Different packs, can't combine 

3800 return None 

3801 

3802 # Handle exclusions if provided 

3803 if exclude_shas and result_pack and combined_bitmap: 

3804 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack]) 

3805 

3806 if len(exclude_bitmaps) == len(exclude_shas): 

3807 # All excludes have bitmaps, compute exclusion 

3808 exclude_combined = None 

3809 

3810 for commit_sha in exclude_shas: 

3811 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha] 

3812 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3813 

3814 if exclude_bitmap is None: 

3815 break 

3816 

3817 if exclude_combined is None: 

3818 exclude_combined = exclude_bitmap 

3819 else: 

3820 exclude_combined = exclude_combined | exclude_bitmap 

3821 

3822 # Subtract excludes using set difference 

3823 if exclude_combined: 

3824 combined_bitmap = combined_bitmap - exclude_combined 

3825 

3826 if combined_bitmap and result_pack: 

3827 return (combined_bitmap, result_pack) 

3828 return None 

3829 

3830 def get_reachable_commits( 

3831 self, 

3832 heads: Iterable[ObjectID], 

3833 exclude: Iterable[ObjectID] | None = None, 

3834 shallow: Set[ObjectID] | None = None, 

3835 ) -> set[ObjectID]: 

3836 """Get all commits reachable from heads using bitmaps where possible. 

3837 

3838 Args: 

3839 heads: Starting commit SHAs 

3840 exclude: Commit SHAs to exclude (and their ancestors) 

3841 shallow: Set of shallow commit boundaries 

3842 

3843 Returns: 

3844 Set of commit SHAs reachable from heads but not from exclude 

3845 """ 

3846 from .bitmap import bitmap_to_object_shas 

3847 

3848 # If shallow is specified, fall back to graph traversal 

3849 # (bitmaps don't support shallow boundaries well) 

3850 if shallow: 

3851 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3852 

3853 heads_set = set(heads) 

3854 exclude_set = set(exclude) if exclude else None 

3855 

3856 # Try to combine bitmaps 

3857 result = self._combine_commit_bitmaps(heads_set, exclude_set) 

3858 if result is None: 

3859 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3860 

3861 combined_bitmap, result_pack = result 

3862 

3863 # Convert bitmap to commit SHAs, filtering for commits only 

3864 pack_bitmap = result_pack.bitmap 

3865 if pack_bitmap is None: 

3866 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3867 commit_type_filter = pack_bitmap.commit_bitmap 

3868 return bitmap_to_object_shas( 

3869 combined_bitmap, result_pack.index, commit_type_filter 

3870 ) 

3871 

3872 def get_tree_objects( 

3873 self, 

3874 tree_shas: Iterable[ObjectID], 

3875 ) -> set[ObjectID]: 

3876 """Get all trees and blobs reachable from the given trees. 

3877 

3878 Args: 

3879 tree_shas: Starting tree SHAs 

3880 

3881 Returns: 

3882 Set of tree and blob SHAs 

3883 """ 

3884 # Tree traversal doesn't benefit much from bitmaps, use fallback 

3885 return self._fallback.get_tree_objects(tree_shas) 

3886 

3887 def get_reachable_objects( 

3888 self, 

3889 commits: Iterable[ObjectID], 

3890 exclude_commits: Iterable[ObjectID] | None = None, 

3891 ) -> set[ObjectID]: 

3892 """Get all objects reachable from commits using bitmaps. 

3893 

3894 Args: 

3895 commits: Starting commit SHAs 

3896 exclude_commits: Commits whose objects should be excluded 

3897 

3898 Returns: 

3899 Set of all object SHAs (commits, trees, blobs) 

3900 """ 

3901 from .bitmap import bitmap_to_object_shas 

3902 

3903 commits_set = set(commits) 

3904 exclude_set = set(exclude_commits) if exclude_commits else None 

3905 

3906 # Try to combine bitmaps 

3907 result = self._combine_commit_bitmaps(commits_set, exclude_set) 

3908 if result is None: 

3909 return self._fallback.get_reachable_objects(commits, exclude_commits) 

3910 

3911 combined_bitmap, result_pack = result 

3912 

3913 # Convert bitmap to all object SHAs (no type filter) 

3914 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)