Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1516 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26__all__ = [ 

27 "DEFAULT_TEMPFILE_GRACE_PERIOD", 

28 "INFODIR", 

29 "PACKDIR", 

30 "PACK_MODE", 

31 "BaseObjectStore", 

32 "BitmapReachability", 

33 "BucketBasedObjectStore", 

34 "DiskObjectStore", 

35 "GraphTraversalReachability", 

36 "GraphWalker", 

37 "MemoryObjectStore", 

38 "MissingObjectFinder", 

39 "ObjectIterator", 

40 "ObjectReachabilityProvider", 

41 "ObjectStoreGraphWalker", 

42 "OverlayObjectStore", 

43 "PackBasedObjectStore", 

44 "PackCapableObjectStore", 

45 "PackContainer", 

46 "commit_tree_changes", 

47 "find_shallow", 

48 "get_depth", 

49 "iter_commit_contents", 

50 "iter_tree_contents", 

51 "peel_sha", 

52 "read_packs_file", 

53 "tree_lookup_path", 

54] 

55 

56import binascii 

57import os 

58import stat 

59import sys 

60import time 

61import warnings 

62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set 

63from contextlib import suppress 

64from io import BytesIO 

65from pathlib import Path 

66from typing import ( 

67 TYPE_CHECKING, 

68 BinaryIO, 

69 Protocol, 

70 cast, 

71) 

72 

73if TYPE_CHECKING: 

74 from .object_format import ObjectFormat 

75 

76from .errors import NotTreeError 

77from .file import GitFile, _GitFile 

78from .midx import MultiPackIndex, load_midx 

79from .objects import ( 

80 S_ISGITLINK, 

81 Blob, 

82 Commit, 

83 ObjectID, 

84 RawObjectID, 

85 ShaFile, 

86 Tag, 

87 Tree, 

88 TreeEntry, 

89 hex_to_filename, 

90 hex_to_sha, 

91 object_class, 

92 sha_to_hex, 

93 valid_hexsha, 

94) 

95from .pack import ( 

96 PACK_SPOOL_FILE_MAX_SIZE, 

97 ObjectContainer, 

98 Pack, 

99 PackData, 

100 PackedObjectContainer, 

101 PackFileDisappeared, 

102 PackHint, 

103 PackIndexer, 

104 PackInflater, 

105 PackStreamCopier, 

106 UnpackedObject, 

107 extend_pack, 

108 full_unpacked_object, 

109 generate_unpacked_objects, 

110 iter_sha1, 

111 load_pack_index_file, 

112 pack_objects_to_data, 

113 write_pack_data, 

114 write_pack_index, 

115) 

116from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX 

117from .refs import Ref 

118 

119if TYPE_CHECKING: 

120 from .bitmap import EWAHBitmap 

121 from .commit_graph import CommitGraph 

122 from .config import Config 

123 from .diff_tree import RenameDetector 

124 from .pack import Pack 

125 

126 

127class GraphWalker(Protocol): 

128 """Protocol for graph walker objects.""" 

129 

130 def __next__(self) -> ObjectID | None: 

131 """Return the next object SHA to visit.""" 

132 ... 

133 

134 def ack(self, sha: ObjectID) -> None: 

135 """Acknowledge that an object has been received.""" 

136 ... 

137 

138 def nak(self) -> None: 

139 """Nothing in common was found.""" 

140 ... 

141 

142 

143class ObjectReachabilityProvider(Protocol): 

144 """Protocol for computing object reachability queries. 

145 

146 This abstraction allows reachability computations to be backed by either 

147 naive graph traversal or optimized bitmap indexes, with a consistent interface. 

148 """ 

149 

150 def get_reachable_commits( 

151 self, 

152 heads: Iterable[ObjectID], 

153 exclude: Iterable[ObjectID] | None = None, 

154 shallow: Set[ObjectID] | None = None, 

155 ) -> set[ObjectID]: 

156 """Get all commits reachable from heads, excluding those in exclude. 

157 

158 Args: 

159 heads: Starting commit SHAs 

160 exclude: Commit SHAs to exclude (and their ancestors) 

161 shallow: Set of shallow commit boundaries (traversal stops here) 

162 

163 Returns: 

164 Set of commit SHAs reachable from heads but not from exclude 

165 """ 

166 ... 

167 

168 def get_reachable_objects( 

169 self, 

170 commits: Iterable[ObjectID], 

171 exclude_commits: Iterable[ObjectID] | None = None, 

172 ) -> set[ObjectID]: 

173 """Get all objects (commits + trees + blobs) reachable from commits. 

174 

175 Args: 

176 commits: Starting commit SHAs 

177 exclude_commits: Commits whose objects should be excluded 

178 

179 Returns: 

180 Set of all object SHAs (commits, trees, blobs, tags) 

181 """ 

182 ... 

183 

184 def get_tree_objects( 

185 self, 

186 tree_shas: Iterable[ObjectID], 

187 ) -> set[ObjectID]: 

188 """Get all trees and blobs reachable from the given trees. 

189 

190 Args: 

191 tree_shas: Starting tree SHAs 

192 

193 Returns: 

194 Set of tree and blob SHAs 

195 """ 

196 ... 

197 

198 

199INFODIR = "info" 

200PACKDIR = "pack" 

201 

202# use permissions consistent with Git; just readable by everyone 

203# TODO: should packs also be non-writable on Windows? if so, that 

204# would requite some rather significant adjustments to the test suite 

205PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

206 

207# Grace period for cleaning up temporary pack files (in seconds) 

208# Matches git's default of 2 weeks 

209DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

210 

211 

212def find_shallow( 

213 store: ObjectContainer, heads: Iterable[ObjectID], depth: int 

214) -> tuple[set[ObjectID], set[ObjectID]]: 

215 """Find shallow commits according to a given depth. 

216 

217 Args: 

218 store: An ObjectStore for looking up objects. 

219 heads: Iterable of head SHAs to start walking from. 

220 depth: The depth of ancestors to include. A depth of one includes 

221 only the heads themselves. 

222 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

223 considered shallow and unshallow according to the arguments. Note that 

224 these sets may overlap if a commit is reachable along multiple paths. 

225 """ 

226 parents: dict[ObjectID, list[ObjectID]] = {} 

227 commit_graph = store.get_commit_graph() 

228 

229 def get_parents(sha: ObjectID) -> list[ObjectID]: 

230 result = parents.get(sha, None) 

231 if not result: 

232 # Try to use commit graph first if available 

233 if commit_graph: 

234 graph_parents = commit_graph.get_parents(sha) 

235 if graph_parents is not None: 

236 result = graph_parents 

237 parents[sha] = result 

238 return result 

239 # Fall back to loading the object 

240 commit = store[sha] 

241 assert isinstance(commit, Commit) 

242 result = commit.parents 

243 parents[sha] = result 

244 return result 

245 

246 todo = [] # stack of (sha, depth) 

247 for head_sha in heads: 

248 obj = store[head_sha] 

249 # Peel tags if necessary 

250 while isinstance(obj, Tag): 

251 _, sha = obj.object 

252 obj = store[sha] 

253 if isinstance(obj, Commit): 

254 todo.append((obj.id, 1)) 

255 

256 not_shallow = set() 

257 shallow = set() 

258 while todo: 

259 sha, cur_depth = todo.pop() 

260 if cur_depth < depth: 

261 not_shallow.add(sha) 

262 new_depth = cur_depth + 1 

263 todo.extend((p, new_depth) for p in get_parents(sha)) 

264 else: 

265 shallow.add(sha) 

266 

267 return shallow, not_shallow 

268 

269 

270def get_depth( 

271 store: ObjectContainer, 

272 head: ObjectID, 

273 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

274 max_depth: int | None = None, 

275) -> int: 

276 """Return the current available depth for the given head. 

277 

278 For commits with multiple parents, the largest possible depth will be 

279 returned. 

280 

281 Args: 

282 store: Object store to search in 

283 head: commit to start from 

284 get_parents: optional function for getting the parents of a commit 

285 max_depth: maximum depth to search 

286 """ 

287 if head not in store: 

288 return 0 

289 current_depth = 1 

290 queue = [(head, current_depth)] 

291 commit_graph = store.get_commit_graph() 

292 

293 while queue and (max_depth is None or current_depth < max_depth): 

294 e, depth = queue.pop(0) 

295 current_depth = max(current_depth, depth) 

296 

297 # Try to use commit graph for parent lookup if available 

298 parents = None 

299 if commit_graph: 

300 parents = commit_graph.get_parents(e) 

301 

302 if parents is None: 

303 # Fall back to loading the object 

304 cmt = store[e] 

305 if isinstance(cmt, Tag): 

306 _cls, sha = cmt.object 

307 cmt = store[sha] 

308 parents = get_parents(cmt) 

309 

310 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

311 return current_depth 

312 

313 

314class PackContainer(Protocol): 

315 """Protocol for containers that can accept pack files.""" 

316 

317 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

318 """Add a new pack.""" 

319 

320 

321class BaseObjectStore: 

322 """Object store interface.""" 

323 

324 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None: 

325 """Initialize object store. 

326 

327 Args: 

328 object_format: Object format to use (defaults to DEFAULT_OBJECT_FORMAT) 

329 """ 

330 from .object_format import DEFAULT_OBJECT_FORMAT 

331 

332 self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT 

333 

334 def determine_wants_all( 

335 self, refs: Mapping[Ref, ObjectID], depth: int | None = None 

336 ) -> list[ObjectID]: 

337 """Determine which objects are wanted based on refs.""" 

338 

339 def _want_deepen(sha: ObjectID) -> bool: 

340 if not depth: 

341 return False 

342 if depth == DEPTH_INFINITE: 

343 return True 

344 return depth > self._get_depth(sha) 

345 

346 return [ 

347 sha 

348 for (ref, sha) in refs.items() 

349 if (sha not in self or _want_deepen(sha)) 

350 and not ref.endswith(PEELED_TAG_SUFFIX) 

351 ] 

352 

353 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

354 """Check if a particular object is present by SHA1 and is loose.""" 

355 raise NotImplementedError(self.contains_loose) 

356 

357 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

358 """Check if a particular object is present by SHA1 and is packed.""" 

359 return False # Default implementation for stores that don't support packing 

360 

361 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool: 

362 """Check if a particular object is present by SHA1. 

363 

364 This method makes no distinction between loose and packed objects. 

365 """ 

366 return self.contains_loose(sha1) 

367 

368 @property 

369 def packs(self) -> list[Pack]: 

370 """Iterable of pack objects.""" 

371 raise NotImplementedError 

372 

373 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

374 """Obtain the raw text for an object. 

375 

376 Args: 

377 name: sha for the object. 

378 Returns: tuple with numeric type and object contents. 

379 """ 

380 raise NotImplementedError(self.get_raw) 

381 

382 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile: 

383 """Obtain an object by SHA1.""" 

384 type_num, uncomp = self.get_raw(sha1) 

385 return ShaFile.from_raw_string( 

386 type_num, uncomp, sha=sha1, object_format=self.object_format 

387 ) 

388 

389 def __iter__(self) -> Iterator[ObjectID]: 

390 """Iterate over the SHAs that are present in this store.""" 

391 raise NotImplementedError(self.__iter__) 

392 

393 def add_object(self, obj: ShaFile) -> None: 

394 """Add a single object to this object store.""" 

395 raise NotImplementedError(self.add_object) 

396 

397 def add_objects( 

398 self, 

399 objects: Sequence[tuple[ShaFile, str | None]], 

400 progress: Callable[..., None] | None = None, 

401 ) -> "Pack | None": 

402 """Add a set of objects to this object store. 

403 

404 Args: 

405 objects: Iterable over a list of (object, path) tuples 

406 progress: Optional progress callback 

407 """ 

408 raise NotImplementedError(self.add_objects) 

409 

410 def get_reachability_provider( 

411 self, prefer_bitmap: bool = True 

412 ) -> ObjectReachabilityProvider: 

413 """Get a reachability provider for this object store. 

414 

415 Returns an ObjectReachabilityProvider that can efficiently compute 

416 object reachability queries. Subclasses can override this to provide 

417 optimized implementations (e.g., using bitmap indexes). 

418 

419 Args: 

420 prefer_bitmap: Whether to prefer bitmap-based reachability if 

421 available. 

422 

423 Returns: 

424 ObjectReachabilityProvider instance 

425 """ 

426 return GraphTraversalReachability(self) 

427 

428 def tree_changes( 

429 self, 

430 source: ObjectID | None, 

431 target: ObjectID | None, 

432 want_unchanged: bool = False, 

433 include_trees: bool = False, 

434 change_type_same: bool = False, 

435 rename_detector: "RenameDetector | None" = None, 

436 paths: Sequence[bytes] | None = None, 

437 ) -> Iterator[ 

438 tuple[ 

439 tuple[bytes | None, bytes | None], 

440 tuple[int | None, int | None], 

441 tuple[ObjectID | None, ObjectID | None], 

442 ] 

443 ]: 

444 """Find the differences between the contents of two trees. 

445 

446 Args: 

447 source: SHA1 of the source tree 

448 target: SHA1 of the target tree 

449 want_unchanged: Whether unchanged files should be reported 

450 include_trees: Whether to include trees 

451 change_type_same: Whether to report files changing 

452 type in the same entry. 

453 rename_detector: RenameDetector object for detecting renames. 

454 paths: Optional list of paths to filter to (as bytes). 

455 Returns: Iterator over tuples with 

456 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

457 """ 

458 from .diff_tree import tree_changes 

459 

460 for change in tree_changes( 

461 self, 

462 source, 

463 target, 

464 want_unchanged=want_unchanged, 

465 include_trees=include_trees, 

466 change_type_same=change_type_same, 

467 rename_detector=rename_detector, 

468 paths=paths, 

469 ): 

470 old_path = change.old.path if change.old is not None else None 

471 new_path = change.new.path if change.new is not None else None 

472 old_mode = change.old.mode if change.old is not None else None 

473 new_mode = change.new.mode if change.new is not None else None 

474 old_sha = change.old.sha if change.old is not None else None 

475 new_sha = change.new.sha if change.new is not None else None 

476 yield ( 

477 (old_path, new_path), 

478 (old_mode, new_mode), 

479 (old_sha, new_sha), 

480 ) 

481 

482 def iter_tree_contents( 

483 self, tree_id: ObjectID, include_trees: bool = False 

484 ) -> Iterator[TreeEntry]: 

485 """Iterate the contents of a tree and all subtrees. 

486 

487 Iteration is depth-first pre-order, as in e.g. os.walk. 

488 

489 Args: 

490 tree_id: SHA1 of the tree. 

491 include_trees: If True, include tree objects in the iteration. 

492 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

493 tree. 

494 """ 

495 warnings.warn( 

496 "Please use dulwich.object_store.iter_tree_contents", 

497 DeprecationWarning, 

498 stacklevel=2, 

499 ) 

500 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

501 

502 def iterobjects_subset( 

503 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

504 ) -> Iterator[ShaFile]: 

505 """Iterate over a subset of objects in the store. 

506 

507 Args: 

508 shas: Iterable of object SHAs to retrieve 

509 allow_missing: If True, skip missing objects; if False, raise KeyError 

510 

511 Returns: 

512 Iterator of ShaFile objects 

513 

514 Raises: 

515 KeyError: If an object is missing and allow_missing is False 

516 """ 

517 for sha in shas: 

518 try: 

519 yield self[sha] 

520 except KeyError: 

521 if not allow_missing: 

522 raise 

523 

524 def iter_unpacked_subset( 

525 self, 

526 shas: Iterable[ObjectID | RawObjectID], 

527 include_comp: bool = False, 

528 allow_missing: bool = False, 

529 convert_ofs_delta: bool = True, 

530 ) -> "Iterator[UnpackedObject]": 

531 """Iterate over unpacked objects for a subset of SHAs. 

532 

533 Default implementation that converts ShaFile objects to UnpackedObject. 

534 Subclasses may override for more efficient unpacked access. 

535 

536 Args: 

537 shas: Iterable of object SHAs to retrieve 

538 include_comp: Whether to include compressed data (ignored in base 

539 implementation) 

540 allow_missing: If True, skip missing objects; if False, raise 

541 KeyError 

542 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in 

543 base implementation) 

544 

545 Returns: 

546 Iterator of UnpackedObject instances 

547 

548 Raises: 

549 KeyError: If an object is missing and allow_missing is False 

550 """ 

551 from .pack import UnpackedObject 

552 

553 for sha in shas: 

554 try: 

555 obj = self[sha] 

556 # Convert ShaFile to UnpackedObject 

557 unpacked = UnpackedObject( 

558 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

559 ) 

560 yield unpacked 

561 except KeyError: 

562 if not allow_missing: 

563 raise 

564 

565 def find_missing_objects( 

566 self, 

567 haves: Iterable[ObjectID], 

568 wants: Iterable[ObjectID], 

569 shallow: Set[ObjectID] | None = None, 

570 progress: Callable[..., None] | None = None, 

571 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

572 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

573 ) -> Iterator[tuple[ObjectID, PackHint | None]]: 

574 """Find the missing objects required for a set of revisions. 

575 

576 Args: 

577 haves: Iterable over SHAs already in common. 

578 wants: Iterable over SHAs of objects to fetch. 

579 shallow: Set of shallow commit SHA1s to skip 

580 progress: Simple progress function that will be called with 

581 updated progress strings. 

582 get_tagged: Function that returns a dict of pointed-to sha -> 

583 tag sha for including tags. 

584 get_parents: Optional function for getting the parents of a 

585 commit. 

586 Returns: Iterator over (sha, path) pairs. 

587 """ 

588 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

589 finder = MissingObjectFinder( 

590 self, 

591 haves=haves, 

592 wants=wants, 

593 shallow=shallow, 

594 progress=progress, 

595 get_tagged=get_tagged, 

596 get_parents=get_parents, 

597 ) 

598 return iter(finder) 

599 

600 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]: 

601 """Find which revisions this store has in common using graphwalker. 

602 

603 Args: 

604 graphwalker: A graphwalker object. 

605 Returns: List of SHAs that are in common 

606 """ 

607 haves = [] 

608 sha = next(graphwalker) 

609 while sha: 

610 if sha in self: 

611 haves.append(sha) 

612 graphwalker.ack(sha) 

613 sha = next(graphwalker) 

614 return haves 

615 

616 def generate_pack_data( 

617 self, 

618 have: Iterable[ObjectID], 

619 want: Iterable[ObjectID], 

620 *, 

621 shallow: Set[ObjectID] | None = None, 

622 progress: Callable[..., None] | None = None, 

623 ofs_delta: bool = True, 

624 ) -> tuple[int, Iterator[UnpackedObject]]: 

625 """Generate pack data objects for a set of wants/haves. 

626 

627 Args: 

628 have: List of SHA1s of objects that should not be sent 

629 want: List of SHA1s of objects that should be sent 

630 shallow: Set of shallow commit SHA1s to skip 

631 ofs_delta: Whether OFS deltas can be included 

632 progress: Optional progress reporting method 

633 """ 

634 # Note that the pack-specific implementation below is more efficient, 

635 # as it reuses deltas 

636 missing_objects = MissingObjectFinder( 

637 self, haves=have, wants=want, shallow=shallow, progress=progress 

638 ) 

639 object_ids = list(missing_objects) 

640 return pack_objects_to_data( 

641 [(self[oid], path) for oid, path in object_ids], 

642 ofs_delta=ofs_delta, 

643 progress=progress, 

644 ) 

645 

646 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

647 """Peel all tags from a SHA. 

648 

649 Args: 

650 sha: The object SHA to peel. 

651 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

652 intermediate tags; if the original ref does not point to a tag, 

653 this will equal the original SHA1. 

654 """ 

655 warnings.warn( 

656 "Please use dulwich.object_store.peel_sha()", 

657 DeprecationWarning, 

658 stacklevel=2, 

659 ) 

660 return peel_sha(self, sha)[1].id 

661 

662 def _get_depth( 

663 self, 

664 head: ObjectID, 

665 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents, 

666 max_depth: int | None = None, 

667 ) -> int: 

668 """Return the current available depth for the given head. 

669 

670 For commits with multiple parents, the largest possible depth will be 

671 returned. 

672 

673 Args: 

674 head: commit to start from 

675 get_parents: optional function for getting the parents of a commit 

676 max_depth: maximum depth to search 

677 """ 

678 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

679 

680 def close(self) -> None: 

681 """Close any files opened by this object store.""" 

682 # Default implementation is a NO-OP 

683 

684 def prune(self, grace_period: int | None = None) -> None: 

685 """Prune/clean up this object store. 

686 

687 This includes removing orphaned temporary files and other 

688 housekeeping tasks. Default implementation is a NO-OP. 

689 

690 Args: 

691 grace_period: Grace period in seconds for removing temporary files. 

692 If None, uses the default grace period. 

693 """ 

694 # Default implementation is a NO-OP 

695 

696 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

697 """Iterate over all SHA1s that start with a given prefix. 

698 

699 The default implementation is a naive iteration over all objects. 

700 However, subclasses may override this method with more efficient 

701 implementations. 

702 """ 

703 for sha in self: 

704 if sha.startswith(prefix): 

705 yield sha 

706 

707 def get_commit_graph(self) -> "CommitGraph | None": 

708 """Get the commit graph for this object store. 

709 

710 Returns: 

711 CommitGraph object if available, None otherwise 

712 """ 

713 return None 

714 

715 def write_commit_graph( 

716 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

717 ) -> None: 

718 """Write a commit graph file for this object store. 

719 

720 Args: 

721 refs: List of refs to include. If None, includes all refs from object store. 

722 reachable: If True, includes all commits reachable from refs. 

723 If False, only includes the direct ref targets. 

724 

725 Note: 

726 Default implementation does nothing. Subclasses should override 

727 this method to provide commit graph writing functionality. 

728 """ 

729 raise NotImplementedError(self.write_commit_graph) 

730 

731 def get_object_mtime(self, sha: ObjectID) -> float: 

732 """Get the modification time of an object. 

733 

734 Args: 

735 sha: SHA1 of the object 

736 

737 Returns: 

738 Modification time as seconds since epoch 

739 

740 Raises: 

741 KeyError: if the object is not found 

742 """ 

743 # Default implementation raises KeyError 

744 # Subclasses should override to provide actual mtime 

745 raise KeyError(sha) 

746 

747 

748class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

749 """Object store that supports pack operations. 

750 

751 This is a base class for object stores that can handle pack files, 

752 including both disk-based and memory-based stores. 

753 """ 

754 

755 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

756 """Add a new pack to this object store. 

757 

758 Returns: Tuple of (file, commit_func, abort_func) 

759 """ 

760 raise NotImplementedError(self.add_pack) 

761 

762 def add_pack_data( 

763 self, 

764 count: int, 

765 unpacked_objects: Iterator["UnpackedObject"], 

766 progress: Callable[..., None] | None = None, 

767 ) -> "Pack | None": 

768 """Add pack data to this object store. 

769 

770 Args: 

771 count: Number of objects 

772 unpacked_objects: Iterator over unpacked objects 

773 progress: Optional progress callback 

774 """ 

775 raise NotImplementedError(self.add_pack_data) 

776 

777 def get_unpacked_object( 

778 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False 

779 ) -> "UnpackedObject": 

780 """Get a raw unresolved object. 

781 

782 Args: 

783 sha1: SHA-1 hash of the object 

784 include_comp: Whether to include compressed data 

785 

786 Returns: 

787 UnpackedObject instance 

788 """ 

789 from .pack import UnpackedObject 

790 

791 obj = self[sha1] 

792 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

793 

794 def iterobjects_subset( 

795 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

796 ) -> Iterator[ShaFile]: 

797 """Iterate over a subset of objects. 

798 

799 Args: 

800 shas: Iterable of object SHAs to retrieve 

801 allow_missing: If True, skip missing objects 

802 

803 Returns: 

804 Iterator of ShaFile objects 

805 """ 

806 for sha in shas: 

807 try: 

808 yield self[sha] 

809 except KeyError: 

810 if not allow_missing: 

811 raise 

812 

813 

814class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

815 """Object store that uses pack files for storage. 

816 

817 This class provides a base implementation for object stores that use 

818 Git pack files as their primary storage mechanism. It handles caching 

819 of open pack files and provides configuration for pack file operations. 

820 """ 

821 

822 def __init__( 

823 self, 

824 pack_compression_level: int = -1, 

825 pack_index_version: int | None = None, 

826 pack_delta_window_size: int | None = None, 

827 pack_window_memory: int | None = None, 

828 pack_delta_cache_size: int | None = None, 

829 pack_depth: int | None = None, 

830 pack_threads: int | None = None, 

831 pack_big_file_threshold: int | None = None, 

832 *, 

833 object_format: "ObjectFormat | None" = None, 

834 ) -> None: 

835 """Initialize a PackBasedObjectStore. 

836 

837 Args: 

838 pack_compression_level: Compression level for pack files (-1 to 9) 

839 pack_index_version: Pack index version to use 

840 pack_delta_window_size: Window size for delta compression 

841 pack_window_memory: Maximum memory to use for delta window 

842 pack_delta_cache_size: Cache size for delta operations 

843 pack_depth: Maximum depth for pack deltas 

844 pack_threads: Number of threads to use for packing 

845 pack_big_file_threshold: Threshold for treating files as "big" 

846 object_format: Hash algorithm to use 

847 """ 

848 super().__init__(object_format=object_format) 

849 self._pack_cache: dict[str, Pack] = {} 

850 self.pack_compression_level = pack_compression_level 

851 self.pack_index_version = pack_index_version 

852 self.pack_delta_window_size = pack_delta_window_size 

853 self.pack_window_memory = pack_window_memory 

854 self.pack_delta_cache_size = pack_delta_cache_size 

855 self.pack_depth = pack_depth 

856 self.pack_threads = pack_threads 

857 self.pack_big_file_threshold = pack_big_file_threshold 

858 

859 def get_reachability_provider( 

860 self, 

861 prefer_bitmaps: bool = True, 

862 ) -> ObjectReachabilityProvider: 

863 """Get the best reachability provider for the object store. 

864 

865 Args: 

866 prefer_bitmaps: Whether to use bitmaps if available 

867 

868 Returns: 

869 ObjectReachabilityProvider implementation (either bitmap-accelerated 

870 or graph traversal) 

871 """ 

872 if prefer_bitmaps: 

873 # Check if any packs have bitmaps 

874 has_bitmap = False 

875 for pack in self.packs: 

876 try: 

877 # Try to access bitmap property 

878 if pack.bitmap is not None: 

879 has_bitmap = True 

880 break 

881 except FileNotFoundError: 

882 # Bitmap file doesn't exist for this pack 

883 continue 

884 

885 if has_bitmap: 

886 return BitmapReachability(self) 

887 

888 # Fall back to graph traversal 

889 return GraphTraversalReachability(self) 

890 

891 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

892 """Add a new pack to this object store.""" 

893 raise NotImplementedError(self.add_pack) 

894 

895 def add_pack_data( 

896 self, 

897 count: int, 

898 unpacked_objects: Iterator[UnpackedObject], 

899 progress: Callable[..., None] | None = None, 

900 ) -> "Pack | None": 

901 """Add pack data to this object store. 

902 

903 Args: 

904 count: Number of items to add 

905 unpacked_objects: Iterator of UnpackedObject instances 

906 progress: Optional progress callback 

907 """ 

908 if count == 0: 

909 # Don't bother writing an empty pack file 

910 return None 

911 f, commit, abort = self.add_pack() 

912 try: 

913 write_pack_data( 

914 f.write, 

915 unpacked_objects, 

916 num_records=count, 

917 progress=progress, 

918 compression_level=self.pack_compression_level, 

919 object_format=self.object_format, 

920 ) 

921 except BaseException: 

922 abort() 

923 raise 

924 else: 

925 return commit() 

926 

927 @property 

928 def alternates(self) -> list["BaseObjectStore"]: 

929 """Return list of alternate object stores.""" 

930 return [] 

931 

932 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

933 """Check if a particular object is present by SHA1 and is packed. 

934 

935 This does not check alternates. 

936 """ 

937 for pack in self.packs: 

938 try: 

939 if sha in pack: 

940 return True 

941 except PackFileDisappeared: 

942 pass 

943 return False 

944 

945 def __contains__(self, sha: ObjectID | RawObjectID) -> bool: 

946 """Check if a particular object is present by SHA1. 

947 

948 This method makes no distinction between loose and packed objects. 

949 """ 

950 if self.contains_packed(sha) or self.contains_loose(sha): 

951 return True 

952 for alternate in self.alternates: 

953 if sha in alternate: 

954 return True 

955 return False 

956 

957 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

958 """Add a newly appeared pack to the cache by path.""" 

959 prev_pack = self._pack_cache.get(base_name) 

960 if prev_pack is not pack: 

961 self._pack_cache[base_name] = pack 

962 if prev_pack: 

963 prev_pack.close() 

964 

965 def generate_pack_data( 

966 self, 

967 have: Iterable[ObjectID], 

968 want: Iterable[ObjectID], 

969 *, 

970 shallow: Set[ObjectID] | None = None, 

971 progress: Callable[..., None] | None = None, 

972 ofs_delta: bool = True, 

973 ) -> tuple[int, Iterator[UnpackedObject]]: 

974 """Generate pack data objects for a set of wants/haves. 

975 

976 Args: 

977 have: List of SHA1s of objects that should not be sent 

978 want: List of SHA1s of objects that should be sent 

979 shallow: Set of shallow commit SHA1s to skip 

980 ofs_delta: Whether OFS deltas can be included 

981 progress: Optional progress reporting method 

982 """ 

983 missing_objects = MissingObjectFinder( 

984 self, haves=have, wants=want, shallow=shallow, progress=progress 

985 ) 

986 remote_has = missing_objects.get_remote_has() 

987 object_ids = list(missing_objects) 

988 return len(object_ids), generate_unpacked_objects( 

989 self, 

990 object_ids, 

991 progress=progress, 

992 ofs_delta=ofs_delta, 

993 other_haves=remote_has, 

994 ) 

995 

996 def _clear_cached_packs(self) -> None: 

997 pack_cache = self._pack_cache 

998 self._pack_cache = {} 

999 while pack_cache: 

1000 (_name, pack) = pack_cache.popitem() 

1001 pack.close() 

1002 

1003 def _iter_cached_packs(self) -> Iterator[Pack]: 

1004 return iter(self._pack_cache.values()) 

1005 

1006 def _update_pack_cache(self) -> list[Pack]: 

1007 raise NotImplementedError(self._update_pack_cache) 

1008 

1009 def close(self) -> None: 

1010 """Close the object store and release resources. 

1011 

1012 This method closes all cached pack files and frees associated resources. 

1013 Can be called multiple times safely. 

1014 """ 

1015 self._clear_cached_packs() 

1016 

1017 def __del__(self) -> None: 

1018 """Warn if the object store is being deleted with unclosed packs.""" 

1019 if self._pack_cache: 

1020 import warnings 

1021 

1022 warnings.warn( 

1023 f"ObjectStore {self!r} was destroyed with {len(self._pack_cache)} " 

1024 "unclosed pack(s). Please call close() explicitly.", 

1025 ResourceWarning, 

1026 stacklevel=2, 

1027 ) 

1028 self.close() 

1029 

1030 @property 

1031 def packs(self) -> list[Pack]: 

1032 """List with pack objects.""" 

1033 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

1034 

1035 def count_pack_files(self) -> int: 

1036 """Count the number of pack files. 

1037 

1038 Returns: 

1039 Number of pack files (excluding those with .keep files) 

1040 """ 

1041 count = 0 

1042 for pack in self.packs: 

1043 # Check if there's a .keep file for this pack 

1044 keep_path = pack._basename + ".keep" 

1045 if not os.path.exists(keep_path): 

1046 count += 1 

1047 return count 

1048 

1049 def _iter_alternate_objects(self) -> Iterator[ObjectID]: 

1050 """Iterate over the SHAs of all the objects in alternate stores.""" 

1051 for alternate in self.alternates: 

1052 yield from alternate 

1053 

1054 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1055 """Iterate over the SHAs of all loose objects.""" 

1056 raise NotImplementedError(self._iter_loose_objects) 

1057 

1058 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1059 raise NotImplementedError(self._get_loose_object) 

1060 

1061 def delete_loose_object(self, sha: ObjectID) -> None: 

1062 """Delete a loose object. 

1063 

1064 This method only handles loose objects. For packed objects, 

1065 use repack(exclude=...) to exclude them during repacking. 

1066 """ 

1067 raise NotImplementedError(self.delete_loose_object) 

1068 

1069 def _remove_pack(self, pack: "Pack") -> None: 

1070 raise NotImplementedError(self._remove_pack) 

1071 

1072 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

1073 """Pack loose objects. 

1074 

1075 Args: 

1076 progress: Optional progress reporting callback 

1077 

1078 Returns: Number of objects packed 

1079 """ 

1080 objects: list[tuple[ShaFile, None]] = [] 

1081 for sha in self._iter_loose_objects(): 

1082 obj = self._get_loose_object(sha) 

1083 if obj is not None: 

1084 objects.append((obj, None)) 

1085 self.add_objects(objects, progress=progress) 

1086 for obj, path in objects: 

1087 self.delete_loose_object(obj.id) 

1088 return len(objects) 

1089 

1090 def repack( 

1091 self, 

1092 exclude: Set[bytes] | None = None, 

1093 progress: Callable[[str], None] | None = None, 

1094 ) -> int: 

1095 """Repack the packs in this repository. 

1096 

1097 Note that this implementation is fairly naive and currently keeps all 

1098 objects in memory while it repacks. 

1099 

1100 Args: 

1101 exclude: Optional set of object SHAs to exclude from repacking 

1102 progress: Optional progress reporting callback 

1103 """ 

1104 if exclude is None: 

1105 exclude = set() 

1106 

1107 loose_objects = set() 

1108 excluded_loose_objects = set() 

1109 for sha in self._iter_loose_objects(): 

1110 if sha not in exclude: 

1111 obj = self._get_loose_object(sha) 

1112 if obj is not None: 

1113 loose_objects.add(obj) 

1114 else: 

1115 excluded_loose_objects.add(sha) 

1116 

1117 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

1118 old_packs = {p.name(): p for p in self.packs} 

1119 for name, pack in old_packs.items(): 

1120 objects.update( 

1121 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

1122 ) 

1123 

1124 # Only create a new pack if there are objects to pack 

1125 if objects: 

1126 # The name of the consolidated pack might match the name of a 

1127 # pre-existing pack. Take care not to remove the newly created 

1128 # consolidated pack. 

1129 consolidated = self.add_objects(list(objects), progress=progress) 

1130 if consolidated is not None: 

1131 old_packs.pop(consolidated.name(), None) 

1132 

1133 # Delete loose objects that were packed 

1134 for obj in loose_objects: 

1135 if obj is not None: 

1136 self.delete_loose_object(obj.id) 

1137 # Delete excluded loose objects 

1138 for sha in excluded_loose_objects: 

1139 self.delete_loose_object(sha) 

1140 for name, pack in old_packs.items(): 

1141 self._remove_pack(pack) 

1142 self._update_pack_cache() 

1143 return len(objects) 

1144 

1145 def generate_pack_bitmaps( 

1146 self, 

1147 refs: dict[Ref, ObjectID], 

1148 *, 

1149 commit_interval: int | None = None, 

1150 progress: Callable[[str], None] | None = None, 

1151 ) -> int: 

1152 """Generate bitmap indexes for all packs that don't have them. 

1153 

1154 This generates .bitmap files for packfiles, enabling fast reachability 

1155 queries. Equivalent to the bitmap generation part of 'git repack -b'. 

1156 

1157 Args: 

1158 refs: Dictionary of ref names to commit SHAs 

1159 commit_interval: Include every Nth commit in bitmap index (None for default) 

1160 progress: Optional progress reporting callback 

1161 

1162 Returns: 

1163 Number of bitmaps generated 

1164 """ 

1165 count = 0 

1166 for pack in self.packs: 

1167 pack.ensure_bitmap( 

1168 self, refs, commit_interval=commit_interval, progress=progress 

1169 ) 

1170 count += 1 

1171 

1172 # Update cache to pick up new bitmaps 

1173 self._update_pack_cache() 

1174 

1175 return count 

1176 

1177 def __iter__(self) -> Iterator[ObjectID]: 

1178 """Iterate over the SHAs that are present in this store.""" 

1179 self._update_pack_cache() 

1180 for pack in self._iter_cached_packs(): 

1181 try: 

1182 yield from pack 

1183 except PackFileDisappeared: 

1184 pass 

1185 yield from self._iter_loose_objects() 

1186 yield from self._iter_alternate_objects() 

1187 

1188 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

1189 """Check if a particular object is present by SHA1 and is loose. 

1190 

1191 This does not check alternates. 

1192 """ 

1193 return self._get_loose_object(sha) is not None 

1194 

1195 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

1196 """Obtain the raw fulltext for an object. 

1197 

1198 Args: 

1199 name: sha for the object. 

1200 Returns: tuple with numeric type and object contents. 

1201 """ 

1202 sha: RawObjectID 

1203 if len(name) == self.object_format.hex_length: 

1204 sha = hex_to_sha(ObjectID(name)) 

1205 hexsha = name 

1206 elif len(name) == self.object_format.oid_length: 

1207 sha = RawObjectID(name) 

1208 hexsha = None 

1209 else: 

1210 raise AssertionError(f"Invalid object name {name!r}") 

1211 for pack in self._iter_cached_packs(): 

1212 try: 

1213 return pack.get_raw(sha) 

1214 except (KeyError, PackFileDisappeared): 

1215 pass 

1216 if hexsha is None: 

1217 hexsha = sha_to_hex(sha) 

1218 ret = self._get_loose_object(hexsha) 

1219 if ret is not None: 

1220 return ret.type_num, ret.as_raw_string() 

1221 # Maybe something else has added a pack with the object 

1222 # in the mean time? 

1223 for pack in self._update_pack_cache(): 

1224 try: 

1225 return pack.get_raw(sha) 

1226 except KeyError: 

1227 pass 

1228 for alternate in self.alternates: 

1229 try: 

1230 return alternate.get_raw(hexsha) 

1231 except KeyError: 

1232 pass 

1233 raise KeyError(hexsha) 

1234 

1235 def iter_unpacked_subset( 

1236 self, 

1237 shas: Iterable[ObjectID | RawObjectID], 

1238 include_comp: bool = False, 

1239 allow_missing: bool = False, 

1240 convert_ofs_delta: bool = True, 

1241 ) -> Iterator[UnpackedObject]: 

1242 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1243 

1244 Args: 

1245 shas: Set of object SHAs to retrieve 

1246 include_comp: Whether to include compressed data 

1247 allow_missing: If True, skip missing objects; if False, raise KeyError 

1248 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1249 

1250 Returns: 

1251 Iterator of UnpackedObject instances 

1252 

1253 Raises: 

1254 KeyError: If an object is missing and allow_missing is False 

1255 """ 

1256 todo: set[ObjectID | RawObjectID] = set(shas) 

1257 for p in self._iter_cached_packs(): 

1258 for unpacked in p.iter_unpacked_subset( 

1259 todo, 

1260 include_comp=include_comp, 

1261 allow_missing=True, 

1262 convert_ofs_delta=convert_ofs_delta, 

1263 ): 

1264 yield unpacked 

1265 hexsha = sha_to_hex(unpacked.sha()) 

1266 todo.remove(hexsha) 

1267 # Maybe something else has added a pack with the object 

1268 # in the mean time? 

1269 for p in self._update_pack_cache(): 

1270 for unpacked in p.iter_unpacked_subset( 

1271 todo, 

1272 include_comp=include_comp, 

1273 allow_missing=True, 

1274 convert_ofs_delta=convert_ofs_delta, 

1275 ): 

1276 yield unpacked 

1277 hexsha = sha_to_hex(unpacked.sha()) 

1278 todo.remove(hexsha) 

1279 for alternate in self.alternates: 

1280 assert isinstance(alternate, PackBasedObjectStore) 

1281 for unpacked in alternate.iter_unpacked_subset( 

1282 todo, 

1283 include_comp=include_comp, 

1284 allow_missing=True, 

1285 convert_ofs_delta=convert_ofs_delta, 

1286 ): 

1287 yield unpacked 

1288 hexsha = sha_to_hex(unpacked.sha()) 

1289 todo.remove(hexsha) 

1290 

1291 def iterobjects_subset( 

1292 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

1293 ) -> Iterator[ShaFile]: 

1294 """Iterate over a subset of objects in the store. 

1295 

1296 This method searches for objects in pack files, alternates, and loose storage. 

1297 

1298 Args: 

1299 shas: Iterable of object SHAs to retrieve 

1300 allow_missing: If True, skip missing objects; if False, raise KeyError 

1301 

1302 Returns: 

1303 Iterator of ShaFile objects 

1304 

1305 Raises: 

1306 KeyError: If an object is missing and allow_missing is False 

1307 """ 

1308 todo: set[ObjectID] = set(shas) 

1309 for p in self._iter_cached_packs(): 

1310 for o in p.iterobjects_subset(todo, allow_missing=True): 

1311 yield o 

1312 todo.remove(o.id) 

1313 # Maybe something else has added a pack with the object 

1314 # in the mean time? 

1315 for p in self._update_pack_cache(): 

1316 for o in p.iterobjects_subset(todo, allow_missing=True): 

1317 yield o 

1318 todo.remove(o.id) 

1319 for alternate in self.alternates: 

1320 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1321 yield o 

1322 todo.remove(o.id) 

1323 for oid in todo: 

1324 loose_obj: ShaFile | None = self._get_loose_object(oid) 

1325 if loose_obj is not None: 

1326 yield loose_obj 

1327 elif not allow_missing: 

1328 raise KeyError(oid) 

1329 

1330 def get_unpacked_object( 

1331 self, sha1: bytes, *, include_comp: bool = False 

1332 ) -> UnpackedObject: 

1333 """Obtain the unpacked object. 

1334 

1335 Args: 

1336 sha1: sha for the object. 

1337 include_comp: Whether to include compression metadata. 

1338 """ 

1339 if len(sha1) == self.object_format.hex_length: 

1340 sha = hex_to_sha(cast(ObjectID, sha1)) 

1341 hexsha = cast(ObjectID, sha1) 

1342 elif len(sha1) == self.object_format.oid_length: 

1343 sha = cast(RawObjectID, sha1) 

1344 hexsha = None 

1345 else: 

1346 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1347 for pack in self._iter_cached_packs(): 

1348 try: 

1349 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1350 except (KeyError, PackFileDisappeared): 

1351 pass 

1352 if hexsha is None: 

1353 hexsha = sha_to_hex(sha) 

1354 # Maybe something else has added a pack with the object 

1355 # in the mean time? 

1356 for pack in self._update_pack_cache(): 

1357 try: 

1358 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1359 except KeyError: 

1360 pass 

1361 for alternate in self.alternates: 

1362 assert isinstance(alternate, PackBasedObjectStore) 

1363 try: 

1364 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1365 except KeyError: 

1366 pass 

1367 raise KeyError(hexsha) 

1368 

1369 def add_objects( 

1370 self, 

1371 objects: Sequence[tuple[ShaFile, str | None]], 

1372 progress: Callable[[str], None] | None = None, 

1373 ) -> "Pack | None": 

1374 """Add a set of objects to this object store. 

1375 

1376 Args: 

1377 objects: Iterable over (object, path) tuples, should support 

1378 __len__. 

1379 progress: Optional progress reporting function. 

1380 Returns: Pack object of the objects written. 

1381 """ 

1382 count = len(objects) 

1383 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1384 return self.add_pack_data(count, record_iter, progress=progress) 

1385 

1386 

1387class DiskObjectStore(PackBasedObjectStore): 

1388 """Git-style object store that exists on disk.""" 

1389 

1390 path: str | os.PathLike[str] 

1391 pack_dir: str | os.PathLike[str] 

1392 _alternates: "list[BaseObjectStore] | None" 

1393 _commit_graph: "CommitGraph | None" 

1394 

1395 def __init__( 

1396 self, 

1397 path: str | os.PathLike[str], 

1398 *, 

1399 loose_compression_level: int = -1, 

1400 pack_compression_level: int = -1, 

1401 pack_index_version: int | None = None, 

1402 pack_delta_window_size: int | None = None, 

1403 pack_window_memory: int | None = None, 

1404 pack_delta_cache_size: int | None = None, 

1405 pack_depth: int | None = None, 

1406 pack_threads: int | None = None, 

1407 pack_big_file_threshold: int | None = None, 

1408 fsync_object_files: bool = False, 

1409 pack_write_bitmaps: bool = False, 

1410 pack_write_bitmap_hash_cache: bool = True, 

1411 pack_write_bitmap_lookup_table: bool = True, 

1412 file_mode: int | None = None, 

1413 dir_mode: int | None = None, 

1414 object_format: "ObjectFormat | None" = None, 

1415 ) -> None: 

1416 """Open an object store. 

1417 

1418 Args: 

1419 path: Path of the object store. 

1420 loose_compression_level: zlib compression level for loose objects 

1421 pack_compression_level: zlib compression level for pack objects 

1422 pack_index_version: pack index version to use (1, 2, or 3) 

1423 pack_delta_window_size: sliding window size for delta compression 

1424 pack_window_memory: memory limit for delta window operations 

1425 pack_delta_cache_size: size of cache for delta operations 

1426 pack_depth: maximum delta chain depth 

1427 pack_threads: number of threads for pack operations 

1428 pack_big_file_threshold: threshold for treating files as big 

1429 fsync_object_files: whether to fsync object files for durability 

1430 pack_write_bitmaps: whether to write bitmap indexes for packs 

1431 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps 

1432 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps 

1433 file_mode: File permission mask for shared repository 

1434 dir_mode: Directory permission mask for shared repository 

1435 object_format: Hash algorithm to use (SHA1 or SHA256) 

1436 """ 

1437 # Import here to avoid circular dependency 

1438 from .object_format import DEFAULT_OBJECT_FORMAT 

1439 

1440 super().__init__( 

1441 pack_compression_level=pack_compression_level, 

1442 pack_index_version=pack_index_version, 

1443 pack_delta_window_size=pack_delta_window_size, 

1444 pack_window_memory=pack_window_memory, 

1445 pack_delta_cache_size=pack_delta_cache_size, 

1446 pack_depth=pack_depth, 

1447 pack_threads=pack_threads, 

1448 pack_big_file_threshold=pack_big_file_threshold, 

1449 object_format=object_format if object_format else DEFAULT_OBJECT_FORMAT, 

1450 ) 

1451 self.path = path 

1452 self.pack_dir = os.path.join(self.path, PACKDIR) 

1453 self._alternates = None 

1454 self.loose_compression_level = loose_compression_level 

1455 self.pack_compression_level = pack_compression_level 

1456 self.pack_index_version = pack_index_version 

1457 self.fsync_object_files = fsync_object_files 

1458 self.pack_write_bitmaps = pack_write_bitmaps 

1459 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache 

1460 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table 

1461 self.file_mode = file_mode 

1462 self.dir_mode = dir_mode 

1463 

1464 # Commit graph support - lazy loaded 

1465 self._commit_graph = None 

1466 self._use_commit_graph = True # Default to true 

1467 

1468 # Multi-pack-index support - lazy loaded 

1469 self._midx: MultiPackIndex | None = None 

1470 self._use_midx = True # Default to true 

1471 

1472 def __repr__(self) -> str: 

1473 """Return string representation of DiskObjectStore. 

1474 

1475 Returns: 

1476 String representation including the store path 

1477 """ 

1478 return f"<{self.__class__.__name__}({self.path!r})>" 

1479 

1480 @classmethod 

1481 def from_config( 

1482 cls, 

1483 path: str | os.PathLike[str], 

1484 config: "Config", 

1485 *, 

1486 file_mode: int | None = None, 

1487 dir_mode: int | None = None, 

1488 ) -> "DiskObjectStore": 

1489 """Create a DiskObjectStore from a configuration object. 

1490 

1491 Args: 

1492 path: Path to the object store directory 

1493 config: Configuration object to read settings from 

1494 file_mode: Optional file permission mask for shared repository 

1495 dir_mode: Optional directory permission mask for shared repository 

1496 

1497 Returns: 

1498 New DiskObjectStore instance configured according to config 

1499 """ 

1500 try: 

1501 default_compression_level = int( 

1502 config.get((b"core",), b"compression").decode() 

1503 ) 

1504 except KeyError: 

1505 default_compression_level = -1 

1506 try: 

1507 loose_compression_level = int( 

1508 config.get((b"core",), b"looseCompression").decode() 

1509 ) 

1510 except KeyError: 

1511 loose_compression_level = default_compression_level 

1512 try: 

1513 pack_compression_level = int( 

1514 config.get((b"core",), "packCompression").decode() 

1515 ) 

1516 except KeyError: 

1517 pack_compression_level = default_compression_level 

1518 try: 

1519 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1520 except KeyError: 

1521 pack_index_version = None 

1522 

1523 # Read pack configuration options 

1524 try: 

1525 pack_delta_window_size = int( 

1526 config.get((b"pack",), b"deltaWindowSize").decode() 

1527 ) 

1528 except KeyError: 

1529 pack_delta_window_size = None 

1530 try: 

1531 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1532 except KeyError: 

1533 pack_window_memory = None 

1534 try: 

1535 pack_delta_cache_size = int( 

1536 config.get((b"pack",), b"deltaCacheSize").decode() 

1537 ) 

1538 except KeyError: 

1539 pack_delta_cache_size = None 

1540 try: 

1541 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1542 except KeyError: 

1543 pack_depth = None 

1544 try: 

1545 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1546 except KeyError: 

1547 pack_threads = None 

1548 try: 

1549 pack_big_file_threshold = int( 

1550 config.get((b"pack",), b"bigFileThreshold").decode() 

1551 ) 

1552 except KeyError: 

1553 pack_big_file_threshold = None 

1554 

1555 # Read core.commitGraph setting 

1556 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1557 

1558 # Read core.multiPackIndex setting 

1559 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True) 

1560 

1561 # Read core.fsyncObjectFiles setting 

1562 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False) 

1563 

1564 # Read bitmap settings 

1565 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False) 

1566 pack_write_bitmap_hash_cache = config.get_boolean( 

1567 (b"pack",), b"writeBitmapHashCache", True 

1568 ) 

1569 pack_write_bitmap_lookup_table = config.get_boolean( 

1570 (b"pack",), b"writeBitmapLookupTable", True 

1571 ) 

1572 # Also check repack.writeBitmaps for backwards compatibility 

1573 if not pack_write_bitmaps: 

1574 pack_write_bitmaps = config.get_boolean( 

1575 (b"repack",), b"writeBitmaps", False 

1576 ) 

1577 

1578 # Get hash algorithm from config 

1579 from .object_format import get_object_format 

1580 

1581 object_format = None 

1582 try: 

1583 try: 

1584 version = int(config.get((b"core",), b"repositoryformatversion")) 

1585 except KeyError: 

1586 version = 0 

1587 if version == 1: 

1588 try: 

1589 object_format_name = config.get((b"extensions",), b"objectformat") 

1590 except KeyError: 

1591 object_format_name = b"sha1" 

1592 object_format = get_object_format(object_format_name.decode("ascii")) 

1593 except (KeyError, ValueError): 

1594 pass 

1595 

1596 instance = cls( 

1597 path, 

1598 loose_compression_level=loose_compression_level, 

1599 pack_compression_level=pack_compression_level, 

1600 pack_index_version=pack_index_version, 

1601 pack_delta_window_size=pack_delta_window_size, 

1602 pack_window_memory=pack_window_memory, 

1603 pack_delta_cache_size=pack_delta_cache_size, 

1604 pack_depth=pack_depth, 

1605 pack_threads=pack_threads, 

1606 pack_big_file_threshold=pack_big_file_threshold, 

1607 fsync_object_files=fsync_object_files, 

1608 pack_write_bitmaps=pack_write_bitmaps, 

1609 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache, 

1610 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table, 

1611 file_mode=file_mode, 

1612 dir_mode=dir_mode, 

1613 object_format=object_format, 

1614 ) 

1615 instance._use_commit_graph = use_commit_graph 

1616 instance._use_midx = use_midx 

1617 return instance 

1618 

1619 @property 

1620 def alternates(self) -> list["BaseObjectStore"]: 

1621 """Get the list of alternate object stores. 

1622 

1623 Reads from .git/objects/info/alternates if not already cached. 

1624 

1625 Returns: 

1626 List of DiskObjectStore instances for alternate object directories 

1627 """ 

1628 if self._alternates is not None: 

1629 return self._alternates 

1630 self._alternates = [] 

1631 for path in self._read_alternate_paths(): 

1632 self._alternates.append(DiskObjectStore(path)) 

1633 return self._alternates 

1634 

1635 def _read_alternate_paths(self) -> Iterator[str]: 

1636 try: 

1637 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1638 except FileNotFoundError: 

1639 return 

1640 with f: 

1641 for line in f.readlines(): 

1642 line = line.rstrip(b"\n") 

1643 if line.startswith(b"#"): 

1644 continue 

1645 if os.path.isabs(line): 

1646 yield os.fsdecode(line) 

1647 else: 

1648 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1649 

1650 def add_alternate_path(self, path: str | os.PathLike[str]) -> None: 

1651 """Add an alternate path to this object store.""" 

1652 info_dir = os.path.join(self.path, INFODIR) 

1653 try: 

1654 os.mkdir(info_dir) 

1655 if self.dir_mode is not None: 

1656 os.chmod(info_dir, self.dir_mode) 

1657 except FileExistsError: 

1658 pass 

1659 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1660 mask = self.file_mode if self.file_mode is not None else 0o644 

1661 with GitFile(alternates_path, "wb", mask=mask) as f: 

1662 try: 

1663 orig_f = open(alternates_path, "rb") 

1664 except FileNotFoundError: 

1665 pass 

1666 else: 

1667 with orig_f: 

1668 f.write(orig_f.read()) 

1669 f.write(os.fsencode(path) + b"\n") 

1670 

1671 if not os.path.isabs(path): 

1672 path = os.path.join(self.path, path) 

1673 self.alternates.append(DiskObjectStore(path)) 

1674 

1675 def _update_pack_cache(self) -> list[Pack]: 

1676 """Read and iterate over new pack files and cache them.""" 

1677 try: 

1678 pack_dir_contents = os.listdir(self.pack_dir) 

1679 except FileNotFoundError: 

1680 return [] 

1681 pack_files = set() 

1682 for name in pack_dir_contents: 

1683 if name.startswith("pack-") and name.endswith(".pack"): 

1684 # verify that idx exists first (otherwise the pack was not yet 

1685 # fully written) 

1686 idx_name = os.path.splitext(name)[0] + ".idx" 

1687 if idx_name in pack_dir_contents: 

1688 # Extract just the hash (remove "pack-" prefix and ".pack" suffix) 

1689 pack_hash = name[len("pack-") : -len(".pack")] 

1690 pack_files.add(pack_hash) 

1691 

1692 # Open newly appeared pack files 

1693 new_packs = [] 

1694 for pack_hash in pack_files: 

1695 if pack_hash not in self._pack_cache: 

1696 pack = Pack( 

1697 os.path.join(self.pack_dir, "pack-" + pack_hash), 

1698 object_format=self.object_format, 

1699 delta_window_size=self.pack_delta_window_size, 

1700 window_memory=self.pack_window_memory, 

1701 delta_cache_size=self.pack_delta_cache_size, 

1702 depth=self.pack_depth, 

1703 threads=self.pack_threads, 

1704 big_file_threshold=self.pack_big_file_threshold, 

1705 ) 

1706 new_packs.append(pack) 

1707 self._pack_cache[pack_hash] = pack 

1708 # Remove disappeared pack files 

1709 for f in set(self._pack_cache) - pack_files: 

1710 self._pack_cache.pop(f).close() 

1711 return new_packs 

1712 

1713 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str: 

1714 # Check from object dir 

1715 return hex_to_filename(os.fspath(self.path), sha) 

1716 

1717 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

1718 for base in os.listdir(self.path): 

1719 if len(base) != 2: 

1720 continue 

1721 for rest in os.listdir(os.path.join(self.path, base)): 

1722 sha = os.fsencode(base + rest) 

1723 if not valid_hexsha(sha): 

1724 continue 

1725 yield ObjectID(sha) 

1726 

1727 def count_loose_objects(self) -> int: 

1728 """Count the number of loose objects in the object store. 

1729 

1730 Returns: 

1731 Number of loose objects 

1732 """ 

1733 # Calculate expected filename length for loose 

1734 # objects (excluding directory) 

1735 fn_length = self.object_format.hex_length - 2 

1736 count = 0 

1737 if not os.path.exists(self.path): 

1738 return 0 

1739 

1740 for i in range(256): 

1741 subdir = os.path.join(self.path, f"{i:02x}") 

1742 try: 

1743 count += len( 

1744 [name for name in os.listdir(subdir) if len(name) == fn_length] 

1745 ) 

1746 except FileNotFoundError: 

1747 # Directory may have been removed or is inaccessible 

1748 continue 

1749 

1750 return count 

1751 

1752 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None: 

1753 path = self._get_shafile_path(sha) 

1754 try: 

1755 # Load the object from path with SHA and hash algorithm from object store 

1756 # Convert to hex ObjectID if needed 

1757 if len(sha) == self.object_format.oid_length: 

1758 hex_sha: ObjectID = sha_to_hex(RawObjectID(sha)) 

1759 else: 

1760 hex_sha = ObjectID(sha) 

1761 return ShaFile.from_path(path, hex_sha, object_format=self.object_format) 

1762 except FileNotFoundError: 

1763 return None 

1764 

1765 def delete_loose_object(self, sha: ObjectID) -> None: 

1766 """Delete a loose object from disk. 

1767 

1768 Args: 

1769 sha: SHA1 of the object to delete 

1770 

1771 Raises: 

1772 FileNotFoundError: If the object file doesn't exist 

1773 """ 

1774 os.remove(self._get_shafile_path(sha)) 

1775 

1776 def get_object_mtime(self, sha: ObjectID) -> float: 

1777 """Get the modification time of an object. 

1778 

1779 Args: 

1780 sha: SHA1 of the object 

1781 

1782 Returns: 

1783 Modification time as seconds since epoch 

1784 

1785 Raises: 

1786 KeyError: if the object is not found 

1787 """ 

1788 # First check if it's a loose object 

1789 if self.contains_loose(sha): 

1790 path = self._get_shafile_path(sha) 

1791 try: 

1792 return os.path.getmtime(path) 

1793 except FileNotFoundError: 

1794 pass 

1795 

1796 # Check if it's in a pack file 

1797 for pack in self.packs: 

1798 try: 

1799 if sha in pack: 

1800 # Use the pack file's mtime for packed objects 

1801 pack_path = pack._data_path 

1802 try: 

1803 return os.path.getmtime(pack_path) 

1804 except (FileNotFoundError, AttributeError): 

1805 pass 

1806 except PackFileDisappeared: 

1807 pass 

1808 

1809 raise KeyError(sha) 

1810 

1811 def _remove_pack(self, pack: Pack) -> None: 

1812 try: 

1813 del self._pack_cache[os.path.basename(pack._basename)] 

1814 except KeyError: 

1815 pass 

1816 # Store paths before closing to avoid re-opening files on Windows 

1817 data_path = pack._data_path 

1818 idx_path = pack._idx_path 

1819 pack.close() 

1820 os.remove(data_path) 

1821 if os.path.exists(idx_path): 

1822 os.remove(idx_path) 

1823 

1824 def _get_pack_basepath( 

1825 self, entries: Iterable[tuple[bytes, int, int | None]] 

1826 ) -> str: 

1827 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1828 # TODO: Handle self.pack_dir being bytes 

1829 suffix = suffix_bytes.decode("ascii") 

1830 return os.path.join(self.pack_dir, "pack-" + suffix) 

1831 

1832 def _complete_pack( 

1833 self, 

1834 f: BinaryIO, 

1835 path: str, 

1836 num_objects: int, 

1837 indexer: PackIndexer, 

1838 progress: Callable[..., None] | None = None, 

1839 refs: dict[Ref, ObjectID] | None = None, 

1840 ) -> Pack: 

1841 """Move a specific file containing a pack into the pack directory. 

1842 

1843 Note: The file should be on the same file system as the 

1844 packs directory. 

1845 

1846 Args: 

1847 f: Open file object for the pack. 

1848 path: Path to the pack file. 

1849 num_objects: Number of objects in the pack. 

1850 indexer: A PackIndexer for indexing the pack. 

1851 progress: Optional progress reporting function. 

1852 refs: Optional dictionary of refs for bitmap generation. 

1853 """ 

1854 entries = [] 

1855 for i, entry in enumerate(indexer): 

1856 if progress is not None: 

1857 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1858 entries.append(entry) 

1859 

1860 pack_sha, extra_entries = extend_pack( 

1861 f, 

1862 set(indexer.ext_refs()), 

1863 get_raw=self.get_raw, 

1864 compression_level=self.pack_compression_level, 

1865 progress=progress, 

1866 object_format=self.object_format, 

1867 ) 

1868 f.flush() 

1869 if self.fsync_object_files: 

1870 try: 

1871 fileno = f.fileno() 

1872 except AttributeError as e: 

1873 raise OSError("fsync requested but file has no fileno()") from e 

1874 else: 

1875 os.fsync(fileno) 

1876 f.close() 

1877 

1878 entries.extend(extra_entries) 

1879 

1880 # Move the pack in. 

1881 entries.sort() 

1882 pack_base_name = self._get_pack_basepath(entries) 

1883 

1884 for pack in self.packs: 

1885 if pack._basename == pack_base_name: 

1886 return pack 

1887 

1888 target_pack_path = pack_base_name + ".pack" 

1889 target_index_path = pack_base_name + ".idx" 

1890 if sys.platform == "win32": 

1891 # Windows might have the target pack file lingering. Attempt 

1892 # removal, silently passing if the target does not exist. 

1893 with suppress(FileNotFoundError): 

1894 os.remove(target_pack_path) 

1895 os.rename(path, target_pack_path) 

1896 

1897 # Write the index. 

1898 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

1899 with GitFile( 

1900 target_index_path, 

1901 "wb", 

1902 mask=mask, 

1903 fsync=self.fsync_object_files, 

1904 ) as index_file: 

1905 write_pack_index( 

1906 index_file, entries, pack_sha, version=self.pack_index_version 

1907 ) 

1908 

1909 # Generate bitmap if configured and refs are available 

1910 if self.pack_write_bitmaps and refs: 

1911 from .bitmap import generate_bitmap, write_bitmap 

1912 from .pack import load_pack_index_file 

1913 

1914 if progress: 

1915 progress("Generating bitmap index\r".encode("ascii")) 

1916 

1917 # Load the index we just wrote 

1918 with open(target_index_path, "rb") as idx_file: 

1919 pack_index = load_pack_index_file( 

1920 os.path.basename(target_index_path), 

1921 idx_file, 

1922 self.object_format, 

1923 ) 

1924 

1925 # Generate the bitmap 

1926 bitmap = generate_bitmap( 

1927 pack_index=pack_index, 

1928 object_store=self, 

1929 refs=refs, 

1930 pack_checksum=pack_sha, 

1931 include_hash_cache=self.pack_write_bitmap_hash_cache, 

1932 include_lookup_table=self.pack_write_bitmap_lookup_table, 

1933 progress=lambda msg: ( 

1934 progress(msg.encode("ascii")) 

1935 if progress and isinstance(msg, str) 

1936 else None 

1937 ), 

1938 ) 

1939 

1940 # Write the bitmap 

1941 target_bitmap_path = pack_base_name + ".bitmap" 

1942 write_bitmap(target_bitmap_path, bitmap) 

1943 

1944 if progress: 

1945 progress("Bitmap index written\r".encode("ascii")) 

1946 

1947 # Add the pack to the store and return it. 

1948 final_pack = Pack( 

1949 pack_base_name, 

1950 object_format=self.object_format, 

1951 delta_window_size=self.pack_delta_window_size, 

1952 window_memory=self.pack_window_memory, 

1953 delta_cache_size=self.pack_delta_cache_size, 

1954 depth=self.pack_depth, 

1955 threads=self.pack_threads, 

1956 big_file_threshold=self.pack_big_file_threshold, 

1957 ) 

1958 final_pack.check_length_and_checksum() 

1959 # Extract just the hash from pack_base_name (/path/to/pack-HASH -> HASH) 

1960 pack_hash = os.path.basename(pack_base_name)[len("pack-") :] 

1961 self._add_cached_pack(pack_hash, final_pack) 

1962 return final_pack 

1963 

1964 def add_thin_pack( 

1965 self, 

1966 read_all: Callable[[int], bytes], 

1967 read_some: Callable[[int], bytes] | None, 

1968 progress: Callable[..., None] | None = None, 

1969 ) -> "Pack": 

1970 """Add a new thin pack to this object store. 

1971 

1972 Thin packs are packs that contain deltas with parents that exist 

1973 outside the pack. They should never be placed in the object store 

1974 directly, and always indexed and completed as they are copied. 

1975 

1976 Args: 

1977 read_all: Read function that blocks until the number of 

1978 requested bytes are read. 

1979 read_some: Read function that returns at least one byte, but may 

1980 not return the number of bytes requested. 

1981 progress: Optional progress reporting function. 

1982 Returns: A Pack object pointing at the now-completed thin pack in the 

1983 objects/pack directory. 

1984 """ 

1985 import tempfile 

1986 

1987 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1988 with os.fdopen(fd, "w+b") as f: 

1989 os.chmod(path, PACK_MODE) 

1990 indexer = PackIndexer( 

1991 f, 

1992 self.object_format.hash_func, 

1993 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

1994 ) 

1995 copier = PackStreamCopier( 

1996 self.object_format.hash_func, 

1997 read_all, 

1998 read_some, 

1999 f, 

2000 delta_iter=indexer, # type: ignore[arg-type] 

2001 ) 

2002 copier.verify(progress=progress) 

2003 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

2004 

2005 def add_pack( 

2006 self, 

2007 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2008 """Add a new pack to this object store. 

2009 

2010 Returns: Fileobject to write to, a commit function to 

2011 call when the pack is finished and an abort 

2012 function. 

2013 """ 

2014 import tempfile 

2015 

2016 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

2017 f = os.fdopen(fd, "w+b") 

2018 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

2019 os.chmod(path, mask) 

2020 

2021 def commit() -> "Pack | None": 

2022 if f.tell() > 0: 

2023 f.seek(0) 

2024 

2025 with PackData(path, file=f, object_format=self.object_format) as pd: 

2026 indexer = PackIndexer.for_pack_data( 

2027 pd, 

2028 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

2029 ) 

2030 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

2031 else: 

2032 f.close() 

2033 os.remove(path) 

2034 return None 

2035 

2036 def abort() -> None: 

2037 f.close() 

2038 os.remove(path) 

2039 

2040 return f, commit, abort # type: ignore[return-value] 

2041 

2042 def add_object(self, obj: ShaFile) -> None: 

2043 """Add a single object to this object store. 

2044 

2045 Args: 

2046 obj: Object to add 

2047 """ 

2048 # Use the correct hash algorithm for the object ID 

2049 obj_id = ObjectID(obj.get_id(self.object_format)) 

2050 path = self._get_shafile_path(obj_id) 

2051 dir = os.path.dirname(path) 

2052 try: 

2053 os.mkdir(dir) 

2054 if self.dir_mode is not None: 

2055 os.chmod(dir, self.dir_mode) 

2056 except FileExistsError: 

2057 pass 

2058 if os.path.exists(path): 

2059 return # Already there, no need to write again 

2060 mask = self.file_mode if self.file_mode is not None else PACK_MODE 

2061 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f: 

2062 f.write( 

2063 obj.as_legacy_object(compression_level=self.loose_compression_level) 

2064 ) 

2065 

2066 @classmethod 

2067 def init( 

2068 cls, 

2069 path: str | os.PathLike[str], 

2070 *, 

2071 file_mode: int | None = None, 

2072 dir_mode: int | None = None, 

2073 object_format: "ObjectFormat | None" = None, 

2074 ) -> "DiskObjectStore": 

2075 """Initialize a new disk object store. 

2076 

2077 Creates the necessary directory structure for a Git object store. 

2078 

2079 Args: 

2080 path: Path where the object store should be created 

2081 file_mode: Optional file permission mask for shared repository 

2082 dir_mode: Optional directory permission mask for shared repository 

2083 object_format: Hash algorithm to use (SHA1 or SHA256) 

2084 

2085 Returns: 

2086 New DiskObjectStore instance 

2087 """ 

2088 try: 

2089 os.mkdir(path) 

2090 if dir_mode is not None: 

2091 os.chmod(path, dir_mode) 

2092 except FileExistsError: 

2093 pass 

2094 info_path = os.path.join(path, "info") 

2095 pack_path = os.path.join(path, PACKDIR) 

2096 os.mkdir(info_path) 

2097 os.mkdir(pack_path) 

2098 if dir_mode is not None: 

2099 os.chmod(info_path, dir_mode) 

2100 os.chmod(pack_path, dir_mode) 

2101 return cls( 

2102 path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format 

2103 ) 

2104 

2105 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

2106 """Iterate over all object SHAs with the given prefix. 

2107 

2108 Args: 

2109 prefix: Hex prefix to search for (as bytes) 

2110 

2111 Returns: 

2112 Iterator of object SHAs (as ObjectID) matching the prefix 

2113 """ 

2114 if len(prefix) < 2: 

2115 yield from super().iter_prefix(prefix) 

2116 return 

2117 seen = set() 

2118 dir = prefix[:2].decode() 

2119 rest = prefix[2:].decode() 

2120 try: 

2121 for name in os.listdir(os.path.join(self.path, dir)): 

2122 if name.startswith(rest): 

2123 sha = ObjectID(os.fsencode(dir + name)) 

2124 if sha not in seen: 

2125 seen.add(sha) 

2126 yield sha 

2127 except FileNotFoundError: 

2128 pass 

2129 

2130 for p in self.packs: 

2131 bin_prefix = ( 

2132 binascii.unhexlify(prefix) 

2133 if len(prefix) % 2 == 0 

2134 else binascii.unhexlify(prefix[:-1]) 

2135 ) 

2136 for bin_sha in p.index.iter_prefix(bin_prefix): 

2137 sha = sha_to_hex(bin_sha) 

2138 if sha.startswith(prefix) and sha not in seen: 

2139 seen.add(sha) 

2140 yield sha 

2141 for alternate in self.alternates: 

2142 for sha in alternate.iter_prefix(prefix): 

2143 if sha not in seen: 

2144 seen.add(sha) 

2145 yield sha 

2146 

2147 def get_commit_graph(self) -> "CommitGraph | None": 

2148 """Get the commit graph for this object store. 

2149 

2150 Returns: 

2151 CommitGraph object if available, None otherwise 

2152 """ 

2153 if not self._use_commit_graph: 

2154 return None 

2155 

2156 if self._commit_graph is None: 

2157 from .commit_graph import read_commit_graph 

2158 

2159 # Look for commit graph in our objects directory 

2160 graph_file = os.path.join(self.path, "info", "commit-graph") 

2161 if os.path.exists(graph_file): 

2162 self._commit_graph = read_commit_graph(graph_file) 

2163 return self._commit_graph 

2164 

2165 def get_midx(self) -> MultiPackIndex | None: 

2166 """Get the multi-pack-index for this object store. 

2167 

2168 Returns: 

2169 MultiPackIndex object if available, None otherwise 

2170 

2171 Raises: 

2172 ValueError: If MIDX file is corrupt 

2173 OSError: If MIDX file cannot be read 

2174 """ 

2175 if not self._use_midx: 

2176 return None 

2177 

2178 if self._midx is None: 

2179 # Look for MIDX in pack directory 

2180 midx_file = os.path.join(self.pack_dir, "multi-pack-index") 

2181 if os.path.exists(midx_file): 

2182 self._midx = load_midx(midx_file) 

2183 return self._midx 

2184 

2185 def _get_pack_by_name(self, pack_name: str) -> Pack: 

2186 """Get a pack by its base name. 

2187 

2188 Args: 

2189 pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx') 

2190 

2191 Returns: 

2192 Pack object 

2193 

2194 Raises: 

2195 KeyError: If pack doesn't exist 

2196 """ 

2197 # Remove .pack or .idx extension if present 

2198 if pack_name.endswith(".pack"): 

2199 base_name = pack_name[:-5] 

2200 elif pack_name.endswith(".idx"): 

2201 base_name = pack_name[:-4] 

2202 else: 

2203 base_name = pack_name 

2204 

2205 # Check if already in cache 

2206 if base_name in self._pack_cache: 

2207 return self._pack_cache[base_name] 

2208 

2209 # Load the pack 

2210 pack_path = os.path.join(self.pack_dir, base_name) 

2211 if not os.path.exists(pack_path + ".pack"): 

2212 raise KeyError(f"Pack {pack_name} not found") 

2213 

2214 pack = Pack( 

2215 pack_path, 

2216 object_format=self.object_format, 

2217 delta_window_size=self.pack_delta_window_size, 

2218 window_memory=self.pack_window_memory, 

2219 delta_cache_size=self.pack_delta_cache_size, 

2220 depth=self.pack_depth, 

2221 threads=self.pack_threads, 

2222 big_file_threshold=self.pack_big_file_threshold, 

2223 ) 

2224 self._pack_cache[base_name] = pack 

2225 return pack 

2226 

2227 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2228 """Check if a particular object is present by SHA1 and is packed. 

2229 

2230 This checks the MIDX first if available, then falls back to checking 

2231 individual pack indexes. 

2232 

2233 Args: 

2234 sha: Binary SHA of the object 

2235 

2236 Returns: 

2237 True if the object is in a pack file 

2238 """ 

2239 # Check MIDX first for faster lookup 

2240 midx = self.get_midx() 

2241 if midx is not None and sha in midx: 

2242 return True 

2243 

2244 # Fall back to checking individual packs 

2245 return super().contains_packed(sha) 

2246 

2247 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2248 """Obtain the raw fulltext for an object. 

2249 

2250 This uses the MIDX if available for faster lookups. 

2251 

2252 Args: 

2253 name: SHA for the object (20 bytes binary or 40 bytes hex) 

2254 

2255 Returns: 

2256 Tuple with numeric type and object contents 

2257 

2258 Raises: 

2259 KeyError: If object not found 

2260 """ 

2261 sha: RawObjectID 

2262 if len(name) in (40, 64): 

2263 # name is ObjectID (hex), convert to RawObjectID 

2264 # Support both SHA1 (40) and SHA256 (64) 

2265 sha = hex_to_sha(cast(ObjectID, name)) 

2266 elif len(name) in (20, 32): 

2267 # name is already RawObjectID (binary) 

2268 # Support both SHA1 (20) and SHA256 (32) 

2269 sha = RawObjectID(name) 

2270 else: 

2271 raise AssertionError(f"Invalid object name {name!r}") 

2272 

2273 # Try MIDX first for faster lookup 

2274 midx = self.get_midx() 

2275 if midx is not None: 

2276 result = midx.object_offset(sha) 

2277 if result is not None: 

2278 pack_name, _offset = result 

2279 try: 

2280 pack = self._get_pack_by_name(pack_name) 

2281 return pack.get_raw(sha) 

2282 except (KeyError, PackFileDisappeared): 

2283 # Pack disappeared or object not found, fall through to standard lookup 

2284 pass 

2285 

2286 # Fall back to the standard implementation 

2287 return super().get_raw(name) 

2288 

2289 def write_midx(self) -> bytes: 

2290 """Write a multi-pack-index file for this object store. 

2291 

2292 Creates a MIDX file that indexes all pack files in the pack directory. 

2293 

2294 Returns: 

2295 SHA-1 checksum of the written MIDX file 

2296 

2297 Raises: 

2298 OSError: If the pack directory doesn't exist or MIDX can't be written 

2299 """ 

2300 from .midx import write_midx_file 

2301 

2302 # Get all pack files 

2303 packs = self.packs 

2304 if not packs: 

2305 # No packs to index 

2306 return b"\x00" * 20 

2307 

2308 # Collect entries from all packs 

2309 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = [] 

2310 

2311 for pack in packs: 

2312 # Git stores .idx extension in MIDX, not .pack 

2313 pack_name = os.path.basename(pack._basename) + ".idx" 

2314 entries = list(pack.index.iterentries()) 

2315 pack_entries.append((pack_name, entries)) 

2316 

2317 # Write MIDX file 

2318 midx_path = os.path.join(self.pack_dir, "multi-pack-index") 

2319 return write_midx_file(midx_path, pack_entries) 

2320 

2321 def write_commit_graph( 

2322 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True 

2323 ) -> None: 

2324 """Write a commit graph file for this object store. 

2325 

2326 Args: 

2327 refs: List of refs to include. If None, includes all refs from object store. 

2328 reachable: If True, includes all commits reachable from refs. 

2329 If False, only includes the direct ref targets. 

2330 """ 

2331 from .commit_graph import get_reachable_commits 

2332 

2333 if refs is None: 

2334 # Get all commit objects from the object store 

2335 all_refs = [] 

2336 # Iterate through all objects to find commits 

2337 for sha in self: 

2338 try: 

2339 obj = self[sha] 

2340 if obj.type_name == b"commit": 

2341 all_refs.append(sha) 

2342 except KeyError: 

2343 continue 

2344 else: 

2345 # Use provided refs 

2346 all_refs = list(refs) 

2347 

2348 if not all_refs: 

2349 return # No commits to include 

2350 

2351 if reachable: 

2352 # Get all reachable commits 

2353 commit_ids = get_reachable_commits(self, all_refs) 

2354 else: 

2355 # Just use the direct ref targets - ensure they're hex ObjectIDs 

2356 commit_ids = [] 

2357 for ref in all_refs: 

2358 if isinstance(ref, bytes) and len(ref) == self.object_format.hex_length: 

2359 # Already hex ObjectID 

2360 commit_ids.append(ref) 

2361 elif ( 

2362 isinstance(ref, bytes) and len(ref) == self.object_format.oid_length 

2363 ): 

2364 # Binary SHA, convert to hex ObjectID 

2365 from .objects import sha_to_hex 

2366 

2367 commit_ids.append(sha_to_hex(RawObjectID(ref))) 

2368 else: 

2369 # Assume it's already correct format 

2370 commit_ids.append(ref) 

2371 

2372 if commit_ids: 

2373 # Write commit graph directly to our object store path 

2374 # Generate the commit graph 

2375 from .commit_graph import generate_commit_graph 

2376 

2377 graph = generate_commit_graph(self, commit_ids) 

2378 

2379 if graph.entries: 

2380 # Ensure the info directory exists 

2381 info_dir = os.path.join(self.path, "info") 

2382 os.makedirs(info_dir, exist_ok=True) 

2383 if self.dir_mode is not None: 

2384 os.chmod(info_dir, self.dir_mode) 

2385 

2386 # Write using GitFile for atomic operation 

2387 graph_path = os.path.join(info_dir, "commit-graph") 

2388 mask = self.file_mode if self.file_mode is not None else 0o644 

2389 with GitFile(graph_path, "wb", mask=mask) as f: 

2390 assert isinstance( 

2391 f, _GitFile 

2392 ) # GitFile in write mode always returns _GitFile 

2393 graph.write_to_file(f) 

2394 

2395 # Clear cached commit graph so it gets reloaded 

2396 self._commit_graph = None 

2397 

2398 def prune(self, grace_period: int | None = None) -> None: 

2399 """Prune/clean up this object store. 

2400 

2401 This removes temporary files that were left behind by interrupted 

2402 pack operations. These are files that start with ``tmp_pack_`` in the 

2403 repository directory or files with .pack extension but no corresponding 

2404 .idx file in the pack directory. 

2405 

2406 Args: 

2407 grace_period: Grace period in seconds for removing temporary files. 

2408 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

2409 """ 

2410 import glob 

2411 

2412 if grace_period is None: 

2413 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

2414 

2415 # Clean up tmp_pack_* files in the repository directory 

2416 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

2417 # Check if file is old enough (more than grace period) 

2418 mtime = os.path.getmtime(tmp_file) 

2419 if time.time() - mtime > grace_period: 

2420 os.remove(tmp_file) 

2421 

2422 # Clean up orphaned .pack files without corresponding .idx files 

2423 try: 

2424 pack_dir_contents = os.listdir(self.pack_dir) 

2425 except FileNotFoundError: 

2426 return 

2427 

2428 pack_files = {} 

2429 idx_files = set() 

2430 

2431 for name in pack_dir_contents: 

2432 if name.endswith(".pack"): 

2433 base_name = name[:-5] # Remove .pack extension 

2434 pack_files[base_name] = name 

2435 elif name.endswith(".idx"): 

2436 base_name = name[:-4] # Remove .idx extension 

2437 idx_files.add(base_name) 

2438 

2439 # Remove .pack files without corresponding .idx files 

2440 for base_name, pack_name in pack_files.items(): 

2441 if base_name not in idx_files: 

2442 pack_path = os.path.join(self.pack_dir, pack_name) 

2443 # Check if file is old enough (more than grace period) 

2444 mtime = os.path.getmtime(pack_path) 

2445 if time.time() - mtime > grace_period: 

2446 os.remove(pack_path) 

2447 

2448 def close(self) -> None: 

2449 """Close the object store and release resources. 

2450 

2451 This method closes all cached pack files, MIDX, and frees associated resources. 

2452 Can be called multiple times safely. 

2453 """ 

2454 # Close MIDX if it's loaded 

2455 if self._midx is not None: 

2456 self._midx.close() 

2457 self._midx = None 

2458 

2459 # Close alternates 

2460 if self._alternates is not None: 

2461 for alt in self._alternates: 

2462 alt.close() 

2463 self._alternates = None 

2464 

2465 # Call parent class close to handle pack files 

2466 super().close() 

2467 

2468 

2469class MemoryObjectStore(PackCapableObjectStore): 

2470 """Object store that keeps all objects in memory.""" 

2471 

2472 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None: 

2473 """Initialize a MemoryObjectStore. 

2474 

2475 Creates an empty in-memory object store. 

2476 

2477 Args: 

2478 object_format: Hash algorithm to use (defaults to SHA1) 

2479 """ 

2480 super().__init__(object_format=object_format) 

2481 self._data: dict[ObjectID, ShaFile] = {} 

2482 self.pack_compression_level = -1 

2483 

2484 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID: 

2485 if len(sha) == self.object_format.hex_length: 

2486 return cast(ObjectID, sha) 

2487 elif len(sha) == self.object_format.oid_length: 

2488 return sha_to_hex(cast(RawObjectID, sha)) 

2489 else: 

2490 raise ValueError(f"Invalid sha {sha!r}") 

2491 

2492 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

2493 """Check if a particular object is present by SHA1 and is loose.""" 

2494 return self._to_hexsha(sha) in self._data 

2495 

2496 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

2497 """Check if a particular object is present by SHA1 and is packed.""" 

2498 return False 

2499 

2500 def __iter__(self) -> Iterator[ObjectID]: 

2501 """Iterate over the SHAs that are present in this store.""" 

2502 return iter(self._data.keys()) 

2503 

2504 @property 

2505 def packs(self) -> list[Pack]: 

2506 """List with pack objects.""" 

2507 return [] 

2508 

2509 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]: 

2510 """Obtain the raw text for an object. 

2511 

2512 Args: 

2513 name: sha for the object. 

2514 Returns: tuple with numeric type and object contents. 

2515 """ 

2516 obj = self[self._to_hexsha(name)] 

2517 return obj.type_num, obj.as_raw_string() 

2518 

2519 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile: 

2520 """Retrieve an object by SHA. 

2521 

2522 Args: 

2523 name: SHA of the object (as hex string or bytes) 

2524 

2525 Returns: 

2526 Copy of the ShaFile object 

2527 

2528 Raises: 

2529 KeyError: If the object is not found 

2530 """ 

2531 return self._data[self._to_hexsha(name)].copy() 

2532 

2533 def __delitem__(self, name: ObjectID) -> None: 

2534 """Delete an object from this store, for testing only.""" 

2535 del self._data[self._to_hexsha(name)] 

2536 

2537 def add_object(self, obj: ShaFile) -> None: 

2538 """Add a single object to this object store.""" 

2539 self._data[obj.id] = obj.copy() 

2540 

2541 def add_objects( 

2542 self, 

2543 objects: Iterable[tuple[ShaFile, str | None]], 

2544 progress: Callable[[str], None] | None = None, 

2545 ) -> None: 

2546 """Add a set of objects to this object store. 

2547 

2548 Args: 

2549 objects: Iterable over a list of (object, path) tuples 

2550 progress: Optional progress reporting function. 

2551 """ 

2552 for obj, path in objects: 

2553 self.add_object(obj) 

2554 

2555 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2556 """Add a new pack to this object store. 

2557 

2558 Because this object store doesn't support packs, we extract and add the 

2559 individual objects. 

2560 

2561 Returns: Fileobject to write to and a commit function to 

2562 call when the pack is finished. 

2563 """ 

2564 from tempfile import SpooledTemporaryFile 

2565 

2566 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2567 

2568 def commit() -> None: 

2569 size = f.tell() 

2570 if size > 0: 

2571 f.seek(0) 

2572 

2573 p = PackData.from_file(f, self.object_format, size) 

2574 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type] 

2575 self.add_object(obj) 

2576 p.close() 

2577 f.close() 

2578 else: 

2579 f.close() 

2580 

2581 def abort() -> None: 

2582 f.close() 

2583 

2584 return f, commit, abort # type: ignore[return-value] 

2585 

2586 def add_pack_data( 

2587 self, 

2588 count: int, 

2589 unpacked_objects: Iterator[UnpackedObject], 

2590 progress: Callable[[str], None] | None = None, 

2591 ) -> None: 

2592 """Add pack data to this object store. 

2593 

2594 Args: 

2595 count: Number of items to add 

2596 unpacked_objects: Iterator of UnpackedObject instances 

2597 progress: Optional progress reporting function. 

2598 """ 

2599 if count == 0: 

2600 return 

2601 

2602 # Since MemoryObjectStore doesn't support pack files, we need to 

2603 # extract individual objects. To handle deltas properly, we write 

2604 # to a temporary pack and then use PackInflater to resolve them. 

2605 f, commit, abort = self.add_pack() 

2606 try: 

2607 write_pack_data( 

2608 f.write, 

2609 unpacked_objects, 

2610 num_records=count, 

2611 progress=progress, 

2612 object_format=self.object_format, 

2613 ) 

2614 except BaseException: 

2615 abort() 

2616 raise 

2617 else: 

2618 commit() 

2619 

2620 def add_thin_pack( 

2621 self, 

2622 read_all: Callable[[int], bytes], 

2623 read_some: Callable[[int], bytes] | None, 

2624 progress: Callable[[str], None] | None = None, 

2625 ) -> None: 

2626 """Add a new thin pack to this object store. 

2627 

2628 Thin packs are packs that contain deltas with parents that exist 

2629 outside the pack. Because this object store doesn't support packs, we 

2630 extract and add the individual objects. 

2631 

2632 Args: 

2633 read_all: Read function that blocks until the number of 

2634 requested bytes are read. 

2635 read_some: Read function that returns at least one byte, but may 

2636 not return the number of bytes requested. 

2637 progress: Optional progress reporting function. 

2638 """ 

2639 f, commit, abort = self.add_pack() 

2640 try: 

2641 copier = PackStreamCopier( 

2642 self.object_format.hash_func, 

2643 read_all, 

2644 read_some, 

2645 f, 

2646 ) 

2647 copier.verify() 

2648 except BaseException: 

2649 abort() 

2650 raise 

2651 else: 

2652 commit() 

2653 

2654 

2655class ObjectIterator(Protocol): 

2656 """Interface for iterating over objects.""" 

2657 

2658 def iterobjects(self) -> Iterator[ShaFile]: 

2659 """Iterate over all objects. 

2660 

2661 Returns: 

2662 Iterator of ShaFile objects 

2663 """ 

2664 raise NotImplementedError(self.iterobjects) 

2665 

2666 

2667def tree_lookup_path( 

2668 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile], 

2669 root_sha: ObjectID | RawObjectID, 

2670 path: bytes, 

2671) -> tuple[int, ObjectID]: 

2672 """Look up an object in a Git tree. 

2673 

2674 Args: 

2675 lookup_obj: Callback for retrieving object by SHA1 

2676 root_sha: SHA1 of the root tree 

2677 path: Path to lookup 

2678 Returns: A tuple of (mode, SHA) of the resulting path. 

2679 """ 

2680 tree = lookup_obj(root_sha) 

2681 if not isinstance(tree, Tree): 

2682 raise NotTreeError(root_sha) 

2683 return tree.lookup_path(lookup_obj, path) 

2684 

2685 

2686def _collect_filetree_revs( 

2687 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2688) -> None: 

2689 """Collect SHA1s of files and directories for specified tree. 

2690 

2691 Args: 

2692 obj_store: Object store to get objects by SHA from 

2693 tree_sha: tree reference to walk 

2694 kset: set to fill with references to files and directories 

2695 """ 

2696 filetree = obj_store[tree_sha] 

2697 assert isinstance(filetree, Tree) 

2698 for name, mode, sha in filetree.iteritems(): 

2699 assert mode is not None 

2700 assert sha is not None 

2701 if not S_ISGITLINK(mode) and sha not in kset: 

2702 kset.add(sha) 

2703 if stat.S_ISDIR(mode): 

2704 _collect_filetree_revs(obj_store, sha, kset) 

2705 

2706 

2707def _split_commits_and_tags( 

2708 obj_store: ObjectContainer, 

2709 lst: Iterable[ObjectID], 

2710 *, 

2711 unknown: str = "error", 

2712) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]: 

2713 """Split object id list into three lists with commit, tag, and other SHAs. 

2714 

2715 Commits referenced by tags are included into commits 

2716 list as well. Only SHA1s known in this repository will get 

2717 through, controlled by the unknown parameter. 

2718 

2719 Args: 

2720 obj_store: Object store to get objects by SHA1 from 

2721 lst: Collection of commit and tag SHAs 

2722 unknown: How to handle unknown objects: "error", "warn", or "ignore" 

2723 Returns: A tuple of (commits, tags, others) SHA1s 

2724 """ 

2725 import logging 

2726 

2727 if unknown not in ("error", "warn", "ignore"): 

2728 raise ValueError( 

2729 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}" 

2730 ) 

2731 

2732 commits: set[ObjectID] = set() 

2733 tags: set[ObjectID] = set() 

2734 others: set[ObjectID] = set() 

2735 for e in lst: 

2736 try: 

2737 o = obj_store[e] 

2738 except KeyError: 

2739 if unknown == "error": 

2740 raise 

2741 elif unknown == "warn": 

2742 logging.warning( 

2743 "Object %s not found in object store", e.decode("ascii") 

2744 ) 

2745 # else: ignore 

2746 else: 

2747 if isinstance(o, Commit): 

2748 commits.add(e) 

2749 elif isinstance(o, Tag): 

2750 tags.add(e) 

2751 tagged = o.object[1] 

2752 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown) 

2753 commits |= c 

2754 tags |= t 

2755 others |= os 

2756 else: 

2757 others.add(e) 

2758 return (commits, tags, others) 

2759 

2760 

2761class MissingObjectFinder: 

2762 """Find the objects missing from another object store. 

2763 

2764 Args: 

2765 object_store: Object store containing at least all objects to be 

2766 sent 

2767 haves: SHA1s of commits not to send (already present in target) 

2768 wants: SHA1s of commits to send 

2769 progress: Optional function to report progress to. 

2770 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2771 sha for including tags. 

2772 get_parents: Optional function for getting the parents of a commit. 

2773 """ 

2774 

2775 def __init__( 

2776 self, 

2777 object_store: BaseObjectStore, 

2778 haves: Iterable[ObjectID], 

2779 wants: Iterable[ObjectID], 

2780 *, 

2781 shallow: Set[ObjectID] | None = None, 

2782 progress: Callable[[bytes], None] | None = None, 

2783 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None, 

2784 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

2785 ) -> None: 

2786 """Initialize a MissingObjectFinder. 

2787 

2788 Args: 

2789 object_store: Object store containing objects 

2790 haves: SHA1s of objects already present in target 

2791 wants: SHA1s of objects to send 

2792 shallow: Set of shallow commit SHA1s 

2793 progress: Optional progress reporting callback 

2794 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2795 get_parents: Function for getting commit parents 

2796 """ 

2797 self.object_store = object_store 

2798 if shallow is None: 

2799 shallow = set() 

2800 self._get_parents = get_parents 

2801 reachability = object_store.get_reachability_provider() 

2802 # process Commits and Tags differently 

2803 # haves may list commits/tags not available locally (silently ignore them). 

2804 # wants should only contain valid SHAs (fail fast if not). 

2805 have_commits, have_tags, have_others = _split_commits_and_tags( 

2806 object_store, haves, unknown="ignore" 

2807 ) 

2808 want_commits, want_tags, want_others = _split_commits_and_tags( 

2809 object_store, wants, unknown="error" 

2810 ) 

2811 # all_ancestors is a set of commits that shall not be sent 

2812 # (complete repository up to 'haves') 

2813 all_ancestors = reachability.get_reachable_commits( 

2814 have_commits, exclude=None, shallow=shallow 

2815 ) 

2816 # all_missing - complete set of commits between haves and wants 

2817 # common_commits - boundary commits directly encountered when traversing wants 

2818 # We use _collect_ancestors here because we need the exact boundary behavior: 

2819 # commits that are in all_ancestors and directly reachable from wants, 

2820 # but we don't traverse past them. This is hard to express with the 

2821 # reachability abstraction alone. 

2822 missing_commits, common_commits = _collect_ancestors( 

2823 object_store, 

2824 want_commits, 

2825 frozenset(all_ancestors), 

2826 shallow=frozenset(shallow), 

2827 get_parents=self._get_parents, 

2828 ) 

2829 

2830 self.remote_has: set[ObjectID] = set() 

2831 # Now, fill sha_done with commits and revisions of 

2832 # files and directories known to be both locally 

2833 # and on target. Thus these commits and files 

2834 # won't get selected for fetch 

2835 for h in common_commits: 

2836 self.remote_has.add(h) 

2837 cmt = object_store[h] 

2838 assert isinstance(cmt, Commit) 

2839 # Get tree objects for this commit 

2840 tree_objects = reachability.get_tree_objects([cmt.tree]) 

2841 self.remote_has.update(tree_objects) 

2842 

2843 # record tags we have as visited, too 

2844 for t in have_tags: 

2845 self.remote_has.add(t) 

2846 self.sha_done = set(self.remote_has) 

2847 

2848 # in fact, what we 'want' is commits, tags, and others 

2849 # we've found missing 

2850 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = { 

2851 (w, None, Commit.type_num, False) for w in missing_commits 

2852 } 

2853 missing_tags = want_tags.difference(have_tags) 

2854 self.objects_to_send.update( 

2855 {(w, None, Tag.type_num, False) for w in missing_tags} 

2856 ) 

2857 missing_others = want_others.difference(have_others) 

2858 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2859 

2860 if progress is None: 

2861 self.progress: Callable[[bytes], None] = lambda x: None 

2862 else: 

2863 self.progress = progress 

2864 self._tagged = (get_tagged and get_tagged()) or {} 

2865 

2866 def get_remote_has(self) -> set[ObjectID]: 

2867 """Get the set of SHAs the remote has. 

2868 

2869 Returns: 

2870 Set of SHA1s that the remote side already has 

2871 """ 

2872 return self.remote_has 

2873 

2874 def add_todo( 

2875 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]] 

2876 ) -> None: 

2877 """Add objects to the todo list. 

2878 

2879 Args: 

2880 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2881 """ 

2882 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2883 

2884 def __next__(self) -> tuple[ObjectID, PackHint | None]: 

2885 """Get the next object to send. 

2886 

2887 Returns: 

2888 Tuple of (sha, pack_hint) 

2889 

2890 Raises: 

2891 StopIteration: When no more objects to send 

2892 """ 

2893 while True: 

2894 if not self.objects_to_send: 

2895 self.progress( 

2896 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2897 ) 

2898 raise StopIteration 

2899 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2900 if sha not in self.sha_done: 

2901 break 

2902 if not leaf: 

2903 o = self.object_store[sha] 

2904 if isinstance(o, Commit): 

2905 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2906 elif isinstance(o, Tree): 

2907 todos = [] 

2908 for n, m, s in o.iteritems(): 

2909 assert m is not None 

2910 assert n is not None 

2911 assert s is not None 

2912 if not S_ISGITLINK(m): 

2913 todos.append( 

2914 ( 

2915 s, 

2916 n, 

2917 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2918 not stat.S_ISDIR(m), 

2919 ) 

2920 ) 

2921 self.add_todo(todos) 

2922 elif isinstance(o, Tag): 

2923 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2924 if sha in self._tagged: 

2925 self.add_todo([(self._tagged[sha], None, None, True)]) 

2926 self.sha_done.add(sha) 

2927 if len(self.sha_done) % 1000 == 0: 

2928 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2929 if type_num is None: 

2930 pack_hint = None 

2931 else: 

2932 pack_hint = (type_num, name) 

2933 return (sha, pack_hint) 

2934 

2935 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]: 

2936 """Return iterator over objects to send. 

2937 

2938 Returns: 

2939 Self (this class implements the iterator protocol) 

2940 """ 

2941 return self 

2942 

2943 

2944class ObjectStoreGraphWalker: 

2945 """Graph walker that finds what commits are missing from an object store.""" 

2946 

2947 heads: set[ObjectID] 

2948 """Revisions without descendants in the local repo.""" 

2949 

2950 get_parents: Callable[[ObjectID], list[ObjectID]] 

2951 """Function to retrieve parents in the local repo.""" 

2952 

2953 shallow: set[ObjectID] 

2954 

2955 def __init__( 

2956 self, 

2957 local_heads: Iterable[ObjectID], 

2958 get_parents: Callable[[ObjectID], list[ObjectID]], 

2959 shallow: set[ObjectID] | None = None, 

2960 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None] 

2961 | None = None, 

2962 ) -> None: 

2963 """Create a new instance. 

2964 

2965 Args: 

2966 local_heads: Heads to start search with 

2967 get_parents: Function for finding the parents of a SHA1. 

2968 shallow: Set of shallow commits. 

2969 update_shallow: Function to update shallow commits. 

2970 """ 

2971 self.heads = set(local_heads) 

2972 self.get_parents = get_parents 

2973 self.parents: dict[ObjectID, list[ObjectID] | None] = {} 

2974 if shallow is None: 

2975 shallow = set() 

2976 self.shallow = shallow 

2977 self.update_shallow = update_shallow 

2978 

2979 def nak(self) -> None: 

2980 """Nothing in common was found.""" 

2981 

2982 def ack(self, sha: ObjectID) -> None: 

2983 """Ack that a revision and its ancestors are present in the source.""" 

2984 if len(sha) != 40: 

2985 # TODO: support SHA256 

2986 raise ValueError(f"unexpected sha {sha!r} received") 

2987 ancestors = {sha} 

2988 

2989 # stop if we run out of heads to remove 

2990 while self.heads: 

2991 for a in ancestors: 

2992 if a in self.heads: 

2993 self.heads.remove(a) 

2994 

2995 # collect all ancestors 

2996 new_ancestors = set() 

2997 for a in ancestors: 

2998 ps = self.parents.get(a) 

2999 if ps is not None: 

3000 new_ancestors.update(ps) 

3001 self.parents[a] = None 

3002 

3003 # no more ancestors; stop 

3004 if not new_ancestors: 

3005 break 

3006 

3007 ancestors = new_ancestors 

3008 

3009 def next(self) -> ObjectID | None: 

3010 """Iterate over ancestors of heads in the target.""" 

3011 if self.heads: 

3012 ret = self.heads.pop() 

3013 try: 

3014 ps = self.get_parents(ret) 

3015 except KeyError: 

3016 return None 

3017 self.parents[ret] = ps 

3018 self.heads.update([p for p in ps if p not in self.parents]) 

3019 return ret 

3020 return None 

3021 

3022 __next__ = next 

3023 

3024 

3025def commit_tree_changes( 

3026 object_store: BaseObjectStore, 

3027 tree: ObjectID | Tree, 

3028 changes: Sequence[tuple[bytes, int | None, ObjectID | None]], 

3029) -> ObjectID: 

3030 """Commit a specified set of changes to a tree structure. 

3031 

3032 This will apply a set of changes on top of an existing tree, storing new 

3033 objects in object_store. 

3034 

3035 changes are a list of tuples with (path, mode, object_sha). 

3036 Paths can be both blobs and trees. See the mode and 

3037 object sha to None deletes the path. 

3038 

3039 This method works especially well if there are only a small 

3040 number of changes to a big tree. For a large number of changes 

3041 to a large tree, use e.g. commit_tree. 

3042 

3043 Args: 

3044 object_store: Object store to store new objects in 

3045 and retrieve old ones from. 

3046 tree: Original tree root (SHA or Tree object) 

3047 changes: changes to apply 

3048 Returns: New tree root object 

3049 """ 

3050 # TODO(jelmer): Save up the objects and add them using .add_objects 

3051 # rather than with individual calls to .add_object. 

3052 # Handle both Tree object and SHA 

3053 if isinstance(tree, Tree): 

3054 tree_obj: Tree = tree 

3055 else: 

3056 sha_obj = object_store[tree] 

3057 assert isinstance(sha_obj, Tree) 

3058 tree_obj = sha_obj 

3059 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {} 

3060 for path, new_mode, new_sha in changes: 

3061 try: 

3062 (dirname, subpath) = path.split(b"/", 1) 

3063 except ValueError: 

3064 if new_sha is None: 

3065 del tree_obj[path] 

3066 else: 

3067 assert new_mode is not None 

3068 tree_obj[path] = (new_mode, new_sha) 

3069 else: 

3070 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

3071 for name, subchanges in nested_changes.items(): 

3072 try: 

3073 orig_subtree_id: ObjectID | Tree = tree_obj[name][1] 

3074 except KeyError: 

3075 # For new directories, pass an empty Tree object 

3076 orig_subtree_id = Tree() 

3077 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

3078 subtree = object_store[subtree_id] 

3079 assert isinstance(subtree, Tree) 

3080 if len(subtree) == 0: 

3081 del tree_obj[name] 

3082 else: 

3083 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

3084 object_store.add_object(tree_obj) 

3085 return tree_obj.id 

3086 

3087 

3088class OverlayObjectStore(BaseObjectStore): 

3089 """Object store that can overlay multiple object stores.""" 

3090 

3091 def __init__( 

3092 self, 

3093 bases: list[BaseObjectStore], 

3094 add_store: BaseObjectStore | None = None, 

3095 ) -> None: 

3096 """Initialize an OverlayObjectStore. 

3097 

3098 Args: 

3099 bases: List of base object stores to overlay 

3100 add_store: Optional store to write new objects to 

3101 

3102 Raises: 

3103 ValueError: If stores have different hash algorithms 

3104 """ 

3105 from .object_format import verify_same_object_format 

3106 

3107 # Verify all stores use the same hash algorithm 

3108 store_algorithms = [store.object_format for store in bases] 

3109 if add_store: 

3110 store_algorithms.append(add_store.object_format) 

3111 

3112 object_format = verify_same_object_format(*store_algorithms) 

3113 

3114 super().__init__(object_format=object_format) 

3115 self.bases = bases 

3116 self.add_store = add_store 

3117 

3118 def add_object(self, object: ShaFile) -> None: 

3119 """Add a single object to the store. 

3120 

3121 Args: 

3122 object: Object to add 

3123 

3124 Raises: 

3125 NotImplementedError: If no add_store was provided 

3126 """ 

3127 if self.add_store is None: 

3128 raise NotImplementedError(self.add_object) 

3129 return self.add_store.add_object(object) 

3130 

3131 def add_objects( 

3132 self, 

3133 objects: Sequence[tuple[ShaFile, str | None]], 

3134 progress: Callable[[str], None] | None = None, 

3135 ) -> Pack | None: 

3136 """Add multiple objects to the store. 

3137 

3138 Args: 

3139 objects: Iterator of objects to add 

3140 progress: Optional progress reporting callback 

3141 

3142 Raises: 

3143 NotImplementedError: If no add_store was provided 

3144 """ 

3145 if self.add_store is None: 

3146 raise NotImplementedError(self.add_object) 

3147 return self.add_store.add_objects(objects, progress) 

3148 

3149 @property 

3150 def packs(self) -> list[Pack]: 

3151 """Get the list of packs from all overlaid stores. 

3152 

3153 Returns: 

3154 Combined list of packs from all base stores 

3155 """ 

3156 ret = [] 

3157 for b in self.bases: 

3158 ret.extend(b.packs) 

3159 return ret 

3160 

3161 def __iter__(self) -> Iterator[ObjectID]: 

3162 """Iterate over all object SHAs in the overlaid stores. 

3163 

3164 Returns: 

3165 Iterator of object SHAs (deduped across stores) 

3166 """ 

3167 done = set() 

3168 for b in self.bases: 

3169 for o_id in b: 

3170 if o_id not in done: 

3171 yield o_id 

3172 done.add(o_id) 

3173 

3174 def iterobjects_subset( 

3175 self, shas: Iterable[ObjectID], *, allow_missing: bool = False 

3176 ) -> Iterator[ShaFile]: 

3177 """Iterate over a subset of objects from the overlaid stores. 

3178 

3179 Args: 

3180 shas: Iterable of object SHAs to retrieve 

3181 allow_missing: If True, skip missing objects; if False, raise KeyError 

3182 

3183 Returns: 

3184 Iterator of ShaFile objects 

3185 

3186 Raises: 

3187 KeyError: If an object is missing and allow_missing is False 

3188 """ 

3189 todo = set(shas) 

3190 found: set[ObjectID] = set() 

3191 

3192 for b in self.bases: 

3193 # Create a copy of todo for each base to avoid modifying 

3194 # the set while iterating through it 

3195 current_todo = todo - found 

3196 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

3197 yield o 

3198 found.add(o.id) 

3199 

3200 # Check for any remaining objects not found 

3201 missing = todo - found 

3202 if missing and not allow_missing: 

3203 raise KeyError(next(iter(missing))) 

3204 

3205 def iter_unpacked_subset( 

3206 self, 

3207 shas: Iterable[ObjectID | RawObjectID], 

3208 include_comp: bool = False, 

3209 allow_missing: bool = False, 

3210 convert_ofs_delta: bool = True, 

3211 ) -> Iterator[UnpackedObject]: 

3212 """Iterate over unpacked objects from the overlaid stores. 

3213 

3214 Args: 

3215 shas: Iterable of object SHAs to retrieve 

3216 include_comp: Whether to include compressed data 

3217 allow_missing: If True, skip missing objects; if False, raise KeyError 

3218 convert_ofs_delta: Whether to convert OFS_DELTA objects 

3219 

3220 Returns: 

3221 Iterator of unpacked objects 

3222 

3223 Raises: 

3224 KeyError: If an object is missing and allow_missing is False 

3225 """ 

3226 todo: set[ObjectID | RawObjectID] = set(shas) 

3227 for b in self.bases: 

3228 for o in b.iter_unpacked_subset( 

3229 todo, 

3230 include_comp=include_comp, 

3231 allow_missing=True, 

3232 convert_ofs_delta=convert_ofs_delta, 

3233 ): 

3234 yield o 

3235 todo.remove(o.sha()) 

3236 if todo and not allow_missing: 

3237 raise KeyError(next(iter(todo))) 

3238 

3239 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]: 

3240 """Get the raw object data from the overlaid stores. 

3241 

3242 Args: 

3243 sha_id: SHA of the object 

3244 

3245 Returns: 

3246 Tuple of (type_num, raw_data) 

3247 

3248 Raises: 

3249 KeyError: If object not found in any base store 

3250 """ 

3251 for b in self.bases: 

3252 try: 

3253 return b.get_raw(sha_id) 

3254 except KeyError: 

3255 pass 

3256 raise KeyError(sha_id) 

3257 

3258 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool: 

3259 """Check if an object is packed in any base store. 

3260 

3261 Args: 

3262 sha: SHA of the object 

3263 

3264 Returns: 

3265 True if object is packed in any base store 

3266 """ 

3267 for b in self.bases: 

3268 if b.contains_packed(sha): 

3269 return True 

3270 return False 

3271 

3272 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool: 

3273 """Check if an object is loose in any base store. 

3274 

3275 Args: 

3276 sha: SHA of the object 

3277 

3278 Returns: 

3279 True if object is loose in any base store 

3280 """ 

3281 for b in self.bases: 

3282 if b.contains_loose(sha): 

3283 return True 

3284 return False 

3285 

3286 

3287def read_packs_file(f: BinaryIO) -> Iterator[str]: 

3288 """Yield the packs listed in a packs file.""" 

3289 for line in f.read().splitlines(): 

3290 if not line: 

3291 continue 

3292 (kind, name) = line.split(b" ", 1) 

3293 if kind != b"P": 

3294 continue 

3295 yield os.fsdecode(name) 

3296 

3297 

3298class BucketBasedObjectStore(PackBasedObjectStore): 

3299 """Object store implementation that uses a bucket store like S3 as backend.""" 

3300 

3301 def _iter_loose_objects(self) -> Iterator[ObjectID]: 

3302 """Iterate over the SHAs of all loose objects.""" 

3303 return iter([]) 

3304 

3305 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None: 

3306 return None 

3307 

3308 def delete_loose_object(self, sha: ObjectID) -> None: 

3309 """Delete a loose object (no-op for bucket stores). 

3310 

3311 Bucket-based stores don't have loose objects, so this is a no-op. 

3312 

3313 Args: 

3314 sha: SHA of the object to delete 

3315 """ 

3316 # Doesn't exist.. 

3317 

3318 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

3319 """Pack loose objects. Returns number of objects packed. 

3320 

3321 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

3322 

3323 Args: 

3324 progress: Optional progress reporting callback (ignored) 

3325 """ 

3326 return 0 

3327 

3328 def _remove_pack_by_name(self, name: str) -> None: 

3329 """Remove a pack by name. Subclasses should implement this.""" 

3330 raise NotImplementedError(self._remove_pack_by_name) 

3331 

3332 def _iter_pack_names(self) -> Iterator[str]: 

3333 raise NotImplementedError(self._iter_pack_names) 

3334 

3335 def _get_pack(self, name: str) -> Pack: 

3336 raise NotImplementedError(self._get_pack) 

3337 

3338 def _update_pack_cache(self) -> list[Pack]: 

3339 pack_files = set(self._iter_pack_names()) 

3340 

3341 # Open newly appeared pack files 

3342 new_packs = [] 

3343 for f in pack_files: 

3344 if f not in self._pack_cache: 

3345 pack = self._get_pack(f) 

3346 new_packs.append(pack) 

3347 self._pack_cache[f] = pack 

3348 # Remove disappeared pack files 

3349 for f in set(self._pack_cache) - pack_files: 

3350 self._pack_cache.pop(f).close() 

3351 return new_packs 

3352 

3353 def _upload_pack( 

3354 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

3355 ) -> None: 

3356 raise NotImplementedError 

3357 

3358 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

3359 """Add a new pack to this object store. 

3360 

3361 Returns: Fileobject to write to, a commit function to 

3362 call when the pack is finished and an abort 

3363 function. 

3364 """ 

3365 import tempfile 

3366 

3367 pf = tempfile.SpooledTemporaryFile( 

3368 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3369 ) 

3370 

3371 def commit() -> Pack | None: 

3372 if pf.tell() == 0: 

3373 pf.close() 

3374 return None 

3375 

3376 pf.seek(0) 

3377 

3378 p = PackData(pf.name, file=pf, object_format=self.object_format) 

3379 entries = p.sorted_entries() 

3380 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

3381 idxf = tempfile.SpooledTemporaryFile( 

3382 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

3383 ) 

3384 checksum = p.get_stored_checksum() 

3385 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

3386 idxf.seek(0) 

3387 idx = load_pack_index_file(basename + ".idx", idxf, self.object_format) 

3388 for pack in self.packs: 

3389 if pack.get_stored_checksum() == p.get_stored_checksum(): 

3390 p.close() 

3391 idx.close() 

3392 pf.close() 

3393 idxf.close() 

3394 return pack 

3395 pf.seek(0) 

3396 idxf.seek(0) 

3397 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

3398 final_pack = Pack.from_objects(p, idx) 

3399 self._add_cached_pack(basename, final_pack) 

3400 pf.close() 

3401 idxf.close() 

3402 return final_pack 

3403 

3404 return pf, commit, pf.close # type: ignore[return-value] 

3405 

3406 

3407def _collect_ancestors( 

3408 store: ObjectContainer, 

3409 heads: Iterable[ObjectID], 

3410 common: frozenset[ObjectID] = frozenset(), 

3411 shallow: frozenset[ObjectID] = frozenset(), 

3412 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents, 

3413) -> tuple[set[ObjectID], set[ObjectID]]: 

3414 """Collect all ancestors of heads up to (excluding) those in common. 

3415 

3416 Args: 

3417 store: Object store to get commits from 

3418 heads: commits to start from 

3419 common: commits to end at, or empty set to walk repository 

3420 completely 

3421 shallow: Set of shallow commits 

3422 get_parents: Optional function for getting the parents of a 

3423 commit. 

3424 Returns: a tuple (A, B) where A - all commits reachable 

3425 from heads but not present in common, B - common (shared) elements 

3426 that are directly reachable from heads 

3427 """ 

3428 bases = set() 

3429 commits = set() 

3430 queue: list[ObjectID] = [] 

3431 queue.extend(heads) 

3432 

3433 # Try to use commit graph if available 

3434 commit_graph = store.get_commit_graph() 

3435 

3436 while queue: 

3437 e = queue.pop(0) 

3438 if e in common: 

3439 bases.add(e) 

3440 elif e not in commits: 

3441 commits.add(e) 

3442 if e in shallow: 

3443 continue 

3444 

3445 # Try to use commit graph for parent lookup 

3446 parents = None 

3447 if commit_graph: 

3448 parents = commit_graph.get_parents(e) 

3449 

3450 if parents is None: 

3451 # Fall back to loading the object 

3452 cmt = store[e] 

3453 assert isinstance(cmt, Commit) 

3454 parents = get_parents(cmt) 

3455 

3456 queue.extend(parents) 

3457 return (commits, bases) 

3458 

3459 

3460def iter_tree_contents( 

3461 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False 

3462) -> Iterator[TreeEntry]: 

3463 """Iterate the contents of a tree and all subtrees. 

3464 

3465 Iteration is depth-first pre-order, as in e.g. os.walk. 

3466 

3467 Args: 

3468 store: Object store to get trees from 

3469 tree_id: SHA1 of the tree. 

3470 include_trees: If True, include tree objects in the iteration. 

3471 

3472 Yields: TreeEntry namedtuples for all the objects in a tree. 

3473 """ 

3474 if tree_id is None: 

3475 return 

3476 # This could be fairly easily generalized to >2 trees if we find a use 

3477 # case. 

3478 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

3479 while todo: 

3480 entry = todo.pop() 

3481 assert entry.mode is not None 

3482 if stat.S_ISDIR(entry.mode): 

3483 extra = [] 

3484 assert entry.sha is not None 

3485 tree = store[entry.sha] 

3486 assert isinstance(tree, Tree) 

3487 for subentry in tree.iteritems(name_order=True): 

3488 assert entry.path is not None 

3489 extra.append(subentry.in_path(entry.path)) 

3490 todo.extend(reversed(extra)) 

3491 if not stat.S_ISDIR(entry.mode) or include_trees: 

3492 yield entry 

3493 

3494 

3495def iter_commit_contents( 

3496 store: ObjectContainer, 

3497 commit: Commit | ObjectID | RawObjectID, 

3498 *, 

3499 include: Sequence[str | bytes | Path] | None = None, 

3500) -> Iterator[TreeEntry]: 

3501 """Iterate the contents of the repository at the specified commit. 

3502 

3503 This is a wrapper around iter_tree_contents() and 

3504 tree_lookup_path() to simplify the common task of getting the 

3505 contest of a repo at a particular commit. See also 

3506 dulwich.index.build_file_from_blob() for writing individual files 

3507 to disk. 

3508 

3509 Args: 

3510 store: Object store to get trees from 

3511 commit: Commit object, or SHA1 of a commit 

3512 include: if provided, only the entries whose paths are in the 

3513 list, or whose parent tree is in the list, will be 

3514 included. Note that duplicate or overlapping paths 

3515 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

3516 

3517 Yields: TreeEntry namedtuples for all matching files in a commit. 

3518 """ 

3519 sha = commit.id if isinstance(commit, Commit) else commit 

3520 if not isinstance(obj := store[sha], Commit): 

3521 raise TypeError( 

3522 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

3523 ) 

3524 commit = obj 

3525 encoding = commit.encoding or "utf-8" 

3526 include_bytes: list[bytes] = ( 

3527 [ 

3528 path if isinstance(path, bytes) else str(path).encode(encoding) 

3529 for path in include 

3530 ] 

3531 if include is not None 

3532 else [b""] 

3533 ) 

3534 

3535 for path in include_bytes: 

3536 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

3537 # Iterate all contained files if path points to a dir, otherwise just get that 

3538 # single file 

3539 if isinstance(store[obj_id], Tree): 

3540 for entry in iter_tree_contents(store, obj_id): 

3541 yield entry.in_path(path) 

3542 else: 

3543 yield TreeEntry(path, mode, obj_id) 

3544 

3545 

3546def peel_sha( 

3547 store: ObjectContainer, sha: ObjectID | RawObjectID 

3548) -> tuple[ShaFile, ShaFile]: 

3549 """Peel all tags from a SHA. 

3550 

3551 Args: 

3552 store: Object store to get objects from 

3553 sha: The object SHA to peel. 

3554 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

3555 intermediate tags; if the original ref does not point to a tag, 

3556 this will equal the original SHA1. 

3557 """ 

3558 unpeeled = obj = store[sha] 

3559 obj_class = object_class(obj.type_name) 

3560 while obj_class is Tag: 

3561 assert isinstance(obj, Tag) 

3562 obj_class, sha = obj.object 

3563 obj = store[sha] 

3564 return unpeeled, obj 

3565 

3566 

3567class GraphTraversalReachability: 

3568 """Naive graph traversal implementation of ObjectReachabilityProvider. 

3569 

3570 This implementation wraps existing graph traversal functions 

3571 (_collect_ancestors, _collect_filetree_revs) to provide the standard 

3572 reachability interface without any performance optimizations. 

3573 """ 

3574 

3575 def __init__(self, object_store: BaseObjectStore) -> None: 

3576 """Initialize the graph traversal provider. 

3577 

3578 Args: 

3579 object_store: Object store to query 

3580 """ 

3581 self.store = object_store 

3582 

3583 def get_reachable_commits( 

3584 self, 

3585 heads: Iterable[ObjectID], 

3586 exclude: Iterable[ObjectID] | None = None, 

3587 shallow: Set[ObjectID] | None = None, 

3588 ) -> set[ObjectID]: 

3589 """Get all commits reachable from heads, excluding those in exclude. 

3590 

3591 Uses _collect_ancestors for commit traversal. 

3592 

3593 Args: 

3594 heads: Starting commit SHAs 

3595 exclude: Commit SHAs to exclude (and their ancestors) 

3596 shallow: Set of shallow commit boundaries 

3597 

3598 Returns: 

3599 Set of commit SHAs reachable from heads but not from exclude 

3600 """ 

3601 exclude_set = frozenset(exclude) if exclude else frozenset() 

3602 shallow_set = frozenset(shallow) if shallow else frozenset() 

3603 commits, _bases = _collect_ancestors( 

3604 self.store, heads, exclude_set, shallow_set 

3605 ) 

3606 return commits 

3607 

3608 def get_tree_objects( 

3609 self, 

3610 tree_shas: Iterable[ObjectID], 

3611 ) -> set[ObjectID]: 

3612 """Get all trees and blobs reachable from the given trees. 

3613 

3614 Uses _collect_filetree_revs for tree traversal. 

3615 

3616 Args: 

3617 tree_shas: Starting tree SHAs 

3618 

3619 Returns: 

3620 Set of tree and blob SHAs 

3621 """ 

3622 result: set[ObjectID] = set() 

3623 for tree_sha in tree_shas: 

3624 _collect_filetree_revs(self.store, tree_sha, result) 

3625 return result 

3626 

3627 def get_reachable_objects( 

3628 self, 

3629 commits: Iterable[ObjectID], 

3630 exclude_commits: Iterable[ObjectID] | None = None, 

3631 ) -> set[ObjectID]: 

3632 """Get all objects (commits + trees + blobs) reachable from commits. 

3633 

3634 Args: 

3635 commits: Starting commit SHAs 

3636 exclude_commits: Commits whose objects should be excluded 

3637 

3638 Returns: 

3639 Set of all object SHAs (commits, trees, blobs) 

3640 """ 

3641 commits_set = set(commits) 

3642 result = set(commits_set) 

3643 

3644 # Get trees for all commits 

3645 tree_shas = [] 

3646 for commit_sha in commits_set: 

3647 try: 

3648 commit = self.store[commit_sha] 

3649 if isinstance(commit, Commit): 

3650 tree_shas.append(commit.tree) 

3651 except KeyError: 

3652 # Commit not in store, skip 

3653 continue 

3654 

3655 # Collect all tree/blob objects 

3656 result.update(self.get_tree_objects(tree_shas)) 

3657 

3658 # Exclude objects from exclude_commits if needed 

3659 if exclude_commits: 

3660 exclude_objects = self.get_reachable_objects(exclude_commits, None) 

3661 result -= exclude_objects 

3662 

3663 return result 

3664 

3665 

3666class BitmapReachability: 

3667 """Bitmap-accelerated implementation of ObjectReachabilityProvider. 

3668 

3669 This implementation uses packfile bitmap indexes where available to 

3670 accelerate reachability queries. Falls back to graph traversal when 

3671 bitmaps don't cover the requested commits. 

3672 """ 

3673 

3674 def __init__(self, object_store: "PackBasedObjectStore") -> None: 

3675 """Initialize the bitmap provider. 

3676 

3677 Args: 

3678 object_store: Pack-based object store with bitmap support 

3679 """ 

3680 self.store = object_store 

3681 # Fallback to graph traversal for operations not yet optimized 

3682 self._fallback = GraphTraversalReachability(object_store) 

3683 

3684 def _combine_commit_bitmaps( 

3685 self, 

3686 commit_shas: set[ObjectID], 

3687 exclude_shas: set[ObjectID] | None = None, 

3688 ) -> tuple["EWAHBitmap", "Pack"] | None: 

3689 """Combine bitmaps for multiple commits using OR, with optional exclusion. 

3690 

3691 Args: 

3692 commit_shas: Set of commit SHAs to combine 

3693 exclude_shas: Optional set of commit SHAs to exclude 

3694 

3695 Returns: 

3696 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable 

3697 """ 

3698 from .bitmap import find_commit_bitmaps 

3699 

3700 # Find bitmaps for the commits 

3701 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs) 

3702 

3703 # If we can't find bitmaps for all commits, return None 

3704 if len(commit_bitmaps) < len(commit_shas): 

3705 return None 

3706 

3707 # Combine bitmaps using OR 

3708 combined_bitmap = None 

3709 result_pack = None 

3710 

3711 for commit_sha in commit_shas: 

3712 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha] 

3713 commit_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3714 

3715 if commit_bitmap is None: 

3716 return None 

3717 

3718 if combined_bitmap is None: 

3719 combined_bitmap = commit_bitmap 

3720 result_pack = pack 

3721 elif pack == result_pack: 

3722 # Same pack, can OR directly 

3723 combined_bitmap = combined_bitmap | commit_bitmap 

3724 else: 

3725 # Different packs, can't combine 

3726 return None 

3727 

3728 # Handle exclusions if provided 

3729 if exclude_shas and result_pack and combined_bitmap: 

3730 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack]) 

3731 

3732 if len(exclude_bitmaps) == len(exclude_shas): 

3733 # All excludes have bitmaps, compute exclusion 

3734 exclude_combined = None 

3735 

3736 for commit_sha in exclude_shas: 

3737 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha] 

3738 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha) 

3739 

3740 if exclude_bitmap is None: 

3741 break 

3742 

3743 if exclude_combined is None: 

3744 exclude_combined = exclude_bitmap 

3745 else: 

3746 exclude_combined = exclude_combined | exclude_bitmap 

3747 

3748 # Subtract excludes using set difference 

3749 if exclude_combined: 

3750 combined_bitmap = combined_bitmap - exclude_combined 

3751 

3752 if combined_bitmap and result_pack: 

3753 return (combined_bitmap, result_pack) 

3754 return None 

3755 

3756 def get_reachable_commits( 

3757 self, 

3758 heads: Iterable[ObjectID], 

3759 exclude: Iterable[ObjectID] | None = None, 

3760 shallow: Set[ObjectID] | None = None, 

3761 ) -> set[ObjectID]: 

3762 """Get all commits reachable from heads using bitmaps where possible. 

3763 

3764 Args: 

3765 heads: Starting commit SHAs 

3766 exclude: Commit SHAs to exclude (and their ancestors) 

3767 shallow: Set of shallow commit boundaries 

3768 

3769 Returns: 

3770 Set of commit SHAs reachable from heads but not from exclude 

3771 """ 

3772 from .bitmap import bitmap_to_object_shas 

3773 

3774 # If shallow is specified, fall back to graph traversal 

3775 # (bitmaps don't support shallow boundaries well) 

3776 if shallow: 

3777 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3778 

3779 heads_set = set(heads) 

3780 exclude_set = set(exclude) if exclude else None 

3781 

3782 # Try to combine bitmaps 

3783 result = self._combine_commit_bitmaps(heads_set, exclude_set) 

3784 if result is None: 

3785 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3786 

3787 combined_bitmap, result_pack = result 

3788 

3789 # Convert bitmap to commit SHAs, filtering for commits only 

3790 pack_bitmap = result_pack.bitmap 

3791 if pack_bitmap is None: 

3792 return self._fallback.get_reachable_commits(heads, exclude, shallow) 

3793 commit_type_filter = pack_bitmap.commit_bitmap 

3794 return bitmap_to_object_shas( 

3795 combined_bitmap, result_pack.index, commit_type_filter 

3796 ) 

3797 

3798 def get_tree_objects( 

3799 self, 

3800 tree_shas: Iterable[ObjectID], 

3801 ) -> set[ObjectID]: 

3802 """Get all trees and blobs reachable from the given trees. 

3803 

3804 Args: 

3805 tree_shas: Starting tree SHAs 

3806 

3807 Returns: 

3808 Set of tree and blob SHAs 

3809 """ 

3810 # Tree traversal doesn't benefit much from bitmaps, use fallback 

3811 return self._fallback.get_tree_objects(tree_shas) 

3812 

3813 def get_reachable_objects( 

3814 self, 

3815 commits: Iterable[ObjectID], 

3816 exclude_commits: Iterable[ObjectID] | None = None, 

3817 ) -> set[ObjectID]: 

3818 """Get all objects reachable from commits using bitmaps. 

3819 

3820 Args: 

3821 commits: Starting commit SHAs 

3822 exclude_commits: Commits whose objects should be excluded 

3823 

3824 Returns: 

3825 Set of all object SHAs (commits, trees, blobs) 

3826 """ 

3827 from .bitmap import bitmap_to_object_shas 

3828 

3829 commits_set = set(commits) 

3830 exclude_set = set(exclude_commits) if exclude_commits else None 

3831 

3832 # Try to combine bitmaps 

3833 result = self._combine_commit_bitmaps(commits_set, exclude_set) 

3834 if result is None: 

3835 return self._fallback.get_reachable_objects(commits, exclude_commits) 

3836 

3837 combined_bitmap, result_pack = result 

3838 

3839 # Convert bitmap to all object SHAs (no type filter) 

3840 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)