Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1236 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set 

33from contextlib import suppress 

34from io import BytesIO 

35from pathlib import Path 

36from typing import ( 

37 TYPE_CHECKING, 

38 BinaryIO, 

39 Optional, 

40 Protocol, 

41) 

42 

43from .errors import NotTreeError 

44from .file import GitFile, _GitFile 

45from .objects import ( 

46 S_ISGITLINK, 

47 ZERO_SHA, 

48 Blob, 

49 Commit, 

50 ObjectID, 

51 ShaFile, 

52 Tag, 

53 Tree, 

54 TreeEntry, 

55 hex_to_filename, 

56 hex_to_sha, 

57 object_class, 

58 sha_to_hex, 

59 valid_hexsha, 

60) 

61from .pack import ( 

62 PACK_SPOOL_FILE_MAX_SIZE, 

63 ObjectContainer, 

64 Pack, 

65 PackData, 

66 PackedObjectContainer, 

67 PackFileDisappeared, 

68 PackHint, 

69 PackIndexer, 

70 PackInflater, 

71 PackStreamCopier, 

72 UnpackedObject, 

73 extend_pack, 

74 full_unpacked_object, 

75 generate_unpacked_objects, 

76 iter_sha1, 

77 load_pack_index_file, 

78 pack_objects_to_data, 

79 write_pack_data, 

80 write_pack_index, 

81) 

82from .protocol import DEPTH_INFINITE 

83from .refs import PEELED_TAG_SUFFIX, Ref 

84 

85if TYPE_CHECKING: 

86 from .commit_graph import CommitGraph 

87 from .config import Config 

88 from .diff_tree import RenameDetector 

89 

90 

91class GraphWalker(Protocol): 

92 """Protocol for graph walker objects.""" 

93 

94 def __next__(self) -> bytes | None: 

95 """Return the next object SHA to visit.""" 

96 ... 

97 

98 def ack(self, sha: bytes) -> None: 

99 """Acknowledge that an object has been received.""" 

100 ... 

101 

102 def nak(self) -> None: 

103 """Nothing in common was found.""" 

104 ... 

105 

106 

107INFODIR = "info" 

108PACKDIR = "pack" 

109 

110# use permissions consistent with Git; just readable by everyone 

111# TODO: should packs also be non-writable on Windows? if so, that 

112# would requite some rather significant adjustments to the test suite 

113PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

114 

115# Grace period for cleaning up temporary pack files (in seconds) 

116# Matches git's default of 2 weeks 

117DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

118 

119 

120def find_shallow( 

121 store: ObjectContainer, heads: Iterable[bytes], depth: int 

122) -> tuple[set[bytes], set[bytes]]: 

123 """Find shallow commits according to a given depth. 

124 

125 Args: 

126 store: An ObjectStore for looking up objects. 

127 heads: Iterable of head SHAs to start walking from. 

128 depth: The depth of ancestors to include. A depth of one includes 

129 only the heads themselves. 

130 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

131 considered shallow and unshallow according to the arguments. Note that 

132 these sets may overlap if a commit is reachable along multiple paths. 

133 """ 

134 parents: dict[bytes, list[bytes]] = {} 

135 commit_graph = store.get_commit_graph() 

136 

137 def get_parents(sha: bytes) -> list[bytes]: 

138 result = parents.get(sha, None) 

139 if not result: 

140 # Try to use commit graph first if available 

141 if commit_graph: 

142 graph_parents = commit_graph.get_parents(sha) 

143 if graph_parents is not None: 

144 result = graph_parents 

145 parents[sha] = result 

146 return result 

147 # Fall back to loading the object 

148 commit = store[sha] 

149 assert isinstance(commit, Commit) 

150 result = commit.parents 

151 parents[sha] = result 

152 return result 

153 

154 todo = [] # stack of (sha, depth) 

155 for head_sha in heads: 

156 obj = store[head_sha] 

157 # Peel tags if necessary 

158 while isinstance(obj, Tag): 

159 _, sha = obj.object 

160 obj = store[sha] 

161 if isinstance(obj, Commit): 

162 todo.append((obj.id, 1)) 

163 

164 not_shallow = set() 

165 shallow = set() 

166 while todo: 

167 sha, cur_depth = todo.pop() 

168 if cur_depth < depth: 

169 not_shallow.add(sha) 

170 new_depth = cur_depth + 1 

171 todo.extend((p, new_depth) for p in get_parents(sha)) 

172 else: 

173 shallow.add(sha) 

174 

175 return shallow, not_shallow 

176 

177 

178def get_depth( 

179 store: ObjectContainer, 

180 head: bytes, 

181 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

182 max_depth: int | None = None, 

183) -> int: 

184 """Return the current available depth for the given head. 

185 

186 For commits with multiple parents, the largest possible depth will be 

187 returned. 

188 

189 Args: 

190 store: Object store to search in 

191 head: commit to start from 

192 get_parents: optional function for getting the parents of a commit 

193 max_depth: maximum depth to search 

194 """ 

195 if head not in store: 

196 return 0 

197 current_depth = 1 

198 queue = [(head, current_depth)] 

199 commit_graph = store.get_commit_graph() 

200 

201 while queue and (max_depth is None or current_depth < max_depth): 

202 e, depth = queue.pop(0) 

203 current_depth = max(current_depth, depth) 

204 

205 # Try to use commit graph for parent lookup if available 

206 parents = None 

207 if commit_graph: 

208 parents = commit_graph.get_parents(e) 

209 

210 if parents is None: 

211 # Fall back to loading the object 

212 cmt = store[e] 

213 if isinstance(cmt, Tag): 

214 _cls, sha = cmt.object 

215 cmt = store[sha] 

216 parents = get_parents(cmt) 

217 

218 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

219 return current_depth 

220 

221 

222class PackContainer(Protocol): 

223 """Protocol for containers that can accept pack files.""" 

224 

225 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

226 """Add a new pack.""" 

227 

228 

229class BaseObjectStore: 

230 """Object store interface.""" 

231 

232 def determine_wants_all( 

233 self, refs: Mapping[Ref, ObjectID], depth: int | None = None 

234 ) -> list[ObjectID]: 

235 """Determine which objects are wanted based on refs.""" 

236 

237 def _want_deepen(sha: bytes) -> bool: 

238 if not depth: 

239 return False 

240 if depth == DEPTH_INFINITE: 

241 return True 

242 return depth > self._get_depth(sha) 

243 

244 return [ 

245 sha 

246 for (ref, sha) in refs.items() 

247 if (sha not in self or _want_deepen(sha)) 

248 and not ref.endswith(PEELED_TAG_SUFFIX) 

249 and not sha == ZERO_SHA 

250 ] 

251 

252 def contains_loose(self, sha: bytes) -> bool: 

253 """Check if a particular object is present by SHA1 and is loose.""" 

254 raise NotImplementedError(self.contains_loose) 

255 

256 def contains_packed(self, sha: bytes) -> bool: 

257 """Check if a particular object is present by SHA1 and is packed.""" 

258 return False # Default implementation for stores that don't support packing 

259 

260 def __contains__(self, sha1: bytes) -> bool: 

261 """Check if a particular object is present by SHA1. 

262 

263 This method makes no distinction between loose and packed objects. 

264 """ 

265 return self.contains_loose(sha1) 

266 

267 @property 

268 def packs(self) -> list[Pack]: 

269 """Iterable of pack objects.""" 

270 raise NotImplementedError 

271 

272 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

273 """Obtain the raw text for an object. 

274 

275 Args: 

276 name: sha for the object. 

277 Returns: tuple with numeric type and object contents. 

278 """ 

279 raise NotImplementedError(self.get_raw) 

280 

281 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

282 """Obtain an object by SHA1.""" 

283 type_num, uncomp = self.get_raw(sha1) 

284 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

285 

286 def __iter__(self) -> Iterator[bytes]: 

287 """Iterate over the SHAs that are present in this store.""" 

288 raise NotImplementedError(self.__iter__) 

289 

290 def add_object(self, obj: ShaFile) -> None: 

291 """Add a single object to this object store.""" 

292 raise NotImplementedError(self.add_object) 

293 

294 def add_objects( 

295 self, 

296 objects: Sequence[tuple[ShaFile, str | None]], 

297 progress: Callable[..., None] | None = None, 

298 ) -> Optional["Pack"]: 

299 """Add a set of objects to this object store. 

300 

301 Args: 

302 objects: Iterable over a list of (object, path) tuples 

303 progress: Optional progress callback 

304 """ 

305 raise NotImplementedError(self.add_objects) 

306 

307 def tree_changes( 

308 self, 

309 source: bytes | None, 

310 target: bytes | None, 

311 want_unchanged: bool = False, 

312 include_trees: bool = False, 

313 change_type_same: bool = False, 

314 rename_detector: Optional["RenameDetector"] = None, 

315 paths: Sequence[bytes] | None = None, 

316 ) -> Iterator[ 

317 tuple[ 

318 tuple[bytes | None, bytes | None], 

319 tuple[int | None, int | None], 

320 tuple[bytes | None, bytes | None], 

321 ] 

322 ]: 

323 """Find the differences between the contents of two trees. 

324 

325 Args: 

326 source: SHA1 of the source tree 

327 target: SHA1 of the target tree 

328 want_unchanged: Whether unchanged files should be reported 

329 include_trees: Whether to include trees 

330 change_type_same: Whether to report files changing 

331 type in the same entry. 

332 rename_detector: RenameDetector object for detecting renames. 

333 paths: Optional list of paths to filter to (as bytes). 

334 Returns: Iterator over tuples with 

335 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

336 """ 

337 from .diff_tree import tree_changes 

338 

339 for change in tree_changes( 

340 self, 

341 source, 

342 target, 

343 want_unchanged=want_unchanged, 

344 include_trees=include_trees, 

345 change_type_same=change_type_same, 

346 rename_detector=rename_detector, 

347 paths=paths, 

348 ): 

349 old_path = change.old.path if change.old is not None else None 

350 new_path = change.new.path if change.new is not None else None 

351 old_mode = change.old.mode if change.old is not None else None 

352 new_mode = change.new.mode if change.new is not None else None 

353 old_sha = change.old.sha if change.old is not None else None 

354 new_sha = change.new.sha if change.new is not None else None 

355 yield ( 

356 (old_path, new_path), 

357 (old_mode, new_mode), 

358 (old_sha, new_sha), 

359 ) 

360 

361 def iter_tree_contents( 

362 self, tree_id: bytes, include_trees: bool = False 

363 ) -> Iterator[TreeEntry]: 

364 """Iterate the contents of a tree and all subtrees. 

365 

366 Iteration is depth-first pre-order, as in e.g. os.walk. 

367 

368 Args: 

369 tree_id: SHA1 of the tree. 

370 include_trees: If True, include tree objects in the iteration. 

371 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

372 tree. 

373 """ 

374 warnings.warn( 

375 "Please use dulwich.object_store.iter_tree_contents", 

376 DeprecationWarning, 

377 stacklevel=2, 

378 ) 

379 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

380 

381 def iterobjects_subset( 

382 self, shas: Iterable[bytes], *, allow_missing: bool = False 

383 ) -> Iterator[ShaFile]: 

384 """Iterate over a subset of objects in the store. 

385 

386 Args: 

387 shas: Iterable of object SHAs to retrieve 

388 allow_missing: If True, skip missing objects; if False, raise KeyError 

389 

390 Returns: 

391 Iterator of ShaFile objects 

392 

393 Raises: 

394 KeyError: If an object is missing and allow_missing is False 

395 """ 

396 for sha in shas: 

397 try: 

398 yield self[sha] 

399 except KeyError: 

400 if not allow_missing: 

401 raise 

402 

403 def iter_unpacked_subset( 

404 self, 

405 shas: Iterable[bytes], 

406 include_comp: bool = False, 

407 allow_missing: bool = False, 

408 convert_ofs_delta: bool = True, 

409 ) -> "Iterator[UnpackedObject]": 

410 """Iterate over unpacked objects for a subset of SHAs. 

411 

412 Default implementation that converts ShaFile objects to UnpackedObject. 

413 Subclasses may override for more efficient unpacked access. 

414 

415 Args: 

416 shas: Iterable of object SHAs to retrieve 

417 include_comp: Whether to include compressed data (ignored in base implementation) 

418 allow_missing: If True, skip missing objects; if False, raise KeyError 

419 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation) 

420 

421 Returns: 

422 Iterator of UnpackedObject instances 

423 

424 Raises: 

425 KeyError: If an object is missing and allow_missing is False 

426 """ 

427 from .pack import UnpackedObject 

428 

429 for sha in shas: 

430 try: 

431 obj = self[sha] 

432 # Convert ShaFile to UnpackedObject 

433 unpacked = UnpackedObject( 

434 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

435 ) 

436 yield unpacked 

437 except KeyError: 

438 if not allow_missing: 

439 raise 

440 

441 def find_missing_objects( 

442 self, 

443 haves: Iterable[bytes], 

444 wants: Iterable[bytes], 

445 shallow: Set[bytes] | None = None, 

446 progress: Callable[..., None] | None = None, 

447 get_tagged: Callable[[], dict[bytes, bytes]] | None = None, 

448 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

449 ) -> Iterator[tuple[bytes, PackHint | None]]: 

450 """Find the missing objects required for a set of revisions. 

451 

452 Args: 

453 haves: Iterable over SHAs already in common. 

454 wants: Iterable over SHAs of objects to fetch. 

455 shallow: Set of shallow commit SHA1s to skip 

456 progress: Simple progress function that will be called with 

457 updated progress strings. 

458 get_tagged: Function that returns a dict of pointed-to sha -> 

459 tag sha for including tags. 

460 get_parents: Optional function for getting the parents of a 

461 commit. 

462 Returns: Iterator over (sha, path) pairs. 

463 """ 

464 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

465 finder = MissingObjectFinder( 

466 self, 

467 haves=haves, 

468 wants=wants, 

469 shallow=shallow, 

470 progress=progress, 

471 get_tagged=get_tagged, 

472 get_parents=get_parents, 

473 ) 

474 return iter(finder) 

475 

476 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

477 """Find which revisions this store has in common using graphwalker. 

478 

479 Args: 

480 graphwalker: A graphwalker object. 

481 Returns: List of SHAs that are in common 

482 """ 

483 haves = [] 

484 sha = next(graphwalker) 

485 while sha: 

486 if sha in self: 

487 haves.append(sha) 

488 graphwalker.ack(sha) 

489 sha = next(graphwalker) 

490 return haves 

491 

492 def generate_pack_data( 

493 self, 

494 have: Iterable[bytes], 

495 want: Iterable[bytes], 

496 *, 

497 shallow: Set[bytes] | None = None, 

498 progress: Callable[..., None] | None = None, 

499 ofs_delta: bool = True, 

500 ) -> tuple[int, Iterator[UnpackedObject]]: 

501 """Generate pack data objects for a set of wants/haves. 

502 

503 Args: 

504 have: List of SHA1s of objects that should not be sent 

505 want: List of SHA1s of objects that should be sent 

506 shallow: Set of shallow commit SHA1s to skip 

507 ofs_delta: Whether OFS deltas can be included 

508 progress: Optional progress reporting method 

509 """ 

510 # Note that the pack-specific implementation below is more efficient, 

511 # as it reuses deltas 

512 missing_objects = MissingObjectFinder( 

513 self, haves=have, wants=want, shallow=shallow, progress=progress 

514 ) 

515 object_ids = list(missing_objects) 

516 return pack_objects_to_data( 

517 [(self[oid], path) for oid, path in object_ids], 

518 ofs_delta=ofs_delta, 

519 progress=progress, 

520 ) 

521 

522 def peel_sha(self, sha: bytes) -> bytes: 

523 """Peel all tags from a SHA. 

524 

525 Args: 

526 sha: The object SHA to peel. 

527 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

528 intermediate tags; if the original ref does not point to a tag, 

529 this will equal the original SHA1. 

530 """ 

531 warnings.warn( 

532 "Please use dulwich.object_store.peel_sha()", 

533 DeprecationWarning, 

534 stacklevel=2, 

535 ) 

536 return peel_sha(self, sha)[1].id 

537 

538 def _get_depth( 

539 self, 

540 head: bytes, 

541 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

542 max_depth: int | None = None, 

543 ) -> int: 

544 """Return the current available depth for the given head. 

545 

546 For commits with multiple parents, the largest possible depth will be 

547 returned. 

548 

549 Args: 

550 head: commit to start from 

551 get_parents: optional function for getting the parents of a commit 

552 max_depth: maximum depth to search 

553 """ 

554 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

555 

556 def close(self) -> None: 

557 """Close any files opened by this object store.""" 

558 # Default implementation is a NO-OP 

559 

560 def prune(self, grace_period: int | None = None) -> None: 

561 """Prune/clean up this object store. 

562 

563 This includes removing orphaned temporary files and other 

564 housekeeping tasks. Default implementation is a NO-OP. 

565 

566 Args: 

567 grace_period: Grace period in seconds for removing temporary files. 

568 If None, uses the default grace period. 

569 """ 

570 # Default implementation is a NO-OP 

571 

572 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

573 """Iterate over all SHA1s that start with a given prefix. 

574 

575 The default implementation is a naive iteration over all objects. 

576 However, subclasses may override this method with more efficient 

577 implementations. 

578 """ 

579 for sha in self: 

580 if sha.startswith(prefix): 

581 yield sha 

582 

583 def get_commit_graph(self) -> Optional["CommitGraph"]: 

584 """Get the commit graph for this object store. 

585 

586 Returns: 

587 CommitGraph object if available, None otherwise 

588 """ 

589 return None 

590 

591 def write_commit_graph( 

592 self, refs: Sequence[bytes] | None = None, reachable: bool = True 

593 ) -> None: 

594 """Write a commit graph file for this object store. 

595 

596 Args: 

597 refs: List of refs to include. If None, includes all refs from object store. 

598 reachable: If True, includes all commits reachable from refs. 

599 If False, only includes the direct ref targets. 

600 

601 Note: 

602 Default implementation does nothing. Subclasses should override 

603 this method to provide commit graph writing functionality. 

604 """ 

605 raise NotImplementedError(self.write_commit_graph) 

606 

607 def get_object_mtime(self, sha: bytes) -> float: 

608 """Get the modification time of an object. 

609 

610 Args: 

611 sha: SHA1 of the object 

612 

613 Returns: 

614 Modification time as seconds since epoch 

615 

616 Raises: 

617 KeyError: if the object is not found 

618 """ 

619 # Default implementation raises KeyError 

620 # Subclasses should override to provide actual mtime 

621 raise KeyError(sha) 

622 

623 

624class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

625 """Object store that supports pack operations. 

626 

627 This is a base class for object stores that can handle pack files, 

628 including both disk-based and memory-based stores. 

629 """ 

630 

631 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

632 """Add a new pack to this object store. 

633 

634 Returns: Tuple of (file, commit_func, abort_func) 

635 """ 

636 raise NotImplementedError(self.add_pack) 

637 

638 def add_pack_data( 

639 self, 

640 count: int, 

641 unpacked_objects: Iterator["UnpackedObject"], 

642 progress: Callable[..., None] | None = None, 

643 ) -> Optional["Pack"]: 

644 """Add pack data to this object store. 

645 

646 Args: 

647 count: Number of objects 

648 unpacked_objects: Iterator over unpacked objects 

649 progress: Optional progress callback 

650 """ 

651 raise NotImplementedError(self.add_pack_data) 

652 

653 def get_unpacked_object( 

654 self, sha1: bytes, *, include_comp: bool = False 

655 ) -> "UnpackedObject": 

656 """Get a raw unresolved object. 

657 

658 Args: 

659 sha1: SHA-1 hash of the object 

660 include_comp: Whether to include compressed data 

661 

662 Returns: 

663 UnpackedObject instance 

664 """ 

665 from .pack import UnpackedObject 

666 

667 obj = self[sha1] 

668 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

669 

670 def iterobjects_subset( 

671 self, shas: Iterable[bytes], *, allow_missing: bool = False 

672 ) -> Iterator[ShaFile]: 

673 """Iterate over a subset of objects. 

674 

675 Args: 

676 shas: Iterable of object SHAs to retrieve 

677 allow_missing: If True, skip missing objects 

678 

679 Returns: 

680 Iterator of ShaFile objects 

681 """ 

682 for sha in shas: 

683 try: 

684 yield self[sha] 

685 except KeyError: 

686 if not allow_missing: 

687 raise 

688 

689 

690class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

691 """Object store that uses pack files for storage. 

692 

693 This class provides a base implementation for object stores that use 

694 Git pack files as their primary storage mechanism. It handles caching 

695 of open pack files and provides configuration for pack file operations. 

696 """ 

697 

698 def __init__( 

699 self, 

700 pack_compression_level: int = -1, 

701 pack_index_version: int | None = None, 

702 pack_delta_window_size: int | None = None, 

703 pack_window_memory: int | None = None, 

704 pack_delta_cache_size: int | None = None, 

705 pack_depth: int | None = None, 

706 pack_threads: int | None = None, 

707 pack_big_file_threshold: int | None = None, 

708 ) -> None: 

709 """Initialize a PackBasedObjectStore. 

710 

711 Args: 

712 pack_compression_level: Compression level for pack files (-1 to 9) 

713 pack_index_version: Pack index version to use 

714 pack_delta_window_size: Window size for delta compression 

715 pack_window_memory: Maximum memory to use for delta window 

716 pack_delta_cache_size: Cache size for delta operations 

717 pack_depth: Maximum depth for pack deltas 

718 pack_threads: Number of threads to use for packing 

719 pack_big_file_threshold: Threshold for treating files as "big" 

720 """ 

721 self._pack_cache: dict[str, Pack] = {} 

722 self.pack_compression_level = pack_compression_level 

723 self.pack_index_version = pack_index_version 

724 self.pack_delta_window_size = pack_delta_window_size 

725 self.pack_window_memory = pack_window_memory 

726 self.pack_delta_cache_size = pack_delta_cache_size 

727 self.pack_depth = pack_depth 

728 self.pack_threads = pack_threads 

729 self.pack_big_file_threshold = pack_big_file_threshold 

730 

731 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

732 """Add a new pack to this object store.""" 

733 raise NotImplementedError(self.add_pack) 

734 

735 def add_pack_data( 

736 self, 

737 count: int, 

738 unpacked_objects: Iterator[UnpackedObject], 

739 progress: Callable[..., None] | None = None, 

740 ) -> Optional["Pack"]: 

741 """Add pack data to this object store. 

742 

743 Args: 

744 count: Number of items to add 

745 unpacked_objects: Iterator of UnpackedObject instances 

746 progress: Optional progress callback 

747 """ 

748 if count == 0: 

749 # Don't bother writing an empty pack file 

750 return None 

751 f, commit, abort = self.add_pack() 

752 try: 

753 write_pack_data( 

754 f.write, 

755 unpacked_objects, 

756 num_records=count, 

757 progress=progress, 

758 compression_level=self.pack_compression_level, 

759 ) 

760 except BaseException: 

761 abort() 

762 raise 

763 else: 

764 return commit() 

765 

766 @property 

767 def alternates(self) -> list["BaseObjectStore"]: 

768 """Return list of alternate object stores.""" 

769 return [] 

770 

771 def contains_packed(self, sha: bytes) -> bool: 

772 """Check if a particular object is present by SHA1 and is packed. 

773 

774 This does not check alternates. 

775 """ 

776 for pack in self.packs: 

777 try: 

778 if sha in pack: 

779 return True 

780 except PackFileDisappeared: 

781 pass 

782 return False 

783 

784 def __contains__(self, sha: bytes) -> bool: 

785 """Check if a particular object is present by SHA1. 

786 

787 This method makes no distinction between loose and packed objects. 

788 """ 

789 if self.contains_packed(sha) or self.contains_loose(sha): 

790 return True 

791 for alternate in self.alternates: 

792 if sha in alternate: 

793 return True 

794 return False 

795 

796 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

797 """Add a newly appeared pack to the cache by path.""" 

798 prev_pack = self._pack_cache.get(base_name) 

799 if prev_pack is not pack: 

800 self._pack_cache[base_name] = pack 

801 if prev_pack: 

802 prev_pack.close() 

803 

804 def generate_pack_data( 

805 self, 

806 have: Iterable[bytes], 

807 want: Iterable[bytes], 

808 *, 

809 shallow: Set[bytes] | None = None, 

810 progress: Callable[..., None] | None = None, 

811 ofs_delta: bool = True, 

812 ) -> tuple[int, Iterator[UnpackedObject]]: 

813 """Generate pack data objects for a set of wants/haves. 

814 

815 Args: 

816 have: List of SHA1s of objects that should not be sent 

817 want: List of SHA1s of objects that should be sent 

818 shallow: Set of shallow commit SHA1s to skip 

819 ofs_delta: Whether OFS deltas can be included 

820 progress: Optional progress reporting method 

821 """ 

822 missing_objects = MissingObjectFinder( 

823 self, haves=have, wants=want, shallow=shallow, progress=progress 

824 ) 

825 remote_has = missing_objects.get_remote_has() 

826 object_ids = list(missing_objects) 

827 return len(object_ids), generate_unpacked_objects( 

828 self, 

829 object_ids, 

830 progress=progress, 

831 ofs_delta=ofs_delta, 

832 other_haves=remote_has, 

833 ) 

834 

835 def _clear_cached_packs(self) -> None: 

836 pack_cache = self._pack_cache 

837 self._pack_cache = {} 

838 while pack_cache: 

839 (_name, pack) = pack_cache.popitem() 

840 pack.close() 

841 

842 def _iter_cached_packs(self) -> Iterator[Pack]: 

843 return iter(self._pack_cache.values()) 

844 

845 def _update_pack_cache(self) -> list[Pack]: 

846 raise NotImplementedError(self._update_pack_cache) 

847 

848 def close(self) -> None: 

849 """Close the object store and release resources. 

850 

851 This method closes all cached pack files and frees associated resources. 

852 """ 

853 self._clear_cached_packs() 

854 

855 @property 

856 def packs(self) -> list[Pack]: 

857 """List with pack objects.""" 

858 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

859 

860 def count_pack_files(self) -> int: 

861 """Count the number of pack files. 

862 

863 Returns: 

864 Number of pack files (excluding those with .keep files) 

865 """ 

866 count = 0 

867 for pack in self.packs: 

868 # Check if there's a .keep file for this pack 

869 keep_path = pack._basename + ".keep" 

870 if not os.path.exists(keep_path): 

871 count += 1 

872 return count 

873 

874 def _iter_alternate_objects(self) -> Iterator[bytes]: 

875 """Iterate over the SHAs of all the objects in alternate stores.""" 

876 for alternate in self.alternates: 

877 yield from alternate 

878 

879 def _iter_loose_objects(self) -> Iterator[bytes]: 

880 """Iterate over the SHAs of all loose objects.""" 

881 raise NotImplementedError(self._iter_loose_objects) 

882 

883 def _get_loose_object(self, sha: bytes) -> ShaFile | None: 

884 raise NotImplementedError(self._get_loose_object) 

885 

886 def delete_loose_object(self, sha: bytes) -> None: 

887 """Delete a loose object. 

888 

889 This method only handles loose objects. For packed objects, 

890 use repack(exclude=...) to exclude them during repacking. 

891 """ 

892 raise NotImplementedError(self.delete_loose_object) 

893 

894 def _remove_pack(self, pack: "Pack") -> None: 

895 raise NotImplementedError(self._remove_pack) 

896 

897 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

898 """Pack loose objects. 

899 

900 Args: 

901 progress: Optional progress reporting callback 

902 

903 Returns: Number of objects packed 

904 """ 

905 objects: list[tuple[ShaFile, None]] = [] 

906 for sha in self._iter_loose_objects(): 

907 obj = self._get_loose_object(sha) 

908 if obj is not None: 

909 objects.append((obj, None)) 

910 self.add_objects(objects, progress=progress) 

911 for obj, path in objects: 

912 self.delete_loose_object(obj.id) 

913 return len(objects) 

914 

915 def repack( 

916 self, 

917 exclude: Set[bytes] | None = None, 

918 progress: Callable[[str], None] | None = None, 

919 ) -> int: 

920 """Repack the packs in this repository. 

921 

922 Note that this implementation is fairly naive and currently keeps all 

923 objects in memory while it repacks. 

924 

925 Args: 

926 exclude: Optional set of object SHAs to exclude from repacking 

927 progress: Optional progress reporting callback 

928 """ 

929 if exclude is None: 

930 exclude = set() 

931 

932 loose_objects = set() 

933 excluded_loose_objects = set() 

934 for sha in self._iter_loose_objects(): 

935 if sha not in exclude: 

936 obj = self._get_loose_object(sha) 

937 if obj is not None: 

938 loose_objects.add(obj) 

939 else: 

940 excluded_loose_objects.add(sha) 

941 

942 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

943 old_packs = {p.name(): p for p in self.packs} 

944 for name, pack in old_packs.items(): 

945 objects.update( 

946 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

947 ) 

948 

949 # Only create a new pack if there are objects to pack 

950 if objects: 

951 # The name of the consolidated pack might match the name of a 

952 # pre-existing pack. Take care not to remove the newly created 

953 # consolidated pack. 

954 consolidated = self.add_objects(list(objects), progress=progress) 

955 if consolidated is not None: 

956 old_packs.pop(consolidated.name(), None) 

957 

958 # Delete loose objects that were packed 

959 for obj in loose_objects: 

960 if obj is not None: 

961 self.delete_loose_object(obj.id) 

962 # Delete excluded loose objects 

963 for sha in excluded_loose_objects: 

964 self.delete_loose_object(sha) 

965 for name, pack in old_packs.items(): 

966 self._remove_pack(pack) 

967 self._update_pack_cache() 

968 return len(objects) 

969 

970 def __iter__(self) -> Iterator[bytes]: 

971 """Iterate over the SHAs that are present in this store.""" 

972 self._update_pack_cache() 

973 for pack in self._iter_cached_packs(): 

974 try: 

975 yield from pack 

976 except PackFileDisappeared: 

977 pass 

978 yield from self._iter_loose_objects() 

979 yield from self._iter_alternate_objects() 

980 

981 def contains_loose(self, sha: bytes) -> bool: 

982 """Check if a particular object is present by SHA1 and is loose. 

983 

984 This does not check alternates. 

985 """ 

986 return self._get_loose_object(sha) is not None 

987 

988 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

989 """Obtain the raw fulltext for an object. 

990 

991 Args: 

992 name: sha for the object. 

993 Returns: tuple with numeric type and object contents. 

994 """ 

995 if name == ZERO_SHA: 

996 raise KeyError(name) 

997 if len(name) == 40: 

998 sha = hex_to_sha(name) 

999 hexsha = name 

1000 elif len(name) == 20: 

1001 sha = name 

1002 hexsha = None 

1003 else: 

1004 raise AssertionError(f"Invalid object name {name!r}") 

1005 for pack in self._iter_cached_packs(): 

1006 try: 

1007 return pack.get_raw(sha) 

1008 except (KeyError, PackFileDisappeared): 

1009 pass 

1010 if hexsha is None: 

1011 hexsha = sha_to_hex(name) 

1012 ret = self._get_loose_object(hexsha) 

1013 if ret is not None: 

1014 return ret.type_num, ret.as_raw_string() 

1015 # Maybe something else has added a pack with the object 

1016 # in the mean time? 

1017 for pack in self._update_pack_cache(): 

1018 try: 

1019 return pack.get_raw(sha) 

1020 except KeyError: 

1021 pass 

1022 for alternate in self.alternates: 

1023 try: 

1024 return alternate.get_raw(hexsha) 

1025 except KeyError: 

1026 pass 

1027 raise KeyError(hexsha) 

1028 

1029 def iter_unpacked_subset( 

1030 self, 

1031 shas: Iterable[bytes], 

1032 include_comp: bool = False, 

1033 allow_missing: bool = False, 

1034 convert_ofs_delta: bool = True, 

1035 ) -> Iterator[UnpackedObject]: 

1036 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1037 

1038 Args: 

1039 shas: Set of object SHAs to retrieve 

1040 include_comp: Whether to include compressed data 

1041 allow_missing: If True, skip missing objects; if False, raise KeyError 

1042 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1043 

1044 Returns: 

1045 Iterator of UnpackedObject instances 

1046 

1047 Raises: 

1048 KeyError: If an object is missing and allow_missing is False 

1049 """ 

1050 todo: set[bytes] = set(shas) 

1051 for p in self._iter_cached_packs(): 

1052 for unpacked in p.iter_unpacked_subset( 

1053 todo, 

1054 include_comp=include_comp, 

1055 allow_missing=True, 

1056 convert_ofs_delta=convert_ofs_delta, 

1057 ): 

1058 yield unpacked 

1059 hexsha = sha_to_hex(unpacked.sha()) 

1060 todo.remove(hexsha) 

1061 # Maybe something else has added a pack with the object 

1062 # in the mean time? 

1063 for p in self._update_pack_cache(): 

1064 for unpacked in p.iter_unpacked_subset( 

1065 todo, 

1066 include_comp=include_comp, 

1067 allow_missing=True, 

1068 convert_ofs_delta=convert_ofs_delta, 

1069 ): 

1070 yield unpacked 

1071 hexsha = sha_to_hex(unpacked.sha()) 

1072 todo.remove(hexsha) 

1073 for alternate in self.alternates: 

1074 assert isinstance(alternate, PackBasedObjectStore) 

1075 for unpacked in alternate.iter_unpacked_subset( 

1076 todo, 

1077 include_comp=include_comp, 

1078 allow_missing=True, 

1079 convert_ofs_delta=convert_ofs_delta, 

1080 ): 

1081 yield unpacked 

1082 hexsha = sha_to_hex(unpacked.sha()) 

1083 todo.remove(hexsha) 

1084 

1085 def iterobjects_subset( 

1086 self, shas: Iterable[bytes], *, allow_missing: bool = False 

1087 ) -> Iterator[ShaFile]: 

1088 """Iterate over a subset of objects in the store. 

1089 

1090 This method searches for objects in pack files, alternates, and loose storage. 

1091 

1092 Args: 

1093 shas: Iterable of object SHAs to retrieve 

1094 allow_missing: If True, skip missing objects; if False, raise KeyError 

1095 

1096 Returns: 

1097 Iterator of ShaFile objects 

1098 

1099 Raises: 

1100 KeyError: If an object is missing and allow_missing is False 

1101 """ 

1102 todo: set[bytes] = set(shas) 

1103 for p in self._iter_cached_packs(): 

1104 for o in p.iterobjects_subset(todo, allow_missing=True): 

1105 yield o 

1106 todo.remove(o.id) 

1107 # Maybe something else has added a pack with the object 

1108 # in the mean time? 

1109 for p in self._update_pack_cache(): 

1110 for o in p.iterobjects_subset(todo, allow_missing=True): 

1111 yield o 

1112 todo.remove(o.id) 

1113 for alternate in self.alternates: 

1114 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1115 yield o 

1116 todo.remove(o.id) 

1117 for oid in todo: 

1118 loose_obj: ShaFile | None = self._get_loose_object(oid) 

1119 if loose_obj is not None: 

1120 yield loose_obj 

1121 elif not allow_missing: 

1122 raise KeyError(oid) 

1123 

1124 def get_unpacked_object( 

1125 self, sha1: bytes, *, include_comp: bool = False 

1126 ) -> UnpackedObject: 

1127 """Obtain the unpacked object. 

1128 

1129 Args: 

1130 sha1: sha for the object. 

1131 include_comp: Whether to include compression metadata. 

1132 """ 

1133 if sha1 == ZERO_SHA: 

1134 raise KeyError(sha1) 

1135 if len(sha1) == 40: 

1136 sha = hex_to_sha(sha1) 

1137 hexsha = sha1 

1138 elif len(sha1) == 20: 

1139 sha = sha1 

1140 hexsha = None 

1141 else: 

1142 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1143 for pack in self._iter_cached_packs(): 

1144 try: 

1145 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1146 except (KeyError, PackFileDisappeared): 

1147 pass 

1148 if hexsha is None: 

1149 hexsha = sha_to_hex(sha1) 

1150 # Maybe something else has added a pack with the object 

1151 # in the mean time? 

1152 for pack in self._update_pack_cache(): 

1153 try: 

1154 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1155 except KeyError: 

1156 pass 

1157 for alternate in self.alternates: 

1158 assert isinstance(alternate, PackBasedObjectStore) 

1159 try: 

1160 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1161 except KeyError: 

1162 pass 

1163 raise KeyError(hexsha) 

1164 

1165 def add_objects( 

1166 self, 

1167 objects: Sequence[tuple[ShaFile, str | None]], 

1168 progress: Callable[[str], None] | None = None, 

1169 ) -> Optional["Pack"]: 

1170 """Add a set of objects to this object store. 

1171 

1172 Args: 

1173 objects: Iterable over (object, path) tuples, should support 

1174 __len__. 

1175 progress: Optional progress reporting function. 

1176 Returns: Pack object of the objects written. 

1177 """ 

1178 count = len(objects) 

1179 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1180 return self.add_pack_data(count, record_iter, progress=progress) 

1181 

1182 

1183class DiskObjectStore(PackBasedObjectStore): 

1184 """Git-style object store that exists on disk.""" 

1185 

1186 path: str | os.PathLike[str] 

1187 pack_dir: str | os.PathLike[str] 

1188 _alternates: list["BaseObjectStore"] | None 

1189 _commit_graph: Optional["CommitGraph"] 

1190 

1191 def __init__( 

1192 self, 

1193 path: str | os.PathLike[str], 

1194 loose_compression_level: int = -1, 

1195 pack_compression_level: int = -1, 

1196 pack_index_version: int | None = None, 

1197 pack_delta_window_size: int | None = None, 

1198 pack_window_memory: int | None = None, 

1199 pack_delta_cache_size: int | None = None, 

1200 pack_depth: int | None = None, 

1201 pack_threads: int | None = None, 

1202 pack_big_file_threshold: int | None = None, 

1203 fsync_object_files: bool = False, 

1204 ) -> None: 

1205 """Open an object store. 

1206 

1207 Args: 

1208 path: Path of the object store. 

1209 loose_compression_level: zlib compression level for loose objects 

1210 pack_compression_level: zlib compression level for pack objects 

1211 pack_index_version: pack index version to use (1, 2, or 3) 

1212 pack_delta_window_size: sliding window size for delta compression 

1213 pack_window_memory: memory limit for delta window operations 

1214 pack_delta_cache_size: size of cache for delta operations 

1215 pack_depth: maximum delta chain depth 

1216 pack_threads: number of threads for pack operations 

1217 pack_big_file_threshold: threshold for treating files as big 

1218 fsync_object_files: whether to fsync object files for durability 

1219 """ 

1220 super().__init__( 

1221 pack_compression_level=pack_compression_level, 

1222 pack_index_version=pack_index_version, 

1223 pack_delta_window_size=pack_delta_window_size, 

1224 pack_window_memory=pack_window_memory, 

1225 pack_delta_cache_size=pack_delta_cache_size, 

1226 pack_depth=pack_depth, 

1227 pack_threads=pack_threads, 

1228 pack_big_file_threshold=pack_big_file_threshold, 

1229 ) 

1230 self.path = path 

1231 self.pack_dir = os.path.join(self.path, PACKDIR) 

1232 self._alternates = None 

1233 self.loose_compression_level = loose_compression_level 

1234 self.pack_compression_level = pack_compression_level 

1235 self.pack_index_version = pack_index_version 

1236 self.fsync_object_files = fsync_object_files 

1237 

1238 # Commit graph support - lazy loaded 

1239 self._commit_graph = None 

1240 self._use_commit_graph = True # Default to true 

1241 

1242 def __repr__(self) -> str: 

1243 """Return string representation of DiskObjectStore. 

1244 

1245 Returns: 

1246 String representation including the store path 

1247 """ 

1248 return f"<{self.__class__.__name__}({self.path!r})>" 

1249 

1250 @classmethod 

1251 def from_config( 

1252 cls, path: str | os.PathLike[str], config: "Config" 

1253 ) -> "DiskObjectStore": 

1254 """Create a DiskObjectStore from a configuration object. 

1255 

1256 Args: 

1257 path: Path to the object store directory 

1258 config: Configuration object to read settings from 

1259 

1260 Returns: 

1261 New DiskObjectStore instance configured according to config 

1262 """ 

1263 try: 

1264 default_compression_level = int( 

1265 config.get((b"core",), b"compression").decode() 

1266 ) 

1267 except KeyError: 

1268 default_compression_level = -1 

1269 try: 

1270 loose_compression_level = int( 

1271 config.get((b"core",), b"looseCompression").decode() 

1272 ) 

1273 except KeyError: 

1274 loose_compression_level = default_compression_level 

1275 try: 

1276 pack_compression_level = int( 

1277 config.get((b"core",), "packCompression").decode() 

1278 ) 

1279 except KeyError: 

1280 pack_compression_level = default_compression_level 

1281 try: 

1282 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1283 except KeyError: 

1284 pack_index_version = None 

1285 

1286 # Read pack configuration options 

1287 try: 

1288 pack_delta_window_size = int( 

1289 config.get((b"pack",), b"deltaWindowSize").decode() 

1290 ) 

1291 except KeyError: 

1292 pack_delta_window_size = None 

1293 try: 

1294 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1295 except KeyError: 

1296 pack_window_memory = None 

1297 try: 

1298 pack_delta_cache_size = int( 

1299 config.get((b"pack",), b"deltaCacheSize").decode() 

1300 ) 

1301 except KeyError: 

1302 pack_delta_cache_size = None 

1303 try: 

1304 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1305 except KeyError: 

1306 pack_depth = None 

1307 try: 

1308 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1309 except KeyError: 

1310 pack_threads = None 

1311 try: 

1312 pack_big_file_threshold = int( 

1313 config.get((b"pack",), b"bigFileThreshold").decode() 

1314 ) 

1315 except KeyError: 

1316 pack_big_file_threshold = None 

1317 

1318 # Read core.commitGraph setting 

1319 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1320 

1321 # Read core.fsyncObjectFiles setting 

1322 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False) 

1323 

1324 instance = cls( 

1325 path, 

1326 loose_compression_level, 

1327 pack_compression_level, 

1328 pack_index_version, 

1329 pack_delta_window_size, 

1330 pack_window_memory, 

1331 pack_delta_cache_size, 

1332 pack_depth, 

1333 pack_threads, 

1334 pack_big_file_threshold, 

1335 fsync_object_files, 

1336 ) 

1337 instance._use_commit_graph = use_commit_graph 

1338 return instance 

1339 

1340 @property 

1341 def alternates(self) -> list["BaseObjectStore"]: 

1342 """Get the list of alternate object stores. 

1343 

1344 Reads from .git/objects/info/alternates if not already cached. 

1345 

1346 Returns: 

1347 List of DiskObjectStore instances for alternate object directories 

1348 """ 

1349 if self._alternates is not None: 

1350 return self._alternates 

1351 self._alternates = [] 

1352 for path in self._read_alternate_paths(): 

1353 self._alternates.append(DiskObjectStore(path)) 

1354 return self._alternates 

1355 

1356 def _read_alternate_paths(self) -> Iterator[str]: 

1357 try: 

1358 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1359 except FileNotFoundError: 

1360 return 

1361 with f: 

1362 for line in f.readlines(): 

1363 line = line.rstrip(b"\n") 

1364 if line.startswith(b"#"): 

1365 continue 

1366 if os.path.isabs(line): 

1367 yield os.fsdecode(line) 

1368 else: 

1369 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1370 

1371 def add_alternate_path(self, path: str | os.PathLike[str]) -> None: 

1372 """Add an alternate path to this object store.""" 

1373 try: 

1374 os.mkdir(os.path.join(self.path, INFODIR)) 

1375 except FileExistsError: 

1376 pass 

1377 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1378 with GitFile(alternates_path, "wb") as f: 

1379 try: 

1380 orig_f = open(alternates_path, "rb") 

1381 except FileNotFoundError: 

1382 pass 

1383 else: 

1384 with orig_f: 

1385 f.write(orig_f.read()) 

1386 f.write(os.fsencode(path) + b"\n") 

1387 

1388 if not os.path.isabs(path): 

1389 path = os.path.join(self.path, path) 

1390 self.alternates.append(DiskObjectStore(path)) 

1391 

1392 def _update_pack_cache(self) -> list[Pack]: 

1393 """Read and iterate over new pack files and cache them.""" 

1394 try: 

1395 pack_dir_contents = os.listdir(self.pack_dir) 

1396 except FileNotFoundError: 

1397 self.close() 

1398 return [] 

1399 pack_files = set() 

1400 for name in pack_dir_contents: 

1401 if name.startswith("pack-") and name.endswith(".pack"): 

1402 # verify that idx exists first (otherwise the pack was not yet 

1403 # fully written) 

1404 idx_name = os.path.splitext(name)[0] + ".idx" 

1405 if idx_name in pack_dir_contents: 

1406 pack_name = name[: -len(".pack")] 

1407 pack_files.add(pack_name) 

1408 

1409 # Open newly appeared pack files 

1410 new_packs = [] 

1411 for f in pack_files: 

1412 if f not in self._pack_cache: 

1413 pack = Pack( 

1414 os.path.join(self.pack_dir, f), 

1415 delta_window_size=self.pack_delta_window_size, 

1416 window_memory=self.pack_window_memory, 

1417 delta_cache_size=self.pack_delta_cache_size, 

1418 depth=self.pack_depth, 

1419 threads=self.pack_threads, 

1420 big_file_threshold=self.pack_big_file_threshold, 

1421 ) 

1422 new_packs.append(pack) 

1423 self._pack_cache[f] = pack 

1424 # Remove disappeared pack files 

1425 for f in set(self._pack_cache) - pack_files: 

1426 self._pack_cache.pop(f).close() 

1427 return new_packs 

1428 

1429 def _get_shafile_path(self, sha: bytes) -> str: 

1430 # Check from object dir 

1431 return hex_to_filename(os.fspath(self.path), sha) 

1432 

1433 def _iter_loose_objects(self) -> Iterator[bytes]: 

1434 for base in os.listdir(self.path): 

1435 if len(base) != 2: 

1436 continue 

1437 for rest in os.listdir(os.path.join(self.path, base)): 

1438 sha = os.fsencode(base + rest) 

1439 if not valid_hexsha(sha): 

1440 continue 

1441 yield sha 

1442 

1443 def count_loose_objects(self) -> int: 

1444 """Count the number of loose objects in the object store. 

1445 

1446 Returns: 

1447 Number of loose objects 

1448 """ 

1449 count = 0 

1450 if not os.path.exists(self.path): 

1451 return 0 

1452 

1453 for i in range(256): 

1454 subdir = os.path.join(self.path, f"{i:02x}") 

1455 try: 

1456 count += len( 

1457 [ 

1458 name 

1459 for name in os.listdir(subdir) 

1460 if len(name) == 38 # 40 - 2 for the prefix 

1461 ] 

1462 ) 

1463 except FileNotFoundError: 

1464 # Directory may have been removed or is inaccessible 

1465 continue 

1466 

1467 return count 

1468 

1469 def _get_loose_object(self, sha: bytes) -> ShaFile | None: 

1470 path = self._get_shafile_path(sha) 

1471 try: 

1472 return ShaFile.from_path(path) 

1473 except FileNotFoundError: 

1474 return None 

1475 

1476 def delete_loose_object(self, sha: bytes) -> None: 

1477 """Delete a loose object from disk. 

1478 

1479 Args: 

1480 sha: SHA1 of the object to delete 

1481 

1482 Raises: 

1483 FileNotFoundError: If the object file doesn't exist 

1484 """ 

1485 os.remove(self._get_shafile_path(sha)) 

1486 

1487 def get_object_mtime(self, sha: bytes) -> float: 

1488 """Get the modification time of an object. 

1489 

1490 Args: 

1491 sha: SHA1 of the object 

1492 

1493 Returns: 

1494 Modification time as seconds since epoch 

1495 

1496 Raises: 

1497 KeyError: if the object is not found 

1498 """ 

1499 # First check if it's a loose object 

1500 if self.contains_loose(sha): 

1501 path = self._get_shafile_path(sha) 

1502 try: 

1503 return os.path.getmtime(path) 

1504 except FileNotFoundError: 

1505 pass 

1506 

1507 # Check if it's in a pack file 

1508 for pack in self.packs: 

1509 try: 

1510 if sha in pack: 

1511 # Use the pack file's mtime for packed objects 

1512 pack_path = pack._data_path 

1513 try: 

1514 return os.path.getmtime(pack_path) 

1515 except (FileNotFoundError, AttributeError): 

1516 pass 

1517 except PackFileDisappeared: 

1518 pass 

1519 

1520 raise KeyError(sha) 

1521 

1522 def _remove_pack(self, pack: Pack) -> None: 

1523 try: 

1524 del self._pack_cache[os.path.basename(pack._basename)] 

1525 except KeyError: 

1526 pass 

1527 pack.close() 

1528 os.remove(pack.data.path) 

1529 if hasattr(pack.index, "path"): 

1530 os.remove(pack.index.path) 

1531 

1532 def _get_pack_basepath( 

1533 self, entries: Iterable[tuple[bytes, int, int | None]] 

1534 ) -> str: 

1535 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1536 # TODO: Handle self.pack_dir being bytes 

1537 suffix = suffix_bytes.decode("ascii") 

1538 return os.path.join(self.pack_dir, "pack-" + suffix) 

1539 

1540 def _complete_pack( 

1541 self, 

1542 f: BinaryIO, 

1543 path: str, 

1544 num_objects: int, 

1545 indexer: PackIndexer, 

1546 progress: Callable[..., None] | None = None, 

1547 ) -> Pack: 

1548 """Move a specific file containing a pack into the pack directory. 

1549 

1550 Note: The file should be on the same file system as the 

1551 packs directory. 

1552 

1553 Args: 

1554 f: Open file object for the pack. 

1555 path: Path to the pack file. 

1556 num_objects: Number of objects in the pack. 

1557 indexer: A PackIndexer for indexing the pack. 

1558 progress: Optional progress reporting function. 

1559 """ 

1560 entries = [] 

1561 for i, entry in enumerate(indexer): 

1562 if progress is not None: 

1563 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1564 entries.append(entry) 

1565 

1566 pack_sha, extra_entries = extend_pack( 

1567 f, 

1568 set(indexer.ext_refs()), 

1569 get_raw=self.get_raw, 

1570 compression_level=self.pack_compression_level, 

1571 progress=progress, 

1572 ) 

1573 f.flush() 

1574 if self.fsync_object_files: 

1575 try: 

1576 fileno = f.fileno() 

1577 except AttributeError as e: 

1578 raise OSError("fsync requested but file has no fileno()") from e 

1579 else: 

1580 os.fsync(fileno) 

1581 f.close() 

1582 

1583 entries.extend(extra_entries) 

1584 

1585 # Move the pack in. 

1586 entries.sort() 

1587 pack_base_name = self._get_pack_basepath(entries) 

1588 

1589 for pack in self.packs: 

1590 if pack._basename == pack_base_name: 

1591 return pack 

1592 

1593 target_pack_path = pack_base_name + ".pack" 

1594 target_index_path = pack_base_name + ".idx" 

1595 if sys.platform == "win32": 

1596 # Windows might have the target pack file lingering. Attempt 

1597 # removal, silently passing if the target does not exist. 

1598 with suppress(FileNotFoundError): 

1599 os.remove(target_pack_path) 

1600 os.rename(path, target_pack_path) 

1601 

1602 # Write the index. 

1603 with GitFile( 

1604 target_index_path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files 

1605 ) as index_file: 

1606 write_pack_index( 

1607 index_file, entries, pack_sha, version=self.pack_index_version 

1608 ) 

1609 

1610 # Add the pack to the store and return it. 

1611 final_pack = Pack( 

1612 pack_base_name, 

1613 delta_window_size=self.pack_delta_window_size, 

1614 window_memory=self.pack_window_memory, 

1615 delta_cache_size=self.pack_delta_cache_size, 

1616 depth=self.pack_depth, 

1617 threads=self.pack_threads, 

1618 big_file_threshold=self.pack_big_file_threshold, 

1619 ) 

1620 final_pack.check_length_and_checksum() 

1621 self._add_cached_pack(pack_base_name, final_pack) 

1622 return final_pack 

1623 

1624 def add_thin_pack( 

1625 self, 

1626 read_all: Callable[[int], bytes], 

1627 read_some: Callable[[int], bytes] | None, 

1628 progress: Callable[..., None] | None = None, 

1629 ) -> "Pack": 

1630 """Add a new thin pack to this object store. 

1631 

1632 Thin packs are packs that contain deltas with parents that exist 

1633 outside the pack. They should never be placed in the object store 

1634 directly, and always indexed and completed as they are copied. 

1635 

1636 Args: 

1637 read_all: Read function that blocks until the number of 

1638 requested bytes are read. 

1639 read_some: Read function that returns at least one byte, but may 

1640 not return the number of bytes requested. 

1641 progress: Optional progress reporting function. 

1642 Returns: A Pack object pointing at the now-completed thin pack in the 

1643 objects/pack directory. 

1644 """ 

1645 import tempfile 

1646 

1647 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1648 with os.fdopen(fd, "w+b") as f: 

1649 os.chmod(path, PACK_MODE) 

1650 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type] 

1651 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type] 

1652 copier.verify(progress=progress) 

1653 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1654 

1655 def add_pack( 

1656 self, 

1657 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

1658 """Add a new pack to this object store. 

1659 

1660 Returns: Fileobject to write to, a commit function to 

1661 call when the pack is finished and an abort 

1662 function. 

1663 """ 

1664 import tempfile 

1665 

1666 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1667 f = os.fdopen(fd, "w+b") 

1668 os.chmod(path, PACK_MODE) 

1669 

1670 def commit() -> Optional["Pack"]: 

1671 if f.tell() > 0: 

1672 f.seek(0) 

1673 

1674 with PackData(path, f) as pd: 

1675 indexer = PackIndexer.for_pack_data( 

1676 pd, 

1677 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

1678 ) 

1679 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

1680 else: 

1681 f.close() 

1682 os.remove(path) 

1683 return None 

1684 

1685 def abort() -> None: 

1686 f.close() 

1687 os.remove(path) 

1688 

1689 return f, commit, abort # type: ignore[return-value] 

1690 

1691 def add_object(self, obj: ShaFile) -> None: 

1692 """Add a single object to this object store. 

1693 

1694 Args: 

1695 obj: Object to add 

1696 """ 

1697 path = self._get_shafile_path(obj.id) 

1698 dir = os.path.dirname(path) 

1699 try: 

1700 os.mkdir(dir) 

1701 except FileExistsError: 

1702 pass 

1703 if os.path.exists(path): 

1704 return # Already there, no need to write again 

1705 with GitFile(path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files) as f: 

1706 f.write( 

1707 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1708 ) 

1709 

1710 @classmethod 

1711 def init(cls, path: str | os.PathLike[str]) -> "DiskObjectStore": 

1712 """Initialize a new disk object store. 

1713 

1714 Creates the necessary directory structure for a Git object store. 

1715 

1716 Args: 

1717 path: Path where the object store should be created 

1718 

1719 Returns: 

1720 New DiskObjectStore instance 

1721 """ 

1722 try: 

1723 os.mkdir(path) 

1724 except FileExistsError: 

1725 pass 

1726 os.mkdir(os.path.join(path, "info")) 

1727 os.mkdir(os.path.join(path, PACKDIR)) 

1728 return cls(path) 

1729 

1730 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]: 

1731 """Iterate over all object SHAs with the given prefix. 

1732 

1733 Args: 

1734 prefix: Hex prefix to search for (as bytes) 

1735 

1736 Returns: 

1737 Iterator of object SHAs (as bytes) matching the prefix 

1738 """ 

1739 if len(prefix) < 2: 

1740 yield from super().iter_prefix(prefix) 

1741 return 

1742 seen = set() 

1743 dir = prefix[:2].decode() 

1744 rest = prefix[2:].decode() 

1745 try: 

1746 for name in os.listdir(os.path.join(self.path, dir)): 

1747 if name.startswith(rest): 

1748 sha = os.fsencode(dir + name) 

1749 if sha not in seen: 

1750 seen.add(sha) 

1751 yield sha 

1752 except FileNotFoundError: 

1753 pass 

1754 

1755 for p in self.packs: 

1756 bin_prefix = ( 

1757 binascii.unhexlify(prefix) 

1758 if len(prefix) % 2 == 0 

1759 else binascii.unhexlify(prefix[:-1]) 

1760 ) 

1761 for sha in p.index.iter_prefix(bin_prefix): 

1762 sha = sha_to_hex(sha) 

1763 if sha.startswith(prefix) and sha not in seen: 

1764 seen.add(sha) 

1765 yield sha 

1766 for alternate in self.alternates: 

1767 for sha in alternate.iter_prefix(prefix): 

1768 if sha not in seen: 

1769 seen.add(sha) 

1770 yield sha 

1771 

1772 def get_commit_graph(self) -> Optional["CommitGraph"]: 

1773 """Get the commit graph for this object store. 

1774 

1775 Returns: 

1776 CommitGraph object if available, None otherwise 

1777 """ 

1778 if not self._use_commit_graph: 

1779 return None 

1780 

1781 if self._commit_graph is None: 

1782 from .commit_graph import read_commit_graph 

1783 

1784 # Look for commit graph in our objects directory 

1785 graph_file = os.path.join(self.path, "info", "commit-graph") 

1786 if os.path.exists(graph_file): 

1787 self._commit_graph = read_commit_graph(graph_file) 

1788 return self._commit_graph 

1789 

1790 def write_commit_graph( 

1791 self, refs: Iterable[bytes] | None = None, reachable: bool = True 

1792 ) -> None: 

1793 """Write a commit graph file for this object store. 

1794 

1795 Args: 

1796 refs: List of refs to include. If None, includes all refs from object store. 

1797 reachable: If True, includes all commits reachable from refs. 

1798 If False, only includes the direct ref targets. 

1799 """ 

1800 from .commit_graph import get_reachable_commits 

1801 

1802 if refs is None: 

1803 # Get all commit objects from the object store 

1804 all_refs = [] 

1805 # Iterate through all objects to find commits 

1806 for sha in self: 

1807 try: 

1808 obj = self[sha] 

1809 if obj.type_name == b"commit": 

1810 all_refs.append(sha) 

1811 except KeyError: 

1812 continue 

1813 else: 

1814 # Use provided refs 

1815 all_refs = list(refs) 

1816 

1817 if not all_refs: 

1818 return # No commits to include 

1819 

1820 if reachable: 

1821 # Get all reachable commits 

1822 commit_ids = get_reachable_commits(self, all_refs) 

1823 else: 

1824 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1825 commit_ids = [] 

1826 for ref in all_refs: 

1827 if isinstance(ref, bytes) and len(ref) == 40: 

1828 # Already hex ObjectID 

1829 commit_ids.append(ref) 

1830 elif isinstance(ref, bytes) and len(ref) == 20: 

1831 # Binary SHA, convert to hex ObjectID 

1832 from .objects import sha_to_hex 

1833 

1834 commit_ids.append(sha_to_hex(ref)) 

1835 else: 

1836 # Assume it's already correct format 

1837 commit_ids.append(ref) 

1838 

1839 if commit_ids: 

1840 # Write commit graph directly to our object store path 

1841 # Generate the commit graph 

1842 from .commit_graph import generate_commit_graph 

1843 

1844 graph = generate_commit_graph(self, commit_ids) 

1845 

1846 if graph.entries: 

1847 # Ensure the info directory exists 

1848 info_dir = os.path.join(self.path, "info") 

1849 os.makedirs(info_dir, exist_ok=True) 

1850 

1851 # Write using GitFile for atomic operation 

1852 graph_path = os.path.join(info_dir, "commit-graph") 

1853 with GitFile(graph_path, "wb") as f: 

1854 assert isinstance( 

1855 f, _GitFile 

1856 ) # GitFile in write mode always returns _GitFile 

1857 graph.write_to_file(f) 

1858 

1859 # Clear cached commit graph so it gets reloaded 

1860 self._commit_graph = None 

1861 

1862 def prune(self, grace_period: int | None = None) -> None: 

1863 """Prune/clean up this object store. 

1864 

1865 This removes temporary files that were left behind by interrupted 

1866 pack operations. These are files that start with ``tmp_pack_`` in the 

1867 repository directory or files with .pack extension but no corresponding 

1868 .idx file in the pack directory. 

1869 

1870 Args: 

1871 grace_period: Grace period in seconds for removing temporary files. 

1872 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1873 """ 

1874 import glob 

1875 

1876 if grace_period is None: 

1877 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1878 

1879 # Clean up tmp_pack_* files in the repository directory 

1880 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1881 # Check if file is old enough (more than grace period) 

1882 mtime = os.path.getmtime(tmp_file) 

1883 if time.time() - mtime > grace_period: 

1884 os.remove(tmp_file) 

1885 

1886 # Clean up orphaned .pack files without corresponding .idx files 

1887 try: 

1888 pack_dir_contents = os.listdir(self.pack_dir) 

1889 except FileNotFoundError: 

1890 return 

1891 

1892 pack_files = {} 

1893 idx_files = set() 

1894 

1895 for name in pack_dir_contents: 

1896 if name.endswith(".pack"): 

1897 base_name = name[:-5] # Remove .pack extension 

1898 pack_files[base_name] = name 

1899 elif name.endswith(".idx"): 

1900 base_name = name[:-4] # Remove .idx extension 

1901 idx_files.add(base_name) 

1902 

1903 # Remove .pack files without corresponding .idx files 

1904 for base_name, pack_name in pack_files.items(): 

1905 if base_name not in idx_files: 

1906 pack_path = os.path.join(self.pack_dir, pack_name) 

1907 # Check if file is old enough (more than grace period) 

1908 mtime = os.path.getmtime(pack_path) 

1909 if time.time() - mtime > grace_period: 

1910 os.remove(pack_path) 

1911 

1912 

1913class MemoryObjectStore(PackCapableObjectStore): 

1914 """Object store that keeps all objects in memory.""" 

1915 

1916 def __init__(self) -> None: 

1917 """Initialize a MemoryObjectStore. 

1918 

1919 Creates an empty in-memory object store. 

1920 """ 

1921 super().__init__() 

1922 self._data: dict[bytes, ShaFile] = {} 

1923 self.pack_compression_level = -1 

1924 

1925 def _to_hexsha(self, sha: bytes) -> bytes: 

1926 if len(sha) == 40: 

1927 return sha 

1928 elif len(sha) == 20: 

1929 return sha_to_hex(sha) 

1930 else: 

1931 raise ValueError(f"Invalid sha {sha!r}") 

1932 

1933 def contains_loose(self, sha: bytes) -> bool: 

1934 """Check if a particular object is present by SHA1 and is loose.""" 

1935 return self._to_hexsha(sha) in self._data 

1936 

1937 def contains_packed(self, sha: bytes) -> bool: 

1938 """Check if a particular object is present by SHA1 and is packed.""" 

1939 return False 

1940 

1941 def __iter__(self) -> Iterator[bytes]: 

1942 """Iterate over the SHAs that are present in this store.""" 

1943 return iter(self._data.keys()) 

1944 

1945 @property 

1946 def packs(self) -> list[Pack]: 

1947 """List with pack objects.""" 

1948 return [] 

1949 

1950 def get_raw(self, name: ObjectID) -> tuple[int, bytes]: 

1951 """Obtain the raw text for an object. 

1952 

1953 Args: 

1954 name: sha for the object. 

1955 Returns: tuple with numeric type and object contents. 

1956 """ 

1957 obj = self[self._to_hexsha(name)] 

1958 return obj.type_num, obj.as_raw_string() 

1959 

1960 def __getitem__(self, name: ObjectID) -> ShaFile: 

1961 """Retrieve an object by SHA. 

1962 

1963 Args: 

1964 name: SHA of the object (as hex string or bytes) 

1965 

1966 Returns: 

1967 Copy of the ShaFile object 

1968 

1969 Raises: 

1970 KeyError: If the object is not found 

1971 """ 

1972 return self._data[self._to_hexsha(name)].copy() 

1973 

1974 def __delitem__(self, name: ObjectID) -> None: 

1975 """Delete an object from this store, for testing only.""" 

1976 del self._data[self._to_hexsha(name)] 

1977 

1978 def add_object(self, obj: ShaFile) -> None: 

1979 """Add a single object to this object store.""" 

1980 self._data[obj.id] = obj.copy() 

1981 

1982 def add_objects( 

1983 self, 

1984 objects: Iterable[tuple[ShaFile, str | None]], 

1985 progress: Callable[[str], None] | None = None, 

1986 ) -> None: 

1987 """Add a set of objects to this object store. 

1988 

1989 Args: 

1990 objects: Iterable over a list of (object, path) tuples 

1991 progress: Optional progress reporting function. 

1992 """ 

1993 for obj, path in objects: 

1994 self.add_object(obj) 

1995 

1996 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

1997 """Add a new pack to this object store. 

1998 

1999 Because this object store doesn't support packs, we extract and add the 

2000 individual objects. 

2001 

2002 Returns: Fileobject to write to and a commit function to 

2003 call when the pack is finished. 

2004 """ 

2005 from tempfile import SpooledTemporaryFile 

2006 

2007 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2008 

2009 def commit() -> None: 

2010 size = f.tell() 

2011 if size > 0: 

2012 f.seek(0) 

2013 

2014 p = PackData.from_file(f, size) 

2015 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type] 

2016 self.add_object(obj) 

2017 p.close() 

2018 f.close() 

2019 else: 

2020 f.close() 

2021 

2022 def abort() -> None: 

2023 f.close() 

2024 

2025 return f, commit, abort # type: ignore[return-value] 

2026 

2027 def add_pack_data( 

2028 self, 

2029 count: int, 

2030 unpacked_objects: Iterator[UnpackedObject], 

2031 progress: Callable[[str], None] | None = None, 

2032 ) -> None: 

2033 """Add pack data to this object store. 

2034 

2035 Args: 

2036 count: Number of items to add 

2037 unpacked_objects: Iterator of UnpackedObject instances 

2038 progress: Optional progress reporting function. 

2039 """ 

2040 if count == 0: 

2041 return 

2042 

2043 # Since MemoryObjectStore doesn't support pack files, we need to 

2044 # extract individual objects. To handle deltas properly, we write 

2045 # to a temporary pack and then use PackInflater to resolve them. 

2046 f, commit, abort = self.add_pack() 

2047 try: 

2048 write_pack_data( 

2049 f.write, 

2050 unpacked_objects, 

2051 num_records=count, 

2052 progress=progress, 

2053 ) 

2054 except BaseException: 

2055 abort() 

2056 raise 

2057 else: 

2058 commit() 

2059 

2060 def add_thin_pack( 

2061 self, 

2062 read_all: Callable[[], bytes], 

2063 read_some: Callable[[int], bytes], 

2064 progress: Callable[[str], None] | None = None, 

2065 ) -> None: 

2066 """Add a new thin pack to this object store. 

2067 

2068 Thin packs are packs that contain deltas with parents that exist 

2069 outside the pack. Because this object store doesn't support packs, we 

2070 extract and add the individual objects. 

2071 

2072 Args: 

2073 read_all: Read function that blocks until the number of 

2074 requested bytes are read. 

2075 read_some: Read function that returns at least one byte, but may 

2076 not return the number of bytes requested. 

2077 progress: Optional progress reporting function. 

2078 """ 

2079 f, commit, abort = self.add_pack() 

2080 try: 

2081 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type] 

2082 copier.verify() 

2083 except BaseException: 

2084 abort() 

2085 raise 

2086 else: 

2087 commit() 

2088 

2089 

2090class ObjectIterator(Protocol): 

2091 """Interface for iterating over objects.""" 

2092 

2093 def iterobjects(self) -> Iterator[ShaFile]: 

2094 """Iterate over all objects. 

2095 

2096 Returns: 

2097 Iterator of ShaFile objects 

2098 """ 

2099 raise NotImplementedError(self.iterobjects) 

2100 

2101 

2102def tree_lookup_path( 

2103 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes 

2104) -> tuple[int, bytes]: 

2105 """Look up an object in a Git tree. 

2106 

2107 Args: 

2108 lookup_obj: Callback for retrieving object by SHA1 

2109 root_sha: SHA1 of the root tree 

2110 path: Path to lookup 

2111 Returns: A tuple of (mode, SHA) of the resulting path. 

2112 """ 

2113 tree = lookup_obj(root_sha) 

2114 if not isinstance(tree, Tree): 

2115 raise NotTreeError(root_sha) 

2116 return tree.lookup_path(lookup_obj, path) 

2117 

2118 

2119def _collect_filetree_revs( 

2120 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2121) -> None: 

2122 """Collect SHA1s of files and directories for specified tree. 

2123 

2124 Args: 

2125 obj_store: Object store to get objects by SHA from 

2126 tree_sha: tree reference to walk 

2127 kset: set to fill with references to files and directories 

2128 """ 

2129 filetree = obj_store[tree_sha] 

2130 assert isinstance(filetree, Tree) 

2131 for name, mode, sha in filetree.iteritems(): 

2132 assert mode is not None 

2133 assert sha is not None 

2134 if not S_ISGITLINK(mode) and sha not in kset: 

2135 kset.add(sha) 

2136 if stat.S_ISDIR(mode): 

2137 _collect_filetree_revs(obj_store, sha, kset) 

2138 

2139 

2140def _split_commits_and_tags( 

2141 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False 

2142) -> tuple[set[bytes], set[bytes], set[bytes]]: 

2143 """Split object id list into three lists with commit, tag, and other SHAs. 

2144 

2145 Commits referenced by tags are included into commits 

2146 list as well. Only SHA1s known in this repository will get 

2147 through, and unless ignore_unknown argument is True, KeyError 

2148 is thrown for SHA1 missing in the repository 

2149 

2150 Args: 

2151 obj_store: Object store to get objects by SHA1 from 

2152 lst: Collection of commit and tag SHAs 

2153 ignore_unknown: True to skip SHA1 missing in the repository 

2154 silently. 

2155 Returns: A tuple of (commits, tags, others) SHA1s 

2156 """ 

2157 commits: set[bytes] = set() 

2158 tags: set[bytes] = set() 

2159 others: set[bytes] = set() 

2160 for e in lst: 

2161 try: 

2162 o = obj_store[e] 

2163 except KeyError: 

2164 if not ignore_unknown: 

2165 raise 

2166 else: 

2167 if isinstance(o, Commit): 

2168 commits.add(e) 

2169 elif isinstance(o, Tag): 

2170 tags.add(e) 

2171 tagged = o.object[1] 

2172 c, t, os = _split_commits_and_tags( 

2173 obj_store, [tagged], ignore_unknown=ignore_unknown 

2174 ) 

2175 commits |= c 

2176 tags |= t 

2177 others |= os 

2178 else: 

2179 others.add(e) 

2180 return (commits, tags, others) 

2181 

2182 

2183class MissingObjectFinder: 

2184 """Find the objects missing from another object store. 

2185 

2186 Args: 

2187 object_store: Object store containing at least all objects to be 

2188 sent 

2189 haves: SHA1s of commits not to send (already present in target) 

2190 wants: SHA1s of commits to send 

2191 progress: Optional function to report progress to. 

2192 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2193 sha for including tags. 

2194 get_parents: Optional function for getting the parents of a commit. 

2195 """ 

2196 

2197 def __init__( 

2198 self, 

2199 object_store: BaseObjectStore, 

2200 haves: Iterable[bytes], 

2201 wants: Iterable[bytes], 

2202 *, 

2203 shallow: Set[bytes] | None = None, 

2204 progress: Callable[[bytes], None] | None = None, 

2205 get_tagged: Callable[[], dict[bytes, bytes]] | None = None, 

2206 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

2207 ) -> None: 

2208 """Initialize a MissingObjectFinder. 

2209 

2210 Args: 

2211 object_store: Object store containing objects 

2212 haves: SHA1s of objects already present in target 

2213 wants: SHA1s of objects to send 

2214 shallow: Set of shallow commit SHA1s 

2215 progress: Optional progress reporting callback 

2216 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2217 get_parents: Function for getting commit parents 

2218 """ 

2219 self.object_store = object_store 

2220 if shallow is None: 

2221 shallow = set() 

2222 self._get_parents = get_parents 

2223 # process Commits and Tags differently 

2224 # Note, while haves may list commits/tags not available locally, 

2225 # and such SHAs would get filtered out by _split_commits_and_tags, 

2226 # wants shall list only known SHAs, and otherwise 

2227 # _split_commits_and_tags fails with KeyError 

2228 have_commits, have_tags, have_others = _split_commits_and_tags( 

2229 object_store, haves, ignore_unknown=True 

2230 ) 

2231 want_commits, want_tags, want_others = _split_commits_and_tags( 

2232 object_store, wants, ignore_unknown=False 

2233 ) 

2234 # all_ancestors is a set of commits that shall not be sent 

2235 # (complete repository up to 'haves') 

2236 all_ancestors = _collect_ancestors( 

2237 object_store, 

2238 have_commits, 

2239 shallow=frozenset(shallow), 

2240 get_parents=self._get_parents, 

2241 )[0] 

2242 # all_missing - complete set of commits between haves and wants 

2243 # common - commits from all_ancestors we hit into while 

2244 # traversing parent hierarchy of wants 

2245 missing_commits, common_commits = _collect_ancestors( 

2246 object_store, 

2247 want_commits, 

2248 frozenset(all_ancestors), 

2249 shallow=frozenset(shallow), 

2250 get_parents=self._get_parents, 

2251 ) 

2252 self.remote_has: set[bytes] = set() 

2253 # Now, fill sha_done with commits and revisions of 

2254 # files and directories known to be both locally 

2255 # and on target. Thus these commits and files 

2256 # won't get selected for fetch 

2257 for h in common_commits: 

2258 self.remote_has.add(h) 

2259 cmt = object_store[h] 

2260 assert isinstance(cmt, Commit) 

2261 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2262 # record tags we have as visited, too 

2263 for t in have_tags: 

2264 self.remote_has.add(t) 

2265 self.sha_done = set(self.remote_has) 

2266 

2267 # in fact, what we 'want' is commits, tags, and others 

2268 # we've found missing 

2269 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = { 

2270 (w, None, Commit.type_num, False) for w in missing_commits 

2271 } 

2272 missing_tags = want_tags.difference(have_tags) 

2273 self.objects_to_send.update( 

2274 {(w, None, Tag.type_num, False) for w in missing_tags} 

2275 ) 

2276 missing_others = want_others.difference(have_others) 

2277 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2278 

2279 if progress is None: 

2280 self.progress: Callable[[bytes], None] = lambda x: None 

2281 else: 

2282 self.progress = progress 

2283 self._tagged = (get_tagged and get_tagged()) or {} 

2284 

2285 def get_remote_has(self) -> set[bytes]: 

2286 """Get the set of SHAs the remote has. 

2287 

2288 Returns: 

2289 Set of SHA1s that the remote side already has 

2290 """ 

2291 return self.remote_has 

2292 

2293 def add_todo( 

2294 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]] 

2295 ) -> None: 

2296 """Add objects to the todo list. 

2297 

2298 Args: 

2299 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2300 """ 

2301 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2302 

2303 def __next__(self) -> tuple[bytes, PackHint | None]: 

2304 """Get the next object to send. 

2305 

2306 Returns: 

2307 Tuple of (sha, pack_hint) 

2308 

2309 Raises: 

2310 StopIteration: When no more objects to send 

2311 """ 

2312 while True: 

2313 if not self.objects_to_send: 

2314 self.progress( 

2315 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2316 ) 

2317 raise StopIteration 

2318 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2319 if sha not in self.sha_done: 

2320 break 

2321 if not leaf: 

2322 o = self.object_store[sha] 

2323 if isinstance(o, Commit): 

2324 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2325 elif isinstance(o, Tree): 

2326 todos = [] 

2327 for n, m, s in o.iteritems(): 

2328 assert m is not None 

2329 assert n is not None 

2330 assert s is not None 

2331 if not S_ISGITLINK(m): 

2332 todos.append( 

2333 ( 

2334 s, 

2335 n, 

2336 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2337 not stat.S_ISDIR(m), 

2338 ) 

2339 ) 

2340 self.add_todo(todos) 

2341 elif isinstance(o, Tag): 

2342 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2343 if sha in self._tagged: 

2344 self.add_todo([(self._tagged[sha], None, None, True)]) 

2345 self.sha_done.add(sha) 

2346 if len(self.sha_done) % 1000 == 0: 

2347 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2348 if type_num is None: 

2349 pack_hint = None 

2350 else: 

2351 pack_hint = (type_num, name) 

2352 return (sha, pack_hint) 

2353 

2354 def __iter__(self) -> Iterator[tuple[bytes, PackHint | None]]: 

2355 """Return iterator over objects to send. 

2356 

2357 Returns: 

2358 Self (this class implements the iterator protocol) 

2359 """ 

2360 return self 

2361 

2362 

2363class ObjectStoreGraphWalker: 

2364 """Graph walker that finds what commits are missing from an object store.""" 

2365 

2366 heads: set[ObjectID] 

2367 """Revisions without descendants in the local repo.""" 

2368 

2369 get_parents: Callable[[ObjectID], list[ObjectID]] 

2370 """Function to retrieve parents in the local repo.""" 

2371 

2372 shallow: set[ObjectID] 

2373 

2374 def __init__( 

2375 self, 

2376 local_heads: Iterable[ObjectID], 

2377 get_parents: Callable[[ObjectID], list[ObjectID]], 

2378 shallow: set[ObjectID] | None = None, 

2379 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None] 

2380 | None = None, 

2381 ) -> None: 

2382 """Create a new instance. 

2383 

2384 Args: 

2385 local_heads: Heads to start search with 

2386 get_parents: Function for finding the parents of a SHA1. 

2387 shallow: Set of shallow commits. 

2388 update_shallow: Function to update shallow commits. 

2389 """ 

2390 self.heads = set(local_heads) 

2391 self.get_parents = get_parents 

2392 self.parents: dict[ObjectID, list[ObjectID] | None] = {} 

2393 if shallow is None: 

2394 shallow = set() 

2395 self.shallow = shallow 

2396 self.update_shallow = update_shallow 

2397 

2398 def nak(self) -> None: 

2399 """Nothing in common was found.""" 

2400 

2401 def ack(self, sha: ObjectID) -> None: 

2402 """Ack that a revision and its ancestors are present in the source.""" 

2403 if len(sha) != 40: 

2404 raise ValueError(f"unexpected sha {sha!r} received") 

2405 ancestors = {sha} 

2406 

2407 # stop if we run out of heads to remove 

2408 while self.heads: 

2409 for a in ancestors: 

2410 if a in self.heads: 

2411 self.heads.remove(a) 

2412 

2413 # collect all ancestors 

2414 new_ancestors = set() 

2415 for a in ancestors: 

2416 ps = self.parents.get(a) 

2417 if ps is not None: 

2418 new_ancestors.update(ps) 

2419 self.parents[a] = None 

2420 

2421 # no more ancestors; stop 

2422 if not new_ancestors: 

2423 break 

2424 

2425 ancestors = new_ancestors 

2426 

2427 def next(self) -> ObjectID | None: 

2428 """Iterate over ancestors of heads in the target.""" 

2429 if self.heads: 

2430 ret = self.heads.pop() 

2431 try: 

2432 ps = self.get_parents(ret) 

2433 except KeyError: 

2434 return None 

2435 self.parents[ret] = ps 

2436 self.heads.update([p for p in ps if p not in self.parents]) 

2437 return ret 

2438 return None 

2439 

2440 __next__ = next 

2441 

2442 

2443def commit_tree_changes( 

2444 object_store: BaseObjectStore, 

2445 tree: ObjectID | Tree, 

2446 changes: Sequence[tuple[bytes, int | None, bytes | None]], 

2447) -> ObjectID: 

2448 """Commit a specified set of changes to a tree structure. 

2449 

2450 This will apply a set of changes on top of an existing tree, storing new 

2451 objects in object_store. 

2452 

2453 changes are a list of tuples with (path, mode, object_sha). 

2454 Paths can be both blobs and trees. See the mode and 

2455 object sha to None deletes the path. 

2456 

2457 This method works especially well if there are only a small 

2458 number of changes to a big tree. For a large number of changes 

2459 to a large tree, use e.g. commit_tree. 

2460 

2461 Args: 

2462 object_store: Object store to store new objects in 

2463 and retrieve old ones from. 

2464 tree: Original tree root (SHA or Tree object) 

2465 changes: changes to apply 

2466 Returns: New tree root object 

2467 """ 

2468 # TODO(jelmer): Save up the objects and add them using .add_objects 

2469 # rather than with individual calls to .add_object. 

2470 # Handle both Tree object and SHA 

2471 if isinstance(tree, Tree): 

2472 tree_obj: Tree = tree 

2473 else: 

2474 sha_obj = object_store[tree] 

2475 assert isinstance(sha_obj, Tree) 

2476 tree_obj = sha_obj 

2477 nested_changes: dict[bytes, list[tuple[bytes, int | None, bytes | None]]] = {} 

2478 for path, new_mode, new_sha in changes: 

2479 try: 

2480 (dirname, subpath) = path.split(b"/", 1) 

2481 except ValueError: 

2482 if new_sha is None: 

2483 del tree_obj[path] 

2484 else: 

2485 assert new_mode is not None 

2486 tree_obj[path] = (new_mode, new_sha) 

2487 else: 

2488 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2489 for name, subchanges in nested_changes.items(): 

2490 try: 

2491 orig_subtree_id: bytes | Tree = tree_obj[name][1] 

2492 except KeyError: 

2493 # For new directories, pass an empty Tree object 

2494 orig_subtree_id = Tree() 

2495 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

2496 subtree = object_store[subtree_id] 

2497 assert isinstance(subtree, Tree) 

2498 if len(subtree) == 0: 

2499 del tree_obj[name] 

2500 else: 

2501 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

2502 object_store.add_object(tree_obj) 

2503 return tree_obj.id 

2504 

2505 

2506class OverlayObjectStore(BaseObjectStore): 

2507 """Object store that can overlay multiple object stores.""" 

2508 

2509 def __init__( 

2510 self, 

2511 bases: list[BaseObjectStore], 

2512 add_store: BaseObjectStore | None = None, 

2513 ) -> None: 

2514 """Initialize an OverlayObjectStore. 

2515 

2516 Args: 

2517 bases: List of base object stores to overlay 

2518 add_store: Optional store to write new objects to 

2519 """ 

2520 self.bases = bases 

2521 self.add_store = add_store 

2522 

2523 def add_object(self, object: ShaFile) -> None: 

2524 """Add a single object to the store. 

2525 

2526 Args: 

2527 object: Object to add 

2528 

2529 Raises: 

2530 NotImplementedError: If no add_store was provided 

2531 """ 

2532 if self.add_store is None: 

2533 raise NotImplementedError(self.add_object) 

2534 return self.add_store.add_object(object) 

2535 

2536 def add_objects( 

2537 self, 

2538 objects: Sequence[tuple[ShaFile, str | None]], 

2539 progress: Callable[[str], None] | None = None, 

2540 ) -> Pack | None: 

2541 """Add multiple objects to the store. 

2542 

2543 Args: 

2544 objects: Iterator of objects to add 

2545 progress: Optional progress reporting callback 

2546 

2547 Raises: 

2548 NotImplementedError: If no add_store was provided 

2549 """ 

2550 if self.add_store is None: 

2551 raise NotImplementedError(self.add_object) 

2552 return self.add_store.add_objects(objects, progress) 

2553 

2554 @property 

2555 def packs(self) -> list[Pack]: 

2556 """Get the list of packs from all overlaid stores. 

2557 

2558 Returns: 

2559 Combined list of packs from all base stores 

2560 """ 

2561 ret = [] 

2562 for b in self.bases: 

2563 ret.extend(b.packs) 

2564 return ret 

2565 

2566 def __iter__(self) -> Iterator[ObjectID]: 

2567 """Iterate over all object SHAs in the overlaid stores. 

2568 

2569 Returns: 

2570 Iterator of object SHAs (deduped across stores) 

2571 """ 

2572 done = set() 

2573 for b in self.bases: 

2574 for o_id in b: 

2575 if o_id not in done: 

2576 yield o_id 

2577 done.add(o_id) 

2578 

2579 def iterobjects_subset( 

2580 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2581 ) -> Iterator[ShaFile]: 

2582 """Iterate over a subset of objects from the overlaid stores. 

2583 

2584 Args: 

2585 shas: Iterable of object SHAs to retrieve 

2586 allow_missing: If True, skip missing objects; if False, raise KeyError 

2587 

2588 Returns: 

2589 Iterator of ShaFile objects 

2590 

2591 Raises: 

2592 KeyError: If an object is missing and allow_missing is False 

2593 """ 

2594 todo = set(shas) 

2595 found: set[bytes] = set() 

2596 

2597 for b in self.bases: 

2598 # Create a copy of todo for each base to avoid modifying 

2599 # the set while iterating through it 

2600 current_todo = todo - found 

2601 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2602 yield o 

2603 found.add(o.id) 

2604 

2605 # Check for any remaining objects not found 

2606 missing = todo - found 

2607 if missing and not allow_missing: 

2608 raise KeyError(next(iter(missing))) 

2609 

2610 def iter_unpacked_subset( 

2611 self, 

2612 shas: Iterable[bytes], 

2613 include_comp: bool = False, 

2614 allow_missing: bool = False, 

2615 convert_ofs_delta: bool = True, 

2616 ) -> Iterator[UnpackedObject]: 

2617 """Iterate over unpacked objects from the overlaid stores. 

2618 

2619 Args: 

2620 shas: Iterable of object SHAs to retrieve 

2621 include_comp: Whether to include compressed data 

2622 allow_missing: If True, skip missing objects; if False, raise KeyError 

2623 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2624 

2625 Returns: 

2626 Iterator of unpacked objects 

2627 

2628 Raises: 

2629 KeyError: If an object is missing and allow_missing is False 

2630 """ 

2631 todo = set(shas) 

2632 for b in self.bases: 

2633 for o in b.iter_unpacked_subset( 

2634 todo, 

2635 include_comp=include_comp, 

2636 allow_missing=True, 

2637 convert_ofs_delta=convert_ofs_delta, 

2638 ): 

2639 yield o 

2640 todo.remove(o.sha()) 

2641 if todo and not allow_missing: 

2642 raise KeyError(next(iter(todo))) 

2643 

2644 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]: 

2645 """Get the raw object data from the overlaid stores. 

2646 

2647 Args: 

2648 sha_id: SHA of the object 

2649 

2650 Returns: 

2651 Tuple of (type_num, raw_data) 

2652 

2653 Raises: 

2654 KeyError: If object not found in any base store 

2655 """ 

2656 for b in self.bases: 

2657 try: 

2658 return b.get_raw(sha_id) 

2659 except KeyError: 

2660 pass 

2661 raise KeyError(sha_id) 

2662 

2663 def contains_packed(self, sha: bytes) -> bool: 

2664 """Check if an object is packed in any base store. 

2665 

2666 Args: 

2667 sha: SHA of the object 

2668 

2669 Returns: 

2670 True if object is packed in any base store 

2671 """ 

2672 for b in self.bases: 

2673 if b.contains_packed(sha): 

2674 return True 

2675 return False 

2676 

2677 def contains_loose(self, sha: bytes) -> bool: 

2678 """Check if an object is loose in any base store. 

2679 

2680 Args: 

2681 sha: SHA of the object 

2682 

2683 Returns: 

2684 True if object is loose in any base store 

2685 """ 

2686 for b in self.bases: 

2687 if b.contains_loose(sha): 

2688 return True 

2689 return False 

2690 

2691 

2692def read_packs_file(f: BinaryIO) -> Iterator[str]: 

2693 """Yield the packs listed in a packs file.""" 

2694 for line in f.read().splitlines(): 

2695 if not line: 

2696 continue 

2697 (kind, name) = line.split(b" ", 1) 

2698 if kind != b"P": 

2699 continue 

2700 yield os.fsdecode(name) 

2701 

2702 

2703class BucketBasedObjectStore(PackBasedObjectStore): 

2704 """Object store implementation that uses a bucket store like S3 as backend.""" 

2705 

2706 def _iter_loose_objects(self) -> Iterator[bytes]: 

2707 """Iterate over the SHAs of all loose objects.""" 

2708 return iter([]) 

2709 

2710 def _get_loose_object(self, sha: bytes) -> None: 

2711 return None 

2712 

2713 def delete_loose_object(self, sha: bytes) -> None: 

2714 """Delete a loose object (no-op for bucket stores). 

2715 

2716 Bucket-based stores don't have loose objects, so this is a no-op. 

2717 

2718 Args: 

2719 sha: SHA of the object to delete 

2720 """ 

2721 # Doesn't exist.. 

2722 

2723 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int: 

2724 """Pack loose objects. Returns number of objects packed. 

2725 

2726 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2727 

2728 Args: 

2729 progress: Optional progress reporting callback (ignored) 

2730 """ 

2731 return 0 

2732 

2733 def _remove_pack_by_name(self, name: str) -> None: 

2734 """Remove a pack by name. Subclasses should implement this.""" 

2735 raise NotImplementedError(self._remove_pack_by_name) 

2736 

2737 def _iter_pack_names(self) -> Iterator[str]: 

2738 raise NotImplementedError(self._iter_pack_names) 

2739 

2740 def _get_pack(self, name: str) -> Pack: 

2741 raise NotImplementedError(self._get_pack) 

2742 

2743 def _update_pack_cache(self) -> list[Pack]: 

2744 pack_files = set(self._iter_pack_names()) 

2745 

2746 # Open newly appeared pack files 

2747 new_packs = [] 

2748 for f in pack_files: 

2749 if f not in self._pack_cache: 

2750 pack = self._get_pack(f) 

2751 new_packs.append(pack) 

2752 self._pack_cache[f] = pack 

2753 # Remove disappeared pack files 

2754 for f in set(self._pack_cache) - pack_files: 

2755 self._pack_cache.pop(f).close() 

2756 return new_packs 

2757 

2758 def _upload_pack( 

2759 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

2760 ) -> None: 

2761 raise NotImplementedError 

2762 

2763 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2764 """Add a new pack to this object store. 

2765 

2766 Returns: Fileobject to write to, a commit function to 

2767 call when the pack is finished and an abort 

2768 function. 

2769 """ 

2770 import tempfile 

2771 

2772 pf = tempfile.SpooledTemporaryFile( 

2773 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2774 ) 

2775 

2776 def commit() -> Pack | None: 

2777 if pf.tell() == 0: 

2778 pf.close() 

2779 return None 

2780 

2781 pf.seek(0) 

2782 

2783 p = PackData(pf.name, pf) 

2784 entries = p.sorted_entries() 

2785 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2786 idxf = tempfile.SpooledTemporaryFile( 

2787 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2788 ) 

2789 checksum = p.get_stored_checksum() 

2790 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2791 idxf.seek(0) 

2792 idx = load_pack_index_file(basename + ".idx", idxf) 

2793 for pack in self.packs: 

2794 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2795 p.close() 

2796 idx.close() 

2797 pf.close() 

2798 idxf.close() 

2799 return pack 

2800 pf.seek(0) 

2801 idxf.seek(0) 

2802 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

2803 final_pack = Pack.from_objects(p, idx) 

2804 self._add_cached_pack(basename, final_pack) 

2805 pf.close() 

2806 idxf.close() 

2807 return final_pack 

2808 

2809 return pf, commit, pf.close # type: ignore[return-value] 

2810 

2811 

2812def _collect_ancestors( 

2813 store: ObjectContainer, 

2814 heads: Iterable[ObjectID], 

2815 common: frozenset[ObjectID] = frozenset(), 

2816 shallow: frozenset[ObjectID] = frozenset(), 

2817 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

2818) -> tuple[set[ObjectID], set[ObjectID]]: 

2819 """Collect all ancestors of heads up to (excluding) those in common. 

2820 

2821 Args: 

2822 store: Object store to get commits from 

2823 heads: commits to start from 

2824 common: commits to end at, or empty set to walk repository 

2825 completely 

2826 shallow: Set of shallow commits 

2827 get_parents: Optional function for getting the parents of a 

2828 commit. 

2829 Returns: a tuple (A, B) where A - all commits reachable 

2830 from heads but not present in common, B - common (shared) elements 

2831 that are directly reachable from heads 

2832 """ 

2833 bases = set() 

2834 commits = set() 

2835 queue: list[ObjectID] = [] 

2836 queue.extend(heads) 

2837 

2838 # Try to use commit graph if available 

2839 commit_graph = store.get_commit_graph() 

2840 

2841 while queue: 

2842 e = queue.pop(0) 

2843 if e in common: 

2844 bases.add(e) 

2845 elif e not in commits: 

2846 commits.add(e) 

2847 if e in shallow: 

2848 continue 

2849 

2850 # Try to use commit graph for parent lookup 

2851 parents = None 

2852 if commit_graph: 

2853 parents = commit_graph.get_parents(e) 

2854 

2855 if parents is None: 

2856 # Fall back to loading the object 

2857 cmt = store[e] 

2858 assert isinstance(cmt, Commit) 

2859 parents = get_parents(cmt) 

2860 

2861 queue.extend(parents) 

2862 return (commits, bases) 

2863 

2864 

2865def iter_tree_contents( 

2866 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False 

2867) -> Iterator[TreeEntry]: 

2868 """Iterate the contents of a tree and all subtrees. 

2869 

2870 Iteration is depth-first pre-order, as in e.g. os.walk. 

2871 

2872 Args: 

2873 store: Object store to get trees from 

2874 tree_id: SHA1 of the tree. 

2875 include_trees: If True, include tree objects in the iteration. 

2876 

2877 Yields: TreeEntry namedtuples for all the objects in a tree. 

2878 """ 

2879 if tree_id is None: 

2880 return 

2881 # This could be fairly easily generalized to >2 trees if we find a use 

2882 # case. 

2883 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2884 while todo: 

2885 entry = todo.pop() 

2886 assert entry.mode is not None 

2887 if stat.S_ISDIR(entry.mode): 

2888 extra = [] 

2889 assert entry.sha is not None 

2890 tree = store[entry.sha] 

2891 assert isinstance(tree, Tree) 

2892 for subentry in tree.iteritems(name_order=True): 

2893 assert entry.path is not None 

2894 extra.append(subentry.in_path(entry.path)) 

2895 todo.extend(reversed(extra)) 

2896 if not stat.S_ISDIR(entry.mode) or include_trees: 

2897 yield entry 

2898 

2899 

2900def iter_commit_contents( 

2901 store: ObjectContainer, 

2902 commit: Commit | bytes, 

2903 *, 

2904 include: Sequence[str | bytes | Path] | None = None, 

2905) -> Iterator[TreeEntry]: 

2906 """Iterate the contents of the repository at the specified commit. 

2907 

2908 This is a wrapper around iter_tree_contents() and 

2909 tree_lookup_path() to simplify the common task of getting the 

2910 contest of a repo at a particular commit. See also 

2911 dulwich.index.build_file_from_blob() for writing individual files 

2912 to disk. 

2913 

2914 Args: 

2915 store: Object store to get trees from 

2916 commit: Commit object, or SHA1 of a commit 

2917 include: if provided, only the entries whose paths are in the 

2918 list, or whose parent tree is in the list, will be 

2919 included. Note that duplicate or overlapping paths 

2920 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

2921 

2922 Yields: TreeEntry namedtuples for all matching files in a commit. 

2923 """ 

2924 sha = commit.id if isinstance(commit, Commit) else commit 

2925 if not isinstance(obj := store[sha], Commit): 

2926 raise TypeError( 

2927 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

2928 ) 

2929 commit = obj 

2930 encoding = commit.encoding or "utf-8" 

2931 include_bytes: list[bytes] = ( 

2932 [ 

2933 path if isinstance(path, bytes) else str(path).encode(encoding) 

2934 for path in include 

2935 ] 

2936 if include is not None 

2937 else [b""] 

2938 ) 

2939 

2940 for path in include_bytes: 

2941 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

2942 # Iterate all contained files if path points to a dir, otherwise just get that 

2943 # single file 

2944 if isinstance(store[obj_id], Tree): 

2945 for entry in iter_tree_contents(store, obj_id): 

2946 yield entry.in_path(path) 

2947 else: 

2948 yield TreeEntry(path, mode, obj_id) 

2949 

2950 

2951def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2952 """Peel all tags from a SHA. 

2953 

2954 Args: 

2955 store: Object store to get objects from 

2956 sha: The object SHA to peel. 

2957 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2958 intermediate tags; if the original ref does not point to a tag, 

2959 this will equal the original SHA1. 

2960 """ 

2961 unpeeled = obj = store[sha] 

2962 obj_class = object_class(obj.type_name) 

2963 while obj_class is Tag: 

2964 assert isinstance(obj, Tag) 

2965 obj_class, sha = obj.object 

2966 obj = store[sha] 

2967 return unpeeled, obj