Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1233 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Mapping, Sequence, Set 

33from contextlib import suppress 

34from io import BytesIO 

35from pathlib import Path 

36from typing import ( 

37 TYPE_CHECKING, 

38 BinaryIO, 

39 Callable, 

40 Optional, 

41 Protocol, 

42 Union, 

43) 

44 

45from .errors import NotTreeError 

46from .file import GitFile, _GitFile 

47from .objects import ( 

48 S_ISGITLINK, 

49 ZERO_SHA, 

50 Blob, 

51 Commit, 

52 ObjectID, 

53 ShaFile, 

54 Tag, 

55 Tree, 

56 TreeEntry, 

57 hex_to_filename, 

58 hex_to_sha, 

59 object_class, 

60 sha_to_hex, 

61 valid_hexsha, 

62) 

63from .pack import ( 

64 PACK_SPOOL_FILE_MAX_SIZE, 

65 ObjectContainer, 

66 Pack, 

67 PackData, 

68 PackedObjectContainer, 

69 PackFileDisappeared, 

70 PackHint, 

71 PackIndexer, 

72 PackInflater, 

73 PackStreamCopier, 

74 UnpackedObject, 

75 extend_pack, 

76 full_unpacked_object, 

77 generate_unpacked_objects, 

78 iter_sha1, 

79 load_pack_index_file, 

80 pack_objects_to_data, 

81 write_pack_data, 

82 write_pack_index, 

83) 

84from .protocol import DEPTH_INFINITE 

85from .refs import PEELED_TAG_SUFFIX, Ref 

86 

87if TYPE_CHECKING: 

88 from .commit_graph import CommitGraph 

89 from .config import Config 

90 from .diff_tree import RenameDetector 

91 

92 

93class GraphWalker(Protocol): 

94 """Protocol for graph walker objects.""" 

95 

96 def __next__(self) -> Optional[bytes]: 

97 """Return the next object SHA to visit.""" 

98 ... 

99 

100 def ack(self, sha: bytes) -> None: 

101 """Acknowledge that an object has been received.""" 

102 ... 

103 

104 def nak(self) -> None: 

105 """Nothing in common was found.""" 

106 ... 

107 

108 

109INFODIR = "info" 

110PACKDIR = "pack" 

111 

112# use permissions consistent with Git; just readable by everyone 

113# TODO: should packs also be non-writable on Windows? if so, that 

114# would requite some rather significant adjustments to the test suite 

115PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

116 

117# Grace period for cleaning up temporary pack files (in seconds) 

118# Matches git's default of 2 weeks 

119DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

120 

121 

122def find_shallow( 

123 store: ObjectContainer, heads: Iterable[bytes], depth: int 

124) -> tuple[set[bytes], set[bytes]]: 

125 """Find shallow commits according to a given depth. 

126 

127 Args: 

128 store: An ObjectStore for looking up objects. 

129 heads: Iterable of head SHAs to start walking from. 

130 depth: The depth of ancestors to include. A depth of one includes 

131 only the heads themselves. 

132 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

133 considered shallow and unshallow according to the arguments. Note that 

134 these sets may overlap if a commit is reachable along multiple paths. 

135 """ 

136 parents: dict[bytes, list[bytes]] = {} 

137 commit_graph = store.get_commit_graph() 

138 

139 def get_parents(sha: bytes) -> list[bytes]: 

140 result = parents.get(sha, None) 

141 if not result: 

142 # Try to use commit graph first if available 

143 if commit_graph: 

144 graph_parents = commit_graph.get_parents(sha) 

145 if graph_parents is not None: 

146 result = graph_parents 

147 parents[sha] = result 

148 return result 

149 # Fall back to loading the object 

150 commit = store[sha] 

151 assert isinstance(commit, Commit) 

152 result = commit.parents 

153 parents[sha] = result 

154 return result 

155 

156 todo = [] # stack of (sha, depth) 

157 for head_sha in heads: 

158 obj = store[head_sha] 

159 # Peel tags if necessary 

160 while isinstance(obj, Tag): 

161 _, sha = obj.object 

162 obj = store[sha] 

163 if isinstance(obj, Commit): 

164 todo.append((obj.id, 1)) 

165 

166 not_shallow = set() 

167 shallow = set() 

168 while todo: 

169 sha, cur_depth = todo.pop() 

170 if cur_depth < depth: 

171 not_shallow.add(sha) 

172 new_depth = cur_depth + 1 

173 todo.extend((p, new_depth) for p in get_parents(sha)) 

174 else: 

175 shallow.add(sha) 

176 

177 return shallow, not_shallow 

178 

179 

180def get_depth( 

181 store: ObjectContainer, 

182 head: bytes, 

183 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

184 max_depth: Optional[int] = None, 

185) -> int: 

186 """Return the current available depth for the given head. 

187 

188 For commits with multiple parents, the largest possible depth will be 

189 returned. 

190 

191 Args: 

192 store: Object store to search in 

193 head: commit to start from 

194 get_parents: optional function for getting the parents of a commit 

195 max_depth: maximum depth to search 

196 """ 

197 if head not in store: 

198 return 0 

199 current_depth = 1 

200 queue = [(head, current_depth)] 

201 commit_graph = store.get_commit_graph() 

202 

203 while queue and (max_depth is None or current_depth < max_depth): 

204 e, depth = queue.pop(0) 

205 current_depth = max(current_depth, depth) 

206 

207 # Try to use commit graph for parent lookup if available 

208 parents = None 

209 if commit_graph: 

210 parents = commit_graph.get_parents(e) 

211 

212 if parents is None: 

213 # Fall back to loading the object 

214 cmt = store[e] 

215 if isinstance(cmt, Tag): 

216 _cls, sha = cmt.object 

217 cmt = store[sha] 

218 parents = get_parents(cmt) 

219 

220 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

221 return current_depth 

222 

223 

224class PackContainer(Protocol): 

225 """Protocol for containers that can accept pack files.""" 

226 

227 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

228 """Add a new pack.""" 

229 

230 

231class BaseObjectStore: 

232 """Object store interface.""" 

233 

234 def determine_wants_all( 

235 self, refs: Mapping[Ref, ObjectID], depth: Optional[int] = None 

236 ) -> list[ObjectID]: 

237 """Determine which objects are wanted based on refs.""" 

238 

239 def _want_deepen(sha: bytes) -> bool: 

240 if not depth: 

241 return False 

242 if depth == DEPTH_INFINITE: 

243 return True 

244 return depth > self._get_depth(sha) 

245 

246 return [ 

247 sha 

248 for (ref, sha) in refs.items() 

249 if (sha not in self or _want_deepen(sha)) 

250 and not ref.endswith(PEELED_TAG_SUFFIX) 

251 and not sha == ZERO_SHA 

252 ] 

253 

254 def contains_loose(self, sha: bytes) -> bool: 

255 """Check if a particular object is present by SHA1 and is loose.""" 

256 raise NotImplementedError(self.contains_loose) 

257 

258 def contains_packed(self, sha: bytes) -> bool: 

259 """Check if a particular object is present by SHA1 and is packed.""" 

260 return False # Default implementation for stores that don't support packing 

261 

262 def __contains__(self, sha1: bytes) -> bool: 

263 """Check if a particular object is present by SHA1. 

264 

265 This method makes no distinction between loose and packed objects. 

266 """ 

267 return self.contains_loose(sha1) 

268 

269 @property 

270 def packs(self) -> list[Pack]: 

271 """Iterable of pack objects.""" 

272 raise NotImplementedError 

273 

274 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

275 """Obtain the raw text for an object. 

276 

277 Args: 

278 name: sha for the object. 

279 Returns: tuple with numeric type and object contents. 

280 """ 

281 raise NotImplementedError(self.get_raw) 

282 

283 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

284 """Obtain an object by SHA1.""" 

285 type_num, uncomp = self.get_raw(sha1) 

286 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

287 

288 def __iter__(self) -> Iterator[bytes]: 

289 """Iterate over the SHAs that are present in this store.""" 

290 raise NotImplementedError(self.__iter__) 

291 

292 def add_object(self, obj: ShaFile) -> None: 

293 """Add a single object to this object store.""" 

294 raise NotImplementedError(self.add_object) 

295 

296 def add_objects( 

297 self, 

298 objects: Sequence[tuple[ShaFile, Optional[str]]], 

299 progress: Optional[Callable[..., None]] = None, 

300 ) -> Optional["Pack"]: 

301 """Add a set of objects to this object store. 

302 

303 Args: 

304 objects: Iterable over a list of (object, path) tuples 

305 progress: Optional progress callback 

306 """ 

307 raise NotImplementedError(self.add_objects) 

308 

309 def tree_changes( 

310 self, 

311 source: Optional[bytes], 

312 target: Optional[bytes], 

313 want_unchanged: bool = False, 

314 include_trees: bool = False, 

315 change_type_same: bool = False, 

316 rename_detector: Optional["RenameDetector"] = None, 

317 paths: Optional[Sequence[bytes]] = None, 

318 ) -> Iterator[ 

319 tuple[ 

320 tuple[Optional[bytes], Optional[bytes]], 

321 tuple[Optional[int], Optional[int]], 

322 tuple[Optional[bytes], Optional[bytes]], 

323 ] 

324 ]: 

325 """Find the differences between the contents of two trees. 

326 

327 Args: 

328 source: SHA1 of the source tree 

329 target: SHA1 of the target tree 

330 want_unchanged: Whether unchanged files should be reported 

331 include_trees: Whether to include trees 

332 change_type_same: Whether to report files changing 

333 type in the same entry. 

334 rename_detector: RenameDetector object for detecting renames. 

335 paths: Optional list of paths to filter to (as bytes). 

336 Returns: Iterator over tuples with 

337 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

338 """ 

339 from .diff_tree import tree_changes 

340 

341 for change in tree_changes( 

342 self, 

343 source, 

344 target, 

345 want_unchanged=want_unchanged, 

346 include_trees=include_trees, 

347 change_type_same=change_type_same, 

348 rename_detector=rename_detector, 

349 paths=paths, 

350 ): 

351 old_path = change.old.path if change.old is not None else None 

352 new_path = change.new.path if change.new is not None else None 

353 old_mode = change.old.mode if change.old is not None else None 

354 new_mode = change.new.mode if change.new is not None else None 

355 old_sha = change.old.sha if change.old is not None else None 

356 new_sha = change.new.sha if change.new is not None else None 

357 yield ( 

358 (old_path, new_path), 

359 (old_mode, new_mode), 

360 (old_sha, new_sha), 

361 ) 

362 

363 def iter_tree_contents( 

364 self, tree_id: bytes, include_trees: bool = False 

365 ) -> Iterator[TreeEntry]: 

366 """Iterate the contents of a tree and all subtrees. 

367 

368 Iteration is depth-first pre-order, as in e.g. os.walk. 

369 

370 Args: 

371 tree_id: SHA1 of the tree. 

372 include_trees: If True, include tree objects in the iteration. 

373 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

374 tree. 

375 """ 

376 warnings.warn( 

377 "Please use dulwich.object_store.iter_tree_contents", 

378 DeprecationWarning, 

379 stacklevel=2, 

380 ) 

381 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

382 

383 def iterobjects_subset( 

384 self, shas: Iterable[bytes], *, allow_missing: bool = False 

385 ) -> Iterator[ShaFile]: 

386 """Iterate over a subset of objects in the store. 

387 

388 Args: 

389 shas: Iterable of object SHAs to retrieve 

390 allow_missing: If True, skip missing objects; if False, raise KeyError 

391 

392 Returns: 

393 Iterator of ShaFile objects 

394 

395 Raises: 

396 KeyError: If an object is missing and allow_missing is False 

397 """ 

398 for sha in shas: 

399 try: 

400 yield self[sha] 

401 except KeyError: 

402 if not allow_missing: 

403 raise 

404 

405 def iter_unpacked_subset( 

406 self, 

407 shas: Iterable[bytes], 

408 include_comp: bool = False, 

409 allow_missing: bool = False, 

410 convert_ofs_delta: bool = True, 

411 ) -> "Iterator[UnpackedObject]": 

412 """Iterate over unpacked objects for a subset of SHAs. 

413 

414 Default implementation that converts ShaFile objects to UnpackedObject. 

415 Subclasses may override for more efficient unpacked access. 

416 

417 Args: 

418 shas: Iterable of object SHAs to retrieve 

419 include_comp: Whether to include compressed data (ignored in base implementation) 

420 allow_missing: If True, skip missing objects; if False, raise KeyError 

421 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation) 

422 

423 Returns: 

424 Iterator of UnpackedObject instances 

425 

426 Raises: 

427 KeyError: If an object is missing and allow_missing is False 

428 """ 

429 from .pack import UnpackedObject 

430 

431 for sha in shas: 

432 try: 

433 obj = self[sha] 

434 # Convert ShaFile to UnpackedObject 

435 unpacked = UnpackedObject( 

436 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

437 ) 

438 yield unpacked 

439 except KeyError: 

440 if not allow_missing: 

441 raise 

442 

443 def find_missing_objects( 

444 self, 

445 haves: Iterable[bytes], 

446 wants: Iterable[bytes], 

447 shallow: Optional[Set[bytes]] = None, 

448 progress: Optional[Callable[..., None]] = None, 

449 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None, 

450 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

451 ) -> Iterator[tuple[bytes, Optional[PackHint]]]: 

452 """Find the missing objects required for a set of revisions. 

453 

454 Args: 

455 haves: Iterable over SHAs already in common. 

456 wants: Iterable over SHAs of objects to fetch. 

457 shallow: Set of shallow commit SHA1s to skip 

458 progress: Simple progress function that will be called with 

459 updated progress strings. 

460 get_tagged: Function that returns a dict of pointed-to sha -> 

461 tag sha for including tags. 

462 get_parents: Optional function for getting the parents of a 

463 commit. 

464 Returns: Iterator over (sha, path) pairs. 

465 """ 

466 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

467 finder = MissingObjectFinder( 

468 self, 

469 haves=haves, 

470 wants=wants, 

471 shallow=shallow, 

472 progress=progress, 

473 get_tagged=get_tagged, 

474 get_parents=get_parents, 

475 ) 

476 return iter(finder) 

477 

478 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

479 """Find which revisions this store has in common using graphwalker. 

480 

481 Args: 

482 graphwalker: A graphwalker object. 

483 Returns: List of SHAs that are in common 

484 """ 

485 haves = [] 

486 sha = next(graphwalker) 

487 while sha: 

488 if sha in self: 

489 haves.append(sha) 

490 graphwalker.ack(sha) 

491 sha = next(graphwalker) 

492 return haves 

493 

494 def generate_pack_data( 

495 self, 

496 have: Iterable[bytes], 

497 want: Iterable[bytes], 

498 shallow: Optional[Set[bytes]] = None, 

499 progress: Optional[Callable[..., None]] = None, 

500 ofs_delta: bool = True, 

501 ) -> tuple[int, Iterator[UnpackedObject]]: 

502 """Generate pack data objects for a set of wants/haves. 

503 

504 Args: 

505 have: List of SHA1s of objects that should not be sent 

506 want: List of SHA1s of objects that should be sent 

507 shallow: Set of shallow commit SHA1s to skip 

508 ofs_delta: Whether OFS deltas can be included 

509 progress: Optional progress reporting method 

510 """ 

511 # Note that the pack-specific implementation below is more efficient, 

512 # as it reuses deltas 

513 missing_objects = MissingObjectFinder( 

514 self, haves=have, wants=want, shallow=shallow, progress=progress 

515 ) 

516 object_ids = list(missing_objects) 

517 return pack_objects_to_data( 

518 [(self[oid], path) for oid, path in object_ids], 

519 ofs_delta=ofs_delta, 

520 progress=progress, 

521 ) 

522 

523 def peel_sha(self, sha: bytes) -> bytes: 

524 """Peel all tags from a SHA. 

525 

526 Args: 

527 sha: The object SHA to peel. 

528 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

529 intermediate tags; if the original ref does not point to a tag, 

530 this will equal the original SHA1. 

531 """ 

532 warnings.warn( 

533 "Please use dulwich.object_store.peel_sha()", 

534 DeprecationWarning, 

535 stacklevel=2, 

536 ) 

537 return peel_sha(self, sha)[1].id 

538 

539 def _get_depth( 

540 self, 

541 head: bytes, 

542 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

543 max_depth: Optional[int] = None, 

544 ) -> int: 

545 """Return the current available depth for the given head. 

546 

547 For commits with multiple parents, the largest possible depth will be 

548 returned. 

549 

550 Args: 

551 head: commit to start from 

552 get_parents: optional function for getting the parents of a commit 

553 max_depth: maximum depth to search 

554 """ 

555 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

556 

557 def close(self) -> None: 

558 """Close any files opened by this object store.""" 

559 # Default implementation is a NO-OP 

560 

561 def prune(self, grace_period: Optional[int] = None) -> None: 

562 """Prune/clean up this object store. 

563 

564 This includes removing orphaned temporary files and other 

565 housekeeping tasks. Default implementation is a NO-OP. 

566 

567 Args: 

568 grace_period: Grace period in seconds for removing temporary files. 

569 If None, uses the default grace period. 

570 """ 

571 # Default implementation is a NO-OP 

572 

573 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

574 """Iterate over all SHA1s that start with a given prefix. 

575 

576 The default implementation is a naive iteration over all objects. 

577 However, subclasses may override this method with more efficient 

578 implementations. 

579 """ 

580 for sha in self: 

581 if sha.startswith(prefix): 

582 yield sha 

583 

584 def get_commit_graph(self) -> Optional["CommitGraph"]: 

585 """Get the commit graph for this object store. 

586 

587 Returns: 

588 CommitGraph object if available, None otherwise 

589 """ 

590 return None 

591 

592 def write_commit_graph( 

593 self, refs: Optional[Sequence[bytes]] = None, reachable: bool = True 

594 ) -> None: 

595 """Write a commit graph file for this object store. 

596 

597 Args: 

598 refs: List of refs to include. If None, includes all refs from object store. 

599 reachable: If True, includes all commits reachable from refs. 

600 If False, only includes the direct ref targets. 

601 

602 Note: 

603 Default implementation does nothing. Subclasses should override 

604 this method to provide commit graph writing functionality. 

605 """ 

606 raise NotImplementedError(self.write_commit_graph) 

607 

608 def get_object_mtime(self, sha: bytes) -> float: 

609 """Get the modification time of an object. 

610 

611 Args: 

612 sha: SHA1 of the object 

613 

614 Returns: 

615 Modification time as seconds since epoch 

616 

617 Raises: 

618 KeyError: if the object is not found 

619 """ 

620 # Default implementation raises KeyError 

621 # Subclasses should override to provide actual mtime 

622 raise KeyError(sha) 

623 

624 

625class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

626 """Object store that supports pack operations. 

627 

628 This is a base class for object stores that can handle pack files, 

629 including both disk-based and memory-based stores. 

630 """ 

631 

632 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

633 """Add a new pack to this object store. 

634 

635 Returns: Tuple of (file, commit_func, abort_func) 

636 """ 

637 raise NotImplementedError(self.add_pack) 

638 

639 def add_pack_data( 

640 self, 

641 count: int, 

642 unpacked_objects: Iterator["UnpackedObject"], 

643 progress: Optional[Callable[..., None]] = None, 

644 ) -> Optional["Pack"]: 

645 """Add pack data to this object store. 

646 

647 Args: 

648 count: Number of objects 

649 unpacked_objects: Iterator over unpacked objects 

650 progress: Optional progress callback 

651 """ 

652 raise NotImplementedError(self.add_pack_data) 

653 

654 def get_unpacked_object( 

655 self, sha1: bytes, *, include_comp: bool = False 

656 ) -> "UnpackedObject": 

657 """Get a raw unresolved object. 

658 

659 Args: 

660 sha1: SHA-1 hash of the object 

661 include_comp: Whether to include compressed data 

662 

663 Returns: 

664 UnpackedObject instance 

665 """ 

666 from .pack import UnpackedObject 

667 

668 obj = self[sha1] 

669 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

670 

671 def iterobjects_subset( 

672 self, shas: Iterable[bytes], *, allow_missing: bool = False 

673 ) -> Iterator[ShaFile]: 

674 """Iterate over a subset of objects. 

675 

676 Args: 

677 shas: Iterable of object SHAs to retrieve 

678 allow_missing: If True, skip missing objects 

679 

680 Returns: 

681 Iterator of ShaFile objects 

682 """ 

683 for sha in shas: 

684 try: 

685 yield self[sha] 

686 except KeyError: 

687 if not allow_missing: 

688 raise 

689 

690 

691class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

692 """Object store that uses pack files for storage. 

693 

694 This class provides a base implementation for object stores that use 

695 Git pack files as their primary storage mechanism. It handles caching 

696 of open pack files and provides configuration for pack file operations. 

697 """ 

698 

699 def __init__( 

700 self, 

701 pack_compression_level: int = -1, 

702 pack_index_version: Optional[int] = None, 

703 pack_delta_window_size: Optional[int] = None, 

704 pack_window_memory: Optional[int] = None, 

705 pack_delta_cache_size: Optional[int] = None, 

706 pack_depth: Optional[int] = None, 

707 pack_threads: Optional[int] = None, 

708 pack_big_file_threshold: Optional[int] = None, 

709 ) -> None: 

710 """Initialize a PackBasedObjectStore. 

711 

712 Args: 

713 pack_compression_level: Compression level for pack files (-1 to 9) 

714 pack_index_version: Pack index version to use 

715 pack_delta_window_size: Window size for delta compression 

716 pack_window_memory: Maximum memory to use for delta window 

717 pack_delta_cache_size: Cache size for delta operations 

718 pack_depth: Maximum depth for pack deltas 

719 pack_threads: Number of threads to use for packing 

720 pack_big_file_threshold: Threshold for treating files as "big" 

721 """ 

722 self._pack_cache: dict[str, Pack] = {} 

723 self.pack_compression_level = pack_compression_level 

724 self.pack_index_version = pack_index_version 

725 self.pack_delta_window_size = pack_delta_window_size 

726 self.pack_window_memory = pack_window_memory 

727 self.pack_delta_cache_size = pack_delta_cache_size 

728 self.pack_depth = pack_depth 

729 self.pack_threads = pack_threads 

730 self.pack_big_file_threshold = pack_big_file_threshold 

731 

732 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

733 """Add a new pack to this object store.""" 

734 raise NotImplementedError(self.add_pack) 

735 

736 def add_pack_data( 

737 self, 

738 count: int, 

739 unpacked_objects: Iterator[UnpackedObject], 

740 progress: Optional[Callable[..., None]] = None, 

741 ) -> Optional["Pack"]: 

742 """Add pack data to this object store. 

743 

744 Args: 

745 count: Number of items to add 

746 unpacked_objects: Iterator of UnpackedObject instances 

747 progress: Optional progress callback 

748 """ 

749 if count == 0: 

750 # Don't bother writing an empty pack file 

751 return None 

752 f, commit, abort = self.add_pack() 

753 try: 

754 write_pack_data( 

755 f.write, 

756 unpacked_objects, 

757 num_records=count, 

758 progress=progress, 

759 compression_level=self.pack_compression_level, 

760 ) 

761 except BaseException: 

762 abort() 

763 raise 

764 else: 

765 return commit() 

766 

767 @property 

768 def alternates(self) -> list["BaseObjectStore"]: 

769 """Return list of alternate object stores.""" 

770 return [] 

771 

772 def contains_packed(self, sha: bytes) -> bool: 

773 """Check if a particular object is present by SHA1 and is packed. 

774 

775 This does not check alternates. 

776 """ 

777 for pack in self.packs: 

778 try: 

779 if sha in pack: 

780 return True 

781 except PackFileDisappeared: 

782 pass 

783 return False 

784 

785 def __contains__(self, sha: bytes) -> bool: 

786 """Check if a particular object is present by SHA1. 

787 

788 This method makes no distinction between loose and packed objects. 

789 """ 

790 if self.contains_packed(sha) or self.contains_loose(sha): 

791 return True 

792 for alternate in self.alternates: 

793 if sha in alternate: 

794 return True 

795 return False 

796 

797 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

798 """Add a newly appeared pack to the cache by path.""" 

799 prev_pack = self._pack_cache.get(base_name) 

800 if prev_pack is not pack: 

801 self._pack_cache[base_name] = pack 

802 if prev_pack: 

803 prev_pack.close() 

804 

805 def generate_pack_data( 

806 self, 

807 have: Iterable[bytes], 

808 want: Iterable[bytes], 

809 shallow: Optional[Set[bytes]] = None, 

810 progress: Optional[Callable[..., None]] = None, 

811 ofs_delta: bool = True, 

812 ) -> tuple[int, Iterator[UnpackedObject]]: 

813 """Generate pack data objects for a set of wants/haves. 

814 

815 Args: 

816 have: List of SHA1s of objects that should not be sent 

817 want: List of SHA1s of objects that should be sent 

818 shallow: Set of shallow commit SHA1s to skip 

819 ofs_delta: Whether OFS deltas can be included 

820 progress: Optional progress reporting method 

821 """ 

822 missing_objects = MissingObjectFinder( 

823 self, haves=have, wants=want, shallow=shallow, progress=progress 

824 ) 

825 remote_has = missing_objects.get_remote_has() 

826 object_ids = list(missing_objects) 

827 return len(object_ids), generate_unpacked_objects( 

828 self, 

829 object_ids, 

830 progress=progress, 

831 ofs_delta=ofs_delta, 

832 other_haves=remote_has, 

833 ) 

834 

835 def _clear_cached_packs(self) -> None: 

836 pack_cache = self._pack_cache 

837 self._pack_cache = {} 

838 while pack_cache: 

839 (_name, pack) = pack_cache.popitem() 

840 pack.close() 

841 

842 def _iter_cached_packs(self) -> Iterator[Pack]: 

843 return iter(self._pack_cache.values()) 

844 

845 def _update_pack_cache(self) -> list[Pack]: 

846 raise NotImplementedError(self._update_pack_cache) 

847 

848 def close(self) -> None: 

849 """Close the object store and release resources. 

850 

851 This method closes all cached pack files and frees associated resources. 

852 """ 

853 self._clear_cached_packs() 

854 

855 @property 

856 def packs(self) -> list[Pack]: 

857 """List with pack objects.""" 

858 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

859 

860 def count_pack_files(self) -> int: 

861 """Count the number of pack files. 

862 

863 Returns: 

864 Number of pack files (excluding those with .keep files) 

865 """ 

866 count = 0 

867 for pack in self.packs: 

868 # Check if there's a .keep file for this pack 

869 keep_path = pack._basename + ".keep" 

870 if not os.path.exists(keep_path): 

871 count += 1 

872 return count 

873 

874 def _iter_alternate_objects(self) -> Iterator[bytes]: 

875 """Iterate over the SHAs of all the objects in alternate stores.""" 

876 for alternate in self.alternates: 

877 yield from alternate 

878 

879 def _iter_loose_objects(self) -> Iterator[bytes]: 

880 """Iterate over the SHAs of all loose objects.""" 

881 raise NotImplementedError(self._iter_loose_objects) 

882 

883 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

884 raise NotImplementedError(self._get_loose_object) 

885 

886 def delete_loose_object(self, sha: bytes) -> None: 

887 """Delete a loose object. 

888 

889 This method only handles loose objects. For packed objects, 

890 use repack(exclude=...) to exclude them during repacking. 

891 """ 

892 raise NotImplementedError(self.delete_loose_object) 

893 

894 def _remove_pack(self, pack: "Pack") -> None: 

895 raise NotImplementedError(self._remove_pack) 

896 

897 def pack_loose_objects( 

898 self, progress: Optional[Callable[[str], None]] = None 

899 ) -> int: 

900 """Pack loose objects. 

901 

902 Args: 

903 progress: Optional progress reporting callback 

904 

905 Returns: Number of objects packed 

906 """ 

907 objects: list[tuple[ShaFile, None]] = [] 

908 for sha in self._iter_loose_objects(): 

909 obj = self._get_loose_object(sha) 

910 if obj is not None: 

911 objects.append((obj, None)) 

912 self.add_objects(objects, progress=progress) 

913 for obj, path in objects: 

914 self.delete_loose_object(obj.id) 

915 return len(objects) 

916 

917 def repack( 

918 self, 

919 exclude: Optional[Set[bytes]] = None, 

920 progress: Optional[Callable[[str], None]] = None, 

921 ) -> int: 

922 """Repack the packs in this repository. 

923 

924 Note that this implementation is fairly naive and currently keeps all 

925 objects in memory while it repacks. 

926 

927 Args: 

928 exclude: Optional set of object SHAs to exclude from repacking 

929 progress: Optional progress reporting callback 

930 """ 

931 if exclude is None: 

932 exclude = set() 

933 

934 loose_objects = set() 

935 excluded_loose_objects = set() 

936 for sha in self._iter_loose_objects(): 

937 if sha not in exclude: 

938 obj = self._get_loose_object(sha) 

939 if obj is not None: 

940 loose_objects.add(obj) 

941 else: 

942 excluded_loose_objects.add(sha) 

943 

944 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

945 old_packs = {p.name(): p for p in self.packs} 

946 for name, pack in old_packs.items(): 

947 objects.update( 

948 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

949 ) 

950 

951 # Only create a new pack if there are objects to pack 

952 if objects: 

953 # The name of the consolidated pack might match the name of a 

954 # pre-existing pack. Take care not to remove the newly created 

955 # consolidated pack. 

956 consolidated = self.add_objects(list(objects), progress=progress) 

957 if consolidated is not None: 

958 old_packs.pop(consolidated.name(), None) 

959 

960 # Delete loose objects that were packed 

961 for obj in loose_objects: 

962 if obj is not None: 

963 self.delete_loose_object(obj.id) 

964 # Delete excluded loose objects 

965 for sha in excluded_loose_objects: 

966 self.delete_loose_object(sha) 

967 for name, pack in old_packs.items(): 

968 self._remove_pack(pack) 

969 self._update_pack_cache() 

970 return len(objects) 

971 

972 def __iter__(self) -> Iterator[bytes]: 

973 """Iterate over the SHAs that are present in this store.""" 

974 self._update_pack_cache() 

975 for pack in self._iter_cached_packs(): 

976 try: 

977 yield from pack 

978 except PackFileDisappeared: 

979 pass 

980 yield from self._iter_loose_objects() 

981 yield from self._iter_alternate_objects() 

982 

983 def contains_loose(self, sha: bytes) -> bool: 

984 """Check if a particular object is present by SHA1 and is loose. 

985 

986 This does not check alternates. 

987 """ 

988 return self._get_loose_object(sha) is not None 

989 

990 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

991 """Obtain the raw fulltext for an object. 

992 

993 Args: 

994 name: sha for the object. 

995 Returns: tuple with numeric type and object contents. 

996 """ 

997 if name == ZERO_SHA: 

998 raise KeyError(name) 

999 if len(name) == 40: 

1000 sha = hex_to_sha(name) 

1001 hexsha = name 

1002 elif len(name) == 20: 

1003 sha = name 

1004 hexsha = None 

1005 else: 

1006 raise AssertionError(f"Invalid object name {name!r}") 

1007 for pack in self._iter_cached_packs(): 

1008 try: 

1009 return pack.get_raw(sha) 

1010 except (KeyError, PackFileDisappeared): 

1011 pass 

1012 if hexsha is None: 

1013 hexsha = sha_to_hex(name) 

1014 ret = self._get_loose_object(hexsha) 

1015 if ret is not None: 

1016 return ret.type_num, ret.as_raw_string() 

1017 # Maybe something else has added a pack with the object 

1018 # in the mean time? 

1019 for pack in self._update_pack_cache(): 

1020 try: 

1021 return pack.get_raw(sha) 

1022 except KeyError: 

1023 pass 

1024 for alternate in self.alternates: 

1025 try: 

1026 return alternate.get_raw(hexsha) 

1027 except KeyError: 

1028 pass 

1029 raise KeyError(hexsha) 

1030 

1031 def iter_unpacked_subset( 

1032 self, 

1033 shas: Iterable[bytes], 

1034 include_comp: bool = False, 

1035 allow_missing: bool = False, 

1036 convert_ofs_delta: bool = True, 

1037 ) -> Iterator[UnpackedObject]: 

1038 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1039 

1040 Args: 

1041 shas: Set of object SHAs to retrieve 

1042 include_comp: Whether to include compressed data 

1043 allow_missing: If True, skip missing objects; if False, raise KeyError 

1044 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1045 

1046 Returns: 

1047 Iterator of UnpackedObject instances 

1048 

1049 Raises: 

1050 KeyError: If an object is missing and allow_missing is False 

1051 """ 

1052 todo: set[bytes] = set(shas) 

1053 for p in self._iter_cached_packs(): 

1054 for unpacked in p.iter_unpacked_subset( 

1055 todo, 

1056 include_comp=include_comp, 

1057 allow_missing=True, 

1058 convert_ofs_delta=convert_ofs_delta, 

1059 ): 

1060 yield unpacked 

1061 hexsha = sha_to_hex(unpacked.sha()) 

1062 todo.remove(hexsha) 

1063 # Maybe something else has added a pack with the object 

1064 # in the mean time? 

1065 for p in self._update_pack_cache(): 

1066 for unpacked in p.iter_unpacked_subset( 

1067 todo, 

1068 include_comp=include_comp, 

1069 allow_missing=True, 

1070 convert_ofs_delta=convert_ofs_delta, 

1071 ): 

1072 yield unpacked 

1073 hexsha = sha_to_hex(unpacked.sha()) 

1074 todo.remove(hexsha) 

1075 for alternate in self.alternates: 

1076 assert isinstance(alternate, PackBasedObjectStore) 

1077 for unpacked in alternate.iter_unpacked_subset( 

1078 todo, 

1079 include_comp=include_comp, 

1080 allow_missing=True, 

1081 convert_ofs_delta=convert_ofs_delta, 

1082 ): 

1083 yield unpacked 

1084 hexsha = sha_to_hex(unpacked.sha()) 

1085 todo.remove(hexsha) 

1086 

1087 def iterobjects_subset( 

1088 self, shas: Iterable[bytes], *, allow_missing: bool = False 

1089 ) -> Iterator[ShaFile]: 

1090 """Iterate over a subset of objects in the store. 

1091 

1092 This method searches for objects in pack files, alternates, and loose storage. 

1093 

1094 Args: 

1095 shas: Iterable of object SHAs to retrieve 

1096 allow_missing: If True, skip missing objects; if False, raise KeyError 

1097 

1098 Returns: 

1099 Iterator of ShaFile objects 

1100 

1101 Raises: 

1102 KeyError: If an object is missing and allow_missing is False 

1103 """ 

1104 todo: set[bytes] = set(shas) 

1105 for p in self._iter_cached_packs(): 

1106 for o in p.iterobjects_subset(todo, allow_missing=True): 

1107 yield o 

1108 todo.remove(o.id) 

1109 # Maybe something else has added a pack with the object 

1110 # in the mean time? 

1111 for p in self._update_pack_cache(): 

1112 for o in p.iterobjects_subset(todo, allow_missing=True): 

1113 yield o 

1114 todo.remove(o.id) 

1115 for alternate in self.alternates: 

1116 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1117 yield o 

1118 todo.remove(o.id) 

1119 for oid in todo: 

1120 loose_obj: Optional[ShaFile] = self._get_loose_object(oid) 

1121 if loose_obj is not None: 

1122 yield loose_obj 

1123 elif not allow_missing: 

1124 raise KeyError(oid) 

1125 

1126 def get_unpacked_object( 

1127 self, sha1: bytes, *, include_comp: bool = False 

1128 ) -> UnpackedObject: 

1129 """Obtain the unpacked object. 

1130 

1131 Args: 

1132 sha1: sha for the object. 

1133 include_comp: Whether to include compression metadata. 

1134 """ 

1135 if sha1 == ZERO_SHA: 

1136 raise KeyError(sha1) 

1137 if len(sha1) == 40: 

1138 sha = hex_to_sha(sha1) 

1139 hexsha = sha1 

1140 elif len(sha1) == 20: 

1141 sha = sha1 

1142 hexsha = None 

1143 else: 

1144 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1145 for pack in self._iter_cached_packs(): 

1146 try: 

1147 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1148 except (KeyError, PackFileDisappeared): 

1149 pass 

1150 if hexsha is None: 

1151 hexsha = sha_to_hex(sha1) 

1152 # Maybe something else has added a pack with the object 

1153 # in the mean time? 

1154 for pack in self._update_pack_cache(): 

1155 try: 

1156 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1157 except KeyError: 

1158 pass 

1159 for alternate in self.alternates: 

1160 assert isinstance(alternate, PackBasedObjectStore) 

1161 try: 

1162 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1163 except KeyError: 

1164 pass 

1165 raise KeyError(hexsha) 

1166 

1167 def add_objects( 

1168 self, 

1169 objects: Sequence[tuple[ShaFile, Optional[str]]], 

1170 progress: Optional[Callable[[str], None]] = None, 

1171 ) -> Optional["Pack"]: 

1172 """Add a set of objects to this object store. 

1173 

1174 Args: 

1175 objects: Iterable over (object, path) tuples, should support 

1176 __len__. 

1177 progress: Optional progress reporting function. 

1178 Returns: Pack object of the objects written. 

1179 """ 

1180 count = len(objects) 

1181 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1182 return self.add_pack_data(count, record_iter, progress=progress) 

1183 

1184 

1185class DiskObjectStore(PackBasedObjectStore): 

1186 """Git-style object store that exists on disk.""" 

1187 

1188 path: Union[str, os.PathLike[str]] 

1189 pack_dir: Union[str, os.PathLike[str]] 

1190 _alternates: Optional[list["BaseObjectStore"]] 

1191 _commit_graph: Optional["CommitGraph"] 

1192 

1193 def __init__( 

1194 self, 

1195 path: Union[str, os.PathLike[str]], 

1196 loose_compression_level: int = -1, 

1197 pack_compression_level: int = -1, 

1198 pack_index_version: Optional[int] = None, 

1199 pack_delta_window_size: Optional[int] = None, 

1200 pack_window_memory: Optional[int] = None, 

1201 pack_delta_cache_size: Optional[int] = None, 

1202 pack_depth: Optional[int] = None, 

1203 pack_threads: Optional[int] = None, 

1204 pack_big_file_threshold: Optional[int] = None, 

1205 ) -> None: 

1206 """Open an object store. 

1207 

1208 Args: 

1209 path: Path of the object store. 

1210 loose_compression_level: zlib compression level for loose objects 

1211 pack_compression_level: zlib compression level for pack objects 

1212 pack_index_version: pack index version to use (1, 2, or 3) 

1213 pack_delta_window_size: sliding window size for delta compression 

1214 pack_window_memory: memory limit for delta window operations 

1215 pack_delta_cache_size: size of cache for delta operations 

1216 pack_depth: maximum delta chain depth 

1217 pack_threads: number of threads for pack operations 

1218 pack_big_file_threshold: threshold for treating files as big 

1219 """ 

1220 super().__init__( 

1221 pack_compression_level=pack_compression_level, 

1222 pack_index_version=pack_index_version, 

1223 pack_delta_window_size=pack_delta_window_size, 

1224 pack_window_memory=pack_window_memory, 

1225 pack_delta_cache_size=pack_delta_cache_size, 

1226 pack_depth=pack_depth, 

1227 pack_threads=pack_threads, 

1228 pack_big_file_threshold=pack_big_file_threshold, 

1229 ) 

1230 self.path = path 

1231 self.pack_dir = os.path.join(self.path, PACKDIR) 

1232 self._alternates = None 

1233 self.loose_compression_level = loose_compression_level 

1234 self.pack_compression_level = pack_compression_level 

1235 self.pack_index_version = pack_index_version 

1236 

1237 # Commit graph support - lazy loaded 

1238 self._commit_graph = None 

1239 self._use_commit_graph = True # Default to true 

1240 

1241 def __repr__(self) -> str: 

1242 """Return string representation of DiskObjectStore. 

1243 

1244 Returns: 

1245 String representation including the store path 

1246 """ 

1247 return f"<{self.__class__.__name__}({self.path!r})>" 

1248 

1249 @classmethod 

1250 def from_config( 

1251 cls, path: Union[str, os.PathLike[str]], config: "Config" 

1252 ) -> "DiskObjectStore": 

1253 """Create a DiskObjectStore from a configuration object. 

1254 

1255 Args: 

1256 path: Path to the object store directory 

1257 config: Configuration object to read settings from 

1258 

1259 Returns: 

1260 New DiskObjectStore instance configured according to config 

1261 """ 

1262 try: 

1263 default_compression_level = int( 

1264 config.get((b"core",), b"compression").decode() 

1265 ) 

1266 except KeyError: 

1267 default_compression_level = -1 

1268 try: 

1269 loose_compression_level = int( 

1270 config.get((b"core",), b"looseCompression").decode() 

1271 ) 

1272 except KeyError: 

1273 loose_compression_level = default_compression_level 

1274 try: 

1275 pack_compression_level = int( 

1276 config.get((b"core",), "packCompression").decode() 

1277 ) 

1278 except KeyError: 

1279 pack_compression_level = default_compression_level 

1280 try: 

1281 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1282 except KeyError: 

1283 pack_index_version = None 

1284 

1285 # Read pack configuration options 

1286 try: 

1287 pack_delta_window_size = int( 

1288 config.get((b"pack",), b"deltaWindowSize").decode() 

1289 ) 

1290 except KeyError: 

1291 pack_delta_window_size = None 

1292 try: 

1293 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1294 except KeyError: 

1295 pack_window_memory = None 

1296 try: 

1297 pack_delta_cache_size = int( 

1298 config.get((b"pack",), b"deltaCacheSize").decode() 

1299 ) 

1300 except KeyError: 

1301 pack_delta_cache_size = None 

1302 try: 

1303 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1304 except KeyError: 

1305 pack_depth = None 

1306 try: 

1307 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1308 except KeyError: 

1309 pack_threads = None 

1310 try: 

1311 pack_big_file_threshold = int( 

1312 config.get((b"pack",), b"bigFileThreshold").decode() 

1313 ) 

1314 except KeyError: 

1315 pack_big_file_threshold = None 

1316 

1317 # Read core.commitGraph setting 

1318 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1319 

1320 instance = cls( 

1321 path, 

1322 loose_compression_level, 

1323 pack_compression_level, 

1324 pack_index_version, 

1325 pack_delta_window_size, 

1326 pack_window_memory, 

1327 pack_delta_cache_size, 

1328 pack_depth, 

1329 pack_threads, 

1330 pack_big_file_threshold, 

1331 ) 

1332 instance._use_commit_graph = use_commit_graph 

1333 return instance 

1334 

1335 @property 

1336 def alternates(self) -> list["BaseObjectStore"]: 

1337 """Get the list of alternate object stores. 

1338 

1339 Reads from .git/objects/info/alternates if not already cached. 

1340 

1341 Returns: 

1342 List of DiskObjectStore instances for alternate object directories 

1343 """ 

1344 if self._alternates is not None: 

1345 return self._alternates 

1346 self._alternates = [] 

1347 for path in self._read_alternate_paths(): 

1348 self._alternates.append(DiskObjectStore(path)) 

1349 return self._alternates 

1350 

1351 def _read_alternate_paths(self) -> Iterator[str]: 

1352 try: 

1353 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1354 except FileNotFoundError: 

1355 return 

1356 with f: 

1357 for line in f.readlines(): 

1358 line = line.rstrip(b"\n") 

1359 if line.startswith(b"#"): 

1360 continue 

1361 if os.path.isabs(line): 

1362 yield os.fsdecode(line) 

1363 else: 

1364 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1365 

1366 def add_alternate_path(self, path: Union[str, os.PathLike[str]]) -> None: 

1367 """Add an alternate path to this object store.""" 

1368 try: 

1369 os.mkdir(os.path.join(self.path, INFODIR)) 

1370 except FileExistsError: 

1371 pass 

1372 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1373 with GitFile(alternates_path, "wb") as f: 

1374 try: 

1375 orig_f = open(alternates_path, "rb") 

1376 except FileNotFoundError: 

1377 pass 

1378 else: 

1379 with orig_f: 

1380 f.write(orig_f.read()) 

1381 f.write(os.fsencode(path) + b"\n") 

1382 

1383 if not os.path.isabs(path): 

1384 path = os.path.join(self.path, path) 

1385 self.alternates.append(DiskObjectStore(path)) 

1386 

1387 def _update_pack_cache(self) -> list[Pack]: 

1388 """Read and iterate over new pack files and cache them.""" 

1389 try: 

1390 pack_dir_contents = os.listdir(self.pack_dir) 

1391 except FileNotFoundError: 

1392 self.close() 

1393 return [] 

1394 pack_files = set() 

1395 for name in pack_dir_contents: 

1396 if name.startswith("pack-") and name.endswith(".pack"): 

1397 # verify that idx exists first (otherwise the pack was not yet 

1398 # fully written) 

1399 idx_name = os.path.splitext(name)[0] + ".idx" 

1400 if idx_name in pack_dir_contents: 

1401 pack_name = name[: -len(".pack")] 

1402 pack_files.add(pack_name) 

1403 

1404 # Open newly appeared pack files 

1405 new_packs = [] 

1406 for f in pack_files: 

1407 if f not in self._pack_cache: 

1408 pack = Pack( 

1409 os.path.join(self.pack_dir, f), 

1410 delta_window_size=self.pack_delta_window_size, 

1411 window_memory=self.pack_window_memory, 

1412 delta_cache_size=self.pack_delta_cache_size, 

1413 depth=self.pack_depth, 

1414 threads=self.pack_threads, 

1415 big_file_threshold=self.pack_big_file_threshold, 

1416 ) 

1417 new_packs.append(pack) 

1418 self._pack_cache[f] = pack 

1419 # Remove disappeared pack files 

1420 for f in set(self._pack_cache) - pack_files: 

1421 self._pack_cache.pop(f).close() 

1422 return new_packs 

1423 

1424 def _get_shafile_path(self, sha: bytes) -> str: 

1425 # Check from object dir 

1426 return hex_to_filename(os.fspath(self.path), sha) 

1427 

1428 def _iter_loose_objects(self) -> Iterator[bytes]: 

1429 for base in os.listdir(self.path): 

1430 if len(base) != 2: 

1431 continue 

1432 for rest in os.listdir(os.path.join(self.path, base)): 

1433 sha = os.fsencode(base + rest) 

1434 if not valid_hexsha(sha): 

1435 continue 

1436 yield sha 

1437 

1438 def count_loose_objects(self) -> int: 

1439 """Count the number of loose objects in the object store. 

1440 

1441 Returns: 

1442 Number of loose objects 

1443 """ 

1444 count = 0 

1445 if not os.path.exists(self.path): 

1446 return 0 

1447 

1448 for i in range(256): 

1449 subdir = os.path.join(self.path, f"{i:02x}") 

1450 try: 

1451 count += len( 

1452 [ 

1453 name 

1454 for name in os.listdir(subdir) 

1455 if len(name) == 38 # 40 - 2 for the prefix 

1456 ] 

1457 ) 

1458 except FileNotFoundError: 

1459 # Directory may have been removed or is inaccessible 

1460 continue 

1461 

1462 return count 

1463 

1464 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

1465 path = self._get_shafile_path(sha) 

1466 try: 

1467 return ShaFile.from_path(path) 

1468 except FileNotFoundError: 

1469 return None 

1470 

1471 def delete_loose_object(self, sha: bytes) -> None: 

1472 """Delete a loose object from disk. 

1473 

1474 Args: 

1475 sha: SHA1 of the object to delete 

1476 

1477 Raises: 

1478 FileNotFoundError: If the object file doesn't exist 

1479 """ 

1480 os.remove(self._get_shafile_path(sha)) 

1481 

1482 def get_object_mtime(self, sha: bytes) -> float: 

1483 """Get the modification time of an object. 

1484 

1485 Args: 

1486 sha: SHA1 of the object 

1487 

1488 Returns: 

1489 Modification time as seconds since epoch 

1490 

1491 Raises: 

1492 KeyError: if the object is not found 

1493 """ 

1494 # First check if it's a loose object 

1495 if self.contains_loose(sha): 

1496 path = self._get_shafile_path(sha) 

1497 try: 

1498 return os.path.getmtime(path) 

1499 except FileNotFoundError: 

1500 pass 

1501 

1502 # Check if it's in a pack file 

1503 for pack in self.packs: 

1504 try: 

1505 if sha in pack: 

1506 # Use the pack file's mtime for packed objects 

1507 pack_path = pack._data_path 

1508 try: 

1509 return os.path.getmtime(pack_path) 

1510 except (FileNotFoundError, AttributeError): 

1511 pass 

1512 except PackFileDisappeared: 

1513 pass 

1514 

1515 raise KeyError(sha) 

1516 

1517 def _remove_pack(self, pack: Pack) -> None: 

1518 try: 

1519 del self._pack_cache[os.path.basename(pack._basename)] 

1520 except KeyError: 

1521 pass 

1522 pack.close() 

1523 os.remove(pack.data.path) 

1524 if hasattr(pack.index, "path"): 

1525 os.remove(pack.index.path) 

1526 

1527 def _get_pack_basepath( 

1528 self, entries: Iterable[tuple[bytes, int, Union[int, None]]] 

1529 ) -> str: 

1530 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1531 # TODO: Handle self.pack_dir being bytes 

1532 suffix = suffix_bytes.decode("ascii") 

1533 return os.path.join(self.pack_dir, "pack-" + suffix) 

1534 

1535 def _complete_pack( 

1536 self, 

1537 f: BinaryIO, 

1538 path: str, 

1539 num_objects: int, 

1540 indexer: PackIndexer, 

1541 progress: Optional[Callable[..., None]] = None, 

1542 ) -> Pack: 

1543 """Move a specific file containing a pack into the pack directory. 

1544 

1545 Note: The file should be on the same file system as the 

1546 packs directory. 

1547 

1548 Args: 

1549 f: Open file object for the pack. 

1550 path: Path to the pack file. 

1551 num_objects: Number of objects in the pack. 

1552 indexer: A PackIndexer for indexing the pack. 

1553 progress: Optional progress reporting function. 

1554 """ 

1555 entries = [] 

1556 for i, entry in enumerate(indexer): 

1557 if progress is not None: 

1558 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1559 entries.append(entry) 

1560 

1561 pack_sha, extra_entries = extend_pack( 

1562 f, 

1563 set(indexer.ext_refs()), 

1564 get_raw=self.get_raw, 

1565 compression_level=self.pack_compression_level, 

1566 progress=progress, 

1567 ) 

1568 f.flush() 

1569 try: 

1570 fileno = f.fileno() 

1571 except AttributeError: 

1572 pass 

1573 else: 

1574 os.fsync(fileno) 

1575 f.close() 

1576 

1577 entries.extend(extra_entries) 

1578 

1579 # Move the pack in. 

1580 entries.sort() 

1581 pack_base_name = self._get_pack_basepath(entries) 

1582 

1583 for pack in self.packs: 

1584 if pack._basename == pack_base_name: 

1585 return pack 

1586 

1587 target_pack_path = pack_base_name + ".pack" 

1588 target_index_path = pack_base_name + ".idx" 

1589 if sys.platform == "win32": 

1590 # Windows might have the target pack file lingering. Attempt 

1591 # removal, silently passing if the target does not exist. 

1592 with suppress(FileNotFoundError): 

1593 os.remove(target_pack_path) 

1594 os.rename(path, target_pack_path) 

1595 

1596 # Write the index. 

1597 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1598 write_pack_index( 

1599 index_file, entries, pack_sha, version=self.pack_index_version 

1600 ) 

1601 

1602 # Add the pack to the store and return it. 

1603 final_pack = Pack( 

1604 pack_base_name, 

1605 delta_window_size=self.pack_delta_window_size, 

1606 window_memory=self.pack_window_memory, 

1607 delta_cache_size=self.pack_delta_cache_size, 

1608 depth=self.pack_depth, 

1609 threads=self.pack_threads, 

1610 big_file_threshold=self.pack_big_file_threshold, 

1611 ) 

1612 final_pack.check_length_and_checksum() 

1613 self._add_cached_pack(pack_base_name, final_pack) 

1614 return final_pack 

1615 

1616 def add_thin_pack( 

1617 self, 

1618 read_all: Callable[[int], bytes], 

1619 read_some: Optional[Callable[[int], bytes]], 

1620 progress: Optional[Callable[..., None]] = None, 

1621 ) -> "Pack": 

1622 """Add a new thin pack to this object store. 

1623 

1624 Thin packs are packs that contain deltas with parents that exist 

1625 outside the pack. They should never be placed in the object store 

1626 directly, and always indexed and completed as they are copied. 

1627 

1628 Args: 

1629 read_all: Read function that blocks until the number of 

1630 requested bytes are read. 

1631 read_some: Read function that returns at least one byte, but may 

1632 not return the number of bytes requested. 

1633 progress: Optional progress reporting function. 

1634 Returns: A Pack object pointing at the now-completed thin pack in the 

1635 objects/pack directory. 

1636 """ 

1637 import tempfile 

1638 

1639 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1640 with os.fdopen(fd, "w+b") as f: 

1641 os.chmod(path, PACK_MODE) 

1642 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type] 

1643 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type] 

1644 copier.verify(progress=progress) 

1645 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1646 

1647 def add_pack( 

1648 self, 

1649 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

1650 """Add a new pack to this object store. 

1651 

1652 Returns: Fileobject to write to, a commit function to 

1653 call when the pack is finished and an abort 

1654 function. 

1655 """ 

1656 import tempfile 

1657 

1658 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1659 f = os.fdopen(fd, "w+b") 

1660 os.chmod(path, PACK_MODE) 

1661 

1662 def commit() -> Optional["Pack"]: 

1663 if f.tell() > 0: 

1664 f.seek(0) 

1665 

1666 with PackData(path, f) as pd: 

1667 indexer = PackIndexer.for_pack_data( 

1668 pd, 

1669 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

1670 ) 

1671 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

1672 else: 

1673 f.close() 

1674 os.remove(path) 

1675 return None 

1676 

1677 def abort() -> None: 

1678 f.close() 

1679 os.remove(path) 

1680 

1681 return f, commit, abort # type: ignore[return-value] 

1682 

1683 def add_object(self, obj: ShaFile) -> None: 

1684 """Add a single object to this object store. 

1685 

1686 Args: 

1687 obj: Object to add 

1688 """ 

1689 path = self._get_shafile_path(obj.id) 

1690 dir = os.path.dirname(path) 

1691 try: 

1692 os.mkdir(dir) 

1693 except FileExistsError: 

1694 pass 

1695 if os.path.exists(path): 

1696 return # Already there, no need to write again 

1697 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1698 f.write( 

1699 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1700 ) 

1701 

1702 @classmethod 

1703 def init(cls, path: Union[str, os.PathLike[str]]) -> "DiskObjectStore": 

1704 """Initialize a new disk object store. 

1705 

1706 Creates the necessary directory structure for a Git object store. 

1707 

1708 Args: 

1709 path: Path where the object store should be created 

1710 

1711 Returns: 

1712 New DiskObjectStore instance 

1713 """ 

1714 try: 

1715 os.mkdir(path) 

1716 except FileExistsError: 

1717 pass 

1718 os.mkdir(os.path.join(path, "info")) 

1719 os.mkdir(os.path.join(path, PACKDIR)) 

1720 return cls(path) 

1721 

1722 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]: 

1723 """Iterate over all object SHAs with the given prefix. 

1724 

1725 Args: 

1726 prefix: Hex prefix to search for (as bytes) 

1727 

1728 Returns: 

1729 Iterator of object SHAs (as bytes) matching the prefix 

1730 """ 

1731 if len(prefix) < 2: 

1732 yield from super().iter_prefix(prefix) 

1733 return 

1734 seen = set() 

1735 dir = prefix[:2].decode() 

1736 rest = prefix[2:].decode() 

1737 try: 

1738 for name in os.listdir(os.path.join(self.path, dir)): 

1739 if name.startswith(rest): 

1740 sha = os.fsencode(dir + name) 

1741 if sha not in seen: 

1742 seen.add(sha) 

1743 yield sha 

1744 except FileNotFoundError: 

1745 pass 

1746 

1747 for p in self.packs: 

1748 bin_prefix = ( 

1749 binascii.unhexlify(prefix) 

1750 if len(prefix) % 2 == 0 

1751 else binascii.unhexlify(prefix[:-1]) 

1752 ) 

1753 for sha in p.index.iter_prefix(bin_prefix): 

1754 sha = sha_to_hex(sha) 

1755 if sha.startswith(prefix) and sha not in seen: 

1756 seen.add(sha) 

1757 yield sha 

1758 for alternate in self.alternates: 

1759 for sha in alternate.iter_prefix(prefix): 

1760 if sha not in seen: 

1761 seen.add(sha) 

1762 yield sha 

1763 

1764 def get_commit_graph(self) -> Optional["CommitGraph"]: 

1765 """Get the commit graph for this object store. 

1766 

1767 Returns: 

1768 CommitGraph object if available, None otherwise 

1769 """ 

1770 if not self._use_commit_graph: 

1771 return None 

1772 

1773 if self._commit_graph is None: 

1774 from .commit_graph import read_commit_graph 

1775 

1776 # Look for commit graph in our objects directory 

1777 graph_file = os.path.join(self.path, "info", "commit-graph") 

1778 if os.path.exists(graph_file): 

1779 self._commit_graph = read_commit_graph(graph_file) 

1780 return self._commit_graph 

1781 

1782 def write_commit_graph( 

1783 self, refs: Optional[Iterable[bytes]] = None, reachable: bool = True 

1784 ) -> None: 

1785 """Write a commit graph file for this object store. 

1786 

1787 Args: 

1788 refs: List of refs to include. If None, includes all refs from object store. 

1789 reachable: If True, includes all commits reachable from refs. 

1790 If False, only includes the direct ref targets. 

1791 """ 

1792 from .commit_graph import get_reachable_commits 

1793 

1794 if refs is None: 

1795 # Get all commit objects from the object store 

1796 all_refs = [] 

1797 # Iterate through all objects to find commits 

1798 for sha in self: 

1799 try: 

1800 obj = self[sha] 

1801 if obj.type_name == b"commit": 

1802 all_refs.append(sha) 

1803 except KeyError: 

1804 continue 

1805 else: 

1806 # Use provided refs 

1807 all_refs = list(refs) 

1808 

1809 if not all_refs: 

1810 return # No commits to include 

1811 

1812 if reachable: 

1813 # Get all reachable commits 

1814 commit_ids = get_reachable_commits(self, all_refs) 

1815 else: 

1816 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1817 commit_ids = [] 

1818 for ref in all_refs: 

1819 if isinstance(ref, bytes) and len(ref) == 40: 

1820 # Already hex ObjectID 

1821 commit_ids.append(ref) 

1822 elif isinstance(ref, bytes) and len(ref) == 20: 

1823 # Binary SHA, convert to hex ObjectID 

1824 from .objects import sha_to_hex 

1825 

1826 commit_ids.append(sha_to_hex(ref)) 

1827 else: 

1828 # Assume it's already correct format 

1829 commit_ids.append(ref) 

1830 

1831 if commit_ids: 

1832 # Write commit graph directly to our object store path 

1833 # Generate the commit graph 

1834 from .commit_graph import generate_commit_graph 

1835 

1836 graph = generate_commit_graph(self, commit_ids) 

1837 

1838 if graph.entries: 

1839 # Ensure the info directory exists 

1840 info_dir = os.path.join(self.path, "info") 

1841 os.makedirs(info_dir, exist_ok=True) 

1842 

1843 # Write using GitFile for atomic operation 

1844 graph_path = os.path.join(info_dir, "commit-graph") 

1845 with GitFile(graph_path, "wb") as f: 

1846 assert isinstance( 

1847 f, _GitFile 

1848 ) # GitFile in write mode always returns _GitFile 

1849 graph.write_to_file(f) 

1850 

1851 # Clear cached commit graph so it gets reloaded 

1852 self._commit_graph = None 

1853 

1854 def prune(self, grace_period: Optional[int] = None) -> None: 

1855 """Prune/clean up this object store. 

1856 

1857 This removes temporary files that were left behind by interrupted 

1858 pack operations. These are files that start with ``tmp_pack_`` in the 

1859 repository directory or files with .pack extension but no corresponding 

1860 .idx file in the pack directory. 

1861 

1862 Args: 

1863 grace_period: Grace period in seconds for removing temporary files. 

1864 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1865 """ 

1866 import glob 

1867 

1868 if grace_period is None: 

1869 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1870 

1871 # Clean up tmp_pack_* files in the repository directory 

1872 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1873 # Check if file is old enough (more than grace period) 

1874 mtime = os.path.getmtime(tmp_file) 

1875 if time.time() - mtime > grace_period: 

1876 os.remove(tmp_file) 

1877 

1878 # Clean up orphaned .pack files without corresponding .idx files 

1879 try: 

1880 pack_dir_contents = os.listdir(self.pack_dir) 

1881 except FileNotFoundError: 

1882 return 

1883 

1884 pack_files = {} 

1885 idx_files = set() 

1886 

1887 for name in pack_dir_contents: 

1888 if name.endswith(".pack"): 

1889 base_name = name[:-5] # Remove .pack extension 

1890 pack_files[base_name] = name 

1891 elif name.endswith(".idx"): 

1892 base_name = name[:-4] # Remove .idx extension 

1893 idx_files.add(base_name) 

1894 

1895 # Remove .pack files without corresponding .idx files 

1896 for base_name, pack_name in pack_files.items(): 

1897 if base_name not in idx_files: 

1898 pack_path = os.path.join(self.pack_dir, pack_name) 

1899 # Check if file is old enough (more than grace period) 

1900 mtime = os.path.getmtime(pack_path) 

1901 if time.time() - mtime > grace_period: 

1902 os.remove(pack_path) 

1903 

1904 

1905class MemoryObjectStore(PackCapableObjectStore): 

1906 """Object store that keeps all objects in memory.""" 

1907 

1908 def __init__(self) -> None: 

1909 """Initialize a MemoryObjectStore. 

1910 

1911 Creates an empty in-memory object store. 

1912 """ 

1913 super().__init__() 

1914 self._data: dict[bytes, ShaFile] = {} 

1915 self.pack_compression_level = -1 

1916 

1917 def _to_hexsha(self, sha: bytes) -> bytes: 

1918 if len(sha) == 40: 

1919 return sha 

1920 elif len(sha) == 20: 

1921 return sha_to_hex(sha) 

1922 else: 

1923 raise ValueError(f"Invalid sha {sha!r}") 

1924 

1925 def contains_loose(self, sha: bytes) -> bool: 

1926 """Check if a particular object is present by SHA1 and is loose.""" 

1927 return self._to_hexsha(sha) in self._data 

1928 

1929 def contains_packed(self, sha: bytes) -> bool: 

1930 """Check if a particular object is present by SHA1 and is packed.""" 

1931 return False 

1932 

1933 def __iter__(self) -> Iterator[bytes]: 

1934 """Iterate over the SHAs that are present in this store.""" 

1935 return iter(self._data.keys()) 

1936 

1937 @property 

1938 def packs(self) -> list[Pack]: 

1939 """List with pack objects.""" 

1940 return [] 

1941 

1942 def get_raw(self, name: ObjectID) -> tuple[int, bytes]: 

1943 """Obtain the raw text for an object. 

1944 

1945 Args: 

1946 name: sha for the object. 

1947 Returns: tuple with numeric type and object contents. 

1948 """ 

1949 obj = self[self._to_hexsha(name)] 

1950 return obj.type_num, obj.as_raw_string() 

1951 

1952 def __getitem__(self, name: ObjectID) -> ShaFile: 

1953 """Retrieve an object by SHA. 

1954 

1955 Args: 

1956 name: SHA of the object (as hex string or bytes) 

1957 

1958 Returns: 

1959 Copy of the ShaFile object 

1960 

1961 Raises: 

1962 KeyError: If the object is not found 

1963 """ 

1964 return self._data[self._to_hexsha(name)].copy() 

1965 

1966 def __delitem__(self, name: ObjectID) -> None: 

1967 """Delete an object from this store, for testing only.""" 

1968 del self._data[self._to_hexsha(name)] 

1969 

1970 def add_object(self, obj: ShaFile) -> None: 

1971 """Add a single object to this object store.""" 

1972 self._data[obj.id] = obj.copy() 

1973 

1974 def add_objects( 

1975 self, 

1976 objects: Iterable[tuple[ShaFile, Optional[str]]], 

1977 progress: Optional[Callable[[str], None]] = None, 

1978 ) -> None: 

1979 """Add a set of objects to this object store. 

1980 

1981 Args: 

1982 objects: Iterable over a list of (object, path) tuples 

1983 progress: Optional progress reporting function. 

1984 """ 

1985 for obj, path in objects: 

1986 self.add_object(obj) 

1987 

1988 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

1989 """Add a new pack to this object store. 

1990 

1991 Because this object store doesn't support packs, we extract and add the 

1992 individual objects. 

1993 

1994 Returns: Fileobject to write to and a commit function to 

1995 call when the pack is finished. 

1996 """ 

1997 from tempfile import SpooledTemporaryFile 

1998 

1999 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2000 

2001 def commit() -> None: 

2002 size = f.tell() 

2003 if size > 0: 

2004 f.seek(0) 

2005 

2006 p = PackData.from_file(f, size) 

2007 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type] 

2008 self.add_object(obj) 

2009 p.close() 

2010 f.close() 

2011 else: 

2012 f.close() 

2013 

2014 def abort() -> None: 

2015 f.close() 

2016 

2017 return f, commit, abort # type: ignore[return-value] 

2018 

2019 def add_pack_data( 

2020 self, 

2021 count: int, 

2022 unpacked_objects: Iterator[UnpackedObject], 

2023 progress: Optional[Callable[[str], None]] = None, 

2024 ) -> None: 

2025 """Add pack data to this object store. 

2026 

2027 Args: 

2028 count: Number of items to add 

2029 unpacked_objects: Iterator of UnpackedObject instances 

2030 progress: Optional progress reporting function. 

2031 """ 

2032 if count == 0: 

2033 return 

2034 

2035 # Since MemoryObjectStore doesn't support pack files, we need to 

2036 # extract individual objects. To handle deltas properly, we write 

2037 # to a temporary pack and then use PackInflater to resolve them. 

2038 f, commit, abort = self.add_pack() 

2039 try: 

2040 write_pack_data( 

2041 f.write, 

2042 unpacked_objects, 

2043 num_records=count, 

2044 progress=progress, 

2045 ) 

2046 except BaseException: 

2047 abort() 

2048 raise 

2049 else: 

2050 commit() 

2051 

2052 def add_thin_pack( 

2053 self, 

2054 read_all: Callable[[], bytes], 

2055 read_some: Callable[[int], bytes], 

2056 progress: Optional[Callable[[str], None]] = None, 

2057 ) -> None: 

2058 """Add a new thin pack to this object store. 

2059 

2060 Thin packs are packs that contain deltas with parents that exist 

2061 outside the pack. Because this object store doesn't support packs, we 

2062 extract and add the individual objects. 

2063 

2064 Args: 

2065 read_all: Read function that blocks until the number of 

2066 requested bytes are read. 

2067 read_some: Read function that returns at least one byte, but may 

2068 not return the number of bytes requested. 

2069 progress: Optional progress reporting function. 

2070 """ 

2071 f, commit, abort = self.add_pack() 

2072 try: 

2073 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type] 

2074 copier.verify() 

2075 except BaseException: 

2076 abort() 

2077 raise 

2078 else: 

2079 commit() 

2080 

2081 

2082class ObjectIterator(Protocol): 

2083 """Interface for iterating over objects.""" 

2084 

2085 def iterobjects(self) -> Iterator[ShaFile]: 

2086 """Iterate over all objects. 

2087 

2088 Returns: 

2089 Iterator of ShaFile objects 

2090 """ 

2091 raise NotImplementedError(self.iterobjects) 

2092 

2093 

2094def tree_lookup_path( 

2095 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes 

2096) -> tuple[int, bytes]: 

2097 """Look up an object in a Git tree. 

2098 

2099 Args: 

2100 lookup_obj: Callback for retrieving object by SHA1 

2101 root_sha: SHA1 of the root tree 

2102 path: Path to lookup 

2103 Returns: A tuple of (mode, SHA) of the resulting path. 

2104 """ 

2105 tree = lookup_obj(root_sha) 

2106 if not isinstance(tree, Tree): 

2107 raise NotTreeError(root_sha) 

2108 return tree.lookup_path(lookup_obj, path) 

2109 

2110 

2111def _collect_filetree_revs( 

2112 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2113) -> None: 

2114 """Collect SHA1s of files and directories for specified tree. 

2115 

2116 Args: 

2117 obj_store: Object store to get objects by SHA from 

2118 tree_sha: tree reference to walk 

2119 kset: set to fill with references to files and directories 

2120 """ 

2121 filetree = obj_store[tree_sha] 

2122 assert isinstance(filetree, Tree) 

2123 for name, mode, sha in filetree.iteritems(): 

2124 assert mode is not None 

2125 assert sha is not None 

2126 if not S_ISGITLINK(mode) and sha not in kset: 

2127 kset.add(sha) 

2128 if stat.S_ISDIR(mode): 

2129 _collect_filetree_revs(obj_store, sha, kset) 

2130 

2131 

2132def _split_commits_and_tags( 

2133 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False 

2134) -> tuple[set[bytes], set[bytes], set[bytes]]: 

2135 """Split object id list into three lists with commit, tag, and other SHAs. 

2136 

2137 Commits referenced by tags are included into commits 

2138 list as well. Only SHA1s known in this repository will get 

2139 through, and unless ignore_unknown argument is True, KeyError 

2140 is thrown for SHA1 missing in the repository 

2141 

2142 Args: 

2143 obj_store: Object store to get objects by SHA1 from 

2144 lst: Collection of commit and tag SHAs 

2145 ignore_unknown: True to skip SHA1 missing in the repository 

2146 silently. 

2147 Returns: A tuple of (commits, tags, others) SHA1s 

2148 """ 

2149 commits: set[bytes] = set() 

2150 tags: set[bytes] = set() 

2151 others: set[bytes] = set() 

2152 for e in lst: 

2153 try: 

2154 o = obj_store[e] 

2155 except KeyError: 

2156 if not ignore_unknown: 

2157 raise 

2158 else: 

2159 if isinstance(o, Commit): 

2160 commits.add(e) 

2161 elif isinstance(o, Tag): 

2162 tags.add(e) 

2163 tagged = o.object[1] 

2164 c, t, os = _split_commits_and_tags( 

2165 obj_store, [tagged], ignore_unknown=ignore_unknown 

2166 ) 

2167 commits |= c 

2168 tags |= t 

2169 others |= os 

2170 else: 

2171 others.add(e) 

2172 return (commits, tags, others) 

2173 

2174 

2175class MissingObjectFinder: 

2176 """Find the objects missing from another object store. 

2177 

2178 Args: 

2179 object_store: Object store containing at least all objects to be 

2180 sent 

2181 haves: SHA1s of commits not to send (already present in target) 

2182 wants: SHA1s of commits to send 

2183 progress: Optional function to report progress to. 

2184 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2185 sha for including tags. 

2186 get_parents: Optional function for getting the parents of a commit. 

2187 """ 

2188 

2189 def __init__( 

2190 self, 

2191 object_store: BaseObjectStore, 

2192 haves: Iterable[bytes], 

2193 wants: Iterable[bytes], 

2194 *, 

2195 shallow: Optional[Set[bytes]] = None, 

2196 progress: Optional[Callable[[bytes], None]] = None, 

2197 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None, 

2198 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

2199 ) -> None: 

2200 """Initialize a MissingObjectFinder. 

2201 

2202 Args: 

2203 object_store: Object store containing objects 

2204 haves: SHA1s of objects already present in target 

2205 wants: SHA1s of objects to send 

2206 shallow: Set of shallow commit SHA1s 

2207 progress: Optional progress reporting callback 

2208 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2209 get_parents: Function for getting commit parents 

2210 """ 

2211 self.object_store = object_store 

2212 if shallow is None: 

2213 shallow = set() 

2214 self._get_parents = get_parents 

2215 # process Commits and Tags differently 

2216 # Note, while haves may list commits/tags not available locally, 

2217 # and such SHAs would get filtered out by _split_commits_and_tags, 

2218 # wants shall list only known SHAs, and otherwise 

2219 # _split_commits_and_tags fails with KeyError 

2220 have_commits, have_tags, have_others = _split_commits_and_tags( 

2221 object_store, haves, ignore_unknown=True 

2222 ) 

2223 want_commits, want_tags, want_others = _split_commits_and_tags( 

2224 object_store, wants, ignore_unknown=False 

2225 ) 

2226 # all_ancestors is a set of commits that shall not be sent 

2227 # (complete repository up to 'haves') 

2228 all_ancestors = _collect_ancestors( 

2229 object_store, 

2230 have_commits, 

2231 shallow=frozenset(shallow), 

2232 get_parents=self._get_parents, 

2233 )[0] 

2234 # all_missing - complete set of commits between haves and wants 

2235 # common - commits from all_ancestors we hit into while 

2236 # traversing parent hierarchy of wants 

2237 missing_commits, common_commits = _collect_ancestors( 

2238 object_store, 

2239 want_commits, 

2240 frozenset(all_ancestors), 

2241 shallow=frozenset(shallow), 

2242 get_parents=self._get_parents, 

2243 ) 

2244 self.remote_has: set[bytes] = set() 

2245 # Now, fill sha_done with commits and revisions of 

2246 # files and directories known to be both locally 

2247 # and on target. Thus these commits and files 

2248 # won't get selected for fetch 

2249 for h in common_commits: 

2250 self.remote_has.add(h) 

2251 cmt = object_store[h] 

2252 assert isinstance(cmt, Commit) 

2253 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2254 # record tags we have as visited, too 

2255 for t in have_tags: 

2256 self.remote_has.add(t) 

2257 self.sha_done = set(self.remote_has) 

2258 

2259 # in fact, what we 'want' is commits, tags, and others 

2260 # we've found missing 

2261 self.objects_to_send: set[ 

2262 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

2263 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

2264 missing_tags = want_tags.difference(have_tags) 

2265 self.objects_to_send.update( 

2266 {(w, None, Tag.type_num, False) for w in missing_tags} 

2267 ) 

2268 missing_others = want_others.difference(have_others) 

2269 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2270 

2271 if progress is None: 

2272 self.progress: Callable[[bytes], None] = lambda x: None 

2273 else: 

2274 self.progress = progress 

2275 self._tagged = (get_tagged and get_tagged()) or {} 

2276 

2277 def get_remote_has(self) -> set[bytes]: 

2278 """Get the set of SHAs the remote has. 

2279 

2280 Returns: 

2281 Set of SHA1s that the remote side already has 

2282 """ 

2283 return self.remote_has 

2284 

2285 def add_todo( 

2286 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

2287 ) -> None: 

2288 """Add objects to the todo list. 

2289 

2290 Args: 

2291 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2292 """ 

2293 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2294 

2295 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

2296 """Get the next object to send. 

2297 

2298 Returns: 

2299 Tuple of (sha, pack_hint) 

2300 

2301 Raises: 

2302 StopIteration: When no more objects to send 

2303 """ 

2304 while True: 

2305 if not self.objects_to_send: 

2306 self.progress( 

2307 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2308 ) 

2309 raise StopIteration 

2310 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2311 if sha not in self.sha_done: 

2312 break 

2313 if not leaf: 

2314 o = self.object_store[sha] 

2315 if isinstance(o, Commit): 

2316 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2317 elif isinstance(o, Tree): 

2318 todos = [] 

2319 for n, m, s in o.iteritems(): 

2320 assert m is not None 

2321 assert n is not None 

2322 assert s is not None 

2323 if not S_ISGITLINK(m): 

2324 todos.append( 

2325 ( 

2326 s, 

2327 n, 

2328 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2329 not stat.S_ISDIR(m), 

2330 ) 

2331 ) 

2332 self.add_todo(todos) 

2333 elif isinstance(o, Tag): 

2334 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2335 if sha in self._tagged: 

2336 self.add_todo([(self._tagged[sha], None, None, True)]) 

2337 self.sha_done.add(sha) 

2338 if len(self.sha_done) % 1000 == 0: 

2339 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2340 if type_num is None: 

2341 pack_hint = None 

2342 else: 

2343 pack_hint = (type_num, name) 

2344 return (sha, pack_hint) 

2345 

2346 def __iter__(self) -> Iterator[tuple[bytes, Optional[PackHint]]]: 

2347 """Return iterator over objects to send. 

2348 

2349 Returns: 

2350 Self (this class implements the iterator protocol) 

2351 """ 

2352 return self 

2353 

2354 

2355class ObjectStoreGraphWalker: 

2356 """Graph walker that finds what commits are missing from an object store.""" 

2357 

2358 heads: set[ObjectID] 

2359 """Revisions without descendants in the local repo.""" 

2360 

2361 get_parents: Callable[[ObjectID], list[ObjectID]] 

2362 """Function to retrieve parents in the local repo.""" 

2363 

2364 shallow: set[ObjectID] 

2365 

2366 def __init__( 

2367 self, 

2368 local_heads: Iterable[ObjectID], 

2369 get_parents: Callable[[ObjectID], list[ObjectID]], 

2370 shallow: Optional[set[ObjectID]] = None, 

2371 update_shallow: Optional[ 

2372 Callable[[Optional[set[ObjectID]], Optional[set[ObjectID]]], None] 

2373 ] = None, 

2374 ) -> None: 

2375 """Create a new instance. 

2376 

2377 Args: 

2378 local_heads: Heads to start search with 

2379 get_parents: Function for finding the parents of a SHA1. 

2380 shallow: Set of shallow commits. 

2381 update_shallow: Function to update shallow commits. 

2382 """ 

2383 self.heads = set(local_heads) 

2384 self.get_parents = get_parents 

2385 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

2386 if shallow is None: 

2387 shallow = set() 

2388 self.shallow = shallow 

2389 self.update_shallow = update_shallow 

2390 

2391 def nak(self) -> None: 

2392 """Nothing in common was found.""" 

2393 

2394 def ack(self, sha: ObjectID) -> None: 

2395 """Ack that a revision and its ancestors are present in the source.""" 

2396 if len(sha) != 40: 

2397 raise ValueError(f"unexpected sha {sha!r} received") 

2398 ancestors = {sha} 

2399 

2400 # stop if we run out of heads to remove 

2401 while self.heads: 

2402 for a in ancestors: 

2403 if a in self.heads: 

2404 self.heads.remove(a) 

2405 

2406 # collect all ancestors 

2407 new_ancestors = set() 

2408 for a in ancestors: 

2409 ps = self.parents.get(a) 

2410 if ps is not None: 

2411 new_ancestors.update(ps) 

2412 self.parents[a] = None 

2413 

2414 # no more ancestors; stop 

2415 if not new_ancestors: 

2416 break 

2417 

2418 ancestors = new_ancestors 

2419 

2420 def next(self) -> Optional[ObjectID]: 

2421 """Iterate over ancestors of heads in the target.""" 

2422 if self.heads: 

2423 ret = self.heads.pop() 

2424 try: 

2425 ps = self.get_parents(ret) 

2426 except KeyError: 

2427 return None 

2428 self.parents[ret] = ps 

2429 self.heads.update([p for p in ps if p not in self.parents]) 

2430 return ret 

2431 return None 

2432 

2433 __next__ = next 

2434 

2435 

2436def commit_tree_changes( 

2437 object_store: BaseObjectStore, 

2438 tree: Union[ObjectID, Tree], 

2439 changes: Sequence[tuple[bytes, Optional[int], Optional[bytes]]], 

2440) -> ObjectID: 

2441 """Commit a specified set of changes to a tree structure. 

2442 

2443 This will apply a set of changes on top of an existing tree, storing new 

2444 objects in object_store. 

2445 

2446 changes are a list of tuples with (path, mode, object_sha). 

2447 Paths can be both blobs and trees. See the mode and 

2448 object sha to None deletes the path. 

2449 

2450 This method works especially well if there are only a small 

2451 number of changes to a big tree. For a large number of changes 

2452 to a large tree, use e.g. commit_tree. 

2453 

2454 Args: 

2455 object_store: Object store to store new objects in 

2456 and retrieve old ones from. 

2457 tree: Original tree root (SHA or Tree object) 

2458 changes: changes to apply 

2459 Returns: New tree root object 

2460 """ 

2461 # TODO(jelmer): Save up the objects and add them using .add_objects 

2462 # rather than with individual calls to .add_object. 

2463 # Handle both Tree object and SHA 

2464 if isinstance(tree, Tree): 

2465 tree_obj: Tree = tree 

2466 else: 

2467 sha_obj = object_store[tree] 

2468 assert isinstance(sha_obj, Tree) 

2469 tree_obj = sha_obj 

2470 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {} 

2471 for path, new_mode, new_sha in changes: 

2472 try: 

2473 (dirname, subpath) = path.split(b"/", 1) 

2474 except ValueError: 

2475 if new_sha is None: 

2476 del tree_obj[path] 

2477 else: 

2478 assert new_mode is not None 

2479 tree_obj[path] = (new_mode, new_sha) 

2480 else: 

2481 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2482 for name, subchanges in nested_changes.items(): 

2483 try: 

2484 orig_subtree_id: Union[bytes, Tree] = tree_obj[name][1] 

2485 except KeyError: 

2486 # For new directories, pass an empty Tree object 

2487 orig_subtree_id = Tree() 

2488 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

2489 subtree = object_store[subtree_id] 

2490 assert isinstance(subtree, Tree) 

2491 if len(subtree) == 0: 

2492 del tree_obj[name] 

2493 else: 

2494 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

2495 object_store.add_object(tree_obj) 

2496 return tree_obj.id 

2497 

2498 

2499class OverlayObjectStore(BaseObjectStore): 

2500 """Object store that can overlay multiple object stores.""" 

2501 

2502 def __init__( 

2503 self, 

2504 bases: list[BaseObjectStore], 

2505 add_store: Optional[BaseObjectStore] = None, 

2506 ) -> None: 

2507 """Initialize an OverlayObjectStore. 

2508 

2509 Args: 

2510 bases: List of base object stores to overlay 

2511 add_store: Optional store to write new objects to 

2512 """ 

2513 self.bases = bases 

2514 self.add_store = add_store 

2515 

2516 def add_object(self, object: ShaFile) -> None: 

2517 """Add a single object to the store. 

2518 

2519 Args: 

2520 object: Object to add 

2521 

2522 Raises: 

2523 NotImplementedError: If no add_store was provided 

2524 """ 

2525 if self.add_store is None: 

2526 raise NotImplementedError(self.add_object) 

2527 return self.add_store.add_object(object) 

2528 

2529 def add_objects( 

2530 self, 

2531 objects: Sequence[tuple[ShaFile, Optional[str]]], 

2532 progress: Optional[Callable[[str], None]] = None, 

2533 ) -> Optional[Pack]: 

2534 """Add multiple objects to the store. 

2535 

2536 Args: 

2537 objects: Iterator of objects to add 

2538 progress: Optional progress reporting callback 

2539 

2540 Raises: 

2541 NotImplementedError: If no add_store was provided 

2542 """ 

2543 if self.add_store is None: 

2544 raise NotImplementedError(self.add_object) 

2545 return self.add_store.add_objects(objects, progress) 

2546 

2547 @property 

2548 def packs(self) -> list[Pack]: 

2549 """Get the list of packs from all overlaid stores. 

2550 

2551 Returns: 

2552 Combined list of packs from all base stores 

2553 """ 

2554 ret = [] 

2555 for b in self.bases: 

2556 ret.extend(b.packs) 

2557 return ret 

2558 

2559 def __iter__(self) -> Iterator[ObjectID]: 

2560 """Iterate over all object SHAs in the overlaid stores. 

2561 

2562 Returns: 

2563 Iterator of object SHAs (deduped across stores) 

2564 """ 

2565 done = set() 

2566 for b in self.bases: 

2567 for o_id in b: 

2568 if o_id not in done: 

2569 yield o_id 

2570 done.add(o_id) 

2571 

2572 def iterobjects_subset( 

2573 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2574 ) -> Iterator[ShaFile]: 

2575 """Iterate over a subset of objects from the overlaid stores. 

2576 

2577 Args: 

2578 shas: Iterable of object SHAs to retrieve 

2579 allow_missing: If True, skip missing objects; if False, raise KeyError 

2580 

2581 Returns: 

2582 Iterator of ShaFile objects 

2583 

2584 Raises: 

2585 KeyError: If an object is missing and allow_missing is False 

2586 """ 

2587 todo = set(shas) 

2588 found: set[bytes] = set() 

2589 

2590 for b in self.bases: 

2591 # Create a copy of todo for each base to avoid modifying 

2592 # the set while iterating through it 

2593 current_todo = todo - found 

2594 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2595 yield o 

2596 found.add(o.id) 

2597 

2598 # Check for any remaining objects not found 

2599 missing = todo - found 

2600 if missing and not allow_missing: 

2601 raise KeyError(next(iter(missing))) 

2602 

2603 def iter_unpacked_subset( 

2604 self, 

2605 shas: Iterable[bytes], 

2606 include_comp: bool = False, 

2607 allow_missing: bool = False, 

2608 convert_ofs_delta: bool = True, 

2609 ) -> Iterator[UnpackedObject]: 

2610 """Iterate over unpacked objects from the overlaid stores. 

2611 

2612 Args: 

2613 shas: Iterable of object SHAs to retrieve 

2614 include_comp: Whether to include compressed data 

2615 allow_missing: If True, skip missing objects; if False, raise KeyError 

2616 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2617 

2618 Returns: 

2619 Iterator of unpacked objects 

2620 

2621 Raises: 

2622 KeyError: If an object is missing and allow_missing is False 

2623 """ 

2624 todo = set(shas) 

2625 for b in self.bases: 

2626 for o in b.iter_unpacked_subset( 

2627 todo, 

2628 include_comp=include_comp, 

2629 allow_missing=True, 

2630 convert_ofs_delta=convert_ofs_delta, 

2631 ): 

2632 yield o 

2633 todo.remove(o.sha()) 

2634 if todo and not allow_missing: 

2635 raise KeyError(next(iter(todo))) 

2636 

2637 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]: 

2638 """Get the raw object data from the overlaid stores. 

2639 

2640 Args: 

2641 sha_id: SHA of the object 

2642 

2643 Returns: 

2644 Tuple of (type_num, raw_data) 

2645 

2646 Raises: 

2647 KeyError: If object not found in any base store 

2648 """ 

2649 for b in self.bases: 

2650 try: 

2651 return b.get_raw(sha_id) 

2652 except KeyError: 

2653 pass 

2654 raise KeyError(sha_id) 

2655 

2656 def contains_packed(self, sha: bytes) -> bool: 

2657 """Check if an object is packed in any base store. 

2658 

2659 Args: 

2660 sha: SHA of the object 

2661 

2662 Returns: 

2663 True if object is packed in any base store 

2664 """ 

2665 for b in self.bases: 

2666 if b.contains_packed(sha): 

2667 return True 

2668 return False 

2669 

2670 def contains_loose(self, sha: bytes) -> bool: 

2671 """Check if an object is loose in any base store. 

2672 

2673 Args: 

2674 sha: SHA of the object 

2675 

2676 Returns: 

2677 True if object is loose in any base store 

2678 """ 

2679 for b in self.bases: 

2680 if b.contains_loose(sha): 

2681 return True 

2682 return False 

2683 

2684 

2685def read_packs_file(f: BinaryIO) -> Iterator[str]: 

2686 """Yield the packs listed in a packs file.""" 

2687 for line in f.read().splitlines(): 

2688 if not line: 

2689 continue 

2690 (kind, name) = line.split(b" ", 1) 

2691 if kind != b"P": 

2692 continue 

2693 yield os.fsdecode(name) 

2694 

2695 

2696class BucketBasedObjectStore(PackBasedObjectStore): 

2697 """Object store implementation that uses a bucket store like S3 as backend.""" 

2698 

2699 def _iter_loose_objects(self) -> Iterator[bytes]: 

2700 """Iterate over the SHAs of all loose objects.""" 

2701 return iter([]) 

2702 

2703 def _get_loose_object(self, sha: bytes) -> None: 

2704 return None 

2705 

2706 def delete_loose_object(self, sha: bytes) -> None: 

2707 """Delete a loose object (no-op for bucket stores). 

2708 

2709 Bucket-based stores don't have loose objects, so this is a no-op. 

2710 

2711 Args: 

2712 sha: SHA of the object to delete 

2713 """ 

2714 # Doesn't exist.. 

2715 

2716 def pack_loose_objects( 

2717 self, progress: Optional[Callable[[str], None]] = None 

2718 ) -> int: 

2719 """Pack loose objects. Returns number of objects packed. 

2720 

2721 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2722 

2723 Args: 

2724 progress: Optional progress reporting callback (ignored) 

2725 """ 

2726 return 0 

2727 

2728 def _remove_pack_by_name(self, name: str) -> None: 

2729 """Remove a pack by name. Subclasses should implement this.""" 

2730 raise NotImplementedError(self._remove_pack_by_name) 

2731 

2732 def _iter_pack_names(self) -> Iterator[str]: 

2733 raise NotImplementedError(self._iter_pack_names) 

2734 

2735 def _get_pack(self, name: str) -> Pack: 

2736 raise NotImplementedError(self._get_pack) 

2737 

2738 def _update_pack_cache(self) -> list[Pack]: 

2739 pack_files = set(self._iter_pack_names()) 

2740 

2741 # Open newly appeared pack files 

2742 new_packs = [] 

2743 for f in pack_files: 

2744 if f not in self._pack_cache: 

2745 pack = self._get_pack(f) 

2746 new_packs.append(pack) 

2747 self._pack_cache[f] = pack 

2748 # Remove disappeared pack files 

2749 for f in set(self._pack_cache) - pack_files: 

2750 self._pack_cache.pop(f).close() 

2751 return new_packs 

2752 

2753 def _upload_pack( 

2754 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

2755 ) -> None: 

2756 raise NotImplementedError 

2757 

2758 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2759 """Add a new pack to this object store. 

2760 

2761 Returns: Fileobject to write to, a commit function to 

2762 call when the pack is finished and an abort 

2763 function. 

2764 """ 

2765 import tempfile 

2766 

2767 pf = tempfile.SpooledTemporaryFile( 

2768 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2769 ) 

2770 

2771 def commit() -> Optional[Pack]: 

2772 if pf.tell() == 0: 

2773 pf.close() 

2774 return None 

2775 

2776 pf.seek(0) 

2777 

2778 p = PackData(pf.name, pf) 

2779 entries = p.sorted_entries() 

2780 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2781 idxf = tempfile.SpooledTemporaryFile( 

2782 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2783 ) 

2784 checksum = p.get_stored_checksum() 

2785 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2786 idxf.seek(0) 

2787 idx = load_pack_index_file(basename + ".idx", idxf) 

2788 for pack in self.packs: 

2789 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2790 p.close() 

2791 idx.close() 

2792 pf.close() 

2793 idxf.close() 

2794 return pack 

2795 pf.seek(0) 

2796 idxf.seek(0) 

2797 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

2798 final_pack = Pack.from_objects(p, idx) 

2799 self._add_cached_pack(basename, final_pack) 

2800 pf.close() 

2801 idxf.close() 

2802 return final_pack 

2803 

2804 return pf, commit, pf.close # type: ignore[return-value] 

2805 

2806 

2807def _collect_ancestors( 

2808 store: ObjectContainer, 

2809 heads: Iterable[ObjectID], 

2810 common: frozenset[ObjectID] = frozenset(), 

2811 shallow: frozenset[ObjectID] = frozenset(), 

2812 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

2813) -> tuple[set[ObjectID], set[ObjectID]]: 

2814 """Collect all ancestors of heads up to (excluding) those in common. 

2815 

2816 Args: 

2817 store: Object store to get commits from 

2818 heads: commits to start from 

2819 common: commits to end at, or empty set to walk repository 

2820 completely 

2821 shallow: Set of shallow commits 

2822 get_parents: Optional function for getting the parents of a 

2823 commit. 

2824 Returns: a tuple (A, B) where A - all commits reachable 

2825 from heads but not present in common, B - common (shared) elements 

2826 that are directly reachable from heads 

2827 """ 

2828 bases = set() 

2829 commits = set() 

2830 queue: list[ObjectID] = [] 

2831 queue.extend(heads) 

2832 

2833 # Try to use commit graph if available 

2834 commit_graph = store.get_commit_graph() 

2835 

2836 while queue: 

2837 e = queue.pop(0) 

2838 if e in common: 

2839 bases.add(e) 

2840 elif e not in commits: 

2841 commits.add(e) 

2842 if e in shallow: 

2843 continue 

2844 

2845 # Try to use commit graph for parent lookup 

2846 parents = None 

2847 if commit_graph: 

2848 parents = commit_graph.get_parents(e) 

2849 

2850 if parents is None: 

2851 # Fall back to loading the object 

2852 cmt = store[e] 

2853 assert isinstance(cmt, Commit) 

2854 parents = get_parents(cmt) 

2855 

2856 queue.extend(parents) 

2857 return (commits, bases) 

2858 

2859 

2860def iter_tree_contents( 

2861 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2862) -> Iterator[TreeEntry]: 

2863 """Iterate the contents of a tree and all subtrees. 

2864 

2865 Iteration is depth-first pre-order, as in e.g. os.walk. 

2866 

2867 Args: 

2868 store: Object store to get trees from 

2869 tree_id: SHA1 of the tree. 

2870 include_trees: If True, include tree objects in the iteration. 

2871 

2872 Yields: TreeEntry namedtuples for all the objects in a tree. 

2873 """ 

2874 if tree_id is None: 

2875 return 

2876 # This could be fairly easily generalized to >2 trees if we find a use 

2877 # case. 

2878 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2879 while todo: 

2880 entry = todo.pop() 

2881 assert entry.mode is not None 

2882 if stat.S_ISDIR(entry.mode): 

2883 extra = [] 

2884 assert entry.sha is not None 

2885 tree = store[entry.sha] 

2886 assert isinstance(tree, Tree) 

2887 for subentry in tree.iteritems(name_order=True): 

2888 assert entry.path is not None 

2889 extra.append(subentry.in_path(entry.path)) 

2890 todo.extend(reversed(extra)) 

2891 if not stat.S_ISDIR(entry.mode) or include_trees: 

2892 yield entry 

2893 

2894 

2895def iter_commit_contents( 

2896 store: ObjectContainer, 

2897 commit: Union[Commit, bytes], 

2898 *, 

2899 include: Optional[Sequence[Union[str, bytes, Path]]] = None, 

2900) -> Iterator[TreeEntry]: 

2901 """Iterate the contents of the repository at the specified commit. 

2902 

2903 This is a wrapper around iter_tree_contents() and 

2904 tree_lookup_path() to simplify the common task of getting the 

2905 contest of a repo at a particular commit. See also 

2906 dulwich.index.build_file_from_blob() for writing individual files 

2907 to disk. 

2908 

2909 Args: 

2910 store: Object store to get trees from 

2911 commit: Commit object, or SHA1 of a commit 

2912 include: if provided, only the entries whose paths are in the 

2913 list, or whose parent tree is in the list, will be 

2914 included. Note that duplicate or overlapping paths 

2915 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

2916 

2917 Yields: TreeEntry namedtuples for all matching files in a commit. 

2918 """ 

2919 sha = commit.id if isinstance(commit, Commit) else commit 

2920 if not isinstance(obj := store[sha], Commit): 

2921 raise TypeError( 

2922 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

2923 ) 

2924 commit = obj 

2925 encoding = commit.encoding or "utf-8" 

2926 include_bytes: list[bytes] = ( 

2927 [ 

2928 path if isinstance(path, bytes) else str(path).encode(encoding) 

2929 for path in include 

2930 ] 

2931 if include is not None 

2932 else [b""] 

2933 ) 

2934 

2935 for path in include_bytes: 

2936 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

2937 # Iterate all contained files if path points to a dir, otherwise just get that 

2938 # single file 

2939 if isinstance(store[obj_id], Tree): 

2940 for entry in iter_tree_contents(store, obj_id): 

2941 yield entry.in_path(path) 

2942 else: 

2943 yield TreeEntry(path, mode, obj_id) 

2944 

2945 

2946def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2947 """Peel all tags from a SHA. 

2948 

2949 Args: 

2950 store: Object store to get objects from 

2951 sha: The object SHA to peel. 

2952 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2953 intermediate tags; if the original ref does not point to a tag, 

2954 this will equal the original SHA1. 

2955 """ 

2956 unpeeled = obj = store[sha] 

2957 obj_class = object_class(obj.type_name) 

2958 while obj_class is Tag: 

2959 assert isinstance(obj, Tag) 

2960 obj_class, sha = obj.object 

2961 obj = store[sha] 

2962 return unpeeled, obj