Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1236 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Mapping, Sequence, Set 

33from contextlib import suppress 

34from io import BytesIO 

35from pathlib import Path 

36from typing import ( 

37 TYPE_CHECKING, 

38 BinaryIO, 

39 Callable, 

40 Optional, 

41 Protocol, 

42 Union, 

43) 

44 

45from .errors import NotTreeError 

46from .file import GitFile, _GitFile 

47from .objects import ( 

48 S_ISGITLINK, 

49 ZERO_SHA, 

50 Blob, 

51 Commit, 

52 ObjectID, 

53 ShaFile, 

54 Tag, 

55 Tree, 

56 TreeEntry, 

57 hex_to_filename, 

58 hex_to_sha, 

59 object_class, 

60 sha_to_hex, 

61 valid_hexsha, 

62) 

63from .pack import ( 

64 PACK_SPOOL_FILE_MAX_SIZE, 

65 ObjectContainer, 

66 Pack, 

67 PackData, 

68 PackedObjectContainer, 

69 PackFileDisappeared, 

70 PackHint, 

71 PackIndexer, 

72 PackInflater, 

73 PackStreamCopier, 

74 UnpackedObject, 

75 extend_pack, 

76 full_unpacked_object, 

77 generate_unpacked_objects, 

78 iter_sha1, 

79 load_pack_index_file, 

80 pack_objects_to_data, 

81 write_pack_data, 

82 write_pack_index, 

83) 

84from .protocol import DEPTH_INFINITE 

85from .refs import PEELED_TAG_SUFFIX, Ref 

86 

87if TYPE_CHECKING: 

88 from .commit_graph import CommitGraph 

89 from .config import Config 

90 from .diff_tree import RenameDetector 

91 

92 

93class GraphWalker(Protocol): 

94 """Protocol for graph walker objects.""" 

95 

96 def __next__(self) -> Optional[bytes]: 

97 """Return the next object SHA to visit.""" 

98 ... 

99 

100 def ack(self, sha: bytes) -> None: 

101 """Acknowledge that an object has been received.""" 

102 ... 

103 

104 def nak(self) -> None: 

105 """Nothing in common was found.""" 

106 ... 

107 

108 

109INFODIR = "info" 

110PACKDIR = "pack" 

111 

112# use permissions consistent with Git; just readable by everyone 

113# TODO: should packs also be non-writable on Windows? if so, that 

114# would requite some rather significant adjustments to the test suite 

115PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

116 

117# Grace period for cleaning up temporary pack files (in seconds) 

118# Matches git's default of 2 weeks 

119DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

120 

121 

122def find_shallow( 

123 store: ObjectContainer, heads: Iterable[bytes], depth: int 

124) -> tuple[set[bytes], set[bytes]]: 

125 """Find shallow commits according to a given depth. 

126 

127 Args: 

128 store: An ObjectStore for looking up objects. 

129 heads: Iterable of head SHAs to start walking from. 

130 depth: The depth of ancestors to include. A depth of one includes 

131 only the heads themselves. 

132 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

133 considered shallow and unshallow according to the arguments. Note that 

134 these sets may overlap if a commit is reachable along multiple paths. 

135 """ 

136 parents: dict[bytes, list[bytes]] = {} 

137 commit_graph = store.get_commit_graph() 

138 

139 def get_parents(sha: bytes) -> list[bytes]: 

140 result = parents.get(sha, None) 

141 if not result: 

142 # Try to use commit graph first if available 

143 if commit_graph: 

144 graph_parents = commit_graph.get_parents(sha) 

145 if graph_parents is not None: 

146 result = graph_parents 

147 parents[sha] = result 

148 return result 

149 # Fall back to loading the object 

150 commit = store[sha] 

151 assert isinstance(commit, Commit) 

152 result = commit.parents 

153 parents[sha] = result 

154 return result 

155 

156 todo = [] # stack of (sha, depth) 

157 for head_sha in heads: 

158 obj = store[head_sha] 

159 # Peel tags if necessary 

160 while isinstance(obj, Tag): 

161 _, sha = obj.object 

162 obj = store[sha] 

163 if isinstance(obj, Commit): 

164 todo.append((obj.id, 1)) 

165 

166 not_shallow = set() 

167 shallow = set() 

168 while todo: 

169 sha, cur_depth = todo.pop() 

170 if cur_depth < depth: 

171 not_shallow.add(sha) 

172 new_depth = cur_depth + 1 

173 todo.extend((p, new_depth) for p in get_parents(sha)) 

174 else: 

175 shallow.add(sha) 

176 

177 return shallow, not_shallow 

178 

179 

180def get_depth( 

181 store: ObjectContainer, 

182 head: bytes, 

183 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

184 max_depth: Optional[int] = None, 

185) -> int: 

186 """Return the current available depth for the given head. 

187 

188 For commits with multiple parents, the largest possible depth will be 

189 returned. 

190 

191 Args: 

192 store: Object store to search in 

193 head: commit to start from 

194 get_parents: optional function for getting the parents of a commit 

195 max_depth: maximum depth to search 

196 """ 

197 if head not in store: 

198 return 0 

199 current_depth = 1 

200 queue = [(head, current_depth)] 

201 commit_graph = store.get_commit_graph() 

202 

203 while queue and (max_depth is None or current_depth < max_depth): 

204 e, depth = queue.pop(0) 

205 current_depth = max(current_depth, depth) 

206 

207 # Try to use commit graph for parent lookup if available 

208 parents = None 

209 if commit_graph: 

210 parents = commit_graph.get_parents(e) 

211 

212 if parents is None: 

213 # Fall back to loading the object 

214 cmt = store[e] 

215 if isinstance(cmt, Tag): 

216 _cls, sha = cmt.object 

217 cmt = store[sha] 

218 parents = get_parents(cmt) 

219 

220 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

221 return current_depth 

222 

223 

224class PackContainer(Protocol): 

225 """Protocol for containers that can accept pack files.""" 

226 

227 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

228 """Add a new pack.""" 

229 

230 

231class BaseObjectStore: 

232 """Object store interface.""" 

233 

234 def determine_wants_all( 

235 self, refs: Mapping[Ref, ObjectID], depth: Optional[int] = None 

236 ) -> list[ObjectID]: 

237 """Determine which objects are wanted based on refs.""" 

238 

239 def _want_deepen(sha: bytes) -> bool: 

240 if not depth: 

241 return False 

242 if depth == DEPTH_INFINITE: 

243 return True 

244 return depth > self._get_depth(sha) 

245 

246 return [ 

247 sha 

248 for (ref, sha) in refs.items() 

249 if (sha not in self or _want_deepen(sha)) 

250 and not ref.endswith(PEELED_TAG_SUFFIX) 

251 and not sha == ZERO_SHA 

252 ] 

253 

254 def contains_loose(self, sha: bytes) -> bool: 

255 """Check if a particular object is present by SHA1 and is loose.""" 

256 raise NotImplementedError(self.contains_loose) 

257 

258 def contains_packed(self, sha: bytes) -> bool: 

259 """Check if a particular object is present by SHA1 and is packed.""" 

260 return False # Default implementation for stores that don't support packing 

261 

262 def __contains__(self, sha1: bytes) -> bool: 

263 """Check if a particular object is present by SHA1. 

264 

265 This method makes no distinction between loose and packed objects. 

266 """ 

267 return self.contains_loose(sha1) 

268 

269 @property 

270 def packs(self) -> list[Pack]: 

271 """Iterable of pack objects.""" 

272 raise NotImplementedError 

273 

274 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

275 """Obtain the raw text for an object. 

276 

277 Args: 

278 name: sha for the object. 

279 Returns: tuple with numeric type and object contents. 

280 """ 

281 raise NotImplementedError(self.get_raw) 

282 

283 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

284 """Obtain an object by SHA1.""" 

285 type_num, uncomp = self.get_raw(sha1) 

286 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

287 

288 def __iter__(self) -> Iterator[bytes]: 

289 """Iterate over the SHAs that are present in this store.""" 

290 raise NotImplementedError(self.__iter__) 

291 

292 def add_object(self, obj: ShaFile) -> None: 

293 """Add a single object to this object store.""" 

294 raise NotImplementedError(self.add_object) 

295 

296 def add_objects( 

297 self, 

298 objects: Sequence[tuple[ShaFile, Optional[str]]], 

299 progress: Optional[Callable[..., None]] = None, 

300 ) -> Optional["Pack"]: 

301 """Add a set of objects to this object store. 

302 

303 Args: 

304 objects: Iterable over a list of (object, path) tuples 

305 progress: Optional progress callback 

306 """ 

307 raise NotImplementedError(self.add_objects) 

308 

309 def tree_changes( 

310 self, 

311 source: Optional[bytes], 

312 target: Optional[bytes], 

313 want_unchanged: bool = False, 

314 include_trees: bool = False, 

315 change_type_same: bool = False, 

316 rename_detector: Optional["RenameDetector"] = None, 

317 paths: Optional[Sequence[bytes]] = None, 

318 ) -> Iterator[ 

319 tuple[ 

320 tuple[Optional[bytes], Optional[bytes]], 

321 tuple[Optional[int], Optional[int]], 

322 tuple[Optional[bytes], Optional[bytes]], 

323 ] 

324 ]: 

325 """Find the differences between the contents of two trees. 

326 

327 Args: 

328 source: SHA1 of the source tree 

329 target: SHA1 of the target tree 

330 want_unchanged: Whether unchanged files should be reported 

331 include_trees: Whether to include trees 

332 change_type_same: Whether to report files changing 

333 type in the same entry. 

334 rename_detector: RenameDetector object for detecting renames. 

335 paths: Optional list of paths to filter to (as bytes). 

336 Returns: Iterator over tuples with 

337 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

338 """ 

339 from .diff_tree import tree_changes 

340 

341 for change in tree_changes( 

342 self, 

343 source, 

344 target, 

345 want_unchanged=want_unchanged, 

346 include_trees=include_trees, 

347 change_type_same=change_type_same, 

348 rename_detector=rename_detector, 

349 paths=paths, 

350 ): 

351 old_path = change.old.path if change.old is not None else None 

352 new_path = change.new.path if change.new is not None else None 

353 old_mode = change.old.mode if change.old is not None else None 

354 new_mode = change.new.mode if change.new is not None else None 

355 old_sha = change.old.sha if change.old is not None else None 

356 new_sha = change.new.sha if change.new is not None else None 

357 yield ( 

358 (old_path, new_path), 

359 (old_mode, new_mode), 

360 (old_sha, new_sha), 

361 ) 

362 

363 def iter_tree_contents( 

364 self, tree_id: bytes, include_trees: bool = False 

365 ) -> Iterator[TreeEntry]: 

366 """Iterate the contents of a tree and all subtrees. 

367 

368 Iteration is depth-first pre-order, as in e.g. os.walk. 

369 

370 Args: 

371 tree_id: SHA1 of the tree. 

372 include_trees: If True, include tree objects in the iteration. 

373 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

374 tree. 

375 """ 

376 warnings.warn( 

377 "Please use dulwich.object_store.iter_tree_contents", 

378 DeprecationWarning, 

379 stacklevel=2, 

380 ) 

381 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

382 

383 def iterobjects_subset( 

384 self, shas: Iterable[bytes], *, allow_missing: bool = False 

385 ) -> Iterator[ShaFile]: 

386 """Iterate over a subset of objects in the store. 

387 

388 Args: 

389 shas: Iterable of object SHAs to retrieve 

390 allow_missing: If True, skip missing objects; if False, raise KeyError 

391 

392 Returns: 

393 Iterator of ShaFile objects 

394 

395 Raises: 

396 KeyError: If an object is missing and allow_missing is False 

397 """ 

398 for sha in shas: 

399 try: 

400 yield self[sha] 

401 except KeyError: 

402 if not allow_missing: 

403 raise 

404 

405 def iter_unpacked_subset( 

406 self, 

407 shas: Iterable[bytes], 

408 include_comp: bool = False, 

409 allow_missing: bool = False, 

410 convert_ofs_delta: bool = True, 

411 ) -> "Iterator[UnpackedObject]": 

412 """Iterate over unpacked objects for a subset of SHAs. 

413 

414 Default implementation that converts ShaFile objects to UnpackedObject. 

415 Subclasses may override for more efficient unpacked access. 

416 

417 Args: 

418 shas: Iterable of object SHAs to retrieve 

419 include_comp: Whether to include compressed data (ignored in base implementation) 

420 allow_missing: If True, skip missing objects; if False, raise KeyError 

421 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation) 

422 

423 Returns: 

424 Iterator of UnpackedObject instances 

425 

426 Raises: 

427 KeyError: If an object is missing and allow_missing is False 

428 """ 

429 from .pack import UnpackedObject 

430 

431 for sha in shas: 

432 try: 

433 obj = self[sha] 

434 # Convert ShaFile to UnpackedObject 

435 unpacked = UnpackedObject( 

436 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id 

437 ) 

438 yield unpacked 

439 except KeyError: 

440 if not allow_missing: 

441 raise 

442 

443 def find_missing_objects( 

444 self, 

445 haves: Iterable[bytes], 

446 wants: Iterable[bytes], 

447 shallow: Optional[Set[bytes]] = None, 

448 progress: Optional[Callable[..., None]] = None, 

449 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None, 

450 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

451 ) -> Iterator[tuple[bytes, Optional[PackHint]]]: 

452 """Find the missing objects required for a set of revisions. 

453 

454 Args: 

455 haves: Iterable over SHAs already in common. 

456 wants: Iterable over SHAs of objects to fetch. 

457 shallow: Set of shallow commit SHA1s to skip 

458 progress: Simple progress function that will be called with 

459 updated progress strings. 

460 get_tagged: Function that returns a dict of pointed-to sha -> 

461 tag sha for including tags. 

462 get_parents: Optional function for getting the parents of a 

463 commit. 

464 Returns: Iterator over (sha, path) pairs. 

465 """ 

466 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

467 finder = MissingObjectFinder( 

468 self, 

469 haves=haves, 

470 wants=wants, 

471 shallow=shallow, 

472 progress=progress, 

473 get_tagged=get_tagged, 

474 get_parents=get_parents, 

475 ) 

476 return iter(finder) 

477 

478 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

479 """Find which revisions this store has in common using graphwalker. 

480 

481 Args: 

482 graphwalker: A graphwalker object. 

483 Returns: List of SHAs that are in common 

484 """ 

485 haves = [] 

486 sha = next(graphwalker) 

487 while sha: 

488 if sha in self: 

489 haves.append(sha) 

490 graphwalker.ack(sha) 

491 sha = next(graphwalker) 

492 return haves 

493 

494 def generate_pack_data( 

495 self, 

496 have: Iterable[bytes], 

497 want: Iterable[bytes], 

498 *, 

499 shallow: Optional[Set[bytes]] = None, 

500 progress: Optional[Callable[..., None]] = None, 

501 ofs_delta: bool = True, 

502 ) -> tuple[int, Iterator[UnpackedObject]]: 

503 """Generate pack data objects for a set of wants/haves. 

504 

505 Args: 

506 have: List of SHA1s of objects that should not be sent 

507 want: List of SHA1s of objects that should be sent 

508 shallow: Set of shallow commit SHA1s to skip 

509 ofs_delta: Whether OFS deltas can be included 

510 progress: Optional progress reporting method 

511 """ 

512 # Note that the pack-specific implementation below is more efficient, 

513 # as it reuses deltas 

514 missing_objects = MissingObjectFinder( 

515 self, haves=have, wants=want, shallow=shallow, progress=progress 

516 ) 

517 object_ids = list(missing_objects) 

518 return pack_objects_to_data( 

519 [(self[oid], path) for oid, path in object_ids], 

520 ofs_delta=ofs_delta, 

521 progress=progress, 

522 ) 

523 

524 def peel_sha(self, sha: bytes) -> bytes: 

525 """Peel all tags from a SHA. 

526 

527 Args: 

528 sha: The object SHA to peel. 

529 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

530 intermediate tags; if the original ref does not point to a tag, 

531 this will equal the original SHA1. 

532 """ 

533 warnings.warn( 

534 "Please use dulwich.object_store.peel_sha()", 

535 DeprecationWarning, 

536 stacklevel=2, 

537 ) 

538 return peel_sha(self, sha)[1].id 

539 

540 def _get_depth( 

541 self, 

542 head: bytes, 

543 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents, 

544 max_depth: Optional[int] = None, 

545 ) -> int: 

546 """Return the current available depth for the given head. 

547 

548 For commits with multiple parents, the largest possible depth will be 

549 returned. 

550 

551 Args: 

552 head: commit to start from 

553 get_parents: optional function for getting the parents of a commit 

554 max_depth: maximum depth to search 

555 """ 

556 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

557 

558 def close(self) -> None: 

559 """Close any files opened by this object store.""" 

560 # Default implementation is a NO-OP 

561 

562 def prune(self, grace_period: Optional[int] = None) -> None: 

563 """Prune/clean up this object store. 

564 

565 This includes removing orphaned temporary files and other 

566 housekeeping tasks. Default implementation is a NO-OP. 

567 

568 Args: 

569 grace_period: Grace period in seconds for removing temporary files. 

570 If None, uses the default grace period. 

571 """ 

572 # Default implementation is a NO-OP 

573 

574 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

575 """Iterate over all SHA1s that start with a given prefix. 

576 

577 The default implementation is a naive iteration over all objects. 

578 However, subclasses may override this method with more efficient 

579 implementations. 

580 """ 

581 for sha in self: 

582 if sha.startswith(prefix): 

583 yield sha 

584 

585 def get_commit_graph(self) -> Optional["CommitGraph"]: 

586 """Get the commit graph for this object store. 

587 

588 Returns: 

589 CommitGraph object if available, None otherwise 

590 """ 

591 return None 

592 

593 def write_commit_graph( 

594 self, refs: Optional[Sequence[bytes]] = None, reachable: bool = True 

595 ) -> None: 

596 """Write a commit graph file for this object store. 

597 

598 Args: 

599 refs: List of refs to include. If None, includes all refs from object store. 

600 reachable: If True, includes all commits reachable from refs. 

601 If False, only includes the direct ref targets. 

602 

603 Note: 

604 Default implementation does nothing. Subclasses should override 

605 this method to provide commit graph writing functionality. 

606 """ 

607 raise NotImplementedError(self.write_commit_graph) 

608 

609 def get_object_mtime(self, sha: bytes) -> float: 

610 """Get the modification time of an object. 

611 

612 Args: 

613 sha: SHA1 of the object 

614 

615 Returns: 

616 Modification time as seconds since epoch 

617 

618 Raises: 

619 KeyError: if the object is not found 

620 """ 

621 # Default implementation raises KeyError 

622 # Subclasses should override to provide actual mtime 

623 raise KeyError(sha) 

624 

625 

626class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer): 

627 """Object store that supports pack operations. 

628 

629 This is a base class for object stores that can handle pack files, 

630 including both disk-based and memory-based stores. 

631 """ 

632 

633 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

634 """Add a new pack to this object store. 

635 

636 Returns: Tuple of (file, commit_func, abort_func) 

637 """ 

638 raise NotImplementedError(self.add_pack) 

639 

640 def add_pack_data( 

641 self, 

642 count: int, 

643 unpacked_objects: Iterator["UnpackedObject"], 

644 progress: Optional[Callable[..., None]] = None, 

645 ) -> Optional["Pack"]: 

646 """Add pack data to this object store. 

647 

648 Args: 

649 count: Number of objects 

650 unpacked_objects: Iterator over unpacked objects 

651 progress: Optional progress callback 

652 """ 

653 raise NotImplementedError(self.add_pack_data) 

654 

655 def get_unpacked_object( 

656 self, sha1: bytes, *, include_comp: bool = False 

657 ) -> "UnpackedObject": 

658 """Get a raw unresolved object. 

659 

660 Args: 

661 sha1: SHA-1 hash of the object 

662 include_comp: Whether to include compressed data 

663 

664 Returns: 

665 UnpackedObject instance 

666 """ 

667 from .pack import UnpackedObject 

668 

669 obj = self[sha1] 

670 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks()) 

671 

672 def iterobjects_subset( 

673 self, shas: Iterable[bytes], *, allow_missing: bool = False 

674 ) -> Iterator[ShaFile]: 

675 """Iterate over a subset of objects. 

676 

677 Args: 

678 shas: Iterable of object SHAs to retrieve 

679 allow_missing: If True, skip missing objects 

680 

681 Returns: 

682 Iterator of ShaFile objects 

683 """ 

684 for sha in shas: 

685 try: 

686 yield self[sha] 

687 except KeyError: 

688 if not allow_missing: 

689 raise 

690 

691 

692class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer): 

693 """Object store that uses pack files for storage. 

694 

695 This class provides a base implementation for object stores that use 

696 Git pack files as their primary storage mechanism. It handles caching 

697 of open pack files and provides configuration for pack file operations. 

698 """ 

699 

700 def __init__( 

701 self, 

702 pack_compression_level: int = -1, 

703 pack_index_version: Optional[int] = None, 

704 pack_delta_window_size: Optional[int] = None, 

705 pack_window_memory: Optional[int] = None, 

706 pack_delta_cache_size: Optional[int] = None, 

707 pack_depth: Optional[int] = None, 

708 pack_threads: Optional[int] = None, 

709 pack_big_file_threshold: Optional[int] = None, 

710 ) -> None: 

711 """Initialize a PackBasedObjectStore. 

712 

713 Args: 

714 pack_compression_level: Compression level for pack files (-1 to 9) 

715 pack_index_version: Pack index version to use 

716 pack_delta_window_size: Window size for delta compression 

717 pack_window_memory: Maximum memory to use for delta window 

718 pack_delta_cache_size: Cache size for delta operations 

719 pack_depth: Maximum depth for pack deltas 

720 pack_threads: Number of threads to use for packing 

721 pack_big_file_threshold: Threshold for treating files as "big" 

722 """ 

723 self._pack_cache: dict[str, Pack] = {} 

724 self.pack_compression_level = pack_compression_level 

725 self.pack_index_version = pack_index_version 

726 self.pack_delta_window_size = pack_delta_window_size 

727 self.pack_window_memory = pack_window_memory 

728 self.pack_delta_cache_size = pack_delta_cache_size 

729 self.pack_depth = pack_depth 

730 self.pack_threads = pack_threads 

731 self.pack_big_file_threshold = pack_big_file_threshold 

732 

733 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

734 """Add a new pack to this object store.""" 

735 raise NotImplementedError(self.add_pack) 

736 

737 def add_pack_data( 

738 self, 

739 count: int, 

740 unpacked_objects: Iterator[UnpackedObject], 

741 progress: Optional[Callable[..., None]] = None, 

742 ) -> Optional["Pack"]: 

743 """Add pack data to this object store. 

744 

745 Args: 

746 count: Number of items to add 

747 unpacked_objects: Iterator of UnpackedObject instances 

748 progress: Optional progress callback 

749 """ 

750 if count == 0: 

751 # Don't bother writing an empty pack file 

752 return None 

753 f, commit, abort = self.add_pack() 

754 try: 

755 write_pack_data( 

756 f.write, 

757 unpacked_objects, 

758 num_records=count, 

759 progress=progress, 

760 compression_level=self.pack_compression_level, 

761 ) 

762 except BaseException: 

763 abort() 

764 raise 

765 else: 

766 return commit() 

767 

768 @property 

769 def alternates(self) -> list["BaseObjectStore"]: 

770 """Return list of alternate object stores.""" 

771 return [] 

772 

773 def contains_packed(self, sha: bytes) -> bool: 

774 """Check if a particular object is present by SHA1 and is packed. 

775 

776 This does not check alternates. 

777 """ 

778 for pack in self.packs: 

779 try: 

780 if sha in pack: 

781 return True 

782 except PackFileDisappeared: 

783 pass 

784 return False 

785 

786 def __contains__(self, sha: bytes) -> bool: 

787 """Check if a particular object is present by SHA1. 

788 

789 This method makes no distinction between loose and packed objects. 

790 """ 

791 if self.contains_packed(sha) or self.contains_loose(sha): 

792 return True 

793 for alternate in self.alternates: 

794 if sha in alternate: 

795 return True 

796 return False 

797 

798 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

799 """Add a newly appeared pack to the cache by path.""" 

800 prev_pack = self._pack_cache.get(base_name) 

801 if prev_pack is not pack: 

802 self._pack_cache[base_name] = pack 

803 if prev_pack: 

804 prev_pack.close() 

805 

806 def generate_pack_data( 

807 self, 

808 have: Iterable[bytes], 

809 want: Iterable[bytes], 

810 *, 

811 shallow: Optional[Set[bytes]] = None, 

812 progress: Optional[Callable[..., None]] = None, 

813 ofs_delta: bool = True, 

814 ) -> tuple[int, Iterator[UnpackedObject]]: 

815 """Generate pack data objects for a set of wants/haves. 

816 

817 Args: 

818 have: List of SHA1s of objects that should not be sent 

819 want: List of SHA1s of objects that should be sent 

820 shallow: Set of shallow commit SHA1s to skip 

821 ofs_delta: Whether OFS deltas can be included 

822 progress: Optional progress reporting method 

823 """ 

824 missing_objects = MissingObjectFinder( 

825 self, haves=have, wants=want, shallow=shallow, progress=progress 

826 ) 

827 remote_has = missing_objects.get_remote_has() 

828 object_ids = list(missing_objects) 

829 return len(object_ids), generate_unpacked_objects( 

830 self, 

831 object_ids, 

832 progress=progress, 

833 ofs_delta=ofs_delta, 

834 other_haves=remote_has, 

835 ) 

836 

837 def _clear_cached_packs(self) -> None: 

838 pack_cache = self._pack_cache 

839 self._pack_cache = {} 

840 while pack_cache: 

841 (_name, pack) = pack_cache.popitem() 

842 pack.close() 

843 

844 def _iter_cached_packs(self) -> Iterator[Pack]: 

845 return iter(self._pack_cache.values()) 

846 

847 def _update_pack_cache(self) -> list[Pack]: 

848 raise NotImplementedError(self._update_pack_cache) 

849 

850 def close(self) -> None: 

851 """Close the object store and release resources. 

852 

853 This method closes all cached pack files and frees associated resources. 

854 """ 

855 self._clear_cached_packs() 

856 

857 @property 

858 def packs(self) -> list[Pack]: 

859 """List with pack objects.""" 

860 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

861 

862 def count_pack_files(self) -> int: 

863 """Count the number of pack files. 

864 

865 Returns: 

866 Number of pack files (excluding those with .keep files) 

867 """ 

868 count = 0 

869 for pack in self.packs: 

870 # Check if there's a .keep file for this pack 

871 keep_path = pack._basename + ".keep" 

872 if not os.path.exists(keep_path): 

873 count += 1 

874 return count 

875 

876 def _iter_alternate_objects(self) -> Iterator[bytes]: 

877 """Iterate over the SHAs of all the objects in alternate stores.""" 

878 for alternate in self.alternates: 

879 yield from alternate 

880 

881 def _iter_loose_objects(self) -> Iterator[bytes]: 

882 """Iterate over the SHAs of all loose objects.""" 

883 raise NotImplementedError(self._iter_loose_objects) 

884 

885 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

886 raise NotImplementedError(self._get_loose_object) 

887 

888 def delete_loose_object(self, sha: bytes) -> None: 

889 """Delete a loose object. 

890 

891 This method only handles loose objects. For packed objects, 

892 use repack(exclude=...) to exclude them during repacking. 

893 """ 

894 raise NotImplementedError(self.delete_loose_object) 

895 

896 def _remove_pack(self, pack: "Pack") -> None: 

897 raise NotImplementedError(self._remove_pack) 

898 

899 def pack_loose_objects( 

900 self, progress: Optional[Callable[[str], None]] = None 

901 ) -> int: 

902 """Pack loose objects. 

903 

904 Args: 

905 progress: Optional progress reporting callback 

906 

907 Returns: Number of objects packed 

908 """ 

909 objects: list[tuple[ShaFile, None]] = [] 

910 for sha in self._iter_loose_objects(): 

911 obj = self._get_loose_object(sha) 

912 if obj is not None: 

913 objects.append((obj, None)) 

914 self.add_objects(objects, progress=progress) 

915 for obj, path in objects: 

916 self.delete_loose_object(obj.id) 

917 return len(objects) 

918 

919 def repack( 

920 self, 

921 exclude: Optional[Set[bytes]] = None, 

922 progress: Optional[Callable[[str], None]] = None, 

923 ) -> int: 

924 """Repack the packs in this repository. 

925 

926 Note that this implementation is fairly naive and currently keeps all 

927 objects in memory while it repacks. 

928 

929 Args: 

930 exclude: Optional set of object SHAs to exclude from repacking 

931 progress: Optional progress reporting callback 

932 """ 

933 if exclude is None: 

934 exclude = set() 

935 

936 loose_objects = set() 

937 excluded_loose_objects = set() 

938 for sha in self._iter_loose_objects(): 

939 if sha not in exclude: 

940 obj = self._get_loose_object(sha) 

941 if obj is not None: 

942 loose_objects.add(obj) 

943 else: 

944 excluded_loose_objects.add(sha) 

945 

946 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

947 old_packs = {p.name(): p for p in self.packs} 

948 for name, pack in old_packs.items(): 

949 objects.update( 

950 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

951 ) 

952 

953 # Only create a new pack if there are objects to pack 

954 if objects: 

955 # The name of the consolidated pack might match the name of a 

956 # pre-existing pack. Take care not to remove the newly created 

957 # consolidated pack. 

958 consolidated = self.add_objects(list(objects), progress=progress) 

959 if consolidated is not None: 

960 old_packs.pop(consolidated.name(), None) 

961 

962 # Delete loose objects that were packed 

963 for obj in loose_objects: 

964 if obj is not None: 

965 self.delete_loose_object(obj.id) 

966 # Delete excluded loose objects 

967 for sha in excluded_loose_objects: 

968 self.delete_loose_object(sha) 

969 for name, pack in old_packs.items(): 

970 self._remove_pack(pack) 

971 self._update_pack_cache() 

972 return len(objects) 

973 

974 def __iter__(self) -> Iterator[bytes]: 

975 """Iterate over the SHAs that are present in this store.""" 

976 self._update_pack_cache() 

977 for pack in self._iter_cached_packs(): 

978 try: 

979 yield from pack 

980 except PackFileDisappeared: 

981 pass 

982 yield from self._iter_loose_objects() 

983 yield from self._iter_alternate_objects() 

984 

985 def contains_loose(self, sha: bytes) -> bool: 

986 """Check if a particular object is present by SHA1 and is loose. 

987 

988 This does not check alternates. 

989 """ 

990 return self._get_loose_object(sha) is not None 

991 

992 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

993 """Obtain the raw fulltext for an object. 

994 

995 Args: 

996 name: sha for the object. 

997 Returns: tuple with numeric type and object contents. 

998 """ 

999 if name == ZERO_SHA: 

1000 raise KeyError(name) 

1001 if len(name) == 40: 

1002 sha = hex_to_sha(name) 

1003 hexsha = name 

1004 elif len(name) == 20: 

1005 sha = name 

1006 hexsha = None 

1007 else: 

1008 raise AssertionError(f"Invalid object name {name!r}") 

1009 for pack in self._iter_cached_packs(): 

1010 try: 

1011 return pack.get_raw(sha) 

1012 except (KeyError, PackFileDisappeared): 

1013 pass 

1014 if hexsha is None: 

1015 hexsha = sha_to_hex(name) 

1016 ret = self._get_loose_object(hexsha) 

1017 if ret is not None: 

1018 return ret.type_num, ret.as_raw_string() 

1019 # Maybe something else has added a pack with the object 

1020 # in the mean time? 

1021 for pack in self._update_pack_cache(): 

1022 try: 

1023 return pack.get_raw(sha) 

1024 except KeyError: 

1025 pass 

1026 for alternate in self.alternates: 

1027 try: 

1028 return alternate.get_raw(hexsha) 

1029 except KeyError: 

1030 pass 

1031 raise KeyError(hexsha) 

1032 

1033 def iter_unpacked_subset( 

1034 self, 

1035 shas: Iterable[bytes], 

1036 include_comp: bool = False, 

1037 allow_missing: bool = False, 

1038 convert_ofs_delta: bool = True, 

1039 ) -> Iterator[UnpackedObject]: 

1040 """Iterate over a subset of objects, yielding UnpackedObject instances. 

1041 

1042 Args: 

1043 shas: Set of object SHAs to retrieve 

1044 include_comp: Whether to include compressed data 

1045 allow_missing: If True, skip missing objects; if False, raise KeyError 

1046 convert_ofs_delta: Whether to convert OFS_DELTA objects 

1047 

1048 Returns: 

1049 Iterator of UnpackedObject instances 

1050 

1051 Raises: 

1052 KeyError: If an object is missing and allow_missing is False 

1053 """ 

1054 todo: set[bytes] = set(shas) 

1055 for p in self._iter_cached_packs(): 

1056 for unpacked in p.iter_unpacked_subset( 

1057 todo, 

1058 include_comp=include_comp, 

1059 allow_missing=True, 

1060 convert_ofs_delta=convert_ofs_delta, 

1061 ): 

1062 yield unpacked 

1063 hexsha = sha_to_hex(unpacked.sha()) 

1064 todo.remove(hexsha) 

1065 # Maybe something else has added a pack with the object 

1066 # in the mean time? 

1067 for p in self._update_pack_cache(): 

1068 for unpacked in p.iter_unpacked_subset( 

1069 todo, 

1070 include_comp=include_comp, 

1071 allow_missing=True, 

1072 convert_ofs_delta=convert_ofs_delta, 

1073 ): 

1074 yield unpacked 

1075 hexsha = sha_to_hex(unpacked.sha()) 

1076 todo.remove(hexsha) 

1077 for alternate in self.alternates: 

1078 assert isinstance(alternate, PackBasedObjectStore) 

1079 for unpacked in alternate.iter_unpacked_subset( 

1080 todo, 

1081 include_comp=include_comp, 

1082 allow_missing=True, 

1083 convert_ofs_delta=convert_ofs_delta, 

1084 ): 

1085 yield unpacked 

1086 hexsha = sha_to_hex(unpacked.sha()) 

1087 todo.remove(hexsha) 

1088 

1089 def iterobjects_subset( 

1090 self, shas: Iterable[bytes], *, allow_missing: bool = False 

1091 ) -> Iterator[ShaFile]: 

1092 """Iterate over a subset of objects in the store. 

1093 

1094 This method searches for objects in pack files, alternates, and loose storage. 

1095 

1096 Args: 

1097 shas: Iterable of object SHAs to retrieve 

1098 allow_missing: If True, skip missing objects; if False, raise KeyError 

1099 

1100 Returns: 

1101 Iterator of ShaFile objects 

1102 

1103 Raises: 

1104 KeyError: If an object is missing and allow_missing is False 

1105 """ 

1106 todo: set[bytes] = set(shas) 

1107 for p in self._iter_cached_packs(): 

1108 for o in p.iterobjects_subset(todo, allow_missing=True): 

1109 yield o 

1110 todo.remove(o.id) 

1111 # Maybe something else has added a pack with the object 

1112 # in the mean time? 

1113 for p in self._update_pack_cache(): 

1114 for o in p.iterobjects_subset(todo, allow_missing=True): 

1115 yield o 

1116 todo.remove(o.id) 

1117 for alternate in self.alternates: 

1118 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

1119 yield o 

1120 todo.remove(o.id) 

1121 for oid in todo: 

1122 loose_obj: Optional[ShaFile] = self._get_loose_object(oid) 

1123 if loose_obj is not None: 

1124 yield loose_obj 

1125 elif not allow_missing: 

1126 raise KeyError(oid) 

1127 

1128 def get_unpacked_object( 

1129 self, sha1: bytes, *, include_comp: bool = False 

1130 ) -> UnpackedObject: 

1131 """Obtain the unpacked object. 

1132 

1133 Args: 

1134 sha1: sha for the object. 

1135 include_comp: Whether to include compression metadata. 

1136 """ 

1137 if sha1 == ZERO_SHA: 

1138 raise KeyError(sha1) 

1139 if len(sha1) == 40: 

1140 sha = hex_to_sha(sha1) 

1141 hexsha = sha1 

1142 elif len(sha1) == 20: 

1143 sha = sha1 

1144 hexsha = None 

1145 else: 

1146 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1147 for pack in self._iter_cached_packs(): 

1148 try: 

1149 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1150 except (KeyError, PackFileDisappeared): 

1151 pass 

1152 if hexsha is None: 

1153 hexsha = sha_to_hex(sha1) 

1154 # Maybe something else has added a pack with the object 

1155 # in the mean time? 

1156 for pack in self._update_pack_cache(): 

1157 try: 

1158 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1159 except KeyError: 

1160 pass 

1161 for alternate in self.alternates: 

1162 assert isinstance(alternate, PackBasedObjectStore) 

1163 try: 

1164 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1165 except KeyError: 

1166 pass 

1167 raise KeyError(hexsha) 

1168 

1169 def add_objects( 

1170 self, 

1171 objects: Sequence[tuple[ShaFile, Optional[str]]], 

1172 progress: Optional[Callable[[str], None]] = None, 

1173 ) -> Optional["Pack"]: 

1174 """Add a set of objects to this object store. 

1175 

1176 Args: 

1177 objects: Iterable over (object, path) tuples, should support 

1178 __len__. 

1179 progress: Optional progress reporting function. 

1180 Returns: Pack object of the objects written. 

1181 """ 

1182 count = len(objects) 

1183 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1184 return self.add_pack_data(count, record_iter, progress=progress) 

1185 

1186 

1187class DiskObjectStore(PackBasedObjectStore): 

1188 """Git-style object store that exists on disk.""" 

1189 

1190 path: Union[str, os.PathLike[str]] 

1191 pack_dir: Union[str, os.PathLike[str]] 

1192 _alternates: Optional[list["BaseObjectStore"]] 

1193 _commit_graph: Optional["CommitGraph"] 

1194 

1195 def __init__( 

1196 self, 

1197 path: Union[str, os.PathLike[str]], 

1198 loose_compression_level: int = -1, 

1199 pack_compression_level: int = -1, 

1200 pack_index_version: Optional[int] = None, 

1201 pack_delta_window_size: Optional[int] = None, 

1202 pack_window_memory: Optional[int] = None, 

1203 pack_delta_cache_size: Optional[int] = None, 

1204 pack_depth: Optional[int] = None, 

1205 pack_threads: Optional[int] = None, 

1206 pack_big_file_threshold: Optional[int] = None, 

1207 fsync_object_files: bool = False, 

1208 ) -> None: 

1209 """Open an object store. 

1210 

1211 Args: 

1212 path: Path of the object store. 

1213 loose_compression_level: zlib compression level for loose objects 

1214 pack_compression_level: zlib compression level for pack objects 

1215 pack_index_version: pack index version to use (1, 2, or 3) 

1216 pack_delta_window_size: sliding window size for delta compression 

1217 pack_window_memory: memory limit for delta window operations 

1218 pack_delta_cache_size: size of cache for delta operations 

1219 pack_depth: maximum delta chain depth 

1220 pack_threads: number of threads for pack operations 

1221 pack_big_file_threshold: threshold for treating files as big 

1222 fsync_object_files: whether to fsync object files for durability 

1223 """ 

1224 super().__init__( 

1225 pack_compression_level=pack_compression_level, 

1226 pack_index_version=pack_index_version, 

1227 pack_delta_window_size=pack_delta_window_size, 

1228 pack_window_memory=pack_window_memory, 

1229 pack_delta_cache_size=pack_delta_cache_size, 

1230 pack_depth=pack_depth, 

1231 pack_threads=pack_threads, 

1232 pack_big_file_threshold=pack_big_file_threshold, 

1233 ) 

1234 self.path = path 

1235 self.pack_dir = os.path.join(self.path, PACKDIR) 

1236 self._alternates = None 

1237 self.loose_compression_level = loose_compression_level 

1238 self.pack_compression_level = pack_compression_level 

1239 self.pack_index_version = pack_index_version 

1240 self.fsync_object_files = fsync_object_files 

1241 

1242 # Commit graph support - lazy loaded 

1243 self._commit_graph = None 

1244 self._use_commit_graph = True # Default to true 

1245 

1246 def __repr__(self) -> str: 

1247 """Return string representation of DiskObjectStore. 

1248 

1249 Returns: 

1250 String representation including the store path 

1251 """ 

1252 return f"<{self.__class__.__name__}({self.path!r})>" 

1253 

1254 @classmethod 

1255 def from_config( 

1256 cls, path: Union[str, os.PathLike[str]], config: "Config" 

1257 ) -> "DiskObjectStore": 

1258 """Create a DiskObjectStore from a configuration object. 

1259 

1260 Args: 

1261 path: Path to the object store directory 

1262 config: Configuration object to read settings from 

1263 

1264 Returns: 

1265 New DiskObjectStore instance configured according to config 

1266 """ 

1267 try: 

1268 default_compression_level = int( 

1269 config.get((b"core",), b"compression").decode() 

1270 ) 

1271 except KeyError: 

1272 default_compression_level = -1 

1273 try: 

1274 loose_compression_level = int( 

1275 config.get((b"core",), b"looseCompression").decode() 

1276 ) 

1277 except KeyError: 

1278 loose_compression_level = default_compression_level 

1279 try: 

1280 pack_compression_level = int( 

1281 config.get((b"core",), "packCompression").decode() 

1282 ) 

1283 except KeyError: 

1284 pack_compression_level = default_compression_level 

1285 try: 

1286 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1287 except KeyError: 

1288 pack_index_version = None 

1289 

1290 # Read pack configuration options 

1291 try: 

1292 pack_delta_window_size = int( 

1293 config.get((b"pack",), b"deltaWindowSize").decode() 

1294 ) 

1295 except KeyError: 

1296 pack_delta_window_size = None 

1297 try: 

1298 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1299 except KeyError: 

1300 pack_window_memory = None 

1301 try: 

1302 pack_delta_cache_size = int( 

1303 config.get((b"pack",), b"deltaCacheSize").decode() 

1304 ) 

1305 except KeyError: 

1306 pack_delta_cache_size = None 

1307 try: 

1308 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1309 except KeyError: 

1310 pack_depth = None 

1311 try: 

1312 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1313 except KeyError: 

1314 pack_threads = None 

1315 try: 

1316 pack_big_file_threshold = int( 

1317 config.get((b"pack",), b"bigFileThreshold").decode() 

1318 ) 

1319 except KeyError: 

1320 pack_big_file_threshold = None 

1321 

1322 # Read core.commitGraph setting 

1323 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1324 

1325 # Read core.fsyncObjectFiles setting 

1326 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False) 

1327 

1328 instance = cls( 

1329 path, 

1330 loose_compression_level, 

1331 pack_compression_level, 

1332 pack_index_version, 

1333 pack_delta_window_size, 

1334 pack_window_memory, 

1335 pack_delta_cache_size, 

1336 pack_depth, 

1337 pack_threads, 

1338 pack_big_file_threshold, 

1339 fsync_object_files, 

1340 ) 

1341 instance._use_commit_graph = use_commit_graph 

1342 return instance 

1343 

1344 @property 

1345 def alternates(self) -> list["BaseObjectStore"]: 

1346 """Get the list of alternate object stores. 

1347 

1348 Reads from .git/objects/info/alternates if not already cached. 

1349 

1350 Returns: 

1351 List of DiskObjectStore instances for alternate object directories 

1352 """ 

1353 if self._alternates is not None: 

1354 return self._alternates 

1355 self._alternates = [] 

1356 for path in self._read_alternate_paths(): 

1357 self._alternates.append(DiskObjectStore(path)) 

1358 return self._alternates 

1359 

1360 def _read_alternate_paths(self) -> Iterator[str]: 

1361 try: 

1362 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1363 except FileNotFoundError: 

1364 return 

1365 with f: 

1366 for line in f.readlines(): 

1367 line = line.rstrip(b"\n") 

1368 if line.startswith(b"#"): 

1369 continue 

1370 if os.path.isabs(line): 

1371 yield os.fsdecode(line) 

1372 else: 

1373 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1374 

1375 def add_alternate_path(self, path: Union[str, os.PathLike[str]]) -> None: 

1376 """Add an alternate path to this object store.""" 

1377 try: 

1378 os.mkdir(os.path.join(self.path, INFODIR)) 

1379 except FileExistsError: 

1380 pass 

1381 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1382 with GitFile(alternates_path, "wb") as f: 

1383 try: 

1384 orig_f = open(alternates_path, "rb") 

1385 except FileNotFoundError: 

1386 pass 

1387 else: 

1388 with orig_f: 

1389 f.write(orig_f.read()) 

1390 f.write(os.fsencode(path) + b"\n") 

1391 

1392 if not os.path.isabs(path): 

1393 path = os.path.join(self.path, path) 

1394 self.alternates.append(DiskObjectStore(path)) 

1395 

1396 def _update_pack_cache(self) -> list[Pack]: 

1397 """Read and iterate over new pack files and cache them.""" 

1398 try: 

1399 pack_dir_contents = os.listdir(self.pack_dir) 

1400 except FileNotFoundError: 

1401 self.close() 

1402 return [] 

1403 pack_files = set() 

1404 for name in pack_dir_contents: 

1405 if name.startswith("pack-") and name.endswith(".pack"): 

1406 # verify that idx exists first (otherwise the pack was not yet 

1407 # fully written) 

1408 idx_name = os.path.splitext(name)[0] + ".idx" 

1409 if idx_name in pack_dir_contents: 

1410 pack_name = name[: -len(".pack")] 

1411 pack_files.add(pack_name) 

1412 

1413 # Open newly appeared pack files 

1414 new_packs = [] 

1415 for f in pack_files: 

1416 if f not in self._pack_cache: 

1417 pack = Pack( 

1418 os.path.join(self.pack_dir, f), 

1419 delta_window_size=self.pack_delta_window_size, 

1420 window_memory=self.pack_window_memory, 

1421 delta_cache_size=self.pack_delta_cache_size, 

1422 depth=self.pack_depth, 

1423 threads=self.pack_threads, 

1424 big_file_threshold=self.pack_big_file_threshold, 

1425 ) 

1426 new_packs.append(pack) 

1427 self._pack_cache[f] = pack 

1428 # Remove disappeared pack files 

1429 for f in set(self._pack_cache) - pack_files: 

1430 self._pack_cache.pop(f).close() 

1431 return new_packs 

1432 

1433 def _get_shafile_path(self, sha: bytes) -> str: 

1434 # Check from object dir 

1435 return hex_to_filename(os.fspath(self.path), sha) 

1436 

1437 def _iter_loose_objects(self) -> Iterator[bytes]: 

1438 for base in os.listdir(self.path): 

1439 if len(base) != 2: 

1440 continue 

1441 for rest in os.listdir(os.path.join(self.path, base)): 

1442 sha = os.fsencode(base + rest) 

1443 if not valid_hexsha(sha): 

1444 continue 

1445 yield sha 

1446 

1447 def count_loose_objects(self) -> int: 

1448 """Count the number of loose objects in the object store. 

1449 

1450 Returns: 

1451 Number of loose objects 

1452 """ 

1453 count = 0 

1454 if not os.path.exists(self.path): 

1455 return 0 

1456 

1457 for i in range(256): 

1458 subdir = os.path.join(self.path, f"{i:02x}") 

1459 try: 

1460 count += len( 

1461 [ 

1462 name 

1463 for name in os.listdir(subdir) 

1464 if len(name) == 38 # 40 - 2 for the prefix 

1465 ] 

1466 ) 

1467 except FileNotFoundError: 

1468 # Directory may have been removed or is inaccessible 

1469 continue 

1470 

1471 return count 

1472 

1473 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

1474 path = self._get_shafile_path(sha) 

1475 try: 

1476 return ShaFile.from_path(path) 

1477 except FileNotFoundError: 

1478 return None 

1479 

1480 def delete_loose_object(self, sha: bytes) -> None: 

1481 """Delete a loose object from disk. 

1482 

1483 Args: 

1484 sha: SHA1 of the object to delete 

1485 

1486 Raises: 

1487 FileNotFoundError: If the object file doesn't exist 

1488 """ 

1489 os.remove(self._get_shafile_path(sha)) 

1490 

1491 def get_object_mtime(self, sha: bytes) -> float: 

1492 """Get the modification time of an object. 

1493 

1494 Args: 

1495 sha: SHA1 of the object 

1496 

1497 Returns: 

1498 Modification time as seconds since epoch 

1499 

1500 Raises: 

1501 KeyError: if the object is not found 

1502 """ 

1503 # First check if it's a loose object 

1504 if self.contains_loose(sha): 

1505 path = self._get_shafile_path(sha) 

1506 try: 

1507 return os.path.getmtime(path) 

1508 except FileNotFoundError: 

1509 pass 

1510 

1511 # Check if it's in a pack file 

1512 for pack in self.packs: 

1513 try: 

1514 if sha in pack: 

1515 # Use the pack file's mtime for packed objects 

1516 pack_path = pack._data_path 

1517 try: 

1518 return os.path.getmtime(pack_path) 

1519 except (FileNotFoundError, AttributeError): 

1520 pass 

1521 except PackFileDisappeared: 

1522 pass 

1523 

1524 raise KeyError(sha) 

1525 

1526 def _remove_pack(self, pack: Pack) -> None: 

1527 try: 

1528 del self._pack_cache[os.path.basename(pack._basename)] 

1529 except KeyError: 

1530 pass 

1531 pack.close() 

1532 os.remove(pack.data.path) 

1533 if hasattr(pack.index, "path"): 

1534 os.remove(pack.index.path) 

1535 

1536 def _get_pack_basepath( 

1537 self, entries: Iterable[tuple[bytes, int, Union[int, None]]] 

1538 ) -> str: 

1539 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1540 # TODO: Handle self.pack_dir being bytes 

1541 suffix = suffix_bytes.decode("ascii") 

1542 return os.path.join(self.pack_dir, "pack-" + suffix) 

1543 

1544 def _complete_pack( 

1545 self, 

1546 f: BinaryIO, 

1547 path: str, 

1548 num_objects: int, 

1549 indexer: PackIndexer, 

1550 progress: Optional[Callable[..., None]] = None, 

1551 ) -> Pack: 

1552 """Move a specific file containing a pack into the pack directory. 

1553 

1554 Note: The file should be on the same file system as the 

1555 packs directory. 

1556 

1557 Args: 

1558 f: Open file object for the pack. 

1559 path: Path to the pack file. 

1560 num_objects: Number of objects in the pack. 

1561 indexer: A PackIndexer for indexing the pack. 

1562 progress: Optional progress reporting function. 

1563 """ 

1564 entries = [] 

1565 for i, entry in enumerate(indexer): 

1566 if progress is not None: 

1567 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1568 entries.append(entry) 

1569 

1570 pack_sha, extra_entries = extend_pack( 

1571 f, 

1572 set(indexer.ext_refs()), 

1573 get_raw=self.get_raw, 

1574 compression_level=self.pack_compression_level, 

1575 progress=progress, 

1576 ) 

1577 f.flush() 

1578 if self.fsync_object_files: 

1579 try: 

1580 fileno = f.fileno() 

1581 except AttributeError as e: 

1582 raise OSError("fsync requested but file has no fileno()") from e 

1583 else: 

1584 os.fsync(fileno) 

1585 f.close() 

1586 

1587 entries.extend(extra_entries) 

1588 

1589 # Move the pack in. 

1590 entries.sort() 

1591 pack_base_name = self._get_pack_basepath(entries) 

1592 

1593 for pack in self.packs: 

1594 if pack._basename == pack_base_name: 

1595 return pack 

1596 

1597 target_pack_path = pack_base_name + ".pack" 

1598 target_index_path = pack_base_name + ".idx" 

1599 if sys.platform == "win32": 

1600 # Windows might have the target pack file lingering. Attempt 

1601 # removal, silently passing if the target does not exist. 

1602 with suppress(FileNotFoundError): 

1603 os.remove(target_pack_path) 

1604 os.rename(path, target_pack_path) 

1605 

1606 # Write the index. 

1607 with GitFile( 

1608 target_index_path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files 

1609 ) as index_file: 

1610 write_pack_index( 

1611 index_file, entries, pack_sha, version=self.pack_index_version 

1612 ) 

1613 

1614 # Add the pack to the store and return it. 

1615 final_pack = Pack( 

1616 pack_base_name, 

1617 delta_window_size=self.pack_delta_window_size, 

1618 window_memory=self.pack_window_memory, 

1619 delta_cache_size=self.pack_delta_cache_size, 

1620 depth=self.pack_depth, 

1621 threads=self.pack_threads, 

1622 big_file_threshold=self.pack_big_file_threshold, 

1623 ) 

1624 final_pack.check_length_and_checksum() 

1625 self._add_cached_pack(pack_base_name, final_pack) 

1626 return final_pack 

1627 

1628 def add_thin_pack( 

1629 self, 

1630 read_all: Callable[[int], bytes], 

1631 read_some: Optional[Callable[[int], bytes]], 

1632 progress: Optional[Callable[..., None]] = None, 

1633 ) -> "Pack": 

1634 """Add a new thin pack to this object store. 

1635 

1636 Thin packs are packs that contain deltas with parents that exist 

1637 outside the pack. They should never be placed in the object store 

1638 directly, and always indexed and completed as they are copied. 

1639 

1640 Args: 

1641 read_all: Read function that blocks until the number of 

1642 requested bytes are read. 

1643 read_some: Read function that returns at least one byte, but may 

1644 not return the number of bytes requested. 

1645 progress: Optional progress reporting function. 

1646 Returns: A Pack object pointing at the now-completed thin pack in the 

1647 objects/pack directory. 

1648 """ 

1649 import tempfile 

1650 

1651 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1652 with os.fdopen(fd, "w+b") as f: 

1653 os.chmod(path, PACK_MODE) 

1654 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type] 

1655 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type] 

1656 copier.verify(progress=progress) 

1657 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1658 

1659 def add_pack( 

1660 self, 

1661 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

1662 """Add a new pack to this object store. 

1663 

1664 Returns: Fileobject to write to, a commit function to 

1665 call when the pack is finished and an abort 

1666 function. 

1667 """ 

1668 import tempfile 

1669 

1670 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1671 f = os.fdopen(fd, "w+b") 

1672 os.chmod(path, PACK_MODE) 

1673 

1674 def commit() -> Optional["Pack"]: 

1675 if f.tell() > 0: 

1676 f.seek(0) 

1677 

1678 with PackData(path, f) as pd: 

1679 indexer = PackIndexer.for_pack_data( 

1680 pd, 

1681 resolve_ext_ref=self.get_raw, # type: ignore[arg-type] 

1682 ) 

1683 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type] 

1684 else: 

1685 f.close() 

1686 os.remove(path) 

1687 return None 

1688 

1689 def abort() -> None: 

1690 f.close() 

1691 os.remove(path) 

1692 

1693 return f, commit, abort # type: ignore[return-value] 

1694 

1695 def add_object(self, obj: ShaFile) -> None: 

1696 """Add a single object to this object store. 

1697 

1698 Args: 

1699 obj: Object to add 

1700 """ 

1701 path = self._get_shafile_path(obj.id) 

1702 dir = os.path.dirname(path) 

1703 try: 

1704 os.mkdir(dir) 

1705 except FileExistsError: 

1706 pass 

1707 if os.path.exists(path): 

1708 return # Already there, no need to write again 

1709 with GitFile(path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files) as f: 

1710 f.write( 

1711 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1712 ) 

1713 

1714 @classmethod 

1715 def init(cls, path: Union[str, os.PathLike[str]]) -> "DiskObjectStore": 

1716 """Initialize a new disk object store. 

1717 

1718 Creates the necessary directory structure for a Git object store. 

1719 

1720 Args: 

1721 path: Path where the object store should be created 

1722 

1723 Returns: 

1724 New DiskObjectStore instance 

1725 """ 

1726 try: 

1727 os.mkdir(path) 

1728 except FileExistsError: 

1729 pass 

1730 os.mkdir(os.path.join(path, "info")) 

1731 os.mkdir(os.path.join(path, PACKDIR)) 

1732 return cls(path) 

1733 

1734 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]: 

1735 """Iterate over all object SHAs with the given prefix. 

1736 

1737 Args: 

1738 prefix: Hex prefix to search for (as bytes) 

1739 

1740 Returns: 

1741 Iterator of object SHAs (as bytes) matching the prefix 

1742 """ 

1743 if len(prefix) < 2: 

1744 yield from super().iter_prefix(prefix) 

1745 return 

1746 seen = set() 

1747 dir = prefix[:2].decode() 

1748 rest = prefix[2:].decode() 

1749 try: 

1750 for name in os.listdir(os.path.join(self.path, dir)): 

1751 if name.startswith(rest): 

1752 sha = os.fsencode(dir + name) 

1753 if sha not in seen: 

1754 seen.add(sha) 

1755 yield sha 

1756 except FileNotFoundError: 

1757 pass 

1758 

1759 for p in self.packs: 

1760 bin_prefix = ( 

1761 binascii.unhexlify(prefix) 

1762 if len(prefix) % 2 == 0 

1763 else binascii.unhexlify(prefix[:-1]) 

1764 ) 

1765 for sha in p.index.iter_prefix(bin_prefix): 

1766 sha = sha_to_hex(sha) 

1767 if sha.startswith(prefix) and sha not in seen: 

1768 seen.add(sha) 

1769 yield sha 

1770 for alternate in self.alternates: 

1771 for sha in alternate.iter_prefix(prefix): 

1772 if sha not in seen: 

1773 seen.add(sha) 

1774 yield sha 

1775 

1776 def get_commit_graph(self) -> Optional["CommitGraph"]: 

1777 """Get the commit graph for this object store. 

1778 

1779 Returns: 

1780 CommitGraph object if available, None otherwise 

1781 """ 

1782 if not self._use_commit_graph: 

1783 return None 

1784 

1785 if self._commit_graph is None: 

1786 from .commit_graph import read_commit_graph 

1787 

1788 # Look for commit graph in our objects directory 

1789 graph_file = os.path.join(self.path, "info", "commit-graph") 

1790 if os.path.exists(graph_file): 

1791 self._commit_graph = read_commit_graph(graph_file) 

1792 return self._commit_graph 

1793 

1794 def write_commit_graph( 

1795 self, refs: Optional[Iterable[bytes]] = None, reachable: bool = True 

1796 ) -> None: 

1797 """Write a commit graph file for this object store. 

1798 

1799 Args: 

1800 refs: List of refs to include. If None, includes all refs from object store. 

1801 reachable: If True, includes all commits reachable from refs. 

1802 If False, only includes the direct ref targets. 

1803 """ 

1804 from .commit_graph import get_reachable_commits 

1805 

1806 if refs is None: 

1807 # Get all commit objects from the object store 

1808 all_refs = [] 

1809 # Iterate through all objects to find commits 

1810 for sha in self: 

1811 try: 

1812 obj = self[sha] 

1813 if obj.type_name == b"commit": 

1814 all_refs.append(sha) 

1815 except KeyError: 

1816 continue 

1817 else: 

1818 # Use provided refs 

1819 all_refs = list(refs) 

1820 

1821 if not all_refs: 

1822 return # No commits to include 

1823 

1824 if reachable: 

1825 # Get all reachable commits 

1826 commit_ids = get_reachable_commits(self, all_refs) 

1827 else: 

1828 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1829 commit_ids = [] 

1830 for ref in all_refs: 

1831 if isinstance(ref, bytes) and len(ref) == 40: 

1832 # Already hex ObjectID 

1833 commit_ids.append(ref) 

1834 elif isinstance(ref, bytes) and len(ref) == 20: 

1835 # Binary SHA, convert to hex ObjectID 

1836 from .objects import sha_to_hex 

1837 

1838 commit_ids.append(sha_to_hex(ref)) 

1839 else: 

1840 # Assume it's already correct format 

1841 commit_ids.append(ref) 

1842 

1843 if commit_ids: 

1844 # Write commit graph directly to our object store path 

1845 # Generate the commit graph 

1846 from .commit_graph import generate_commit_graph 

1847 

1848 graph = generate_commit_graph(self, commit_ids) 

1849 

1850 if graph.entries: 

1851 # Ensure the info directory exists 

1852 info_dir = os.path.join(self.path, "info") 

1853 os.makedirs(info_dir, exist_ok=True) 

1854 

1855 # Write using GitFile for atomic operation 

1856 graph_path = os.path.join(info_dir, "commit-graph") 

1857 with GitFile(graph_path, "wb") as f: 

1858 assert isinstance( 

1859 f, _GitFile 

1860 ) # GitFile in write mode always returns _GitFile 

1861 graph.write_to_file(f) 

1862 

1863 # Clear cached commit graph so it gets reloaded 

1864 self._commit_graph = None 

1865 

1866 def prune(self, grace_period: Optional[int] = None) -> None: 

1867 """Prune/clean up this object store. 

1868 

1869 This removes temporary files that were left behind by interrupted 

1870 pack operations. These are files that start with ``tmp_pack_`` in the 

1871 repository directory or files with .pack extension but no corresponding 

1872 .idx file in the pack directory. 

1873 

1874 Args: 

1875 grace_period: Grace period in seconds for removing temporary files. 

1876 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1877 """ 

1878 import glob 

1879 

1880 if grace_period is None: 

1881 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1882 

1883 # Clean up tmp_pack_* files in the repository directory 

1884 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1885 # Check if file is old enough (more than grace period) 

1886 mtime = os.path.getmtime(tmp_file) 

1887 if time.time() - mtime > grace_period: 

1888 os.remove(tmp_file) 

1889 

1890 # Clean up orphaned .pack files without corresponding .idx files 

1891 try: 

1892 pack_dir_contents = os.listdir(self.pack_dir) 

1893 except FileNotFoundError: 

1894 return 

1895 

1896 pack_files = {} 

1897 idx_files = set() 

1898 

1899 for name in pack_dir_contents: 

1900 if name.endswith(".pack"): 

1901 base_name = name[:-5] # Remove .pack extension 

1902 pack_files[base_name] = name 

1903 elif name.endswith(".idx"): 

1904 base_name = name[:-4] # Remove .idx extension 

1905 idx_files.add(base_name) 

1906 

1907 # Remove .pack files without corresponding .idx files 

1908 for base_name, pack_name in pack_files.items(): 

1909 if base_name not in idx_files: 

1910 pack_path = os.path.join(self.pack_dir, pack_name) 

1911 # Check if file is old enough (more than grace period) 

1912 mtime = os.path.getmtime(pack_path) 

1913 if time.time() - mtime > grace_period: 

1914 os.remove(pack_path) 

1915 

1916 

1917class MemoryObjectStore(PackCapableObjectStore): 

1918 """Object store that keeps all objects in memory.""" 

1919 

1920 def __init__(self) -> None: 

1921 """Initialize a MemoryObjectStore. 

1922 

1923 Creates an empty in-memory object store. 

1924 """ 

1925 super().__init__() 

1926 self._data: dict[bytes, ShaFile] = {} 

1927 self.pack_compression_level = -1 

1928 

1929 def _to_hexsha(self, sha: bytes) -> bytes: 

1930 if len(sha) == 40: 

1931 return sha 

1932 elif len(sha) == 20: 

1933 return sha_to_hex(sha) 

1934 else: 

1935 raise ValueError(f"Invalid sha {sha!r}") 

1936 

1937 def contains_loose(self, sha: bytes) -> bool: 

1938 """Check if a particular object is present by SHA1 and is loose.""" 

1939 return self._to_hexsha(sha) in self._data 

1940 

1941 def contains_packed(self, sha: bytes) -> bool: 

1942 """Check if a particular object is present by SHA1 and is packed.""" 

1943 return False 

1944 

1945 def __iter__(self) -> Iterator[bytes]: 

1946 """Iterate over the SHAs that are present in this store.""" 

1947 return iter(self._data.keys()) 

1948 

1949 @property 

1950 def packs(self) -> list[Pack]: 

1951 """List with pack objects.""" 

1952 return [] 

1953 

1954 def get_raw(self, name: ObjectID) -> tuple[int, bytes]: 

1955 """Obtain the raw text for an object. 

1956 

1957 Args: 

1958 name: sha for the object. 

1959 Returns: tuple with numeric type and object contents. 

1960 """ 

1961 obj = self[self._to_hexsha(name)] 

1962 return obj.type_num, obj.as_raw_string() 

1963 

1964 def __getitem__(self, name: ObjectID) -> ShaFile: 

1965 """Retrieve an object by SHA. 

1966 

1967 Args: 

1968 name: SHA of the object (as hex string or bytes) 

1969 

1970 Returns: 

1971 Copy of the ShaFile object 

1972 

1973 Raises: 

1974 KeyError: If the object is not found 

1975 """ 

1976 return self._data[self._to_hexsha(name)].copy() 

1977 

1978 def __delitem__(self, name: ObjectID) -> None: 

1979 """Delete an object from this store, for testing only.""" 

1980 del self._data[self._to_hexsha(name)] 

1981 

1982 def add_object(self, obj: ShaFile) -> None: 

1983 """Add a single object to this object store.""" 

1984 self._data[obj.id] = obj.copy() 

1985 

1986 def add_objects( 

1987 self, 

1988 objects: Iterable[tuple[ShaFile, Optional[str]]], 

1989 progress: Optional[Callable[[str], None]] = None, 

1990 ) -> None: 

1991 """Add a set of objects to this object store. 

1992 

1993 Args: 

1994 objects: Iterable over a list of (object, path) tuples 

1995 progress: Optional progress reporting function. 

1996 """ 

1997 for obj, path in objects: 

1998 self.add_object(obj) 

1999 

2000 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2001 """Add a new pack to this object store. 

2002 

2003 Because this object store doesn't support packs, we extract and add the 

2004 individual objects. 

2005 

2006 Returns: Fileobject to write to and a commit function to 

2007 call when the pack is finished. 

2008 """ 

2009 from tempfile import SpooledTemporaryFile 

2010 

2011 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

2012 

2013 def commit() -> None: 

2014 size = f.tell() 

2015 if size > 0: 

2016 f.seek(0) 

2017 

2018 p = PackData.from_file(f, size) 

2019 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type] 

2020 self.add_object(obj) 

2021 p.close() 

2022 f.close() 

2023 else: 

2024 f.close() 

2025 

2026 def abort() -> None: 

2027 f.close() 

2028 

2029 return f, commit, abort # type: ignore[return-value] 

2030 

2031 def add_pack_data( 

2032 self, 

2033 count: int, 

2034 unpacked_objects: Iterator[UnpackedObject], 

2035 progress: Optional[Callable[[str], None]] = None, 

2036 ) -> None: 

2037 """Add pack data to this object store. 

2038 

2039 Args: 

2040 count: Number of items to add 

2041 unpacked_objects: Iterator of UnpackedObject instances 

2042 progress: Optional progress reporting function. 

2043 """ 

2044 if count == 0: 

2045 return 

2046 

2047 # Since MemoryObjectStore doesn't support pack files, we need to 

2048 # extract individual objects. To handle deltas properly, we write 

2049 # to a temporary pack and then use PackInflater to resolve them. 

2050 f, commit, abort = self.add_pack() 

2051 try: 

2052 write_pack_data( 

2053 f.write, 

2054 unpacked_objects, 

2055 num_records=count, 

2056 progress=progress, 

2057 ) 

2058 except BaseException: 

2059 abort() 

2060 raise 

2061 else: 

2062 commit() 

2063 

2064 def add_thin_pack( 

2065 self, 

2066 read_all: Callable[[], bytes], 

2067 read_some: Callable[[int], bytes], 

2068 progress: Optional[Callable[[str], None]] = None, 

2069 ) -> None: 

2070 """Add a new thin pack to this object store. 

2071 

2072 Thin packs are packs that contain deltas with parents that exist 

2073 outside the pack. Because this object store doesn't support packs, we 

2074 extract and add the individual objects. 

2075 

2076 Args: 

2077 read_all: Read function that blocks until the number of 

2078 requested bytes are read. 

2079 read_some: Read function that returns at least one byte, but may 

2080 not return the number of bytes requested. 

2081 progress: Optional progress reporting function. 

2082 """ 

2083 f, commit, abort = self.add_pack() 

2084 try: 

2085 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type] 

2086 copier.verify() 

2087 except BaseException: 

2088 abort() 

2089 raise 

2090 else: 

2091 commit() 

2092 

2093 

2094class ObjectIterator(Protocol): 

2095 """Interface for iterating over objects.""" 

2096 

2097 def iterobjects(self) -> Iterator[ShaFile]: 

2098 """Iterate over all objects. 

2099 

2100 Returns: 

2101 Iterator of ShaFile objects 

2102 """ 

2103 raise NotImplementedError(self.iterobjects) 

2104 

2105 

2106def tree_lookup_path( 

2107 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes 

2108) -> tuple[int, bytes]: 

2109 """Look up an object in a Git tree. 

2110 

2111 Args: 

2112 lookup_obj: Callback for retrieving object by SHA1 

2113 root_sha: SHA1 of the root tree 

2114 path: Path to lookup 

2115 Returns: A tuple of (mode, SHA) of the resulting path. 

2116 """ 

2117 tree = lookup_obj(root_sha) 

2118 if not isinstance(tree, Tree): 

2119 raise NotTreeError(root_sha) 

2120 return tree.lookup_path(lookup_obj, path) 

2121 

2122 

2123def _collect_filetree_revs( 

2124 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

2125) -> None: 

2126 """Collect SHA1s of files and directories for specified tree. 

2127 

2128 Args: 

2129 obj_store: Object store to get objects by SHA from 

2130 tree_sha: tree reference to walk 

2131 kset: set to fill with references to files and directories 

2132 """ 

2133 filetree = obj_store[tree_sha] 

2134 assert isinstance(filetree, Tree) 

2135 for name, mode, sha in filetree.iteritems(): 

2136 assert mode is not None 

2137 assert sha is not None 

2138 if not S_ISGITLINK(mode) and sha not in kset: 

2139 kset.add(sha) 

2140 if stat.S_ISDIR(mode): 

2141 _collect_filetree_revs(obj_store, sha, kset) 

2142 

2143 

2144def _split_commits_and_tags( 

2145 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False 

2146) -> tuple[set[bytes], set[bytes], set[bytes]]: 

2147 """Split object id list into three lists with commit, tag, and other SHAs. 

2148 

2149 Commits referenced by tags are included into commits 

2150 list as well. Only SHA1s known in this repository will get 

2151 through, and unless ignore_unknown argument is True, KeyError 

2152 is thrown for SHA1 missing in the repository 

2153 

2154 Args: 

2155 obj_store: Object store to get objects by SHA1 from 

2156 lst: Collection of commit and tag SHAs 

2157 ignore_unknown: True to skip SHA1 missing in the repository 

2158 silently. 

2159 Returns: A tuple of (commits, tags, others) SHA1s 

2160 """ 

2161 commits: set[bytes] = set() 

2162 tags: set[bytes] = set() 

2163 others: set[bytes] = set() 

2164 for e in lst: 

2165 try: 

2166 o = obj_store[e] 

2167 except KeyError: 

2168 if not ignore_unknown: 

2169 raise 

2170 else: 

2171 if isinstance(o, Commit): 

2172 commits.add(e) 

2173 elif isinstance(o, Tag): 

2174 tags.add(e) 

2175 tagged = o.object[1] 

2176 c, t, os = _split_commits_and_tags( 

2177 obj_store, [tagged], ignore_unknown=ignore_unknown 

2178 ) 

2179 commits |= c 

2180 tags |= t 

2181 others |= os 

2182 else: 

2183 others.add(e) 

2184 return (commits, tags, others) 

2185 

2186 

2187class MissingObjectFinder: 

2188 """Find the objects missing from another object store. 

2189 

2190 Args: 

2191 object_store: Object store containing at least all objects to be 

2192 sent 

2193 haves: SHA1s of commits not to send (already present in target) 

2194 wants: SHA1s of commits to send 

2195 progress: Optional function to report progress to. 

2196 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2197 sha for including tags. 

2198 get_parents: Optional function for getting the parents of a commit. 

2199 """ 

2200 

2201 def __init__( 

2202 self, 

2203 object_store: BaseObjectStore, 

2204 haves: Iterable[bytes], 

2205 wants: Iterable[bytes], 

2206 *, 

2207 shallow: Optional[Set[bytes]] = None, 

2208 progress: Optional[Callable[[bytes], None]] = None, 

2209 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None, 

2210 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

2211 ) -> None: 

2212 """Initialize a MissingObjectFinder. 

2213 

2214 Args: 

2215 object_store: Object store containing objects 

2216 haves: SHA1s of objects already present in target 

2217 wants: SHA1s of objects to send 

2218 shallow: Set of shallow commit SHA1s 

2219 progress: Optional progress reporting callback 

2220 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2221 get_parents: Function for getting commit parents 

2222 """ 

2223 self.object_store = object_store 

2224 if shallow is None: 

2225 shallow = set() 

2226 self._get_parents = get_parents 

2227 # process Commits and Tags differently 

2228 # Note, while haves may list commits/tags not available locally, 

2229 # and such SHAs would get filtered out by _split_commits_and_tags, 

2230 # wants shall list only known SHAs, and otherwise 

2231 # _split_commits_and_tags fails with KeyError 

2232 have_commits, have_tags, have_others = _split_commits_and_tags( 

2233 object_store, haves, ignore_unknown=True 

2234 ) 

2235 want_commits, want_tags, want_others = _split_commits_and_tags( 

2236 object_store, wants, ignore_unknown=False 

2237 ) 

2238 # all_ancestors is a set of commits that shall not be sent 

2239 # (complete repository up to 'haves') 

2240 all_ancestors = _collect_ancestors( 

2241 object_store, 

2242 have_commits, 

2243 shallow=frozenset(shallow), 

2244 get_parents=self._get_parents, 

2245 )[0] 

2246 # all_missing - complete set of commits between haves and wants 

2247 # common - commits from all_ancestors we hit into while 

2248 # traversing parent hierarchy of wants 

2249 missing_commits, common_commits = _collect_ancestors( 

2250 object_store, 

2251 want_commits, 

2252 frozenset(all_ancestors), 

2253 shallow=frozenset(shallow), 

2254 get_parents=self._get_parents, 

2255 ) 

2256 self.remote_has: set[bytes] = set() 

2257 # Now, fill sha_done with commits and revisions of 

2258 # files and directories known to be both locally 

2259 # and on target. Thus these commits and files 

2260 # won't get selected for fetch 

2261 for h in common_commits: 

2262 self.remote_has.add(h) 

2263 cmt = object_store[h] 

2264 assert isinstance(cmt, Commit) 

2265 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2266 # record tags we have as visited, too 

2267 for t in have_tags: 

2268 self.remote_has.add(t) 

2269 self.sha_done = set(self.remote_has) 

2270 

2271 # in fact, what we 'want' is commits, tags, and others 

2272 # we've found missing 

2273 self.objects_to_send: set[ 

2274 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

2275 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

2276 missing_tags = want_tags.difference(have_tags) 

2277 self.objects_to_send.update( 

2278 {(w, None, Tag.type_num, False) for w in missing_tags} 

2279 ) 

2280 missing_others = want_others.difference(have_others) 

2281 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2282 

2283 if progress is None: 

2284 self.progress: Callable[[bytes], None] = lambda x: None 

2285 else: 

2286 self.progress = progress 

2287 self._tagged = (get_tagged and get_tagged()) or {} 

2288 

2289 def get_remote_has(self) -> set[bytes]: 

2290 """Get the set of SHAs the remote has. 

2291 

2292 Returns: 

2293 Set of SHA1s that the remote side already has 

2294 """ 

2295 return self.remote_has 

2296 

2297 def add_todo( 

2298 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

2299 ) -> None: 

2300 """Add objects to the todo list. 

2301 

2302 Args: 

2303 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2304 """ 

2305 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2306 

2307 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

2308 """Get the next object to send. 

2309 

2310 Returns: 

2311 Tuple of (sha, pack_hint) 

2312 

2313 Raises: 

2314 StopIteration: When no more objects to send 

2315 """ 

2316 while True: 

2317 if not self.objects_to_send: 

2318 self.progress( 

2319 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2320 ) 

2321 raise StopIteration 

2322 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2323 if sha not in self.sha_done: 

2324 break 

2325 if not leaf: 

2326 o = self.object_store[sha] 

2327 if isinstance(o, Commit): 

2328 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2329 elif isinstance(o, Tree): 

2330 todos = [] 

2331 for n, m, s in o.iteritems(): 

2332 assert m is not None 

2333 assert n is not None 

2334 assert s is not None 

2335 if not S_ISGITLINK(m): 

2336 todos.append( 

2337 ( 

2338 s, 

2339 n, 

2340 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2341 not stat.S_ISDIR(m), 

2342 ) 

2343 ) 

2344 self.add_todo(todos) 

2345 elif isinstance(o, Tag): 

2346 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2347 if sha in self._tagged: 

2348 self.add_todo([(self._tagged[sha], None, None, True)]) 

2349 self.sha_done.add(sha) 

2350 if len(self.sha_done) % 1000 == 0: 

2351 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2352 if type_num is None: 

2353 pack_hint = None 

2354 else: 

2355 pack_hint = (type_num, name) 

2356 return (sha, pack_hint) 

2357 

2358 def __iter__(self) -> Iterator[tuple[bytes, Optional[PackHint]]]: 

2359 """Return iterator over objects to send. 

2360 

2361 Returns: 

2362 Self (this class implements the iterator protocol) 

2363 """ 

2364 return self 

2365 

2366 

2367class ObjectStoreGraphWalker: 

2368 """Graph walker that finds what commits are missing from an object store.""" 

2369 

2370 heads: set[ObjectID] 

2371 """Revisions without descendants in the local repo.""" 

2372 

2373 get_parents: Callable[[ObjectID], list[ObjectID]] 

2374 """Function to retrieve parents in the local repo.""" 

2375 

2376 shallow: set[ObjectID] 

2377 

2378 def __init__( 

2379 self, 

2380 local_heads: Iterable[ObjectID], 

2381 get_parents: Callable[[ObjectID], list[ObjectID]], 

2382 shallow: Optional[set[ObjectID]] = None, 

2383 update_shallow: Optional[ 

2384 Callable[[Optional[set[ObjectID]], Optional[set[ObjectID]]], None] 

2385 ] = None, 

2386 ) -> None: 

2387 """Create a new instance. 

2388 

2389 Args: 

2390 local_heads: Heads to start search with 

2391 get_parents: Function for finding the parents of a SHA1. 

2392 shallow: Set of shallow commits. 

2393 update_shallow: Function to update shallow commits. 

2394 """ 

2395 self.heads = set(local_heads) 

2396 self.get_parents = get_parents 

2397 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

2398 if shallow is None: 

2399 shallow = set() 

2400 self.shallow = shallow 

2401 self.update_shallow = update_shallow 

2402 

2403 def nak(self) -> None: 

2404 """Nothing in common was found.""" 

2405 

2406 def ack(self, sha: ObjectID) -> None: 

2407 """Ack that a revision and its ancestors are present in the source.""" 

2408 if len(sha) != 40: 

2409 raise ValueError(f"unexpected sha {sha!r} received") 

2410 ancestors = {sha} 

2411 

2412 # stop if we run out of heads to remove 

2413 while self.heads: 

2414 for a in ancestors: 

2415 if a in self.heads: 

2416 self.heads.remove(a) 

2417 

2418 # collect all ancestors 

2419 new_ancestors = set() 

2420 for a in ancestors: 

2421 ps = self.parents.get(a) 

2422 if ps is not None: 

2423 new_ancestors.update(ps) 

2424 self.parents[a] = None 

2425 

2426 # no more ancestors; stop 

2427 if not new_ancestors: 

2428 break 

2429 

2430 ancestors = new_ancestors 

2431 

2432 def next(self) -> Optional[ObjectID]: 

2433 """Iterate over ancestors of heads in the target.""" 

2434 if self.heads: 

2435 ret = self.heads.pop() 

2436 try: 

2437 ps = self.get_parents(ret) 

2438 except KeyError: 

2439 return None 

2440 self.parents[ret] = ps 

2441 self.heads.update([p for p in ps if p not in self.parents]) 

2442 return ret 

2443 return None 

2444 

2445 __next__ = next 

2446 

2447 

2448def commit_tree_changes( 

2449 object_store: BaseObjectStore, 

2450 tree: Union[ObjectID, Tree], 

2451 changes: Sequence[tuple[bytes, Optional[int], Optional[bytes]]], 

2452) -> ObjectID: 

2453 """Commit a specified set of changes to a tree structure. 

2454 

2455 This will apply a set of changes on top of an existing tree, storing new 

2456 objects in object_store. 

2457 

2458 changes are a list of tuples with (path, mode, object_sha). 

2459 Paths can be both blobs and trees. See the mode and 

2460 object sha to None deletes the path. 

2461 

2462 This method works especially well if there are only a small 

2463 number of changes to a big tree. For a large number of changes 

2464 to a large tree, use e.g. commit_tree. 

2465 

2466 Args: 

2467 object_store: Object store to store new objects in 

2468 and retrieve old ones from. 

2469 tree: Original tree root (SHA or Tree object) 

2470 changes: changes to apply 

2471 Returns: New tree root object 

2472 """ 

2473 # TODO(jelmer): Save up the objects and add them using .add_objects 

2474 # rather than with individual calls to .add_object. 

2475 # Handle both Tree object and SHA 

2476 if isinstance(tree, Tree): 

2477 tree_obj: Tree = tree 

2478 else: 

2479 sha_obj = object_store[tree] 

2480 assert isinstance(sha_obj, Tree) 

2481 tree_obj = sha_obj 

2482 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {} 

2483 for path, new_mode, new_sha in changes: 

2484 try: 

2485 (dirname, subpath) = path.split(b"/", 1) 

2486 except ValueError: 

2487 if new_sha is None: 

2488 del tree_obj[path] 

2489 else: 

2490 assert new_mode is not None 

2491 tree_obj[path] = (new_mode, new_sha) 

2492 else: 

2493 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2494 for name, subchanges in nested_changes.items(): 

2495 try: 

2496 orig_subtree_id: Union[bytes, Tree] = tree_obj[name][1] 

2497 except KeyError: 

2498 # For new directories, pass an empty Tree object 

2499 orig_subtree_id = Tree() 

2500 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges) 

2501 subtree = object_store[subtree_id] 

2502 assert isinstance(subtree, Tree) 

2503 if len(subtree) == 0: 

2504 del tree_obj[name] 

2505 else: 

2506 tree_obj[name] = (stat.S_IFDIR, subtree.id) 

2507 object_store.add_object(tree_obj) 

2508 return tree_obj.id 

2509 

2510 

2511class OverlayObjectStore(BaseObjectStore): 

2512 """Object store that can overlay multiple object stores.""" 

2513 

2514 def __init__( 

2515 self, 

2516 bases: list[BaseObjectStore], 

2517 add_store: Optional[BaseObjectStore] = None, 

2518 ) -> None: 

2519 """Initialize an OverlayObjectStore. 

2520 

2521 Args: 

2522 bases: List of base object stores to overlay 

2523 add_store: Optional store to write new objects to 

2524 """ 

2525 self.bases = bases 

2526 self.add_store = add_store 

2527 

2528 def add_object(self, object: ShaFile) -> None: 

2529 """Add a single object to the store. 

2530 

2531 Args: 

2532 object: Object to add 

2533 

2534 Raises: 

2535 NotImplementedError: If no add_store was provided 

2536 """ 

2537 if self.add_store is None: 

2538 raise NotImplementedError(self.add_object) 

2539 return self.add_store.add_object(object) 

2540 

2541 def add_objects( 

2542 self, 

2543 objects: Sequence[tuple[ShaFile, Optional[str]]], 

2544 progress: Optional[Callable[[str], None]] = None, 

2545 ) -> Optional[Pack]: 

2546 """Add multiple objects to the store. 

2547 

2548 Args: 

2549 objects: Iterator of objects to add 

2550 progress: Optional progress reporting callback 

2551 

2552 Raises: 

2553 NotImplementedError: If no add_store was provided 

2554 """ 

2555 if self.add_store is None: 

2556 raise NotImplementedError(self.add_object) 

2557 return self.add_store.add_objects(objects, progress) 

2558 

2559 @property 

2560 def packs(self) -> list[Pack]: 

2561 """Get the list of packs from all overlaid stores. 

2562 

2563 Returns: 

2564 Combined list of packs from all base stores 

2565 """ 

2566 ret = [] 

2567 for b in self.bases: 

2568 ret.extend(b.packs) 

2569 return ret 

2570 

2571 def __iter__(self) -> Iterator[ObjectID]: 

2572 """Iterate over all object SHAs in the overlaid stores. 

2573 

2574 Returns: 

2575 Iterator of object SHAs (deduped across stores) 

2576 """ 

2577 done = set() 

2578 for b in self.bases: 

2579 for o_id in b: 

2580 if o_id not in done: 

2581 yield o_id 

2582 done.add(o_id) 

2583 

2584 def iterobjects_subset( 

2585 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2586 ) -> Iterator[ShaFile]: 

2587 """Iterate over a subset of objects from the overlaid stores. 

2588 

2589 Args: 

2590 shas: Iterable of object SHAs to retrieve 

2591 allow_missing: If True, skip missing objects; if False, raise KeyError 

2592 

2593 Returns: 

2594 Iterator of ShaFile objects 

2595 

2596 Raises: 

2597 KeyError: If an object is missing and allow_missing is False 

2598 """ 

2599 todo = set(shas) 

2600 found: set[bytes] = set() 

2601 

2602 for b in self.bases: 

2603 # Create a copy of todo for each base to avoid modifying 

2604 # the set while iterating through it 

2605 current_todo = todo - found 

2606 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2607 yield o 

2608 found.add(o.id) 

2609 

2610 # Check for any remaining objects not found 

2611 missing = todo - found 

2612 if missing and not allow_missing: 

2613 raise KeyError(next(iter(missing))) 

2614 

2615 def iter_unpacked_subset( 

2616 self, 

2617 shas: Iterable[bytes], 

2618 include_comp: bool = False, 

2619 allow_missing: bool = False, 

2620 convert_ofs_delta: bool = True, 

2621 ) -> Iterator[UnpackedObject]: 

2622 """Iterate over unpacked objects from the overlaid stores. 

2623 

2624 Args: 

2625 shas: Iterable of object SHAs to retrieve 

2626 include_comp: Whether to include compressed data 

2627 allow_missing: If True, skip missing objects; if False, raise KeyError 

2628 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2629 

2630 Returns: 

2631 Iterator of unpacked objects 

2632 

2633 Raises: 

2634 KeyError: If an object is missing and allow_missing is False 

2635 """ 

2636 todo = set(shas) 

2637 for b in self.bases: 

2638 for o in b.iter_unpacked_subset( 

2639 todo, 

2640 include_comp=include_comp, 

2641 allow_missing=True, 

2642 convert_ofs_delta=convert_ofs_delta, 

2643 ): 

2644 yield o 

2645 todo.remove(o.sha()) 

2646 if todo and not allow_missing: 

2647 raise KeyError(next(iter(todo))) 

2648 

2649 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]: 

2650 """Get the raw object data from the overlaid stores. 

2651 

2652 Args: 

2653 sha_id: SHA of the object 

2654 

2655 Returns: 

2656 Tuple of (type_num, raw_data) 

2657 

2658 Raises: 

2659 KeyError: If object not found in any base store 

2660 """ 

2661 for b in self.bases: 

2662 try: 

2663 return b.get_raw(sha_id) 

2664 except KeyError: 

2665 pass 

2666 raise KeyError(sha_id) 

2667 

2668 def contains_packed(self, sha: bytes) -> bool: 

2669 """Check if an object is packed in any base store. 

2670 

2671 Args: 

2672 sha: SHA of the object 

2673 

2674 Returns: 

2675 True if object is packed in any base store 

2676 """ 

2677 for b in self.bases: 

2678 if b.contains_packed(sha): 

2679 return True 

2680 return False 

2681 

2682 def contains_loose(self, sha: bytes) -> bool: 

2683 """Check if an object is loose in any base store. 

2684 

2685 Args: 

2686 sha: SHA of the object 

2687 

2688 Returns: 

2689 True if object is loose in any base store 

2690 """ 

2691 for b in self.bases: 

2692 if b.contains_loose(sha): 

2693 return True 

2694 return False 

2695 

2696 

2697def read_packs_file(f: BinaryIO) -> Iterator[str]: 

2698 """Yield the packs listed in a packs file.""" 

2699 for line in f.read().splitlines(): 

2700 if not line: 

2701 continue 

2702 (kind, name) = line.split(b" ", 1) 

2703 if kind != b"P": 

2704 continue 

2705 yield os.fsdecode(name) 

2706 

2707 

2708class BucketBasedObjectStore(PackBasedObjectStore): 

2709 """Object store implementation that uses a bucket store like S3 as backend.""" 

2710 

2711 def _iter_loose_objects(self) -> Iterator[bytes]: 

2712 """Iterate over the SHAs of all loose objects.""" 

2713 return iter([]) 

2714 

2715 def _get_loose_object(self, sha: bytes) -> None: 

2716 return None 

2717 

2718 def delete_loose_object(self, sha: bytes) -> None: 

2719 """Delete a loose object (no-op for bucket stores). 

2720 

2721 Bucket-based stores don't have loose objects, so this is a no-op. 

2722 

2723 Args: 

2724 sha: SHA of the object to delete 

2725 """ 

2726 # Doesn't exist.. 

2727 

2728 def pack_loose_objects( 

2729 self, progress: Optional[Callable[[str], None]] = None 

2730 ) -> int: 

2731 """Pack loose objects. Returns number of objects packed. 

2732 

2733 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2734 

2735 Args: 

2736 progress: Optional progress reporting callback (ignored) 

2737 """ 

2738 return 0 

2739 

2740 def _remove_pack_by_name(self, name: str) -> None: 

2741 """Remove a pack by name. Subclasses should implement this.""" 

2742 raise NotImplementedError(self._remove_pack_by_name) 

2743 

2744 def _iter_pack_names(self) -> Iterator[str]: 

2745 raise NotImplementedError(self._iter_pack_names) 

2746 

2747 def _get_pack(self, name: str) -> Pack: 

2748 raise NotImplementedError(self._get_pack) 

2749 

2750 def _update_pack_cache(self) -> list[Pack]: 

2751 pack_files = set(self._iter_pack_names()) 

2752 

2753 # Open newly appeared pack files 

2754 new_packs = [] 

2755 for f in pack_files: 

2756 if f not in self._pack_cache: 

2757 pack = self._get_pack(f) 

2758 new_packs.append(pack) 

2759 self._pack_cache[f] = pack 

2760 # Remove disappeared pack files 

2761 for f in set(self._pack_cache) - pack_files: 

2762 self._pack_cache.pop(f).close() 

2763 return new_packs 

2764 

2765 def _upload_pack( 

2766 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO 

2767 ) -> None: 

2768 raise NotImplementedError 

2769 

2770 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]: 

2771 """Add a new pack to this object store. 

2772 

2773 Returns: Fileobject to write to, a commit function to 

2774 call when the pack is finished and an abort 

2775 function. 

2776 """ 

2777 import tempfile 

2778 

2779 pf = tempfile.SpooledTemporaryFile( 

2780 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2781 ) 

2782 

2783 def commit() -> Optional[Pack]: 

2784 if pf.tell() == 0: 

2785 pf.close() 

2786 return None 

2787 

2788 pf.seek(0) 

2789 

2790 p = PackData(pf.name, pf) 

2791 entries = p.sorted_entries() 

2792 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2793 idxf = tempfile.SpooledTemporaryFile( 

2794 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2795 ) 

2796 checksum = p.get_stored_checksum() 

2797 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2798 idxf.seek(0) 

2799 idx = load_pack_index_file(basename + ".idx", idxf) 

2800 for pack in self.packs: 

2801 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2802 p.close() 

2803 idx.close() 

2804 pf.close() 

2805 idxf.close() 

2806 return pack 

2807 pf.seek(0) 

2808 idxf.seek(0) 

2809 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type] 

2810 final_pack = Pack.from_objects(p, idx) 

2811 self._add_cached_pack(basename, final_pack) 

2812 pf.close() 

2813 idxf.close() 

2814 return final_pack 

2815 

2816 return pf, commit, pf.close # type: ignore[return-value] 

2817 

2818 

2819def _collect_ancestors( 

2820 store: ObjectContainer, 

2821 heads: Iterable[ObjectID], 

2822 common: frozenset[ObjectID] = frozenset(), 

2823 shallow: frozenset[ObjectID] = frozenset(), 

2824 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents, 

2825) -> tuple[set[ObjectID], set[ObjectID]]: 

2826 """Collect all ancestors of heads up to (excluding) those in common. 

2827 

2828 Args: 

2829 store: Object store to get commits from 

2830 heads: commits to start from 

2831 common: commits to end at, or empty set to walk repository 

2832 completely 

2833 shallow: Set of shallow commits 

2834 get_parents: Optional function for getting the parents of a 

2835 commit. 

2836 Returns: a tuple (A, B) where A - all commits reachable 

2837 from heads but not present in common, B - common (shared) elements 

2838 that are directly reachable from heads 

2839 """ 

2840 bases = set() 

2841 commits = set() 

2842 queue: list[ObjectID] = [] 

2843 queue.extend(heads) 

2844 

2845 # Try to use commit graph if available 

2846 commit_graph = store.get_commit_graph() 

2847 

2848 while queue: 

2849 e = queue.pop(0) 

2850 if e in common: 

2851 bases.add(e) 

2852 elif e not in commits: 

2853 commits.add(e) 

2854 if e in shallow: 

2855 continue 

2856 

2857 # Try to use commit graph for parent lookup 

2858 parents = None 

2859 if commit_graph: 

2860 parents = commit_graph.get_parents(e) 

2861 

2862 if parents is None: 

2863 # Fall back to loading the object 

2864 cmt = store[e] 

2865 assert isinstance(cmt, Commit) 

2866 parents = get_parents(cmt) 

2867 

2868 queue.extend(parents) 

2869 return (commits, bases) 

2870 

2871 

2872def iter_tree_contents( 

2873 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2874) -> Iterator[TreeEntry]: 

2875 """Iterate the contents of a tree and all subtrees. 

2876 

2877 Iteration is depth-first pre-order, as in e.g. os.walk. 

2878 

2879 Args: 

2880 store: Object store to get trees from 

2881 tree_id: SHA1 of the tree. 

2882 include_trees: If True, include tree objects in the iteration. 

2883 

2884 Yields: TreeEntry namedtuples for all the objects in a tree. 

2885 """ 

2886 if tree_id is None: 

2887 return 

2888 # This could be fairly easily generalized to >2 trees if we find a use 

2889 # case. 

2890 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2891 while todo: 

2892 entry = todo.pop() 

2893 assert entry.mode is not None 

2894 if stat.S_ISDIR(entry.mode): 

2895 extra = [] 

2896 assert entry.sha is not None 

2897 tree = store[entry.sha] 

2898 assert isinstance(tree, Tree) 

2899 for subentry in tree.iteritems(name_order=True): 

2900 assert entry.path is not None 

2901 extra.append(subentry.in_path(entry.path)) 

2902 todo.extend(reversed(extra)) 

2903 if not stat.S_ISDIR(entry.mode) or include_trees: 

2904 yield entry 

2905 

2906 

2907def iter_commit_contents( 

2908 store: ObjectContainer, 

2909 commit: Union[Commit, bytes], 

2910 *, 

2911 include: Optional[Sequence[Union[str, bytes, Path]]] = None, 

2912) -> Iterator[TreeEntry]: 

2913 """Iterate the contents of the repository at the specified commit. 

2914 

2915 This is a wrapper around iter_tree_contents() and 

2916 tree_lookup_path() to simplify the common task of getting the 

2917 contest of a repo at a particular commit. See also 

2918 dulwich.index.build_file_from_blob() for writing individual files 

2919 to disk. 

2920 

2921 Args: 

2922 store: Object store to get trees from 

2923 commit: Commit object, or SHA1 of a commit 

2924 include: if provided, only the entries whose paths are in the 

2925 list, or whose parent tree is in the list, will be 

2926 included. Note that duplicate or overlapping paths 

2927 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

2928 

2929 Yields: TreeEntry namedtuples for all matching files in a commit. 

2930 """ 

2931 sha = commit.id if isinstance(commit, Commit) else commit 

2932 if not isinstance(obj := store[sha], Commit): 

2933 raise TypeError( 

2934 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

2935 ) 

2936 commit = obj 

2937 encoding = commit.encoding or "utf-8" 

2938 include_bytes: list[bytes] = ( 

2939 [ 

2940 path if isinstance(path, bytes) else str(path).encode(encoding) 

2941 for path in include 

2942 ] 

2943 if include is not None 

2944 else [b""] 

2945 ) 

2946 

2947 for path in include_bytes: 

2948 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

2949 # Iterate all contained files if path points to a dir, otherwise just get that 

2950 # single file 

2951 if isinstance(store[obj_id], Tree): 

2952 for entry in iter_tree_contents(store, obj_id): 

2953 yield entry.in_path(path) 

2954 else: 

2955 yield TreeEntry(path, mode, obj_id) 

2956 

2957 

2958def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2959 """Peel all tags from a SHA. 

2960 

2961 Args: 

2962 store: Object store to get objects from 

2963 sha: The object SHA to peel. 

2964 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2965 intermediate tags; if the original ref does not point to a tag, 

2966 this will equal the original SHA1. 

2967 """ 

2968 unpeeled = obj = store[sha] 

2969 obj_class = object_class(obj.type_name) 

2970 while obj_class is Tag: 

2971 assert isinstance(obj, Tag) 

2972 obj_class, sha = obj.object 

2973 obj = store[sha] 

2974 return unpeeled, obj