Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1171 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from pathlib import Path 

36from typing import ( 

37 TYPE_CHECKING, 

38 Callable, 

39 Optional, 

40 Protocol, 

41 Union, 

42) 

43 

44from .errors import NotTreeError 

45from .file import GitFile, _GitFile 

46from .objects import ( 

47 S_ISGITLINK, 

48 ZERO_SHA, 

49 Blob, 

50 Commit, 

51 ObjectID, 

52 ShaFile, 

53 Tag, 

54 Tree, 

55 TreeEntry, 

56 hex_to_filename, 

57 hex_to_sha, 

58 object_class, 

59 sha_to_hex, 

60 valid_hexsha, 

61) 

62from .pack import ( 

63 PACK_SPOOL_FILE_MAX_SIZE, 

64 ObjectContainer, 

65 Pack, 

66 PackData, 

67 PackedObjectContainer, 

68 PackFileDisappeared, 

69 PackHint, 

70 PackIndexer, 

71 PackInflater, 

72 PackStreamCopier, 

73 UnpackedObject, 

74 extend_pack, 

75 full_unpacked_object, 

76 generate_unpacked_objects, 

77 iter_sha1, 

78 load_pack_index_file, 

79 pack_objects_to_data, 

80 write_pack_data, 

81 write_pack_index, 

82) 

83from .protocol import DEPTH_INFINITE 

84from .refs import PEELED_TAG_SUFFIX, Ref 

85 

86if TYPE_CHECKING: 

87 from .commit_graph import CommitGraph 

88 from .diff_tree import RenameDetector 

89 

90 

91class GraphWalker(Protocol): 

92 """Protocol for graph walker objects.""" 

93 

94 def __next__(self) -> Optional[bytes]: 

95 """Return the next object SHA to visit.""" 

96 ... 

97 

98 def ack(self, sha: bytes) -> None: 

99 """Acknowledge that an object has been received.""" 

100 ... 

101 

102 

103INFODIR = "info" 

104PACKDIR = "pack" 

105 

106# use permissions consistent with Git; just readable by everyone 

107# TODO: should packs also be non-writable on Windows? if so, that 

108# would requite some rather significant adjustments to the test suite 

109PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

110 

111# Grace period for cleaning up temporary pack files (in seconds) 

112# Matches git's default of 2 weeks 

113DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

114 

115 

116def find_shallow( 

117 store: ObjectContainer, heads: Iterable[bytes], depth: int 

118) -> tuple[set[bytes], set[bytes]]: 

119 """Find shallow commits according to a given depth. 

120 

121 Args: 

122 store: An ObjectStore for looking up objects. 

123 heads: Iterable of head SHAs to start walking from. 

124 depth: The depth of ancestors to include. A depth of one includes 

125 only the heads themselves. 

126 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

127 considered shallow and unshallow according to the arguments. Note that 

128 these sets may overlap if a commit is reachable along multiple paths. 

129 """ 

130 parents: dict[bytes, list[bytes]] = {} 

131 commit_graph = store.get_commit_graph() 

132 

133 def get_parents(sha: bytes) -> list[bytes]: 

134 result = parents.get(sha, None) 

135 if not result: 

136 # Try to use commit graph first if available 

137 if commit_graph: 

138 graph_parents = commit_graph.get_parents(sha) 

139 if graph_parents is not None: 

140 result = graph_parents 

141 parents[sha] = result 

142 return result 

143 # Fall back to loading the object 

144 commit = store[sha] 

145 assert isinstance(commit, Commit) 

146 result = commit.parents 

147 parents[sha] = result 

148 return result 

149 

150 todo = [] # stack of (sha, depth) 

151 for head_sha in heads: 

152 obj = store[head_sha] 

153 # Peel tags if necessary 

154 while isinstance(obj, Tag): 

155 _, sha = obj.object 

156 obj = store[sha] 

157 if isinstance(obj, Commit): 

158 todo.append((obj.id, 1)) 

159 

160 not_shallow = set() 

161 shallow = set() 

162 while todo: 

163 sha, cur_depth = todo.pop() 

164 if cur_depth < depth: 

165 not_shallow.add(sha) 

166 new_depth = cur_depth + 1 

167 todo.extend((p, new_depth) for p in get_parents(sha)) 

168 else: 

169 shallow.add(sha) 

170 

171 return shallow, not_shallow 

172 

173 

174def get_depth( 

175 store: ObjectContainer, 

176 head: bytes, 

177 get_parents: Callable = lambda commit: commit.parents, 

178 max_depth: Optional[int] = None, 

179) -> int: 

180 """Return the current available depth for the given head. 

181 

182 For commits with multiple parents, the largest possible depth will be 

183 returned. 

184 

185 Args: 

186 store: Object store to search in 

187 head: commit to start from 

188 get_parents: optional function for getting the parents of a commit 

189 max_depth: maximum depth to search 

190 """ 

191 if head not in store: 

192 return 0 

193 current_depth = 1 

194 queue = [(head, current_depth)] 

195 commit_graph = store.get_commit_graph() 

196 

197 while queue and (max_depth is None or current_depth < max_depth): 

198 e, depth = queue.pop(0) 

199 current_depth = max(current_depth, depth) 

200 

201 # Try to use commit graph for parent lookup if available 

202 parents = None 

203 if commit_graph: 

204 parents = commit_graph.get_parents(e) 

205 

206 if parents is None: 

207 # Fall back to loading the object 

208 cmt = store[e] 

209 if isinstance(cmt, Tag): 

210 _cls, sha = cmt.object 

211 cmt = store[sha] 

212 parents = get_parents(cmt) 

213 

214 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

215 return current_depth 

216 

217 

218class PackContainer(Protocol): 

219 """Protocol for containers that can accept pack files.""" 

220 

221 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

222 """Add a new pack.""" 

223 

224 

225class BaseObjectStore: 

226 """Object store interface.""" 

227 

228 def determine_wants_all( 

229 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

230 ) -> list[ObjectID]: 

231 """Determine which objects are wanted based on refs.""" 

232 

233 def _want_deepen(sha: bytes) -> bool: 

234 if not depth: 

235 return False 

236 if depth == DEPTH_INFINITE: 

237 return True 

238 return depth > self._get_depth(sha) 

239 

240 return [ 

241 sha 

242 for (ref, sha) in refs.items() 

243 if (sha not in self or _want_deepen(sha)) 

244 and not ref.endswith(PEELED_TAG_SUFFIX) 

245 and not sha == ZERO_SHA 

246 ] 

247 

248 def contains_loose(self, sha: bytes) -> bool: 

249 """Check if a particular object is present by SHA1 and is loose.""" 

250 raise NotImplementedError(self.contains_loose) 

251 

252 def __contains__(self, sha1: bytes) -> bool: 

253 """Check if a particular object is present by SHA1. 

254 

255 This method makes no distinction between loose and packed objects. 

256 """ 

257 return self.contains_loose(sha1) 

258 

259 @property 

260 def packs(self) -> list[Pack]: 

261 """Iterable of pack objects.""" 

262 raise NotImplementedError 

263 

264 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

265 """Obtain the raw text for an object. 

266 

267 Args: 

268 name: sha for the object. 

269 Returns: tuple with numeric type and object contents. 

270 """ 

271 raise NotImplementedError(self.get_raw) 

272 

273 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

274 """Obtain an object by SHA1.""" 

275 type_num, uncomp = self.get_raw(sha1) 

276 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

277 

278 def __iter__(self) -> Iterator[bytes]: 

279 """Iterate over the SHAs that are present in this store.""" 

280 raise NotImplementedError(self.__iter__) 

281 

282 def add_object(self, obj: ShaFile) -> None: 

283 """Add a single object to this object store.""" 

284 raise NotImplementedError(self.add_object) 

285 

286 def add_objects( 

287 self, 

288 objects: Sequence[tuple[ShaFile, Optional[str]]], 

289 progress: Optional[Callable] = None, 

290 ) -> Optional["Pack"]: 

291 """Add a set of objects to this object store. 

292 

293 Args: 

294 objects: Iterable over a list of (object, path) tuples 

295 progress: Optional progress callback 

296 """ 

297 raise NotImplementedError(self.add_objects) 

298 

299 def tree_changes( 

300 self, 

301 source: Optional[bytes], 

302 target: Optional[bytes], 

303 want_unchanged: bool = False, 

304 include_trees: bool = False, 

305 change_type_same: bool = False, 

306 rename_detector: Optional["RenameDetector"] = None, 

307 paths: Optional[list[bytes]] = None, 

308 ) -> Iterator[ 

309 tuple[ 

310 tuple[Optional[bytes], Optional[bytes]], 

311 tuple[Optional[int], Optional[int]], 

312 tuple[Optional[bytes], Optional[bytes]], 

313 ] 

314 ]: 

315 """Find the differences between the contents of two trees. 

316 

317 Args: 

318 source: SHA1 of the source tree 

319 target: SHA1 of the target tree 

320 want_unchanged: Whether unchanged files should be reported 

321 include_trees: Whether to include trees 

322 change_type_same: Whether to report files changing 

323 type in the same entry. 

324 rename_detector: RenameDetector object for detecting renames. 

325 paths: Optional list of paths to filter to (as bytes). 

326 Returns: Iterator over tuples with 

327 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

328 """ 

329 from .diff_tree import tree_changes 

330 

331 for change in tree_changes( 

332 self, 

333 source, 

334 target, 

335 want_unchanged=want_unchanged, 

336 include_trees=include_trees, 

337 change_type_same=change_type_same, 

338 rename_detector=rename_detector, 

339 paths=paths, 

340 ): 

341 yield ( 

342 (change.old.path, change.new.path), 

343 (change.old.mode, change.new.mode), 

344 (change.old.sha, change.new.sha), 

345 ) 

346 

347 def iter_tree_contents( 

348 self, tree_id: bytes, include_trees: bool = False 

349 ) -> Iterator[tuple[bytes, int, bytes]]: 

350 """Iterate the contents of a tree and all subtrees. 

351 

352 Iteration is depth-first pre-order, as in e.g. os.walk. 

353 

354 Args: 

355 tree_id: SHA1 of the tree. 

356 include_trees: If True, include tree objects in the iteration. 

357 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

358 tree. 

359 """ 

360 warnings.warn( 

361 "Please use dulwich.object_store.iter_tree_contents", 

362 DeprecationWarning, 

363 stacklevel=2, 

364 ) 

365 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

366 

367 def iterobjects_subset( 

368 self, shas: Iterable[bytes], *, allow_missing: bool = False 

369 ) -> Iterator[ShaFile]: 

370 """Iterate over a subset of objects in the store. 

371 

372 Args: 

373 shas: Iterable of object SHAs to retrieve 

374 allow_missing: If True, skip missing objects; if False, raise KeyError 

375 

376 Returns: 

377 Iterator of ShaFile objects 

378 

379 Raises: 

380 KeyError: If an object is missing and allow_missing is False 

381 """ 

382 for sha in shas: 

383 try: 

384 yield self[sha] 

385 except KeyError: 

386 if not allow_missing: 

387 raise 

388 

389 def find_missing_objects( 

390 self, 

391 haves: Iterable[bytes], 

392 wants: Iterable[bytes], 

393 shallow: Optional[set[bytes]] = None, 

394 progress: Optional[Callable] = None, 

395 get_tagged: Optional[Callable] = None, 

396 get_parents: Callable = lambda commit: commit.parents, 

397 ) -> Iterator[tuple[bytes, Optional[bytes]]]: 

398 """Find the missing objects required for a set of revisions. 

399 

400 Args: 

401 haves: Iterable over SHAs already in common. 

402 wants: Iterable over SHAs of objects to fetch. 

403 shallow: Set of shallow commit SHA1s to skip 

404 progress: Simple progress function that will be called with 

405 updated progress strings. 

406 get_tagged: Function that returns a dict of pointed-to sha -> 

407 tag sha for including tags. 

408 get_parents: Optional function for getting the parents of a 

409 commit. 

410 Returns: Iterator over (sha, path) pairs. 

411 """ 

412 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

413 finder = MissingObjectFinder( 

414 self, 

415 haves=haves, 

416 wants=wants, 

417 shallow=shallow, 

418 progress=progress, 

419 get_tagged=get_tagged, 

420 get_parents=get_parents, 

421 ) 

422 return iter(finder) 

423 

424 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

425 """Find which revisions this store has in common using graphwalker. 

426 

427 Args: 

428 graphwalker: A graphwalker object. 

429 Returns: List of SHAs that are in common 

430 """ 

431 haves = [] 

432 sha = next(graphwalker) 

433 while sha: 

434 if sha in self: 

435 haves.append(sha) 

436 graphwalker.ack(sha) 

437 sha = next(graphwalker) 

438 return haves 

439 

440 def generate_pack_data( 

441 self, 

442 have: Iterable[bytes], 

443 want: Iterable[bytes], 

444 shallow: Optional[set[bytes]] = None, 

445 progress: Optional[Callable] = None, 

446 ofs_delta: bool = True, 

447 ) -> tuple[int, Iterator[UnpackedObject]]: 

448 """Generate pack data objects for a set of wants/haves. 

449 

450 Args: 

451 have: List of SHA1s of objects that should not be sent 

452 want: List of SHA1s of objects that should be sent 

453 shallow: Set of shallow commit SHA1s to skip 

454 ofs_delta: Whether OFS deltas can be included 

455 progress: Optional progress reporting method 

456 """ 

457 # Note that the pack-specific implementation below is more efficient, 

458 # as it reuses deltas 

459 missing_objects = MissingObjectFinder( 

460 self, haves=have, wants=want, shallow=shallow, progress=progress 

461 ) 

462 object_ids = list(missing_objects) 

463 return pack_objects_to_data( 

464 [(self[oid], path) for oid, path in object_ids], 

465 ofs_delta=ofs_delta, 

466 progress=progress, 

467 ) 

468 

469 def peel_sha(self, sha: bytes) -> bytes: 

470 """Peel all tags from a SHA. 

471 

472 Args: 

473 sha: The object SHA to peel. 

474 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

475 intermediate tags; if the original ref does not point to a tag, 

476 this will equal the original SHA1. 

477 """ 

478 warnings.warn( 

479 "Please use dulwich.object_store.peel_sha()", 

480 DeprecationWarning, 

481 stacklevel=2, 

482 ) 

483 return peel_sha(self, sha)[1].id 

484 

485 def _get_depth( 

486 self, 

487 head: bytes, 

488 get_parents: Callable = lambda commit: commit.parents, 

489 max_depth: Optional[int] = None, 

490 ) -> int: 

491 """Return the current available depth for the given head. 

492 

493 For commits with multiple parents, the largest possible depth will be 

494 returned. 

495 

496 Args: 

497 head: commit to start from 

498 get_parents: optional function for getting the parents of a commit 

499 max_depth: maximum depth to search 

500 """ 

501 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

502 

503 def close(self) -> None: 

504 """Close any files opened by this object store.""" 

505 # Default implementation is a NO-OP 

506 

507 def prune(self, grace_period: Optional[int] = None) -> None: 

508 """Prune/clean up this object store. 

509 

510 This includes removing orphaned temporary files and other 

511 housekeeping tasks. Default implementation is a NO-OP. 

512 

513 Args: 

514 grace_period: Grace period in seconds for removing temporary files. 

515 If None, uses the default grace period. 

516 """ 

517 # Default implementation is a NO-OP 

518 

519 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

520 """Iterate over all SHA1s that start with a given prefix. 

521 

522 The default implementation is a naive iteration over all objects. 

523 However, subclasses may override this method with more efficient 

524 implementations. 

525 """ 

526 for sha in self: 

527 if sha.startswith(prefix): 

528 yield sha 

529 

530 def get_commit_graph(self) -> Optional["CommitGraph"]: 

531 """Get the commit graph for this object store. 

532 

533 Returns: 

534 CommitGraph object if available, None otherwise 

535 """ 

536 return None 

537 

538 def write_commit_graph( 

539 self, refs: Optional[list[bytes]] = None, reachable: bool = True 

540 ) -> None: 

541 """Write a commit graph file for this object store. 

542 

543 Args: 

544 refs: List of refs to include. If None, includes all refs from object store. 

545 reachable: If True, includes all commits reachable from refs. 

546 If False, only includes the direct ref targets. 

547 

548 Note: 

549 Default implementation does nothing. Subclasses should override 

550 this method to provide commit graph writing functionality. 

551 """ 

552 raise NotImplementedError(self.write_commit_graph) 

553 

554 def get_object_mtime(self, sha: bytes) -> float: 

555 """Get the modification time of an object. 

556 

557 Args: 

558 sha: SHA1 of the object 

559 

560 Returns: 

561 Modification time as seconds since epoch 

562 

563 Raises: 

564 KeyError: if the object is not found 

565 """ 

566 # Default implementation raises KeyError 

567 # Subclasses should override to provide actual mtime 

568 raise KeyError(sha) 

569 

570 

571class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

572 """Object store that uses pack files for storage. 

573 

574 This class provides a base implementation for object stores that use 

575 Git pack files as their primary storage mechanism. It handles caching 

576 of open pack files and provides configuration for pack file operations. 

577 """ 

578 

579 def __init__( 

580 self, 

581 pack_compression_level: int = -1, 

582 pack_index_version: Optional[int] = None, 

583 pack_delta_window_size: Optional[int] = None, 

584 pack_window_memory: Optional[int] = None, 

585 pack_delta_cache_size: Optional[int] = None, 

586 pack_depth: Optional[int] = None, 

587 pack_threads: Optional[int] = None, 

588 pack_big_file_threshold: Optional[int] = None, 

589 ) -> None: 

590 """Initialize a PackBasedObjectStore. 

591 

592 Args: 

593 pack_compression_level: Compression level for pack files (-1 to 9) 

594 pack_index_version: Pack index version to use 

595 pack_delta_window_size: Window size for delta compression 

596 pack_window_memory: Maximum memory to use for delta window 

597 pack_delta_cache_size: Cache size for delta operations 

598 pack_depth: Maximum depth for pack deltas 

599 pack_threads: Number of threads to use for packing 

600 pack_big_file_threshold: Threshold for treating files as "big" 

601 """ 

602 self._pack_cache: dict[str, Pack] = {} 

603 self.pack_compression_level = pack_compression_level 

604 self.pack_index_version = pack_index_version 

605 self.pack_delta_window_size = pack_delta_window_size 

606 self.pack_window_memory = pack_window_memory 

607 self.pack_delta_cache_size = pack_delta_cache_size 

608 self.pack_depth = pack_depth 

609 self.pack_threads = pack_threads 

610 self.pack_big_file_threshold = pack_big_file_threshold 

611 

612 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

613 """Add a new pack to this object store.""" 

614 raise NotImplementedError(self.add_pack) 

615 

616 def add_pack_data( 

617 self, 

618 count: int, 

619 unpacked_objects: Iterator[UnpackedObject], 

620 progress: Optional[Callable] = None, 

621 ) -> Optional["Pack"]: 

622 """Add pack data to this object store. 

623 

624 Args: 

625 count: Number of items to add 

626 unpacked_objects: Iterator of UnpackedObject instances 

627 progress: Optional progress callback 

628 """ 

629 if count == 0: 

630 # Don't bother writing an empty pack file 

631 return None 

632 f, commit, abort = self.add_pack() 

633 try: 

634 write_pack_data( 

635 f.write, 

636 unpacked_objects, 

637 num_records=count, 

638 progress=progress, 

639 compression_level=self.pack_compression_level, 

640 ) 

641 except BaseException: 

642 abort() 

643 raise 

644 else: 

645 return commit() 

646 

647 @property 

648 def alternates(self) -> list: 

649 """Return list of alternate object stores.""" 

650 return [] 

651 

652 def contains_packed(self, sha: bytes) -> bool: 

653 """Check if a particular object is present by SHA1 and is packed. 

654 

655 This does not check alternates. 

656 """ 

657 for pack in self.packs: 

658 try: 

659 if sha in pack: 

660 return True 

661 except PackFileDisappeared: 

662 pass 

663 return False 

664 

665 def __contains__(self, sha) -> bool: 

666 """Check if a particular object is present by SHA1. 

667 

668 This method makes no distinction between loose and packed objects. 

669 """ 

670 if self.contains_packed(sha) or self.contains_loose(sha): 

671 return True 

672 for alternate in self.alternates: 

673 if sha in alternate: 

674 return True 

675 return False 

676 

677 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

678 """Add a newly appeared pack to the cache by path.""" 

679 prev_pack = self._pack_cache.get(base_name) 

680 if prev_pack is not pack: 

681 self._pack_cache[base_name] = pack 

682 if prev_pack: 

683 prev_pack.close() 

684 

685 def generate_pack_data( 

686 self, have, want, shallow=None, progress=None, ofs_delta=True 

687 ) -> tuple[int, Iterator[UnpackedObject]]: 

688 """Generate pack data objects for a set of wants/haves. 

689 

690 Args: 

691 have: List of SHA1s of objects that should not be sent 

692 want: List of SHA1s of objects that should be sent 

693 shallow: Set of shallow commit SHA1s to skip 

694 ofs_delta: Whether OFS deltas can be included 

695 progress: Optional progress reporting method 

696 """ 

697 missing_objects = MissingObjectFinder( 

698 self, haves=have, wants=want, shallow=shallow, progress=progress 

699 ) 

700 remote_has = missing_objects.get_remote_has() 

701 object_ids = list(missing_objects) 

702 return len(object_ids), generate_unpacked_objects( 

703 self, 

704 object_ids, 

705 progress=progress, 

706 ofs_delta=ofs_delta, 

707 other_haves=remote_has, 

708 ) 

709 

710 def _clear_cached_packs(self) -> None: 

711 pack_cache = self._pack_cache 

712 self._pack_cache = {} 

713 while pack_cache: 

714 (name, pack) = pack_cache.popitem() 

715 pack.close() 

716 

717 def _iter_cached_packs(self) -> Iterator[Pack]: 

718 return iter(self._pack_cache.values()) 

719 

720 def _update_pack_cache(self) -> list[Pack]: 

721 raise NotImplementedError(self._update_pack_cache) 

722 

723 def close(self) -> None: 

724 """Close the object store and release resources. 

725 

726 This method closes all cached pack files and frees associated resources. 

727 """ 

728 self._clear_cached_packs() 

729 

730 @property 

731 def packs(self) -> list[Pack]: 

732 """List with pack objects.""" 

733 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

734 

735 def count_pack_files(self) -> int: 

736 """Count the number of pack files. 

737 

738 Returns: 

739 Number of pack files (excluding those with .keep files) 

740 """ 

741 count = 0 

742 for pack in self.packs: 

743 # Check if there's a .keep file for this pack 

744 keep_path = pack._basename + ".keep" 

745 if not os.path.exists(keep_path): 

746 count += 1 

747 return count 

748 

749 def _iter_alternate_objects(self) -> Iterator[bytes]: 

750 """Iterate over the SHAs of all the objects in alternate stores.""" 

751 for alternate in self.alternates: 

752 yield from alternate 

753 

754 def _iter_loose_objects(self) -> Iterator[bytes]: 

755 """Iterate over the SHAs of all loose objects.""" 

756 raise NotImplementedError(self._iter_loose_objects) 

757 

758 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

759 raise NotImplementedError(self._get_loose_object) 

760 

761 def delete_loose_object(self, sha: bytes) -> None: 

762 """Delete a loose object. 

763 

764 This method only handles loose objects. For packed objects, 

765 use repack(exclude=...) to exclude them during repacking. 

766 """ 

767 raise NotImplementedError(self.delete_loose_object) 

768 

769 def _remove_pack(self, pack: "Pack") -> None: 

770 raise NotImplementedError(self._remove_pack) 

771 

772 def pack_loose_objects(self) -> int: 

773 """Pack loose objects. 

774 

775 Returns: Number of objects packed 

776 """ 

777 objects: list[tuple[ShaFile, None]] = [] 

778 for sha in self._iter_loose_objects(): 

779 obj = self._get_loose_object(sha) 

780 if obj is not None: 

781 objects.append((obj, None)) 

782 self.add_objects(objects) 

783 for obj, path in objects: 

784 self.delete_loose_object(obj.id) 

785 return len(objects) 

786 

787 def repack(self, exclude: Optional[set] = None) -> int: 

788 """Repack the packs in this repository. 

789 

790 Note that this implementation is fairly naive and currently keeps all 

791 objects in memory while it repacks. 

792 

793 Args: 

794 exclude: Optional set of object SHAs to exclude from repacking 

795 """ 

796 if exclude is None: 

797 exclude = set() 

798 

799 loose_objects = set() 

800 excluded_loose_objects = set() 

801 for sha in self._iter_loose_objects(): 

802 if sha not in exclude: 

803 obj = self._get_loose_object(sha) 

804 if obj is not None: 

805 loose_objects.add(obj) 

806 else: 

807 excluded_loose_objects.add(sha) 

808 

809 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

810 old_packs = {p.name(): p for p in self.packs} 

811 for name, pack in old_packs.items(): 

812 objects.update( 

813 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

814 ) 

815 

816 # Only create a new pack if there are objects to pack 

817 if objects: 

818 # The name of the consolidated pack might match the name of a 

819 # pre-existing pack. Take care not to remove the newly created 

820 # consolidated pack. 

821 consolidated = self.add_objects(list(objects)) 

822 if consolidated is not None: 

823 old_packs.pop(consolidated.name(), None) 

824 

825 # Delete loose objects that were packed 

826 for obj in loose_objects: 

827 if obj is not None: 

828 self.delete_loose_object(obj.id) 

829 # Delete excluded loose objects 

830 for sha in excluded_loose_objects: 

831 self.delete_loose_object(sha) 

832 for name, pack in old_packs.items(): 

833 self._remove_pack(pack) 

834 self._update_pack_cache() 

835 return len(objects) 

836 

837 def __iter__(self): 

838 """Iterate over the SHAs that are present in this store.""" 

839 self._update_pack_cache() 

840 for pack in self._iter_cached_packs(): 

841 try: 

842 yield from pack 

843 except PackFileDisappeared: 

844 pass 

845 yield from self._iter_loose_objects() 

846 yield from self._iter_alternate_objects() 

847 

848 def contains_loose(self, sha): 

849 """Check if a particular object is present by SHA1 and is loose. 

850 

851 This does not check alternates. 

852 """ 

853 return self._get_loose_object(sha) is not None 

854 

855 def get_raw(self, name): 

856 """Obtain the raw fulltext for an object. 

857 

858 Args: 

859 name: sha for the object. 

860 Returns: tuple with numeric type and object contents. 

861 """ 

862 if name == ZERO_SHA: 

863 raise KeyError(name) 

864 if len(name) == 40: 

865 sha = hex_to_sha(name) 

866 hexsha = name 

867 elif len(name) == 20: 

868 sha = name 

869 hexsha = None 

870 else: 

871 raise AssertionError(f"Invalid object name {name!r}") 

872 for pack in self._iter_cached_packs(): 

873 try: 

874 return pack.get_raw(sha) 

875 except (KeyError, PackFileDisappeared): 

876 pass 

877 if hexsha is None: 

878 hexsha = sha_to_hex(name) 

879 ret = self._get_loose_object(hexsha) 

880 if ret is not None: 

881 return ret.type_num, ret.as_raw_string() 

882 # Maybe something else has added a pack with the object 

883 # in the mean time? 

884 for pack in self._update_pack_cache(): 

885 try: 

886 return pack.get_raw(sha) 

887 except KeyError: 

888 pass 

889 for alternate in self.alternates: 

890 try: 

891 return alternate.get_raw(hexsha) 

892 except KeyError: 

893 pass 

894 raise KeyError(hexsha) 

895 

896 def iter_unpacked_subset( 

897 self, 

898 shas: set[bytes], 

899 include_comp: bool = False, 

900 allow_missing: bool = False, 

901 convert_ofs_delta: bool = True, 

902 ) -> Iterator[UnpackedObject]: 

903 """Iterate over a subset of objects, yielding UnpackedObject instances. 

904 

905 Args: 

906 shas: Set of object SHAs to retrieve 

907 include_comp: Whether to include compressed data 

908 allow_missing: If True, skip missing objects; if False, raise KeyError 

909 convert_ofs_delta: Whether to convert OFS_DELTA objects 

910 

911 Returns: 

912 Iterator of UnpackedObject instances 

913 

914 Raises: 

915 KeyError: If an object is missing and allow_missing is False 

916 """ 

917 todo: set[bytes] = set(shas) 

918 for p in self._iter_cached_packs(): 

919 for unpacked in p.iter_unpacked_subset( 

920 todo, 

921 include_comp=include_comp, 

922 allow_missing=True, 

923 convert_ofs_delta=convert_ofs_delta, 

924 ): 

925 yield unpacked 

926 hexsha = sha_to_hex(unpacked.sha()) 

927 todo.remove(hexsha) 

928 # Maybe something else has added a pack with the object 

929 # in the mean time? 

930 for p in self._update_pack_cache(): 

931 for unpacked in p.iter_unpacked_subset( 

932 todo, 

933 include_comp=include_comp, 

934 allow_missing=True, 

935 convert_ofs_delta=convert_ofs_delta, 

936 ): 

937 yield unpacked 

938 hexsha = sha_to_hex(unpacked.sha()) 

939 todo.remove(hexsha) 

940 for alternate in self.alternates: 

941 for unpacked in alternate.iter_unpacked_subset( 

942 todo, 

943 include_comp=include_comp, 

944 allow_missing=True, 

945 convert_ofs_delta=convert_ofs_delta, 

946 ): 

947 yield unpacked 

948 hexsha = sha_to_hex(unpacked.sha()) 

949 todo.remove(hexsha) 

950 

951 def iterobjects_subset( 

952 self, shas: Iterable[bytes], *, allow_missing: bool = False 

953 ) -> Iterator[ShaFile]: 

954 """Iterate over a subset of objects in the store. 

955 

956 This method searches for objects in pack files, alternates, and loose storage. 

957 

958 Args: 

959 shas: Iterable of object SHAs to retrieve 

960 allow_missing: If True, skip missing objects; if False, raise KeyError 

961 

962 Returns: 

963 Iterator of ShaFile objects 

964 

965 Raises: 

966 KeyError: If an object is missing and allow_missing is False 

967 """ 

968 todo: set[bytes] = set(shas) 

969 for p in self._iter_cached_packs(): 

970 for o in p.iterobjects_subset(todo, allow_missing=True): 

971 yield o 

972 todo.remove(o.id) 

973 # Maybe something else has added a pack with the object 

974 # in the mean time? 

975 for p in self._update_pack_cache(): 

976 for o in p.iterobjects_subset(todo, allow_missing=True): 

977 yield o 

978 todo.remove(o.id) 

979 for alternate in self.alternates: 

980 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

981 yield o 

982 todo.remove(o.id) 

983 for oid in todo: 

984 loose_obj: Optional[ShaFile] = self._get_loose_object(oid) 

985 if loose_obj is not None: 

986 yield loose_obj 

987 elif not allow_missing: 

988 raise KeyError(oid) 

989 

990 def get_unpacked_object( 

991 self, sha1: bytes, *, include_comp: bool = False 

992 ) -> UnpackedObject: 

993 """Obtain the unpacked object. 

994 

995 Args: 

996 sha1: sha for the object. 

997 include_comp: Whether to include compression metadata. 

998 """ 

999 if sha1 == ZERO_SHA: 

1000 raise KeyError(sha1) 

1001 if len(sha1) == 40: 

1002 sha = hex_to_sha(sha1) 

1003 hexsha = sha1 

1004 elif len(sha1) == 20: 

1005 sha = sha1 

1006 hexsha = None 

1007 else: 

1008 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1009 for pack in self._iter_cached_packs(): 

1010 try: 

1011 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1012 except (KeyError, PackFileDisappeared): 

1013 pass 

1014 if hexsha is None: 

1015 hexsha = sha_to_hex(sha1) 

1016 # Maybe something else has added a pack with the object 

1017 # in the mean time? 

1018 for pack in self._update_pack_cache(): 

1019 try: 

1020 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1021 except KeyError: 

1022 pass 

1023 for alternate in self.alternates: 

1024 try: 

1025 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1026 except KeyError: 

1027 pass 

1028 raise KeyError(hexsha) 

1029 

1030 def add_objects( 

1031 self, 

1032 objects: Sequence[tuple[ShaFile, Optional[str]]], 

1033 progress: Optional[Callable[[str], None]] = None, 

1034 ) -> Optional["Pack"]: 

1035 """Add a set of objects to this object store. 

1036 

1037 Args: 

1038 objects: Iterable over (object, path) tuples, should support 

1039 __len__. 

1040 progress: Optional progress reporting function. 

1041 Returns: Pack object of the objects written. 

1042 """ 

1043 count = len(objects) 

1044 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1045 return self.add_pack_data(count, record_iter, progress=progress) 

1046 

1047 

1048class DiskObjectStore(PackBasedObjectStore): 

1049 """Git-style object store that exists on disk.""" 

1050 

1051 path: Union[str, os.PathLike] 

1052 pack_dir: Union[str, os.PathLike] 

1053 _alternates: Optional[list["DiskObjectStore"]] 

1054 _commit_graph: Optional["CommitGraph"] 

1055 

1056 def __init__( 

1057 self, 

1058 path: Union[str, os.PathLike], 

1059 loose_compression_level=-1, 

1060 pack_compression_level=-1, 

1061 pack_index_version=None, 

1062 pack_delta_window_size=None, 

1063 pack_window_memory=None, 

1064 pack_delta_cache_size=None, 

1065 pack_depth=None, 

1066 pack_threads=None, 

1067 pack_big_file_threshold=None, 

1068 ) -> None: 

1069 """Open an object store. 

1070 

1071 Args: 

1072 path: Path of the object store. 

1073 loose_compression_level: zlib compression level for loose objects 

1074 pack_compression_level: zlib compression level for pack objects 

1075 pack_index_version: pack index version to use (1, 2, or 3) 

1076 pack_delta_window_size: sliding window size for delta compression 

1077 pack_window_memory: memory limit for delta window operations 

1078 pack_delta_cache_size: size of cache for delta operations 

1079 pack_depth: maximum delta chain depth 

1080 pack_threads: number of threads for pack operations 

1081 pack_big_file_threshold: threshold for treating files as big 

1082 """ 

1083 super().__init__( 

1084 pack_compression_level=pack_compression_level, 

1085 pack_index_version=pack_index_version, 

1086 pack_delta_window_size=pack_delta_window_size, 

1087 pack_window_memory=pack_window_memory, 

1088 pack_delta_cache_size=pack_delta_cache_size, 

1089 pack_depth=pack_depth, 

1090 pack_threads=pack_threads, 

1091 pack_big_file_threshold=pack_big_file_threshold, 

1092 ) 

1093 self.path = path 

1094 self.pack_dir = os.path.join(self.path, PACKDIR) 

1095 self._alternates = None 

1096 self.loose_compression_level = loose_compression_level 

1097 self.pack_compression_level = pack_compression_level 

1098 self.pack_index_version = pack_index_version 

1099 

1100 # Commit graph support - lazy loaded 

1101 self._commit_graph = None 

1102 self._use_commit_graph = True # Default to true 

1103 

1104 def __repr__(self) -> str: 

1105 """Return string representation of DiskObjectStore. 

1106 

1107 Returns: 

1108 String representation including the store path 

1109 """ 

1110 return f"<{self.__class__.__name__}({self.path!r})>" 

1111 

1112 @classmethod 

1113 def from_config(cls, path: Union[str, os.PathLike], config): 

1114 """Create a DiskObjectStore from a configuration object. 

1115 

1116 Args: 

1117 path: Path to the object store directory 

1118 config: Configuration object to read settings from 

1119 

1120 Returns: 

1121 New DiskObjectStore instance configured according to config 

1122 """ 

1123 try: 

1124 default_compression_level = int( 

1125 config.get((b"core",), b"compression").decode() 

1126 ) 

1127 except KeyError: 

1128 default_compression_level = -1 

1129 try: 

1130 loose_compression_level = int( 

1131 config.get((b"core",), b"looseCompression").decode() 

1132 ) 

1133 except KeyError: 

1134 loose_compression_level = default_compression_level 

1135 try: 

1136 pack_compression_level = int( 

1137 config.get((b"core",), "packCompression").decode() 

1138 ) 

1139 except KeyError: 

1140 pack_compression_level = default_compression_level 

1141 try: 

1142 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1143 except KeyError: 

1144 pack_index_version = None 

1145 

1146 # Read pack configuration options 

1147 try: 

1148 pack_delta_window_size = int( 

1149 config.get((b"pack",), b"deltaWindowSize").decode() 

1150 ) 

1151 except KeyError: 

1152 pack_delta_window_size = None 

1153 try: 

1154 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1155 except KeyError: 

1156 pack_window_memory = None 

1157 try: 

1158 pack_delta_cache_size = int( 

1159 config.get((b"pack",), b"deltaCacheSize").decode() 

1160 ) 

1161 except KeyError: 

1162 pack_delta_cache_size = None 

1163 try: 

1164 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1165 except KeyError: 

1166 pack_depth = None 

1167 try: 

1168 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1169 except KeyError: 

1170 pack_threads = None 

1171 try: 

1172 pack_big_file_threshold = int( 

1173 config.get((b"pack",), b"bigFileThreshold").decode() 

1174 ) 

1175 except KeyError: 

1176 pack_big_file_threshold = None 

1177 

1178 # Read core.commitGraph setting 

1179 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1180 

1181 instance = cls( 

1182 path, 

1183 loose_compression_level, 

1184 pack_compression_level, 

1185 pack_index_version, 

1186 pack_delta_window_size, 

1187 pack_window_memory, 

1188 pack_delta_cache_size, 

1189 pack_depth, 

1190 pack_threads, 

1191 pack_big_file_threshold, 

1192 ) 

1193 instance._use_commit_graph = use_commit_graph 

1194 return instance 

1195 

1196 @property 

1197 def alternates(self): 

1198 """Get the list of alternate object stores. 

1199 

1200 Reads from .git/objects/info/alternates if not already cached. 

1201 

1202 Returns: 

1203 List of DiskObjectStore instances for alternate object directories 

1204 """ 

1205 if self._alternates is not None: 

1206 return self._alternates 

1207 self._alternates = [] 

1208 for path in self._read_alternate_paths(): 

1209 self._alternates.append(DiskObjectStore(path)) 

1210 return self._alternates 

1211 

1212 def _read_alternate_paths(self): 

1213 try: 

1214 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1215 except FileNotFoundError: 

1216 return 

1217 with f: 

1218 for line in f.readlines(): 

1219 line = line.rstrip(b"\n") 

1220 if line.startswith(b"#"): 

1221 continue 

1222 if os.path.isabs(line): 

1223 yield os.fsdecode(line) 

1224 else: 

1225 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1226 

1227 def add_alternate_path(self, path) -> None: 

1228 """Add an alternate path to this object store.""" 

1229 try: 

1230 os.mkdir(os.path.join(self.path, INFODIR)) 

1231 except FileExistsError: 

1232 pass 

1233 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1234 with GitFile(alternates_path, "wb") as f: 

1235 try: 

1236 orig_f = open(alternates_path, "rb") 

1237 except FileNotFoundError: 

1238 pass 

1239 else: 

1240 with orig_f: 

1241 f.write(orig_f.read()) 

1242 f.write(os.fsencode(path) + b"\n") 

1243 

1244 if not os.path.isabs(path): 

1245 path = os.path.join(self.path, path) 

1246 self.alternates.append(DiskObjectStore(path)) 

1247 

1248 def _update_pack_cache(self): 

1249 """Read and iterate over new pack files and cache them.""" 

1250 try: 

1251 pack_dir_contents = os.listdir(self.pack_dir) 

1252 except FileNotFoundError: 

1253 self.close() 

1254 return [] 

1255 pack_files = set() 

1256 for name in pack_dir_contents: 

1257 if name.startswith("pack-") and name.endswith(".pack"): 

1258 # verify that idx exists first (otherwise the pack was not yet 

1259 # fully written) 

1260 idx_name = os.path.splitext(name)[0] + ".idx" 

1261 if idx_name in pack_dir_contents: 

1262 pack_name = name[: -len(".pack")] 

1263 pack_files.add(pack_name) 

1264 

1265 # Open newly appeared pack files 

1266 new_packs = [] 

1267 for f in pack_files: 

1268 if f not in self._pack_cache: 

1269 pack = Pack( 

1270 os.path.join(self.pack_dir, f), 

1271 delta_window_size=self.pack_delta_window_size, 

1272 window_memory=self.pack_window_memory, 

1273 delta_cache_size=self.pack_delta_cache_size, 

1274 depth=self.pack_depth, 

1275 threads=self.pack_threads, 

1276 big_file_threshold=self.pack_big_file_threshold, 

1277 ) 

1278 new_packs.append(pack) 

1279 self._pack_cache[f] = pack 

1280 # Remove disappeared pack files 

1281 for f in set(self._pack_cache) - pack_files: 

1282 self._pack_cache.pop(f).close() 

1283 return new_packs 

1284 

1285 def _get_shafile_path(self, sha): 

1286 # Check from object dir 

1287 return hex_to_filename(os.fspath(self.path), sha) 

1288 

1289 def _iter_loose_objects(self): 

1290 for base in os.listdir(self.path): 

1291 if len(base) != 2: 

1292 continue 

1293 for rest in os.listdir(os.path.join(self.path, base)): 

1294 sha = os.fsencode(base + rest) 

1295 if not valid_hexsha(sha): 

1296 continue 

1297 yield sha 

1298 

1299 def count_loose_objects(self) -> int: 

1300 """Count the number of loose objects in the object store. 

1301 

1302 Returns: 

1303 Number of loose objects 

1304 """ 

1305 count = 0 

1306 if not os.path.exists(self.path): 

1307 return 0 

1308 

1309 for i in range(256): 

1310 subdir = os.path.join(self.path, f"{i:02x}") 

1311 try: 

1312 count += len( 

1313 [ 

1314 name 

1315 for name in os.listdir(subdir) 

1316 if len(name) == 38 # 40 - 2 for the prefix 

1317 ] 

1318 ) 

1319 except FileNotFoundError: 

1320 # Directory may have been removed or is inaccessible 

1321 continue 

1322 

1323 return count 

1324 

1325 def _get_loose_object(self, sha): 

1326 path = self._get_shafile_path(sha) 

1327 try: 

1328 return ShaFile.from_path(path) 

1329 except FileNotFoundError: 

1330 return None 

1331 

1332 def delete_loose_object(self, sha) -> None: 

1333 """Delete a loose object from disk. 

1334 

1335 Args: 

1336 sha: SHA1 of the object to delete 

1337 

1338 Raises: 

1339 FileNotFoundError: If the object file doesn't exist 

1340 """ 

1341 os.remove(self._get_shafile_path(sha)) 

1342 

1343 def get_object_mtime(self, sha): 

1344 """Get the modification time of an object. 

1345 

1346 Args: 

1347 sha: SHA1 of the object 

1348 

1349 Returns: 

1350 Modification time as seconds since epoch 

1351 

1352 Raises: 

1353 KeyError: if the object is not found 

1354 """ 

1355 # First check if it's a loose object 

1356 if self.contains_loose(sha): 

1357 path = self._get_shafile_path(sha) 

1358 try: 

1359 return os.path.getmtime(path) 

1360 except FileNotFoundError: 

1361 pass 

1362 

1363 # Check if it's in a pack file 

1364 for pack in self.packs: 

1365 try: 

1366 if sha in pack: 

1367 # Use the pack file's mtime for packed objects 

1368 pack_path = pack._data_path 

1369 try: 

1370 return os.path.getmtime(pack_path) 

1371 except (FileNotFoundError, AttributeError): 

1372 pass 

1373 except PackFileDisappeared: 

1374 pass 

1375 

1376 raise KeyError(sha) 

1377 

1378 def _remove_pack(self, pack) -> None: 

1379 try: 

1380 del self._pack_cache[os.path.basename(pack._basename)] 

1381 except KeyError: 

1382 pass 

1383 pack.close() 

1384 os.remove(pack.data.path) 

1385 os.remove(pack.index.path) 

1386 

1387 def _get_pack_basepath(self, entries): 

1388 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1389 # TODO: Handle self.pack_dir being bytes 

1390 suffix = suffix_bytes.decode("ascii") 

1391 return os.path.join(self.pack_dir, "pack-" + suffix) 

1392 

1393 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1394 """Move a specific file containing a pack into the pack directory. 

1395 

1396 Note: The file should be on the same file system as the 

1397 packs directory. 

1398 

1399 Args: 

1400 f: Open file object for the pack. 

1401 path: Path to the pack file. 

1402 num_objects: Number of objects in the pack. 

1403 indexer: A PackIndexer for indexing the pack. 

1404 progress: Optional progress reporting function. 

1405 """ 

1406 entries = [] 

1407 for i, entry in enumerate(indexer): 

1408 if progress is not None: 

1409 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1410 entries.append(entry) 

1411 

1412 pack_sha, extra_entries = extend_pack( 

1413 f, 

1414 indexer.ext_refs(), 

1415 get_raw=self.get_raw, 

1416 compression_level=self.pack_compression_level, 

1417 progress=progress, 

1418 ) 

1419 f.flush() 

1420 try: 

1421 fileno = f.fileno() 

1422 except AttributeError: 

1423 pass 

1424 else: 

1425 os.fsync(fileno) 

1426 f.close() 

1427 

1428 entries.extend(extra_entries) 

1429 

1430 # Move the pack in. 

1431 entries.sort() 

1432 pack_base_name = self._get_pack_basepath(entries) 

1433 

1434 for pack in self.packs: 

1435 if pack._basename == pack_base_name: 

1436 return pack 

1437 

1438 target_pack_path = pack_base_name + ".pack" 

1439 target_index_path = pack_base_name + ".idx" 

1440 if sys.platform == "win32": 

1441 # Windows might have the target pack file lingering. Attempt 

1442 # removal, silently passing if the target does not exist. 

1443 with suppress(FileNotFoundError): 

1444 os.remove(target_pack_path) 

1445 os.rename(path, target_pack_path) 

1446 

1447 # Write the index. 

1448 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1449 write_pack_index( 

1450 index_file, entries, pack_sha, version=self.pack_index_version 

1451 ) 

1452 

1453 # Add the pack to the store and return it. 

1454 final_pack = Pack( 

1455 pack_base_name, 

1456 delta_window_size=self.pack_delta_window_size, 

1457 window_memory=self.pack_window_memory, 

1458 delta_cache_size=self.pack_delta_cache_size, 

1459 depth=self.pack_depth, 

1460 threads=self.pack_threads, 

1461 big_file_threshold=self.pack_big_file_threshold, 

1462 ) 

1463 final_pack.check_length_and_checksum() 

1464 self._add_cached_pack(pack_base_name, final_pack) 

1465 return final_pack 

1466 

1467 def add_thin_pack(self, read_all, read_some, progress=None): 

1468 """Add a new thin pack to this object store. 

1469 

1470 Thin packs are packs that contain deltas with parents that exist 

1471 outside the pack. They should never be placed in the object store 

1472 directly, and always indexed and completed as they are copied. 

1473 

1474 Args: 

1475 read_all: Read function that blocks until the number of 

1476 requested bytes are read. 

1477 read_some: Read function that returns at least one byte, but may 

1478 not return the number of bytes requested. 

1479 progress: Optional progress reporting function. 

1480 Returns: A Pack object pointing at the now-completed thin pack in the 

1481 objects/pack directory. 

1482 """ 

1483 import tempfile 

1484 

1485 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1486 with os.fdopen(fd, "w+b") as f: 

1487 os.chmod(path, PACK_MODE) 

1488 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1489 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1490 copier.verify(progress=progress) 

1491 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1492 

1493 def add_pack(self): 

1494 """Add a new pack to this object store. 

1495 

1496 Returns: Fileobject to write to, a commit function to 

1497 call when the pack is finished and an abort 

1498 function. 

1499 """ 

1500 import tempfile 

1501 

1502 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1503 f = os.fdopen(fd, "w+b") 

1504 os.chmod(path, PACK_MODE) 

1505 

1506 def commit(): 

1507 if f.tell() > 0: 

1508 f.seek(0) 

1509 

1510 with PackData(path, f) as pd: 

1511 indexer = PackIndexer.for_pack_data( 

1512 pd, resolve_ext_ref=self.get_raw 

1513 ) 

1514 return self._complete_pack(f, path, len(pd), indexer) 

1515 else: 

1516 f.close() 

1517 os.remove(path) 

1518 return None 

1519 

1520 def abort() -> None: 

1521 f.close() 

1522 os.remove(path) 

1523 

1524 return f, commit, abort 

1525 

1526 def add_object(self, obj) -> None: 

1527 """Add a single object to this object store. 

1528 

1529 Args: 

1530 obj: Object to add 

1531 """ 

1532 path = self._get_shafile_path(obj.id) 

1533 dir = os.path.dirname(path) 

1534 try: 

1535 os.mkdir(dir) 

1536 except FileExistsError: 

1537 pass 

1538 if os.path.exists(path): 

1539 return # Already there, no need to write again 

1540 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1541 f.write( 

1542 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1543 ) 

1544 

1545 @classmethod 

1546 def init(cls, path: Union[str, os.PathLike]): 

1547 """Initialize a new disk object store. 

1548 

1549 Creates the necessary directory structure for a Git object store. 

1550 

1551 Args: 

1552 path: Path where the object store should be created 

1553 

1554 Returns: 

1555 New DiskObjectStore instance 

1556 """ 

1557 try: 

1558 os.mkdir(path) 

1559 except FileExistsError: 

1560 pass 

1561 os.mkdir(os.path.join(path, "info")) 

1562 os.mkdir(os.path.join(path, PACKDIR)) 

1563 return cls(path) 

1564 

1565 def iter_prefix(self, prefix): 

1566 """Iterate over all object SHAs with the given prefix. 

1567 

1568 Args: 

1569 prefix: Hex prefix to search for (as bytes) 

1570 

1571 Returns: 

1572 Iterator of object SHAs (as bytes) matching the prefix 

1573 """ 

1574 if len(prefix) < 2: 

1575 yield from super().iter_prefix(prefix) 

1576 return 

1577 seen = set() 

1578 dir = prefix[:2].decode() 

1579 rest = prefix[2:].decode() 

1580 try: 

1581 for name in os.listdir(os.path.join(self.path, dir)): 

1582 if name.startswith(rest): 

1583 sha = os.fsencode(dir + name) 

1584 if sha not in seen: 

1585 seen.add(sha) 

1586 yield sha 

1587 except FileNotFoundError: 

1588 pass 

1589 

1590 for p in self.packs: 

1591 bin_prefix = ( 

1592 binascii.unhexlify(prefix) 

1593 if len(prefix) % 2 == 0 

1594 else binascii.unhexlify(prefix[:-1]) 

1595 ) 

1596 for sha in p.index.iter_prefix(bin_prefix): 

1597 sha = sha_to_hex(sha) 

1598 if sha.startswith(prefix) and sha not in seen: 

1599 seen.add(sha) 

1600 yield sha 

1601 for alternate in self.alternates: 

1602 for sha in alternate.iter_prefix(prefix): 

1603 if sha not in seen: 

1604 seen.add(sha) 

1605 yield sha 

1606 

1607 def get_commit_graph(self): 

1608 """Get the commit graph for this object store. 

1609 

1610 Returns: 

1611 CommitGraph object if available, None otherwise 

1612 """ 

1613 if not self._use_commit_graph: 

1614 return None 

1615 

1616 if self._commit_graph is None: 

1617 from .commit_graph import read_commit_graph 

1618 

1619 # Look for commit graph in our objects directory 

1620 graph_file = os.path.join(self.path, "info", "commit-graph") 

1621 if os.path.exists(graph_file): 

1622 self._commit_graph = read_commit_graph(graph_file) 

1623 return self._commit_graph 

1624 

1625 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1626 """Write a commit graph file for this object store. 

1627 

1628 Args: 

1629 refs: List of refs to include. If None, includes all refs from object store. 

1630 reachable: If True, includes all commits reachable from refs. 

1631 If False, only includes the direct ref targets. 

1632 """ 

1633 from .commit_graph import get_reachable_commits 

1634 

1635 if refs is None: 

1636 # Get all commit objects from the object store 

1637 all_refs = [] 

1638 # Iterate through all objects to find commits 

1639 for sha in self: 

1640 try: 

1641 obj = self[sha] 

1642 if obj.type_name == b"commit": 

1643 all_refs.append(sha) 

1644 except KeyError: 

1645 continue 

1646 else: 

1647 # Use provided refs 

1648 all_refs = refs 

1649 

1650 if not all_refs: 

1651 return # No commits to include 

1652 

1653 if reachable: 

1654 # Get all reachable commits 

1655 commit_ids = get_reachable_commits(self, all_refs) 

1656 else: 

1657 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1658 commit_ids = [] 

1659 for ref in all_refs: 

1660 if isinstance(ref, bytes) and len(ref) == 40: 

1661 # Already hex ObjectID 

1662 commit_ids.append(ref) 

1663 elif isinstance(ref, bytes) and len(ref) == 20: 

1664 # Binary SHA, convert to hex ObjectID 

1665 from .objects import sha_to_hex 

1666 

1667 commit_ids.append(sha_to_hex(ref)) 

1668 else: 

1669 # Assume it's already correct format 

1670 commit_ids.append(ref) 

1671 

1672 if commit_ids: 

1673 # Write commit graph directly to our object store path 

1674 # Generate the commit graph 

1675 from .commit_graph import generate_commit_graph 

1676 

1677 graph = generate_commit_graph(self, commit_ids) 

1678 

1679 if graph.entries: 

1680 # Ensure the info directory exists 

1681 info_dir = os.path.join(self.path, "info") 

1682 os.makedirs(info_dir, exist_ok=True) 

1683 

1684 # Write using GitFile for atomic operation 

1685 graph_path = os.path.join(info_dir, "commit-graph") 

1686 with GitFile(graph_path, "wb") as f: 

1687 assert isinstance( 

1688 f, _GitFile 

1689 ) # GitFile in write mode always returns _GitFile 

1690 graph.write_to_file(f) 

1691 

1692 # Clear cached commit graph so it gets reloaded 

1693 self._commit_graph = None 

1694 

1695 def prune(self, grace_period: Optional[int] = None) -> None: 

1696 """Prune/clean up this object store. 

1697 

1698 This removes temporary files that were left behind by interrupted 

1699 pack operations. These are files that start with ``tmp_pack_`` in the 

1700 repository directory or files with .pack extension but no corresponding 

1701 .idx file in the pack directory. 

1702 

1703 Args: 

1704 grace_period: Grace period in seconds for removing temporary files. 

1705 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1706 """ 

1707 import glob 

1708 

1709 if grace_period is None: 

1710 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1711 

1712 # Clean up tmp_pack_* files in the repository directory 

1713 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1714 # Check if file is old enough (more than grace period) 

1715 mtime = os.path.getmtime(tmp_file) 

1716 if time.time() - mtime > grace_period: 

1717 os.remove(tmp_file) 

1718 

1719 # Clean up orphaned .pack files without corresponding .idx files 

1720 try: 

1721 pack_dir_contents = os.listdir(self.pack_dir) 

1722 except FileNotFoundError: 

1723 return 

1724 

1725 pack_files = {} 

1726 idx_files = set() 

1727 

1728 for name in pack_dir_contents: 

1729 if name.endswith(".pack"): 

1730 base_name = name[:-5] # Remove .pack extension 

1731 pack_files[base_name] = name 

1732 elif name.endswith(".idx"): 

1733 base_name = name[:-4] # Remove .idx extension 

1734 idx_files.add(base_name) 

1735 

1736 # Remove .pack files without corresponding .idx files 

1737 for base_name, pack_name in pack_files.items(): 

1738 if base_name not in idx_files: 

1739 pack_path = os.path.join(self.pack_dir, pack_name) 

1740 # Check if file is old enough (more than grace period) 

1741 mtime = os.path.getmtime(pack_path) 

1742 if time.time() - mtime > grace_period: 

1743 os.remove(pack_path) 

1744 

1745 

1746class MemoryObjectStore(BaseObjectStore): 

1747 """Object store that keeps all objects in memory.""" 

1748 

1749 def __init__(self) -> None: 

1750 """Initialize a MemoryObjectStore. 

1751 

1752 Creates an empty in-memory object store. 

1753 """ 

1754 super().__init__() 

1755 self._data: dict[str, ShaFile] = {} 

1756 self.pack_compression_level = -1 

1757 

1758 def _to_hexsha(self, sha): 

1759 if len(sha) == 40: 

1760 return sha 

1761 elif len(sha) == 20: 

1762 return sha_to_hex(sha) 

1763 else: 

1764 raise ValueError(f"Invalid sha {sha!r}") 

1765 

1766 def contains_loose(self, sha): 

1767 """Check if a particular object is present by SHA1 and is loose.""" 

1768 return self._to_hexsha(sha) in self._data 

1769 

1770 def contains_packed(self, sha) -> bool: 

1771 """Check if a particular object is present by SHA1 and is packed.""" 

1772 return False 

1773 

1774 def __iter__(self): 

1775 """Iterate over the SHAs that are present in this store.""" 

1776 return iter(self._data.keys()) 

1777 

1778 @property 

1779 def packs(self): 

1780 """List with pack objects.""" 

1781 return [] 

1782 

1783 def get_raw(self, name: ObjectID): 

1784 """Obtain the raw text for an object. 

1785 

1786 Args: 

1787 name: sha for the object. 

1788 Returns: tuple with numeric type and object contents. 

1789 """ 

1790 obj = self[self._to_hexsha(name)] 

1791 return obj.type_num, obj.as_raw_string() 

1792 

1793 def __getitem__(self, name: ObjectID): 

1794 """Retrieve an object by SHA. 

1795 

1796 Args: 

1797 name: SHA of the object (as hex string or bytes) 

1798 

1799 Returns: 

1800 Copy of the ShaFile object 

1801 

1802 Raises: 

1803 KeyError: If the object is not found 

1804 """ 

1805 return self._data[self._to_hexsha(name)].copy() 

1806 

1807 def __delitem__(self, name: ObjectID) -> None: 

1808 """Delete an object from this store, for testing only.""" 

1809 del self._data[self._to_hexsha(name)] 

1810 

1811 def add_object(self, obj) -> None: 

1812 """Add a single object to this object store.""" 

1813 self._data[obj.id] = obj.copy() 

1814 

1815 def add_objects(self, objects, progress=None) -> None: 

1816 """Add a set of objects to this object store. 

1817 

1818 Args: 

1819 objects: Iterable over a list of (object, path) tuples 

1820 progress: Optional progress reporting function. 

1821 """ 

1822 for obj, path in objects: 

1823 self.add_object(obj) 

1824 

1825 def add_pack(self): 

1826 """Add a new pack to this object store. 

1827 

1828 Because this object store doesn't support packs, we extract and add the 

1829 individual objects. 

1830 

1831 Returns: Fileobject to write to and a commit function to 

1832 call when the pack is finished. 

1833 """ 

1834 from tempfile import SpooledTemporaryFile 

1835 

1836 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1837 

1838 def commit() -> None: 

1839 size = f.tell() 

1840 if size > 0: 

1841 f.seek(0) 

1842 

1843 p = PackData.from_file(f, size) 

1844 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1845 self.add_object(obj) 

1846 p.close() 

1847 f.close() 

1848 else: 

1849 f.close() 

1850 

1851 def abort() -> None: 

1852 f.close() 

1853 

1854 return f, commit, abort 

1855 

1856 def add_pack_data( 

1857 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1858 ) -> None: 

1859 """Add pack data to this object store. 

1860 

1861 Args: 

1862 count: Number of items to add 

1863 unpacked_objects: Iterator of UnpackedObject instances 

1864 progress: Optional progress reporting function. 

1865 """ 

1866 if count == 0: 

1867 return 

1868 

1869 # Since MemoryObjectStore doesn't support pack files, we need to 

1870 # extract individual objects. To handle deltas properly, we write 

1871 # to a temporary pack and then use PackInflater to resolve them. 

1872 f, commit, abort = self.add_pack() 

1873 try: 

1874 write_pack_data( 

1875 f.write, 

1876 unpacked_objects, 

1877 num_records=count, 

1878 progress=progress, 

1879 ) 

1880 except BaseException: 

1881 abort() 

1882 raise 

1883 else: 

1884 commit() 

1885 

1886 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1887 """Add a new thin pack to this object store. 

1888 

1889 Thin packs are packs that contain deltas with parents that exist 

1890 outside the pack. Because this object store doesn't support packs, we 

1891 extract and add the individual objects. 

1892 

1893 Args: 

1894 read_all: Read function that blocks until the number of 

1895 requested bytes are read. 

1896 read_some: Read function that returns at least one byte, but may 

1897 not return the number of bytes requested. 

1898 progress: Optional progress reporting function. 

1899 """ 

1900 f, commit, abort = self.add_pack() 

1901 try: 

1902 copier = PackStreamCopier(read_all, read_some, f) 

1903 copier.verify() 

1904 except BaseException: 

1905 abort() 

1906 raise 

1907 else: 

1908 commit() 

1909 

1910 

1911class ObjectIterator(Protocol): 

1912 """Interface for iterating over objects.""" 

1913 

1914 def iterobjects(self) -> Iterator[ShaFile]: 

1915 """Iterate over all objects. 

1916 

1917 Returns: 

1918 Iterator of ShaFile objects 

1919 """ 

1920 raise NotImplementedError(self.iterobjects) 

1921 

1922 

1923def tree_lookup_path(lookup_obj, root_sha, path): 

1924 """Look up an object in a Git tree. 

1925 

1926 Args: 

1927 lookup_obj: Callback for retrieving object by SHA1 

1928 root_sha: SHA1 of the root tree 

1929 path: Path to lookup 

1930 Returns: A tuple of (mode, SHA) of the resulting path. 

1931 """ 

1932 tree = lookup_obj(root_sha) 

1933 if not isinstance(tree, Tree): 

1934 raise NotTreeError(root_sha) 

1935 return tree.lookup_path(lookup_obj, path) 

1936 

1937 

1938def _collect_filetree_revs( 

1939 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1940) -> None: 

1941 """Collect SHA1s of files and directories for specified tree. 

1942 

1943 Args: 

1944 obj_store: Object store to get objects by SHA from 

1945 tree_sha: tree reference to walk 

1946 kset: set to fill with references to files and directories 

1947 """ 

1948 filetree = obj_store[tree_sha] 

1949 assert isinstance(filetree, Tree) 

1950 for name, mode, sha in filetree.iteritems(): 

1951 if not S_ISGITLINK(mode) and sha not in kset: 

1952 kset.add(sha) 

1953 if stat.S_ISDIR(mode): 

1954 _collect_filetree_revs(obj_store, sha, kset) 

1955 

1956 

1957def _split_commits_and_tags( 

1958 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1959) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1960 """Split object id list into three lists with commit, tag, and other SHAs. 

1961 

1962 Commits referenced by tags are included into commits 

1963 list as well. Only SHA1s known in this repository will get 

1964 through, and unless ignore_unknown argument is True, KeyError 

1965 is thrown for SHA1 missing in the repository 

1966 

1967 Args: 

1968 obj_store: Object store to get objects by SHA1 from 

1969 lst: Collection of commit and tag SHAs 

1970 ignore_unknown: True to skip SHA1 missing in the repository 

1971 silently. 

1972 Returns: A tuple of (commits, tags, others) SHA1s 

1973 """ 

1974 commits: set[bytes] = set() 

1975 tags: set[bytes] = set() 

1976 others: set[bytes] = set() 

1977 for e in lst: 

1978 try: 

1979 o = obj_store[e] 

1980 except KeyError: 

1981 if not ignore_unknown: 

1982 raise 

1983 else: 

1984 if isinstance(o, Commit): 

1985 commits.add(e) 

1986 elif isinstance(o, Tag): 

1987 tags.add(e) 

1988 tagged = o.object[1] 

1989 c, t, os = _split_commits_and_tags( 

1990 obj_store, [tagged], ignore_unknown=ignore_unknown 

1991 ) 

1992 commits |= c 

1993 tags |= t 

1994 others |= os 

1995 else: 

1996 others.add(e) 

1997 return (commits, tags, others) 

1998 

1999 

2000class MissingObjectFinder: 

2001 """Find the objects missing from another object store. 

2002 

2003 Args: 

2004 object_store: Object store containing at least all objects to be 

2005 sent 

2006 haves: SHA1s of commits not to send (already present in target) 

2007 wants: SHA1s of commits to send 

2008 progress: Optional function to report progress to. 

2009 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2010 sha for including tags. 

2011 get_parents: Optional function for getting the parents of a commit. 

2012 """ 

2013 

2014 def __init__( 

2015 self, 

2016 object_store, 

2017 haves, 

2018 wants, 

2019 *, 

2020 shallow=None, 

2021 progress=None, 

2022 get_tagged=None, 

2023 get_parents=lambda commit: commit.parents, 

2024 ) -> None: 

2025 """Initialize a MissingObjectFinder. 

2026 

2027 Args: 

2028 object_store: Object store containing objects 

2029 haves: SHA1s of objects already present in target 

2030 wants: SHA1s of objects to send 

2031 shallow: Set of shallow commit SHA1s 

2032 progress: Optional progress reporting callback 

2033 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2034 get_parents: Function for getting commit parents 

2035 """ 

2036 self.object_store = object_store 

2037 if shallow is None: 

2038 shallow = set() 

2039 self._get_parents = get_parents 

2040 # process Commits and Tags differently 

2041 # Note, while haves may list commits/tags not available locally, 

2042 # and such SHAs would get filtered out by _split_commits_and_tags, 

2043 # wants shall list only known SHAs, and otherwise 

2044 # _split_commits_and_tags fails with KeyError 

2045 have_commits, have_tags, have_others = _split_commits_and_tags( 

2046 object_store, haves, ignore_unknown=True 

2047 ) 

2048 want_commits, want_tags, want_others = _split_commits_and_tags( 

2049 object_store, wants, ignore_unknown=False 

2050 ) 

2051 # all_ancestors is a set of commits that shall not be sent 

2052 # (complete repository up to 'haves') 

2053 all_ancestors = _collect_ancestors( 

2054 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

2055 )[0] 

2056 # all_missing - complete set of commits between haves and wants 

2057 # common - commits from all_ancestors we hit into while 

2058 # traversing parent hierarchy of wants 

2059 missing_commits, common_commits = _collect_ancestors( 

2060 object_store, 

2061 want_commits, 

2062 all_ancestors, 

2063 shallow=shallow, 

2064 get_parents=self._get_parents, 

2065 ) 

2066 self.remote_has: set[bytes] = set() 

2067 # Now, fill sha_done with commits and revisions of 

2068 # files and directories known to be both locally 

2069 # and on target. Thus these commits and files 

2070 # won't get selected for fetch 

2071 for h in common_commits: 

2072 self.remote_has.add(h) 

2073 cmt = object_store[h] 

2074 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2075 # record tags we have as visited, too 

2076 for t in have_tags: 

2077 self.remote_has.add(t) 

2078 self.sha_done = set(self.remote_has) 

2079 

2080 # in fact, what we 'want' is commits, tags, and others 

2081 # we've found missing 

2082 self.objects_to_send: set[ 

2083 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

2084 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

2085 missing_tags = want_tags.difference(have_tags) 

2086 self.objects_to_send.update( 

2087 {(w, None, Tag.type_num, False) for w in missing_tags} 

2088 ) 

2089 missing_others = want_others.difference(have_others) 

2090 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2091 

2092 if progress is None: 

2093 self.progress = lambda x: None 

2094 else: 

2095 self.progress = progress 

2096 self._tagged = (get_tagged and get_tagged()) or {} 

2097 

2098 def get_remote_has(self): 

2099 """Get the set of SHAs the remote has. 

2100 

2101 Returns: 

2102 Set of SHA1s that the remote side already has 

2103 """ 

2104 return self.remote_has 

2105 

2106 def add_todo( 

2107 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

2108 ) -> None: 

2109 """Add objects to the todo list. 

2110 

2111 Args: 

2112 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2113 """ 

2114 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2115 

2116 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

2117 """Get the next object to send. 

2118 

2119 Returns: 

2120 Tuple of (sha, pack_hint) 

2121 

2122 Raises: 

2123 StopIteration: When no more objects to send 

2124 """ 

2125 while True: 

2126 if not self.objects_to_send: 

2127 self.progress( 

2128 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2129 ) 

2130 raise StopIteration 

2131 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2132 if sha not in self.sha_done: 

2133 break 

2134 if not leaf: 

2135 o = self.object_store[sha] 

2136 if isinstance(o, Commit): 

2137 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2138 elif isinstance(o, Tree): 

2139 self.add_todo( 

2140 [ 

2141 ( 

2142 s, 

2143 n, 

2144 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2145 not stat.S_ISDIR(m), 

2146 ) 

2147 for n, m, s in o.iteritems() 

2148 if not S_ISGITLINK(m) 

2149 ] 

2150 ) 

2151 elif isinstance(o, Tag): 

2152 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2153 if sha in self._tagged: 

2154 self.add_todo([(self._tagged[sha], None, None, True)]) 

2155 self.sha_done.add(sha) 

2156 if len(self.sha_done) % 1000 == 0: 

2157 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2158 if type_num is None: 

2159 pack_hint = None 

2160 else: 

2161 pack_hint = (type_num, name) 

2162 return (sha, pack_hint) 

2163 

2164 def __iter__(self): 

2165 """Return iterator over objects to send. 

2166 

2167 Returns: 

2168 Self (this class implements the iterator protocol) 

2169 """ 

2170 return self 

2171 

2172 

2173class ObjectStoreGraphWalker: 

2174 """Graph walker that finds what commits are missing from an object store.""" 

2175 

2176 heads: set[ObjectID] 

2177 """Revisions without descendants in the local repo.""" 

2178 

2179 get_parents: Callable[[ObjectID], list[ObjectID]] 

2180 """Function to retrieve parents in the local repo.""" 

2181 

2182 shallow: set[ObjectID] 

2183 

2184 def __init__( 

2185 self, 

2186 local_heads: Iterable[ObjectID], 

2187 get_parents, 

2188 shallow: Optional[set[ObjectID]] = None, 

2189 update_shallow=None, 

2190 ) -> None: 

2191 """Create a new instance. 

2192 

2193 Args: 

2194 local_heads: Heads to start search with 

2195 get_parents: Function for finding the parents of a SHA1. 

2196 shallow: Set of shallow commits. 

2197 update_shallow: Function to update shallow commits. 

2198 """ 

2199 self.heads = set(local_heads) 

2200 self.get_parents = get_parents 

2201 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

2202 if shallow is None: 

2203 shallow = set() 

2204 self.shallow = shallow 

2205 self.update_shallow = update_shallow 

2206 

2207 def nak(self) -> None: 

2208 """Nothing in common was found.""" 

2209 

2210 def ack(self, sha: ObjectID) -> None: 

2211 """Ack that a revision and its ancestors are present in the source.""" 

2212 if len(sha) != 40: 

2213 raise ValueError(f"unexpected sha {sha!r} received") 

2214 ancestors = {sha} 

2215 

2216 # stop if we run out of heads to remove 

2217 while self.heads: 

2218 for a in ancestors: 

2219 if a in self.heads: 

2220 self.heads.remove(a) 

2221 

2222 # collect all ancestors 

2223 new_ancestors = set() 

2224 for a in ancestors: 

2225 ps = self.parents.get(a) 

2226 if ps is not None: 

2227 new_ancestors.update(ps) 

2228 self.parents[a] = None 

2229 

2230 # no more ancestors; stop 

2231 if not new_ancestors: 

2232 break 

2233 

2234 ancestors = new_ancestors 

2235 

2236 def next(self): 

2237 """Iterate over ancestors of heads in the target.""" 

2238 if self.heads: 

2239 ret = self.heads.pop() 

2240 try: 

2241 ps = self.get_parents(ret) 

2242 except KeyError: 

2243 return None 

2244 self.parents[ret] = ps 

2245 self.heads.update([p for p in ps if p not in self.parents]) 

2246 return ret 

2247 return None 

2248 

2249 __next__ = next 

2250 

2251 

2252def commit_tree_changes(object_store, tree, changes): 

2253 """Commit a specified set of changes to a tree structure. 

2254 

2255 This will apply a set of changes on top of an existing tree, storing new 

2256 objects in object_store. 

2257 

2258 changes are a list of tuples with (path, mode, object_sha). 

2259 Paths can be both blobs and trees. See the mode and 

2260 object sha to None deletes the path. 

2261 

2262 This method works especially well if there are only a small 

2263 number of changes to a big tree. For a large number of changes 

2264 to a large tree, use e.g. commit_tree. 

2265 

2266 Args: 

2267 object_store: Object store to store new objects in 

2268 and retrieve old ones from. 

2269 tree: Original tree root 

2270 changes: changes to apply 

2271 Returns: New tree root object 

2272 """ 

2273 # TODO(jelmer): Save up the objects and add them using .add_objects 

2274 # rather than with individual calls to .add_object. 

2275 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {} 

2276 for path, new_mode, new_sha in changes: 

2277 try: 

2278 (dirname, subpath) = path.split(b"/", 1) 

2279 except ValueError: 

2280 if new_sha is None: 

2281 del tree[path] 

2282 else: 

2283 tree[path] = (new_mode, new_sha) 

2284 else: 

2285 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2286 for name, subchanges in nested_changes.items(): 

2287 try: 

2288 orig_subtree = object_store[tree[name][1]] 

2289 except KeyError: 

2290 orig_subtree = Tree() 

2291 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

2292 if len(subtree) == 0: 

2293 del tree[name] 

2294 else: 

2295 tree[name] = (stat.S_IFDIR, subtree.id) 

2296 object_store.add_object(tree) 

2297 return tree 

2298 

2299 

2300class OverlayObjectStore(BaseObjectStore): 

2301 """Object store that can overlay multiple object stores.""" 

2302 

2303 def __init__(self, bases, add_store=None) -> None: 

2304 """Initialize an OverlayObjectStore. 

2305 

2306 Args: 

2307 bases: List of base object stores to overlay 

2308 add_store: Optional store to write new objects to 

2309 """ 

2310 self.bases = bases 

2311 self.add_store = add_store 

2312 

2313 def add_object(self, object): 

2314 """Add a single object to the store. 

2315 

2316 Args: 

2317 object: Object to add 

2318 

2319 Raises: 

2320 NotImplementedError: If no add_store was provided 

2321 """ 

2322 if self.add_store is None: 

2323 raise NotImplementedError(self.add_object) 

2324 return self.add_store.add_object(object) 

2325 

2326 def add_objects(self, objects, progress=None): 

2327 """Add multiple objects to the store. 

2328 

2329 Args: 

2330 objects: Iterator of objects to add 

2331 progress: Optional progress reporting callback 

2332 

2333 Raises: 

2334 NotImplementedError: If no add_store was provided 

2335 """ 

2336 if self.add_store is None: 

2337 raise NotImplementedError(self.add_object) 

2338 return self.add_store.add_objects(objects, progress) 

2339 

2340 @property 

2341 def packs(self): 

2342 """Get the list of packs from all overlaid stores. 

2343 

2344 Returns: 

2345 Combined list of packs from all base stores 

2346 """ 

2347 ret = [] 

2348 for b in self.bases: 

2349 ret.extend(b.packs) 

2350 return ret 

2351 

2352 def __iter__(self): 

2353 """Iterate over all object SHAs in the overlaid stores. 

2354 

2355 Returns: 

2356 Iterator of object SHAs (deduped across stores) 

2357 """ 

2358 done = set() 

2359 for b in self.bases: 

2360 for o_id in b: 

2361 if o_id not in done: 

2362 yield o_id 

2363 done.add(o_id) 

2364 

2365 def iterobjects_subset( 

2366 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2367 ) -> Iterator[ShaFile]: 

2368 """Iterate over a subset of objects from the overlaid stores. 

2369 

2370 Args: 

2371 shas: Iterable of object SHAs to retrieve 

2372 allow_missing: If True, skip missing objects; if False, raise KeyError 

2373 

2374 Returns: 

2375 Iterator of ShaFile objects 

2376 

2377 Raises: 

2378 KeyError: If an object is missing and allow_missing is False 

2379 """ 

2380 todo = set(shas) 

2381 found: set[bytes] = set() 

2382 

2383 for b in self.bases: 

2384 # Create a copy of todo for each base to avoid modifying 

2385 # the set while iterating through it 

2386 current_todo = todo - found 

2387 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2388 yield o 

2389 found.add(o.id) 

2390 

2391 # Check for any remaining objects not found 

2392 missing = todo - found 

2393 if missing and not allow_missing: 

2394 raise KeyError(next(iter(missing))) 

2395 

2396 def iter_unpacked_subset( 

2397 self, 

2398 shas: Iterable[bytes], 

2399 *, 

2400 include_comp=False, 

2401 allow_missing: bool = False, 

2402 convert_ofs_delta=True, 

2403 ) -> Iterator[ShaFile]: 

2404 """Iterate over unpacked objects from the overlaid stores. 

2405 

2406 Args: 

2407 shas: Iterable of object SHAs to retrieve 

2408 include_comp: Whether to include compressed data 

2409 allow_missing: If True, skip missing objects; if False, raise KeyError 

2410 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2411 

2412 Returns: 

2413 Iterator of unpacked objects 

2414 

2415 Raises: 

2416 KeyError: If an object is missing and allow_missing is False 

2417 """ 

2418 todo = set(shas) 

2419 for b in self.bases: 

2420 for o in b.iter_unpacked_subset( 

2421 todo, 

2422 include_comp=include_comp, 

2423 allow_missing=True, 

2424 convert_ofs_delta=convert_ofs_delta, 

2425 ): 

2426 yield o 

2427 todo.remove(o.id) 

2428 if todo and not allow_missing: 

2429 raise KeyError(o.id) 

2430 

2431 def get_raw(self, sha_id): 

2432 """Get the raw object data from the overlaid stores. 

2433 

2434 Args: 

2435 sha_id: SHA of the object 

2436 

2437 Returns: 

2438 Tuple of (type_num, raw_data) 

2439 

2440 Raises: 

2441 KeyError: If object not found in any base store 

2442 """ 

2443 for b in self.bases: 

2444 try: 

2445 return b.get_raw(sha_id) 

2446 except KeyError: 

2447 pass 

2448 raise KeyError(sha_id) 

2449 

2450 def contains_packed(self, sha) -> bool: 

2451 """Check if an object is packed in any base store. 

2452 

2453 Args: 

2454 sha: SHA of the object 

2455 

2456 Returns: 

2457 True if object is packed in any base store 

2458 """ 

2459 for b in self.bases: 

2460 if b.contains_packed(sha): 

2461 return True 

2462 return False 

2463 

2464 def contains_loose(self, sha) -> bool: 

2465 """Check if an object is loose in any base store. 

2466 

2467 Args: 

2468 sha: SHA of the object 

2469 

2470 Returns: 

2471 True if object is loose in any base store 

2472 """ 

2473 for b in self.bases: 

2474 if b.contains_loose(sha): 

2475 return True 

2476 return False 

2477 

2478 

2479def read_packs_file(f): 

2480 """Yield the packs listed in a packs file.""" 

2481 for line in f.read().splitlines(): 

2482 if not line: 

2483 continue 

2484 (kind, name) = line.split(b" ", 1) 

2485 if kind != b"P": 

2486 continue 

2487 yield os.fsdecode(name) 

2488 

2489 

2490class BucketBasedObjectStore(PackBasedObjectStore): 

2491 """Object store implementation that uses a bucket store like S3 as backend.""" 

2492 

2493 def _iter_loose_objects(self): 

2494 """Iterate over the SHAs of all loose objects.""" 

2495 return iter([]) 

2496 

2497 def _get_loose_object(self, sha) -> None: 

2498 return None 

2499 

2500 def delete_loose_object(self, sha) -> None: 

2501 """Delete a loose object (no-op for bucket stores). 

2502 

2503 Bucket-based stores don't have loose objects, so this is a no-op. 

2504 

2505 Args: 

2506 sha: SHA of the object to delete 

2507 """ 

2508 # Doesn't exist.. 

2509 

2510 def pack_loose_objects(self) -> int: 

2511 """Pack loose objects. Returns number of objects packed. 

2512 

2513 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2514 """ 

2515 return 0 

2516 

2517 def _remove_pack_by_name(self, name: str) -> None: 

2518 """Remove a pack by name. Subclasses should implement this.""" 

2519 raise NotImplementedError(self._remove_pack_by_name) 

2520 

2521 def _iter_pack_names(self) -> Iterator[str]: 

2522 raise NotImplementedError(self._iter_pack_names) 

2523 

2524 def _get_pack(self, name) -> Pack: 

2525 raise NotImplementedError(self._get_pack) 

2526 

2527 def _update_pack_cache(self): 

2528 pack_files = set(self._iter_pack_names()) 

2529 

2530 # Open newly appeared pack files 

2531 new_packs = [] 

2532 for f in pack_files: 

2533 if f not in self._pack_cache: 

2534 pack = self._get_pack(f) 

2535 new_packs.append(pack) 

2536 self._pack_cache[f] = pack 

2537 # Remove disappeared pack files 

2538 for f in set(self._pack_cache) - pack_files: 

2539 self._pack_cache.pop(f).close() 

2540 return new_packs 

2541 

2542 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2543 raise NotImplementedError 

2544 

2545 def add_pack(self): 

2546 """Add a new pack to this object store. 

2547 

2548 Returns: Fileobject to write to, a commit function to 

2549 call when the pack is finished and an abort 

2550 function. 

2551 """ 

2552 import tempfile 

2553 

2554 pf = tempfile.SpooledTemporaryFile( 

2555 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2556 ) 

2557 

2558 def commit(): 

2559 if pf.tell() == 0: 

2560 pf.close() 

2561 return None 

2562 

2563 pf.seek(0) 

2564 

2565 p = PackData(pf.name, pf) 

2566 entries = p.sorted_entries() 

2567 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2568 idxf = tempfile.SpooledTemporaryFile( 

2569 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2570 ) 

2571 checksum = p.get_stored_checksum() 

2572 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2573 idxf.seek(0) 

2574 idx = load_pack_index_file(basename + ".idx", idxf) 

2575 for pack in self.packs: 

2576 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2577 p.close() 

2578 idx.close() 

2579 pf.close() 

2580 idxf.close() 

2581 return pack 

2582 pf.seek(0) 

2583 idxf.seek(0) 

2584 self._upload_pack(basename, pf, idxf) 

2585 final_pack = Pack.from_objects(p, idx) 

2586 self._add_cached_pack(basename, final_pack) 

2587 pf.close() 

2588 idxf.close() 

2589 return final_pack 

2590 

2591 return pf, commit, pf.close 

2592 

2593 

2594def _collect_ancestors( 

2595 store: ObjectContainer, 

2596 heads, 

2597 common: frozenset[ObjectID] = frozenset(), 

2598 shallow: frozenset[ObjectID] = frozenset(), 

2599 get_parents=lambda commit: commit.parents, 

2600): 

2601 """Collect all ancestors of heads up to (excluding) those in common. 

2602 

2603 Args: 

2604 store: Object store to get commits from 

2605 heads: commits to start from 

2606 common: commits to end at, or empty set to walk repository 

2607 completely 

2608 shallow: Set of shallow commits 

2609 get_parents: Optional function for getting the parents of a 

2610 commit. 

2611 Returns: a tuple (A, B) where A - all commits reachable 

2612 from heads but not present in common, B - common (shared) elements 

2613 that are directly reachable from heads 

2614 """ 

2615 bases = set() 

2616 commits = set() 

2617 queue = [] 

2618 queue.extend(heads) 

2619 

2620 # Try to use commit graph if available 

2621 commit_graph = store.get_commit_graph() 

2622 

2623 while queue: 

2624 e = queue.pop(0) 

2625 if e in common: 

2626 bases.add(e) 

2627 elif e not in commits: 

2628 commits.add(e) 

2629 if e in shallow: 

2630 continue 

2631 

2632 # Try to use commit graph for parent lookup 

2633 parents = None 

2634 if commit_graph: 

2635 parents = commit_graph.get_parents(e) 

2636 

2637 if parents is None: 

2638 # Fall back to loading the object 

2639 cmt = store[e] 

2640 parents = get_parents(cmt) 

2641 

2642 queue.extend(parents) 

2643 return (commits, bases) 

2644 

2645 

2646def iter_tree_contents( 

2647 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2648): 

2649 """Iterate the contents of a tree and all subtrees. 

2650 

2651 Iteration is depth-first pre-order, as in e.g. os.walk. 

2652 

2653 Args: 

2654 store: Object store to get trees from 

2655 tree_id: SHA1 of the tree. 

2656 include_trees: If True, include tree objects in the iteration. 

2657 

2658 Yields: TreeEntry namedtuples for all the objects in a tree. 

2659 """ 

2660 if tree_id is None: 

2661 return 

2662 # This could be fairly easily generalized to >2 trees if we find a use 

2663 # case. 

2664 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2665 while todo: 

2666 entry = todo.pop() 

2667 if stat.S_ISDIR(entry.mode): 

2668 extra = [] 

2669 tree = store[entry.sha] 

2670 assert isinstance(tree, Tree) 

2671 for subentry in tree.iteritems(name_order=True): 

2672 extra.append(subentry.in_path(entry.path)) 

2673 todo.extend(reversed(extra)) 

2674 if not stat.S_ISDIR(entry.mode) or include_trees: 

2675 yield entry 

2676 

2677 

2678def iter_commit_contents( 

2679 store: ObjectContainer, 

2680 commit: Union[Commit, bytes], 

2681 *, 

2682 include: Optional[Sequence[Union[str, bytes, Path]]] = None, 

2683): 

2684 """Iterate the contents of the repository at the specified commit. 

2685 

2686 This is a wrapper around iter_tree_contents() and 

2687 tree_lookup_path() to simplify the common task of getting the 

2688 contest of a repo at a particular commit. See also 

2689 dulwich.index.build_file_from_blob() for writing individual files 

2690 to disk. 

2691 

2692 Args: 

2693 store: Object store to get trees from 

2694 commit: Commit object, or SHA1 of a commit 

2695 include: if provided, only the entries whose paths are in the 

2696 list, or whose parent tree is in the list, will be 

2697 included. Note that duplicate or overlapping paths 

2698 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

2699 

2700 Yields: TreeEntry namedtuples for all matching files in a commit. 

2701 """ 

2702 sha = commit.id if isinstance(commit, Commit) else commit 

2703 if not isinstance(obj := store[sha], Commit): 

2704 raise TypeError( 

2705 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

2706 ) 

2707 commit = obj 

2708 encoding = commit.encoding or "utf-8" 

2709 include = ( 

2710 [ 

2711 path if isinstance(path, bytes) else str(path).encode(encoding) 

2712 for path in include 

2713 ] 

2714 if include is not None 

2715 else [b""] 

2716 ) 

2717 

2718 for path in include: 

2719 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

2720 # Iterate all contained files if path points to a dir, otherwise just get that 

2721 # single file 

2722 if isinstance(store[obj_id], Tree): 

2723 for entry in iter_tree_contents(store, obj_id): 

2724 yield entry.in_path(path) 

2725 else: 

2726 yield TreeEntry(path, mode, obj_id) 

2727 

2728 

2729def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2730 """Peel all tags from a SHA. 

2731 

2732 Args: 

2733 store: Object store to get objects from 

2734 sha: The object SHA to peel. 

2735 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2736 intermediate tags; if the original ref does not point to a tag, 

2737 this will equal the original SHA1. 

2738 """ 

2739 unpeeled = obj = store[sha] 

2740 obj_class = object_class(obj.type_name) 

2741 while obj_class is Tag: 

2742 assert isinstance(obj, Tag) 

2743 obj_class, sha = obj.object 

2744 obj = store[sha] 

2745 return unpeeled, obj